diff mbox series

[nf,2/2] netfilter: nft_compat: protect lists between select_ops and init

Message ID 20190110162525.1117-1-ap420073@gmail.com
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series netfilter: nft_compat: fix a race condition in nft_compat module | expand

Commit Message

Taehee Yoo Jan. 10, 2019, 4:25 p.m. UTC
nft_{match/target}_select_ops() find xt modules in the list. if the
module is found, it is just used without any holding module or
increasing reference counts. and the reference count is increased in
nft_{match/target}_init() function.
But, xt modules can be removed anytime if reference count is 0.

CPU0					CPU1
nft_{match/target}_select_ops
					nft_{match/target}_destroy
nft_{match/target}_init

The scenario is like this:
1. ->select_ops() found a module in the list and keeps an element of it
without increasing reference count.
2. ->destroy() frees an element of list.
3. nft_{match/target}_init try to use the pointer of element, but this is
already freed.

Test commands:
   while :
   do
	iptables-nft -t nat -I PREROUTING -m string --string ap --algo \
		kmp -j MASQUERADE &
	nft flush ruleset &
   done

Result:
[  682.340431] BUG: KASAN: use-after-free in nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.340431] Read of size 4 at addr ffff8881079a73a0 by task iptables-nft/8136
[  682.340431]
[  682.340431] CPU: 1 PID: 8136 Comm: iptables-nft Not tainted 4.20.0+ #59
[  682.373297] Call Trace:
[  682.373297]  dump_stack+0xc9/0x16b
[  682.373297]  ? show_regs_print_info+0x5/0x5
[  682.373297]  ? kmsg_dump_rewind_nolock+0xd9/0xd9
[  682.402587]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.402587]  print_address_description+0x6a/0x270
[  682.402587]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.402587]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.402587]  kasan_report+0x12a/0x16f
[  682.429087]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.433653]  nft_target_init+0xb96/0xbe0 [nft_compat]
[  682.433653]  ? target_compat_from_user.isra.5+0x80/0x80 [nft_compat]
[  682.433653]  ? nf_tables_newrule+0xe46/0x2570 [nf_tables]
[  682.433653]  ? ksm_do_scan+0x2bc0/0x3300
[  682.433653]  ? hlock_class+0x140/0x140
[  682.433653]  ? nf_tables_newrule+0xe46/0x2570 [nf_tables]
[  682.465582]  ? memcpy+0x39/0x50
[  682.465582]  ? __kmalloc+0x134/0x2e0
[  682.465582]  ? nf_tables_newrule+0xe46/0x2570 [nf_tables]
[  682.465582]  nf_tables_newrule+0x10a4/0x2570 [nf_tables]
[ ... ]
[  682.828711]
[  682.828711] Allocated by task 8134:
[  682.828711]  kasan_kmalloc+0xa5/0xd0
[  682.828711]  kmem_cache_alloc_trace+0x117/0x290
[  682.828711]  nft_target_select_ops+0x481/0x8e0 [nft_compat]
[  682.828711]  nf_tables_expr_parse+0x2a1/0x510 [nf_tables]
[  682.828711]  nf_tables_newrule+0xd0c/0x2570 [nf_tables]
[  682.828711]  nfnetlink_rcv_batch+0xd2e/0x1550 [nfnetlink]
[  682.828711]  nfnetlink_rcv+0x2ee/0x350 [nfnetlink]
[  682.828711]  netlink_unicast+0x414/0x5e0
[  682.828711]  netlink_sendmsg+0x7b8/0xcf0
[  682.937632]  ___sys_sendmsg+0x689/0x990
[  682.937632]  __sys_sendmsg+0xd2/0x210
[  682.937632]  do_syscall_64+0x138/0x560
[  682.937632]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  682.937632]
[  682.937632] Freed by task 8140:
[  682.937632]  __kasan_slab_free+0x135/0x180
[  682.937632]  kfree+0xdb/0x280
[  682.937632]  rcu_process_callbacks+0x947/0x24d0
[  682.937632]  __do_softirq+0x2a5/0xa3b
[  682.937632]

Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 net/netfilter/nft_compat.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

Comments

Florian Westphal Jan. 10, 2019, 9:59 p.m. UTC | #1
Taehee Yoo <ap420073@gmail.com> wrote:
> index 83adabdec900..8f3114871d20 100644
> --- a/net/netfilter/nft_compat.c
> +++ b/net/netfilter/nft_compat.c
> @@ -29,6 +29,7 @@ struct nft_xt {
>  	struct list_head	head;
>  	struct nft_expr_ops	ops;
>  	unsigned int		refcnt;
> +	bool			use;
>  
>  	/* Unlike other expressions, ops doesn't have static storage duration.
>  	 * nft core assumes they do.  We use kfree_rcu so that nft core can
> @@ -48,7 +49,7 @@ struct nft_xt_match_priv {
>  static bool nft_xt_put(struct nft_xt *xt)
>  {
>  	mutex_lock(&nft_xt_lock);
> -	if (--xt->refcnt == 0) {
> +	if (--xt->refcnt == 0 && !xt->use) {
>  		list_del(&xt->head);

I don't doubt your findings, but I would prefer if we do not have to add
these kinds of conditionals here.

The problem is that when nft_xt_put is called, all side
effects are supposed to have been completed.

At this point, list_del() should not be allowed anymore,
it should have taken place while we were still under transaction
mutex.

Do you see any issues with this alternate approach?
It adds deactivate hook to list_del, so after transaction mutex is
released, next 'add' will create a new compat expression rather
than trying to use the one that is scheduled for free.

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -27,6 +27,7 @@ struct nft_xt {
 	struct list_head	head;
 	struct nft_expr_ops	ops;
 	unsigned int		refcnt;
+	unsigned int		listcnt;
 
 	/* Unlike other expressions, ops doesn't have static storage duration.
 	 * nft core assumes they do.  We use kfree_rcu so that nft core can
@@ -43,10 +44,15 @@ struct nft_xt_match_priv {
 	void *info;
 };
 
+static LIST_HEAD(nft_match_list);
+static LIST_HEAD(nft_target_list);
+static struct nft_expr_type nft_match_type;
+static struct nft_expr_type nft_target_type;
+
 static bool nft_xt_put(struct nft_xt *xt)
 {
 	if (--xt->refcnt == 0) {
-		list_del(&xt->head);
+		WARN_ON_ONCE(!list_empty(&xt->head));
 		kfree_rcu(xt, rcu_head);
 		return true;
 	}
@@ -540,6 +546,40 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
 }
 
+static void nft_compat_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr,
+				struct list_head *h)
+{
+	struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
+
+	if (xt->listcnt == 0)
+		list_add(&xt->head, h);
+
+	xt->listcnt++;
+}
+
+static void nft_compat_activate_mt(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	nft_compat_activate(ctx, expr, &nft_match_list);
+}
+
+static void nft_compat_activate_tg(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	nft_compat_activate(ctx, expr, &nft_target_list);
+}
+
+static void nft_compat_deactivate(const struct nft_ctx *ctx,
+				  const struct nft_expr *expr)
+{
+	struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
+
+	WARN_ON_ONCE(xt->refcnt == 0);
+	if (--xt->listcnt == 0)
+		list_del_init(&xt->head);
+}
+
 static void
 nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 {
@@ -734,10 +774,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
 	.cb		= nfnl_nft_compat_cb,
 };
 
-static LIST_HEAD(nft_match_list);
-
-static struct nft_expr_type nft_match_type;
-
 static bool nft_match_cmp(const struct xt_match *match,
 			  const char *name, u32 rev, u32 family)
 {
@@ -794,6 +830,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	nft_match->ops.eval = nft_match_eval;
 	nft_match->ops.init = nft_match_init;
 	nft_match->ops.destroy = nft_match_destroy;
+	nft_match->ops.activate = nft_compat_activate_mt;
+	nft_match->ops.deactivate = nft_compat_deactivate;
 	nft_match->ops.dump = nft_match_dump;
 	nft_match->ops.validate = nft_match_validate;
 	nft_match->ops.data = match;
@@ -810,6 +848,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 
 	nft_match->ops.size = matchsize;
 
+	nft_match->listcnt = 1;
 	list_add(&nft_match->head, &nft_match_list);
 
 	return &nft_match->ops;
@@ -826,10 +865,6 @@ static struct nft_expr_type nft_match_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static LIST_HEAD(nft_target_list);
-
-static struct nft_expr_type nft_target_type;
-
 static bool nft_target_cmp(const struct xt_target *tg,
 			   const char *name, u32 rev, u32 family)
 {
@@ -898,6 +933,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
 	nft_target->ops.init = nft_target_init;
 	nft_target->ops.destroy = nft_target_destroy;
+	nft_target->ops.activate = nft_compat_activate_tg;
+	nft_target->ops.deactivate = nft_compat_deactivate;
 	nft_target->ops.dump = nft_target_dump;
 	nft_target->ops.validate = nft_target_validate;
 	nft_target->ops.data = target;
@@ -907,6 +944,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	else
 		nft_target->ops.eval = nft_target_eval_xt;
 
+	nft_target->listcnt = 1;
 	list_add(&nft_target->head, &nft_target_list);
 
 	return &nft_target->ops;
Taehee Yoo Jan. 12, 2019, 5:21 p.m. UTC | #2
On Fri, 11 Jan 2019 at 06:59, Florian Westphal <fw@strlen.de> wrote:
>

Hi Florian!

> Taehee Yoo <ap420073@gmail.com> wrote:
> > index 83adabdec900..8f3114871d20 100644
> > --- a/net/netfilter/nft_compat.c
> > +++ b/net/netfilter/nft_compat.c
> > @@ -29,6 +29,7 @@ struct nft_xt {
> >       struct list_head        head;
> >       struct nft_expr_ops     ops;
> >       unsigned int            refcnt;
> > +     bool                    use;
> >
> >       /* Unlike other expressions, ops doesn't have static storage duration.
> >        * nft core assumes they do.  We use kfree_rcu so that nft core can
> > @@ -48,7 +49,7 @@ struct nft_xt_match_priv {
> >  static bool nft_xt_put(struct nft_xt *xt)
> >  {
> >       mutex_lock(&nft_xt_lock);
> > -     if (--xt->refcnt == 0) {
> > +     if (--xt->refcnt == 0 && !xt->use) {
> >               list_del(&xt->head);
>
> I don't doubt your findings, but I would prefer if we do not have to add
> these kinds of conditionals here.
>
> The problem is that when nft_xt_put is called, all side
> effects are supposed to have been completed.
>
> At this point, list_del() should not be allowed anymore,
> it should have taken place while we were still under transaction
> mutex.
>
> Do you see any issues with this alternate approach?
> It adds deactivate hook to list_del, so after transaction mutex is
> released, next 'add' will create a new compat expression rather
> than trying to use the one that is scheduled for free.
>

Thank you for review and sorry for late reply.
I have been testing your patch and I found a problem scenario.
So, I have been trying to find the fundamental problem.

As far as I know, transaction mutex is a per-net mutex.
So, the problem occurred in per-net scenario.

NET #0                          NET #1
->select_ops()
->init()
                                ->select_ops()
->deactivate()
->destroy()
    nft_xt_put()
       kfree_rcu(xt, rcu_head);
                                ->init() <-- use-after-free occurred.

Test commands:

SHELL #1

while :
do
        iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \
                kmp -j MASQUERADE
        nft flush ruleset
done

SHELL #2

ip netns add vm1
ip netns exec vm1 bash

while :
do
        iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \
                kmp -j MASQUERADE
        nft flush ruleset
done

Results:
[12090.543279] BUG: KASAN: use-after-free in
nft_target_init+0xb96/0xbe0 [nft_compat]
[12090.543279] Read of size 4 at addr ffff888111704f60 by task
iptables-nft/7273
[12090.543279] CPU: 0 PID: 7273 Comm: iptables-nft Not tainted 4.20.0+
#59
[12090.543279] Call Trace:
[12090.543279]  dump_stack+0xc9/0x16b
[12090.543279]  ? show_regs_print_info+0x5/0x5
[12090.543279]  ? kmsg_dump_rewind_nolock+0xd9/0xd9
[12090.543279]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[12090.543279]  print_address_description+0x6a/0x270
[12090.543279]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[12090.543279]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[12090.543279]  kasan_report+0x12a/0x16f
[12090.543279]  ? nft_target_init+0xb96/0xbe0 [nft_compat]
[12090.543279]  nft_target_init+0xb96/0xbe0 [nft_compat]
[ ... ]
[12090.543279]  nf_tables_newrule+0x10a4/0x2570 [nf_tables]
[ ... ]
[12090.543279] Allocated by task 7273:
[12090.543279]  kasan_kmalloc+0xa5/0xd0
[12090.543279]  kmem_cache_alloc_trace+0x117/0x290
[12090.543279]  nft_target_select_ops+0x481/0x990 [nft_compat]
[12090.543279]  nf_tables_expr_parse+0x2a1/0x510 [nf_tables]
[12090.543279]  nf_tables_newrule+0xd0c/0x2570 [nf_tables]
[12090.543279]  nfnetlink_rcv_batch+0xd2e/0x1550 [nfnetlink]
[12090.543279]  nfnetlink_rcv+0x2ee/0x350 [nfnetlink]
[12090.543279]  netlink_unicast+0x414/0x5e0
[12090.543279]  netlink_sendmsg+0x7b8/0xcf0
[12090.543279]  ___sys_sendmsg+0x689/0x990
[12090.543279]  __sys_sendmsg+0xd2/0x210
[12090.543279]  do_syscall_64+0x138/0x560
[12090.543279]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[12090.543279]
[12090.543279] Freed by task 1307:
[12090.543279]  __kasan_slab_free+0x135/0x180
[12090.543279]  kfree+0xdb/0x280
[12090.543279]  rcu_process_callbacks+0x947/0x24d0
[12090.543279]  __do_softirq+0x2a5/0xa3b

I agree with your idea that is to use a activate/deactivate callback.
So, Could you fix your patch for this scenario?

If my understanding is incorrect, please let me know.

Thanks again!

> diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
> --- a/net/netfilter/nft_compat.c
> +++ b/net/netfilter/nft_compat.c
> @@ -27,6 +27,7 @@ struct nft_xt {
>         struct list_head        head;
>         struct nft_expr_ops     ops;
>         unsigned int            refcnt;
> +       unsigned int            listcnt;
>
>         /* Unlike other expressions, ops doesn't have static storage duration.
>          * nft core assumes they do.  We use kfree_rcu so that nft core can
> @@ -43,10 +44,15 @@ struct nft_xt_match_priv {
>         void *info;
>  };
>
> +static LIST_HEAD(nft_match_list);
> +static LIST_HEAD(nft_target_list);
> +static struct nft_expr_type nft_match_type;
> +static struct nft_expr_type nft_target_type;
> +
>  static bool nft_xt_put(struct nft_xt *xt)
>  {
>         if (--xt->refcnt == 0) {
> -               list_del(&xt->head);
> +               WARN_ON_ONCE(!list_empty(&xt->head));
>                 kfree_rcu(xt, rcu_head);
>                 return true;
>         }
> @@ -540,6 +546,40 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
>         __nft_match_destroy(ctx, expr, nft_expr_priv(expr));
>  }
>
> +static void nft_compat_activate(const struct nft_ctx *ctx,
> +                               const struct nft_expr *expr,
> +                               struct list_head *h)
> +{
> +       struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
> +
> +       if (xt->listcnt == 0)
> +               list_add(&xt->head, h);
> +
> +       xt->listcnt++;
> +}
> +
> +static void nft_compat_activate_mt(const struct nft_ctx *ctx,
> +                                  const struct nft_expr *expr)
> +{
> +       nft_compat_activate(ctx, expr, &nft_match_list);
> +}
> +
> +static void nft_compat_activate_tg(const struct nft_ctx *ctx,
> +                                  const struct nft_expr *expr)
> +{
> +       nft_compat_activate(ctx, expr, &nft_target_list);
> +}
> +
> +static void nft_compat_deactivate(const struct nft_ctx *ctx,
> +                                 const struct nft_expr *expr)
> +{
> +       struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
> +
> +       WARN_ON_ONCE(xt->refcnt == 0);
> +       if (--xt->listcnt == 0)
> +               list_del_init(&xt->head);
> +}
> +
>  static void
>  nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
>  {
> @@ -734,10 +774,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
>         .cb             = nfnl_nft_compat_cb,
>  };
>
> -static LIST_HEAD(nft_match_list);
> -
> -static struct nft_expr_type nft_match_type;
> -
>  static bool nft_match_cmp(const struct xt_match *match,
>                           const char *name, u32 rev, u32 family)
>  {
> @@ -794,6 +830,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
>         nft_match->ops.eval = nft_match_eval;
>         nft_match->ops.init = nft_match_init;
>         nft_match->ops.destroy = nft_match_destroy;
> +       nft_match->ops.activate = nft_compat_activate_mt;
> +       nft_match->ops.deactivate = nft_compat_deactivate;
>         nft_match->ops.dump = nft_match_dump;
>         nft_match->ops.validate = nft_match_validate;
>         nft_match->ops.data = match;
> @@ -810,6 +848,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
>
>         nft_match->ops.size = matchsize;
>
> +       nft_match->listcnt = 1;
>         list_add(&nft_match->head, &nft_match_list);
>
>         return &nft_match->ops;
> @@ -826,10 +865,6 @@ static struct nft_expr_type nft_match_type __read_mostly = {
>         .owner          = THIS_MODULE,
>  };
>
> -static LIST_HEAD(nft_target_list);
> -
> -static struct nft_expr_type nft_target_type;
> -
>  static bool nft_target_cmp(const struct xt_target *tg,
>                            const char *name, u32 rev, u32 family)
>  {
> @@ -898,6 +933,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
>         nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
>         nft_target->ops.init = nft_target_init;
>         nft_target->ops.destroy = nft_target_destroy;
> +       nft_target->ops.activate = nft_compat_activate_tg;
> +       nft_target->ops.deactivate = nft_compat_deactivate;
>         nft_target->ops.dump = nft_target_dump;
>         nft_target->ops.validate = nft_target_validate;
>         nft_target->ops.data = target;
> @@ -907,6 +944,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
>         else
>                 nft_target->ops.eval = nft_target_eval_xt;
>
> +       nft_target->listcnt = 1;
>         list_add(&nft_target->head, &nft_target_list);
>
>         return &nft_target->ops;
diff mbox series

Patch

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 83adabdec900..8f3114871d20 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -29,6 +29,7 @@  struct nft_xt {
 	struct list_head	head;
 	struct nft_expr_ops	ops;
 	unsigned int		refcnt;
+	bool			use;
 
 	/* Unlike other expressions, ops doesn't have static storage duration.
 	 * nft core assumes they do.  We use kfree_rcu so that nft core can
@@ -48,7 +49,7 @@  struct nft_xt_match_priv {
 static bool nft_xt_put(struct nft_xt *xt)
 {
 	mutex_lock(&nft_xt_lock);
-	if (--xt->refcnt == 0) {
+	if (--xt->refcnt == 0 && !xt->use) {
 		list_del(&xt->head);
 		kfree_rcu(xt, rcu_head);
 		mutex_unlock(&nft_xt_lock);
@@ -280,6 +281,7 @@  nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	mutex_lock(&nft_xt_lock);
 	nft_xt = container_of(expr->ops, struct nft_xt, ops);
 	nft_xt->refcnt++;
+	nft_xt->use = false;
 	mutex_unlock(&nft_xt_lock);
 	return 0;
 }
@@ -495,6 +497,7 @@  __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	mutex_lock(&nft_xt_lock);
 	nft_xt = container_of(expr->ops, struct nft_xt, ops);
 	nft_xt->refcnt++;
+	nft_xt->use = false;
 	mutex_unlock(&nft_xt_lock);
 	return 0;
 }
@@ -780,6 +783,7 @@  nft_match_select_ops(const struct nft_ctx *ctx,
 		struct xt_match *match = nft_match->ops.data;
 
 		if (nft_match_cmp(match, mt_name, rev, family)) {
+			nft_match->use = true;
 			mutex_unlock(&nft_xt_lock);
 			return &nft_match->ops;
 		}
@@ -803,6 +807,7 @@  nft_match_select_ops(const struct nft_ctx *ctx,
 	}
 
 	nft_match->refcnt = 0;
+	nft_match->use = true;
 	nft_match->ops.type = &nft_match_type;
 	nft_match->ops.eval = nft_match_eval;
 	nft_match->ops.init = nft_match_init;
@@ -885,6 +890,7 @@  nft_target_select_ops(const struct nft_ctx *ctx,
 			continue;
 
 		if (nft_target_cmp(target, tg_name, rev, family)) {
+			nft_target->use = true;
 			mutex_unlock(&nft_xt_lock);
 			return &nft_target->ops;
 		}
@@ -913,6 +919,7 @@  nft_target_select_ops(const struct nft_ctx *ctx,
 	}
 
 	nft_target->refcnt = 0;
+	nft_target->use = true;
 	nft_target->ops.type = &nft_target_type;
 	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
 	nft_target->ops.init = nft_target_init;