Message ID | 20190110162525.1117-1-ap420073@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | Pablo Neira |
Headers | show |
Series | netfilter: nft_compat: fix a race condition in nft_compat module | expand |
Taehee Yoo <ap420073@gmail.com> wrote: > index 83adabdec900..8f3114871d20 100644 > --- a/net/netfilter/nft_compat.c > +++ b/net/netfilter/nft_compat.c > @@ -29,6 +29,7 @@ struct nft_xt { > struct list_head head; > struct nft_expr_ops ops; > unsigned int refcnt; > + bool use; > > /* Unlike other expressions, ops doesn't have static storage duration. > * nft core assumes they do. We use kfree_rcu so that nft core can > @@ -48,7 +49,7 @@ struct nft_xt_match_priv { > static bool nft_xt_put(struct nft_xt *xt) > { > mutex_lock(&nft_xt_lock); > - if (--xt->refcnt == 0) { > + if (--xt->refcnt == 0 && !xt->use) { > list_del(&xt->head); I don't doubt your findings, but I would prefer if we do not have to add these kinds of conditionals here. The problem is that when nft_xt_put is called, all side effects are supposed to have been completed. At this point, list_del() should not be allowed anymore, it should have taken place while we were still under transaction mutex. Do you see any issues with this alternate approach? It adds deactivate hook to list_del, so after transaction mutex is released, next 'add' will create a new compat expression rather than trying to use the one that is scheduled for free. diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,6 +27,7 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + unsigned int listcnt; /* Unlike other expressions, ops doesn't have static storage duration. * nft core assumes they do. We use kfree_rcu so that nft core can @@ -43,10 +44,15 @@ struct nft_xt_match_priv { void *info; }; +static LIST_HEAD(nft_match_list); +static LIST_HEAD(nft_target_list); +static struct nft_expr_type nft_match_type; +static struct nft_expr_type nft_target_type; + static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { - list_del(&xt->head); + WARN_ON_ONCE(!list_empty(&xt->head)); kfree_rcu(xt, rcu_head); return true; } @@ -540,6 +546,40 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); } +static void nft_compat_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + struct list_head *h) +{ + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + + if (xt->listcnt == 0) + list_add(&xt->head, h); + + xt->listcnt++; +} + +static void nft_compat_activate_mt(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_compat_activate(ctx, expr, &nft_match_list); +} + +static void nft_compat_activate_tg(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_compat_activate(ctx, expr, &nft_target_list); +} + +static void nft_compat_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); + + WARN_ON_ONCE(xt->refcnt == 0); + if (--xt->listcnt == 0) + list_del_init(&xt->head); +} + static void nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -734,10 +774,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { .cb = nfnl_nft_compat_cb, }; -static LIST_HEAD(nft_match_list); - -static struct nft_expr_type nft_match_type; - static bool nft_match_cmp(const struct xt_match *match, const char *name, u32 rev, u32 family) { @@ -794,6 +830,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; + nft_match->ops.activate = nft_compat_activate_mt; + nft_match->ops.deactivate = nft_compat_deactivate; nft_match->ops.dump = nft_match_dump; nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; @@ -810,6 +848,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.size = matchsize; + nft_match->listcnt = 1; list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; @@ -826,10 +865,6 @@ static struct nft_expr_type nft_match_type __read_mostly = { .owner = THIS_MODULE, }; -static LIST_HEAD(nft_target_list); - -static struct nft_expr_type nft_target_type; - static bool nft_target_cmp(const struct xt_target *tg, const char *name, u32 rev, u32 family) { @@ -898,6 +933,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; nft_target->ops.destroy = nft_target_destroy; + nft_target->ops.activate = nft_compat_activate_tg; + nft_target->ops.deactivate = nft_compat_deactivate; nft_target->ops.dump = nft_target_dump; nft_target->ops.validate = nft_target_validate; nft_target->ops.data = target; @@ -907,6 +944,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, else nft_target->ops.eval = nft_target_eval_xt; + nft_target->listcnt = 1; list_add(&nft_target->head, &nft_target_list); return &nft_target->ops;
On Fri, 11 Jan 2019 at 06:59, Florian Westphal <fw@strlen.de> wrote: > Hi Florian! > Taehee Yoo <ap420073@gmail.com> wrote: > > index 83adabdec900..8f3114871d20 100644 > > --- a/net/netfilter/nft_compat.c > > +++ b/net/netfilter/nft_compat.c > > @@ -29,6 +29,7 @@ struct nft_xt { > > struct list_head head; > > struct nft_expr_ops ops; > > unsigned int refcnt; > > + bool use; > > > > /* Unlike other expressions, ops doesn't have static storage duration. > > * nft core assumes they do. We use kfree_rcu so that nft core can > > @@ -48,7 +49,7 @@ struct nft_xt_match_priv { > > static bool nft_xt_put(struct nft_xt *xt) > > { > > mutex_lock(&nft_xt_lock); > > - if (--xt->refcnt == 0) { > > + if (--xt->refcnt == 0 && !xt->use) { > > list_del(&xt->head); > > I don't doubt your findings, but I would prefer if we do not have to add > these kinds of conditionals here. > > The problem is that when nft_xt_put is called, all side > effects are supposed to have been completed. > > At this point, list_del() should not be allowed anymore, > it should have taken place while we were still under transaction > mutex. > > Do you see any issues with this alternate approach? > It adds deactivate hook to list_del, so after transaction mutex is > released, next 'add' will create a new compat expression rather > than trying to use the one that is scheduled for free. > Thank you for review and sorry for late reply. I have been testing your patch and I found a problem scenario. So, I have been trying to find the fundamental problem. As far as I know, transaction mutex is a per-net mutex. So, the problem occurred in per-net scenario. NET #0 NET #1 ->select_ops() ->init() ->select_ops() ->deactivate() ->destroy() nft_xt_put() kfree_rcu(xt, rcu_head); ->init() <-- use-after-free occurred. Test commands: SHELL #1 while : do iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \ kmp -j MASQUERADE nft flush ruleset done SHELL #2 ip netns add vm1 ip netns exec vm1 bash while : do iptables-nft -t nat -I POSTROUTING -m string --string ap --algo \ kmp -j MASQUERADE nft flush ruleset done Results: [12090.543279] BUG: KASAN: use-after-free in nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] Read of size 4 at addr ffff888111704f60 by task iptables-nft/7273 [12090.543279] CPU: 0 PID: 7273 Comm: iptables-nft Not tainted 4.20.0+ #59 [12090.543279] Call Trace: [12090.543279] dump_stack+0xc9/0x16b [12090.543279] ? show_regs_print_info+0x5/0x5 [12090.543279] ? kmsg_dump_rewind_nolock+0xd9/0xd9 [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] print_address_description+0x6a/0x270 [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] kasan_report+0x12a/0x16f [12090.543279] ? nft_target_init+0xb96/0xbe0 [nft_compat] [12090.543279] nft_target_init+0xb96/0xbe0 [nft_compat] [ ... ] [12090.543279] nf_tables_newrule+0x10a4/0x2570 [nf_tables] [ ... ] [12090.543279] Allocated by task 7273: [12090.543279] kasan_kmalloc+0xa5/0xd0 [12090.543279] kmem_cache_alloc_trace+0x117/0x290 [12090.543279] nft_target_select_ops+0x481/0x990 [nft_compat] [12090.543279] nf_tables_expr_parse+0x2a1/0x510 [nf_tables] [12090.543279] nf_tables_newrule+0xd0c/0x2570 [nf_tables] [12090.543279] nfnetlink_rcv_batch+0xd2e/0x1550 [nfnetlink] [12090.543279] nfnetlink_rcv+0x2ee/0x350 [nfnetlink] [12090.543279] netlink_unicast+0x414/0x5e0 [12090.543279] netlink_sendmsg+0x7b8/0xcf0 [12090.543279] ___sys_sendmsg+0x689/0x990 [12090.543279] __sys_sendmsg+0xd2/0x210 [12090.543279] do_syscall_64+0x138/0x560 [12090.543279] entry_SYSCALL_64_after_hwframe+0x49/0xbe [12090.543279] [12090.543279] Freed by task 1307: [12090.543279] __kasan_slab_free+0x135/0x180 [12090.543279] kfree+0xdb/0x280 [12090.543279] rcu_process_callbacks+0x947/0x24d0 [12090.543279] __do_softirq+0x2a5/0xa3b I agree with your idea that is to use a activate/deactivate callback. So, Could you fix your patch for this scenario? If my understanding is incorrect, please let me know. Thanks again! > diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c > --- a/net/netfilter/nft_compat.c > +++ b/net/netfilter/nft_compat.c > @@ -27,6 +27,7 @@ struct nft_xt { > struct list_head head; > struct nft_expr_ops ops; > unsigned int refcnt; > + unsigned int listcnt; > > /* Unlike other expressions, ops doesn't have static storage duration. > * nft core assumes they do. We use kfree_rcu so that nft core can > @@ -43,10 +44,15 @@ struct nft_xt_match_priv { > void *info; > }; > > +static LIST_HEAD(nft_match_list); > +static LIST_HEAD(nft_target_list); > +static struct nft_expr_type nft_match_type; > +static struct nft_expr_type nft_target_type; > + > static bool nft_xt_put(struct nft_xt *xt) > { > if (--xt->refcnt == 0) { > - list_del(&xt->head); > + WARN_ON_ONCE(!list_empty(&xt->head)); > kfree_rcu(xt, rcu_head); > return true; > } > @@ -540,6 +546,40 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) > __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); > } > > +static void nft_compat_activate(const struct nft_ctx *ctx, > + const struct nft_expr *expr, > + struct list_head *h) > +{ > + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); > + > + if (xt->listcnt == 0) > + list_add(&xt->head, h); > + > + xt->listcnt++; > +} > + > +static void nft_compat_activate_mt(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + nft_compat_activate(ctx, expr, &nft_match_list); > +} > + > +static void nft_compat_activate_tg(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + nft_compat_activate(ctx, expr, &nft_target_list); > +} > + > +static void nft_compat_deactivate(const struct nft_ctx *ctx, > + const struct nft_expr *expr) > +{ > + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); > + > + WARN_ON_ONCE(xt->refcnt == 0); > + if (--xt->listcnt == 0) > + list_del_init(&xt->head); > +} > + > static void > nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) > { > @@ -734,10 +774,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { > .cb = nfnl_nft_compat_cb, > }; > > -static LIST_HEAD(nft_match_list); > - > -static struct nft_expr_type nft_match_type; > - > static bool nft_match_cmp(const struct xt_match *match, > const char *name, u32 rev, u32 family) > { > @@ -794,6 +830,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, > nft_match->ops.eval = nft_match_eval; > nft_match->ops.init = nft_match_init; > nft_match->ops.destroy = nft_match_destroy; > + nft_match->ops.activate = nft_compat_activate_mt; > + nft_match->ops.deactivate = nft_compat_deactivate; > nft_match->ops.dump = nft_match_dump; > nft_match->ops.validate = nft_match_validate; > nft_match->ops.data = match; > @@ -810,6 +848,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, > > nft_match->ops.size = matchsize; > > + nft_match->listcnt = 1; > list_add(&nft_match->head, &nft_match_list); > > return &nft_match->ops; > @@ -826,10 +865,6 @@ static struct nft_expr_type nft_match_type __read_mostly = { > .owner = THIS_MODULE, > }; > > -static LIST_HEAD(nft_target_list); > - > -static struct nft_expr_type nft_target_type; > - > static bool nft_target_cmp(const struct xt_target *tg, > const char *name, u32 rev, u32 family) > { > @@ -898,6 +933,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, > nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); > nft_target->ops.init = nft_target_init; > nft_target->ops.destroy = nft_target_destroy; > + nft_target->ops.activate = nft_compat_activate_tg; > + nft_target->ops.deactivate = nft_compat_deactivate; > nft_target->ops.dump = nft_target_dump; > nft_target->ops.validate = nft_target_validate; > nft_target->ops.data = target; > @@ -907,6 +944,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, > else > nft_target->ops.eval = nft_target_eval_xt; > > + nft_target->listcnt = 1; > list_add(&nft_target->head, &nft_target_list); > > return &nft_target->ops;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 83adabdec900..8f3114871d20 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -29,6 +29,7 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + bool use; /* Unlike other expressions, ops doesn't have static storage duration. * nft core assumes they do. We use kfree_rcu so that nft core can @@ -48,7 +49,7 @@ struct nft_xt_match_priv { static bool nft_xt_put(struct nft_xt *xt) { mutex_lock(&nft_xt_lock); - if (--xt->refcnt == 0) { + if (--xt->refcnt == 0 && !xt->use) { list_del(&xt->head); kfree_rcu(xt, rcu_head); mutex_unlock(&nft_xt_lock); @@ -280,6 +281,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, mutex_lock(&nft_xt_lock); nft_xt = container_of(expr->ops, struct nft_xt, ops); nft_xt->refcnt++; + nft_xt->use = false; mutex_unlock(&nft_xt_lock); return 0; } @@ -495,6 +497,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, mutex_lock(&nft_xt_lock); nft_xt = container_of(expr->ops, struct nft_xt, ops); nft_xt->refcnt++; + nft_xt->use = false; mutex_unlock(&nft_xt_lock); return 0; } @@ -780,6 +783,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match = nft_match->ops.data; if (nft_match_cmp(match, mt_name, rev, family)) { + nft_match->use = true; mutex_unlock(&nft_xt_lock); return &nft_match->ops; } @@ -803,6 +807,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, } nft_match->refcnt = 0; + nft_match->use = true; nft_match->ops.type = &nft_match_type; nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; @@ -885,6 +890,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, continue; if (nft_target_cmp(target, tg_name, rev, family)) { + nft_target->use = true; mutex_unlock(&nft_xt_lock); return &nft_target->ops; } @@ -913,6 +919,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, } nft_target->refcnt = 0; + nft_target->use = true; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init;
nft_{match/target}_select_ops() find xt modules in the list. if the module is found, it is just used without any holding module or increasing reference counts. and the reference count is increased in nft_{match/target}_init() function. But, xt modules can be removed anytime if reference count is 0. CPU0 CPU1 nft_{match/target}_select_ops nft_{match/target}_destroy nft_{match/target}_init The scenario is like this: 1. ->select_ops() found a module in the list and keeps an element of it without increasing reference count. 2. ->destroy() frees an element of list. 3. nft_{match/target}_init try to use the pointer of element, but this is already freed. Test commands: while : do iptables-nft -t nat -I PREROUTING -m string --string ap --algo \ kmp -j MASQUERADE & nft flush ruleset & done Result: [ 682.340431] BUG: KASAN: use-after-free in nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.340431] Read of size 4 at addr ffff8881079a73a0 by task iptables-nft/8136 [ 682.340431] [ 682.340431] CPU: 1 PID: 8136 Comm: iptables-nft Not tainted 4.20.0+ #59 [ 682.373297] Call Trace: [ 682.373297] dump_stack+0xc9/0x16b [ 682.373297] ? show_regs_print_info+0x5/0x5 [ 682.373297] ? kmsg_dump_rewind_nolock+0xd9/0xd9 [ 682.402587] ? nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.402587] print_address_description+0x6a/0x270 [ 682.402587] ? nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.402587] ? nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.402587] kasan_report+0x12a/0x16f [ 682.429087] ? nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.433653] nft_target_init+0xb96/0xbe0 [nft_compat] [ 682.433653] ? target_compat_from_user.isra.5+0x80/0x80 [nft_compat] [ 682.433653] ? nf_tables_newrule+0xe46/0x2570 [nf_tables] [ 682.433653] ? ksm_do_scan+0x2bc0/0x3300 [ 682.433653] ? hlock_class+0x140/0x140 [ 682.433653] ? nf_tables_newrule+0xe46/0x2570 [nf_tables] [ 682.465582] ? memcpy+0x39/0x50 [ 682.465582] ? __kmalloc+0x134/0x2e0 [ 682.465582] ? nf_tables_newrule+0xe46/0x2570 [nf_tables] [ 682.465582] nf_tables_newrule+0x10a4/0x2570 [nf_tables] [ ... ] [ 682.828711] [ 682.828711] Allocated by task 8134: [ 682.828711] kasan_kmalloc+0xa5/0xd0 [ 682.828711] kmem_cache_alloc_trace+0x117/0x290 [ 682.828711] nft_target_select_ops+0x481/0x8e0 [nft_compat] [ 682.828711] nf_tables_expr_parse+0x2a1/0x510 [nf_tables] [ 682.828711] nf_tables_newrule+0xd0c/0x2570 [nf_tables] [ 682.828711] nfnetlink_rcv_batch+0xd2e/0x1550 [nfnetlink] [ 682.828711] nfnetlink_rcv+0x2ee/0x350 [nfnetlink] [ 682.828711] netlink_unicast+0x414/0x5e0 [ 682.828711] netlink_sendmsg+0x7b8/0xcf0 [ 682.937632] ___sys_sendmsg+0x689/0x990 [ 682.937632] __sys_sendmsg+0xd2/0x210 [ 682.937632] do_syscall_64+0x138/0x560 [ 682.937632] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 682.937632] [ 682.937632] Freed by task 8140: [ 682.937632] __kasan_slab_free+0x135/0x180 [ 682.937632] kfree+0xdb/0x280 [ 682.937632] rcu_process_callbacks+0x947/0x24d0 [ 682.937632] __do_softirq+0x2a5/0xa3b [ 682.937632] Fixes: 0935d5588400 ("netfilter: nf_tables: asynchronous release") Signed-off-by: Taehee Yoo <ap420073@gmail.com> --- net/netfilter/nft_compat.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)