Message ID | 20181005164242.9682-1-ap420073@gmail.com |
---|---|
State | Changes Requested |
Delegated to: | Pablo Neira |
Headers | show |
Series | netfilter: ipt_CLUSTERIP: fix bugs in ipt_CLUSTERIP | expand |
On Sat, Oct 06, 2018 at 01:42:42AM +0900, Taehee Yoo wrote: > diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c > index 2c8d313ae216..6ccabe6f74a6 100644 > --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c > +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c > @@ -59,7 +59,6 @@ struct clusterip_config { > struct rcu_head rcu; > > char ifname[IFNAMSIZ]; /* device ifname */ > - struct notifier_block notifier; /* refresh c->ifindex in it */ > }; > > #ifdef CONFIG_PROC_FS > @@ -118,8 +117,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) > spin_unlock(&cn->lock); > local_bh_enable(); > > - unregister_netdevice_notifier(&c->notifier); > - > return; > } > local_bh_enable(); > @@ -181,32 +178,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event, > void *ptr) > { > struct net_device *dev = netdev_notifier_info_to_dev(ptr); > + struct net *net = dev_net(dev); > + struct clusterip_net *cn = net_generic(net, clusterip_net_id); > struct clusterip_config *c; > > - c = container_of(this, struct clusterip_config, notifier); > - switch (event) { > - case NETDEV_REGISTER: > - if (!strcmp(dev->name, c->ifname)) { > - c->ifindex = dev->ifindex; > - dev_mc_add(dev, c->clustermac); > - } > - break; > - case NETDEV_UNREGISTER: > - if (dev->ifindex == c->ifindex) { > - dev_mc_del(dev, c->clustermac); > - c->ifindex = -1; > - } > - break; > - case NETDEV_CHANGENAME: > - if (!strcmp(dev->name, c->ifname)) { > - c->ifindex = dev->ifindex; > - dev_mc_add(dev, c->clustermac); > - } else if (dev->ifindex == c->ifindex) { > - dev_mc_del(dev, c->clustermac); > - c->ifindex = -1; > + spin_lock(&cn->lock); Do we need spin_lock_bh() here?
On Thu, 11 Oct 2018 at 02:32, Pablo Neira Ayuso <pablo@netfilter.org> wrote: > Hi Pablo, Thank you for review! > On Sat, Oct 06, 2018 at 01:42:42AM +0900, Taehee Yoo wrote: > > diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c > > index 2c8d313ae216..6ccabe6f74a6 100644 > > --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c > > +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c > > @@ -59,7 +59,6 @@ struct clusterip_config { > > struct rcu_head rcu; > > > > char ifname[IFNAMSIZ]; /* device ifname */ > > - struct notifier_block notifier; /* refresh c->ifindex in it */ > > }; > > > > #ifdef CONFIG_PROC_FS > > @@ -118,8 +117,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) > > spin_unlock(&cn->lock); > > local_bh_enable(); > > > > - unregister_netdevice_notifier(&c->notifier); > > - > > return; > > } > > local_bh_enable(); > > @@ -181,32 +178,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event, > > void *ptr) > > { > > struct net_device *dev = netdev_notifier_info_to_dev(ptr); > > + struct net *net = dev_net(dev); > > + struct clusterip_net *cn = net_generic(net, clusterip_net_id); > > struct clusterip_config *c; > > > > - c = container_of(this, struct clusterip_config, notifier); > > - switch (event) { > > - case NETDEV_REGISTER: > > - if (!strcmp(dev->name, c->ifname)) { > > - c->ifindex = dev->ifindex; > > - dev_mc_add(dev, c->clustermac); > > - } > > - break; > > - case NETDEV_UNREGISTER: > > - if (dev->ifindex == c->ifindex) { > > - dev_mc_del(dev, c->clustermac); > > - c->ifindex = -1; > > - } > > - break; > > - case NETDEV_CHANGENAME: > > - if (!strcmp(dev->name, c->ifname)) { > > - c->ifindex = dev->ifindex; > > - dev_mc_add(dev, c->clustermac); > > - } else if (dev->ifindex == c->ifindex) { > > - dev_mc_del(dev, c->clustermac); > > - c->ifindex = -1; > > + spin_lock(&cn->lock); > > Do we need spin_lock_bh() here? I checked that, you're right. config is modified in the BH. so, that should be spin_lock_bh(). I will send v2 patch Thanks!
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2c8d313ae216..6ccabe6f74a6 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -59,7 +59,6 @@ struct clusterip_config { struct rcu_head rcu; char ifname[IFNAMSIZ]; /* device ifname */ - struct notifier_block notifier; /* refresh c->ifindex in it */ }; #ifdef CONFIG_PROC_FS @@ -118,8 +117,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) spin_unlock(&cn->lock); local_bh_enable(); - unregister_netdevice_notifier(&c->notifier); - return; } local_bh_enable(); @@ -181,32 +178,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); struct clusterip_config *c; - c = container_of(this, struct clusterip_config, notifier); - switch (event) { - case NETDEV_REGISTER: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } - break; - case NETDEV_UNREGISTER: - if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; - } - break; - case NETDEV_CHANGENAME: - if (!strcmp(dev->name, c->ifname)) { - c->ifindex = dev->ifindex; - dev_mc_add(dev, c->clustermac); - } else if (dev->ifindex == c->ifindex) { - dev_mc_del(dev, c->clustermac); - c->ifindex = -1; + spin_lock(&cn->lock); + list_for_each_entry_rcu(c, &cn->configs, list) { + switch (event) { + case NETDEV_REGISTER: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } + break; + case NETDEV_UNREGISTER: + if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; + case NETDEV_CHANGENAME: + if (!strcmp(dev->name, c->ifname)) { + c->ifindex = dev->ifindex; + dev_mc_add(dev, c->clustermac); + } else if (dev->ifindex == c->ifindex) { + dev_mc_del(dev, c->clustermac); + c->ifindex = -1; + } + break; } - break; } + spin_unlock(&cn->lock); return NOTIFY_DONE; } @@ -260,12 +262,8 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, } #endif - c->notifier.notifier_call = clusterip_netdev_event; - err = register_netdevice_notifier(&c->notifier); - if (!err) { - refcount_set(&c->entries, 1); - return c; - } + refcount_set(&c->entries, 1); + return c; #ifdef CONFIG_PROC_FS proc_remove(c->pde); @@ -847,6 +845,10 @@ static struct pernet_operations clusterip_net_ops = { .size = sizeof(struct clusterip_net), }; +struct notifier_block cip_netdev_notifier = { + .notifier_call = clusterip_netdev_event +}; + static int __init clusterip_tg_init(void) { int ret; @@ -859,11 +861,17 @@ static int __init clusterip_tg_init(void) if (ret < 0) goto cleanup_subsys; + ret = register_netdevice_notifier(&cip_netdev_notifier); + if (ret < 0) + goto unregister_target; + pr_info("ClusterIP Version %s loaded successfully\n", CLUSTERIP_VERSION); return 0; +unregister_target: + xt_unregister_target(&clusterip_tg_reg); cleanup_subsys: unregister_pernet_subsys(&clusterip_net_ops); return ret; @@ -873,6 +881,7 @@ static void __exit clusterip_tg_exit(void) { pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); + unregister_netdevice_notifier(&cip_netdev_notifier); xt_unregister_target(&clusterip_tg_reg); unregister_pernet_subsys(&clusterip_net_ops);
When network namespace is destroyed, cleanup_net() is called. cleanup_net() holds pernet_ops_rwsem then calls each ->exit callback. So that clusterip_tg_destroy() is called by cleanup_net(). And clusterip_tg_destroy() calls unregister_netdevice_notifier(). But both cleanup_net() and clusterip_tg_destroy() hold same lock(pernet_ops_rwsem). hence deadlock occurrs. After this patch, only 1 notifier is registered when module is inserted. And all of configs are added to per-net list. test commands: %ip netns add vm1 %ip netns exec vm1 bash %ip link set lo up %iptables -A INPUT -p tcp -i lo -d 192.168.0.5 --dport 80 \ -j CLUSTERIP --new --hashmode sourceip \ --clustermac 01:00:5e:00:00:20 --total-nodes 2 --local-node 1 %exit %ip netns del vm1 splat looks like: [ 341.809674] ============================================ [ 341.809674] WARNING: possible recursive locking detected [ 341.809674] 4.19.0-rc5+ #16 Tainted: G W [ 341.809674] -------------------------------------------- [ 341.809674] kworker/u4:2/87 is trying to acquire lock: [ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: unregister_netdevice_notifier+0x8c/0x460 [ 341.809674] [ 341.809674] but task is already holding lock: [ 341.809674] 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900 [ 341.809674] [ 341.809674] other info that might help us debug this: [ 341.809674] Possible unsafe locking scenario: [ 341.809674] [ 341.809674] CPU0 [ 341.809674] ---- [ 341.809674] lock(pernet_ops_rwsem); [ 341.809674] lock(pernet_ops_rwsem); [ 341.809674] [ 341.809674] *** DEADLOCK *** [ 341.809674] [ 341.809674] May be due to missing lock nesting notation [ 341.809674] [ 341.809674] 3 locks held by kworker/u4:2/87: [ 341.809674] #0: 00000000d9df6c92 ((wq_completion)"%s""netns"){+.+.}, at: process_one_work+0xafe/0x1de0 [ 341.809674] #1: 00000000c2cbcee2 (net_cleanup_work){+.+.}, at: process_one_work+0xb60/0x1de0 [ 341.809674] #2: 000000005da2d519 (pernet_ops_rwsem){++++}, at: cleanup_net+0x119/0x900 [ 341.809674] [ 341.809674] stack backtrace: [ 341.809674] CPU: 1 PID: 87 Comm: kworker/u4:2 Tainted: G W 4.19.0-rc5+ #16 [ 341.809674] Workqueue: netns cleanup_net [ 341.809674] Call Trace: [ ... ] [ 342.070196] down_write+0x93/0x160 [ 342.070196] ? unregister_netdevice_notifier+0x8c/0x460 [ 342.070196] ? down_read+0x1e0/0x1e0 [ 342.070196] ? sched_clock_cpu+0x126/0x170 [ 342.070196] ? find_held_lock+0x39/0x1c0 [ 342.070196] unregister_netdevice_notifier+0x8c/0x460 [ 342.070196] ? register_netdevice_notifier+0x790/0x790 [ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0 [ 342.070196] ? __local_bh_enable_ip+0xe9/0x1b0 [ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP] [ 342.070196] ? trace_hardirqs_on+0x93/0x210 [ 342.070196] ? __bpf_trace_preemptirq_template+0x10/0x10 [ 342.070196] ? clusterip_tg_destroy+0x372/0x650 [ipt_CLUSTERIP] [ 342.123094] clusterip_tg_destroy+0x3ad/0x650 [ipt_CLUSTERIP] [ 342.123094] ? clusterip_net_init+0x3d0/0x3d0 [ipt_CLUSTERIP] [ 342.123094] ? cleanup_match+0x17d/0x200 [ip_tables] [ 342.123094] ? xt_unregister_table+0x215/0x300 [x_tables] [ 342.123094] ? kfree+0xe2/0x2a0 [ 342.123094] cleanup_entry+0x1d5/0x2f0 [ip_tables] [ 342.123094] ? cleanup_match+0x200/0x200 [ip_tables] [ 342.123094] __ipt_unregister_table+0x9b/0x1a0 [ip_tables] [ 342.123094] iptable_filter_net_exit+0x43/0x80 [iptable_filter] [ 342.123094] ops_exit_list.isra.10+0x94/0x140 [ 342.123094] cleanup_net+0x45b/0x900 [ ... ] Fixes: 202f59afd441 ("netfilter: ipt_CLUSTERIP: do not hold dev") Signed-off-by: Taehee Yoo <ap420073@gmail.com> --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 71 +++++++++++++++++------------- 1 file changed, 40 insertions(+), 31 deletions(-)