Message ID | 1521467568-37876-1-git-send-email-sowmini.varadhan@oracle.com |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
Series | [net-next] rds: tcp: remove register_netdevice_notifier infrastructure. | expand |
On 19.03.2018 16:52, Sowmini Varadhan wrote: > The netns deletion path does not need to wait for all net_devices > to be unregistered before dismantling rds_tcp state for the netns > (we are able to dismantle this state on module unload even when > all net_devices are active so there is no dependency here). > > This patch removes code related to netdevice notifiers and > refactors all the code needed to dismantle rds_tcp state > into a ->exit callback for the pernet_operations used with > register_pernet_device(). > > Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> I just repeat my words: rds_tcp_listen_sock destruction looks nice and safe, since all the places the sockets is dereferenced use sk_callback_lock. So they don't miss rds_tcp_listen_sock = NULL, as rds_tcp_listen_stop() takes the lock too. rds_tcp_conn_list is populated from: 1)rds_tcp_accept_one(), which can't happen after we flushed the queue in rds_tcp_listen_stop(); 2)rds_sendmsg(), which is triggered by userspace, and that's impossible, when net is dead; 3)rds_ib_cm_handle_connect(), which call rds_conn_create() with init_net argument only. This may race with module unloading only, but this problem is already solved in RDS by rds_destroy_pending() check, which care about that. Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com> (The only thing I don't know is the reason we need to destroy the sockets before last netdevice, but I haven't dived into that. Just to mention this...). Thanks, Sowmini. Kirill > --- > net/rds/tcp.c | 93 ++++++++++++++------------------------------------------- > 1 files changed, 23 insertions(+), 70 deletions(-) > > diff --git a/net/rds/tcp.c b/net/rds/tcp.c > index 08ea9cd..4f3a32c 100644 > --- a/net/rds/tcp.c > +++ b/net/rds/tcp.c > @@ -485,40 +485,6 @@ static __net_init int rds_tcp_init_net(struct net *net) > return err; > } > > -static void __net_exit rds_tcp_exit_net(struct net *net) > -{ > - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); > - > - if (rtn->rds_tcp_sysctl) > - unregister_net_sysctl_table(rtn->rds_tcp_sysctl); > - > - if (net != &init_net && rtn->ctl_table) > - kfree(rtn->ctl_table); > - > - /* If rds_tcp_exit_net() is called as a result of netns deletion, > - * the rds_tcp_kill_sock() device notifier would already have cleaned > - * up the listen socket, thus there is no work to do in this function. > - * > - * If rds_tcp_exit_net() is called as a result of module unload, > - * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then > - * we do need to clean up the listen socket here. > - */ > - if (rtn->rds_tcp_listen_sock) { > - struct socket *lsock = rtn->rds_tcp_listen_sock; > - > - rtn->rds_tcp_listen_sock = NULL; > - rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); > - } > -} > - > -static struct pernet_operations rds_tcp_net_ops = { > - .init = rds_tcp_init_net, > - .exit = rds_tcp_exit_net, > - .id = &rds_tcp_netid, > - .size = sizeof(struct rds_tcp_net), > - .async = true, > -}; > - > static void rds_tcp_kill_sock(struct net *net) > { > struct rds_tcp_connection *tc, *_tc; > @@ -546,40 +512,38 @@ static void rds_tcp_kill_sock(struct net *net) > rds_conn_destroy(tc->t_cpath->cp_conn); > } > > -void *rds_tcp_listen_sock_def_readable(struct net *net) > +static void __net_exit rds_tcp_exit_net(struct net *net) > { > struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); > - struct socket *lsock = rtn->rds_tcp_listen_sock; > > - if (!lsock) > - return NULL; > + rds_tcp_kill_sock(net); > > - return lsock->sk->sk_user_data; > + if (rtn->rds_tcp_sysctl) > + unregister_net_sysctl_table(rtn->rds_tcp_sysctl); > + > + if (net != &init_net && rtn->ctl_table) > + kfree(rtn->ctl_table); > } > > -static int rds_tcp_dev_event(struct notifier_block *this, > - unsigned long event, void *ptr) > +static struct pernet_operations rds_tcp_net_ops = { > + .init = rds_tcp_init_net, > + .exit = rds_tcp_exit_net, > + .id = &rds_tcp_netid, > + .size = sizeof(struct rds_tcp_net), > + .async = true, > +}; > + > +void *rds_tcp_listen_sock_def_readable(struct net *net) > { > - struct net_device *dev = netdev_notifier_info_to_dev(ptr); > + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); > + struct socket *lsock = rtn->rds_tcp_listen_sock; > > - /* rds-tcp registers as a pernet subys, so the ->exit will only > - * get invoked after network acitivity has quiesced. We need to > - * clean up all sockets to quiesce network activity, and use > - * the unregistration of the per-net loopback device as a trigger > - * to start that cleanup. > - */ > - if (event == NETDEV_UNREGISTER_FINAL && > - dev->ifindex == LOOPBACK_IFINDEX) > - rds_tcp_kill_sock(dev_net(dev)); > + if (!lsock) > + return NULL; > > - return NOTIFY_DONE; > + return lsock->sk->sk_user_data; > } > > -static struct notifier_block rds_tcp_dev_notifier = { > - .notifier_call = rds_tcp_dev_event, > - .priority = -10, /* must be called after other network notifiers */ > -}; > - > /* when sysctl is used to modify some kernel socket parameters,this > * function resets the RDS connections in that netns so that we can > * restart with new parameters. The assumption is that such reset > @@ -625,9 +589,7 @@ static void rds_tcp_exit(void) > rds_tcp_set_unloading(); > synchronize_rcu(); > rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); > - unregister_pernet_subsys(&rds_tcp_net_ops); > - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) > - pr_warn("could not unregister rds_tcp_dev_notifier\n"); > + unregister_pernet_device(&rds_tcp_net_ops); > rds_tcp_destroy_conns(); > rds_trans_unregister(&rds_tcp_transport); > rds_tcp_recv_exit(); > @@ -651,24 +613,15 @@ static int rds_tcp_init(void) > if (ret) > goto out_slab; > > - ret = register_pernet_subsys(&rds_tcp_net_ops); > + ret = register_pernet_device(&rds_tcp_net_ops); > if (ret) > goto out_recv; > > - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); > - if (ret) { > - pr_warn("could not register rds_tcp_dev_notifier\n"); > - goto out_pernet; > - } > - > rds_trans_register(&rds_tcp_transport); > > rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); > > goto out; > - > -out_pernet: > - unregister_pernet_subsys(&rds_tcp_net_ops); > out_recv: > rds_tcp_recv_exit(); > out_slab: >
From: Sowmini Varadhan <sowmini.varadhan@oracle.com> Date: Mon, 19 Mar 2018 06:52:48 -0700 > The netns deletion path does not need to wait for all net_devices > to be unregistered before dismantling rds_tcp state for the netns > (we are able to dismantle this state on module unload even when > all net_devices are active so there is no dependency here). > > This patch removes code related to netdevice notifiers and > refactors all the code needed to dismantle rds_tcp state > into a ->exit callback for the pernet_operations used with > register_pernet_device(). > > Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> Applied.
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 08ea9cd..4f3a32c 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -485,40 +485,6 @@ static __net_init int rds_tcp_init_net(struct net *net) return err; } -static void __net_exit rds_tcp_exit_net(struct net *net) -{ - struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - - if (rtn->rds_tcp_sysctl) - unregister_net_sysctl_table(rtn->rds_tcp_sysctl); - - if (net != &init_net && rtn->ctl_table) - kfree(rtn->ctl_table); - - /* If rds_tcp_exit_net() is called as a result of netns deletion, - * the rds_tcp_kill_sock() device notifier would already have cleaned - * up the listen socket, thus there is no work to do in this function. - * - * If rds_tcp_exit_net() is called as a result of module unload, - * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then - * we do need to clean up the listen socket here. - */ - if (rtn->rds_tcp_listen_sock) { - struct socket *lsock = rtn->rds_tcp_listen_sock; - - rtn->rds_tcp_listen_sock = NULL; - rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); - } -} - -static struct pernet_operations rds_tcp_net_ops = { - .init = rds_tcp_init_net, - .exit = rds_tcp_exit_net, - .id = &rds_tcp_netid, - .size = sizeof(struct rds_tcp_net), - .async = true, -}; - static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; @@ -546,40 +512,38 @@ static void rds_tcp_kill_sock(struct net *net) rds_conn_destroy(tc->t_cpath->cp_conn); } -void *rds_tcp_listen_sock_def_readable(struct net *net) +static void __net_exit rds_tcp_exit_net(struct net *net) { struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); - struct socket *lsock = rtn->rds_tcp_listen_sock; - if (!lsock) - return NULL; + rds_tcp_kill_sock(net); - return lsock->sk->sk_user_data; + if (rtn->rds_tcp_sysctl) + unregister_net_sysctl_table(rtn->rds_tcp_sysctl); + + if (net != &init_net && rtn->ctl_table) + kfree(rtn->ctl_table); } -static int rds_tcp_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) +static struct pernet_operations rds_tcp_net_ops = { + .init = rds_tcp_init_net, + .exit = rds_tcp_exit_net, + .id = &rds_tcp_netid, + .size = sizeof(struct rds_tcp_net), + .async = true, +}; + +void *rds_tcp_listen_sock_def_readable(struct net *net) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + struct socket *lsock = rtn->rds_tcp_listen_sock; - /* rds-tcp registers as a pernet subys, so the ->exit will only - * get invoked after network acitivity has quiesced. We need to - * clean up all sockets to quiesce network activity, and use - * the unregistration of the per-net loopback device as a trigger - * to start that cleanup. - */ - if (event == NETDEV_UNREGISTER_FINAL && - dev->ifindex == LOOPBACK_IFINDEX) - rds_tcp_kill_sock(dev_net(dev)); + if (!lsock) + return NULL; - return NOTIFY_DONE; + return lsock->sk->sk_user_data; } -static struct notifier_block rds_tcp_dev_notifier = { - .notifier_call = rds_tcp_dev_event, - .priority = -10, /* must be called after other network notifiers */ -}; - /* when sysctl is used to modify some kernel socket parameters,this * function resets the RDS connections in that netns so that we can * restart with new parameters. The assumption is that such reset @@ -625,9 +589,7 @@ static void rds_tcp_exit(void) rds_tcp_set_unloading(); synchronize_rcu(); rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); - unregister_pernet_subsys(&rds_tcp_net_ops); - if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) - pr_warn("could not unregister rds_tcp_dev_notifier\n"); + unregister_pernet_device(&rds_tcp_net_ops); rds_tcp_destroy_conns(); rds_trans_unregister(&rds_tcp_transport); rds_tcp_recv_exit(); @@ -651,24 +613,15 @@ static int rds_tcp_init(void) if (ret) goto out_slab; - ret = register_pernet_subsys(&rds_tcp_net_ops); + ret = register_pernet_device(&rds_tcp_net_ops); if (ret) goto out_recv; - ret = register_netdevice_notifier(&rds_tcp_dev_notifier); - if (ret) { - pr_warn("could not register rds_tcp_dev_notifier\n"); - goto out_pernet; - } - rds_trans_register(&rds_tcp_transport); rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); goto out; - -out_pernet: - unregister_pernet_subsys(&rds_tcp_net_ops); out_recv: rds_tcp_recv_exit(); out_slab:
The netns deletion path does not need to wait for all net_devices to be unregistered before dismantling rds_tcp state for the netns (we are able to dismantle this state on module unload even when all net_devices are active so there is no dependency here). This patch removes code related to netdevice notifiers and refactors all the code needed to dismantle rds_tcp state into a ->exit callback for the pernet_operations used with register_pernet_device(). Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com> --- net/rds/tcp.c | 93 ++++++++++++++------------------------------------------- 1 files changed, 23 insertions(+), 70 deletions(-)