Message ID | 1480136078.8455.589.camel@edumazet-glaptop3.roam.corp.google.com |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
> Oh well, this wont work, since sk->sk_destruct will be called from RCU > callback. > > Grabbing the mutex should not be done from netlink_sock_destruct() but > from netlink_release() > > Maybe this patch would be better : > > diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c > index 62bea4591054..cce10e3c9b68 100644 > --- a/net/netlink/af_netlink.c > +++ b/net/netlink/af_netlink.c > @@ -324,16 +324,6 @@ static void netlink_skb_set_owner_r(struct > sk_buff *skb, struct sock *sk) > > static void netlink_sock_destruct(struct sock *sk) > { > - struct netlink_sock *nlk = nlk_sk(sk); > - > - if (nlk->cb_running) { > - if (nlk->cb.done) > - nlk->cb.done(&nlk->cb); > - > - module_put(nlk->cb.module); > - kfree_skb(nlk->cb.skb); > - } > - > skb_queue_purge(&sk->sk_receive_queue); > > if (!sock_flag(sk, SOCK_DEAD)) { > @@ -456,8 +446,9 @@ static struct sock *netlink_lookup(struct net > *net, int protocol, u32 portid) > > rcu_read_lock(); > sk = __netlink_lookup(table, portid, net); > - if (sk) > - sock_hold(sk); > + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) > + sk = NULL; > + > rcu_read_unlock(); > > return sk; > @@ -581,6 +572,7 @@ static int __netlink_create(struct net *net, > struct socket *sock, > } > init_waitqueue_head(&nlk->wait); > > + sock_set_flag(sk, SOCK_RCU_FREE); > sk->sk_destruct = netlink_sock_destruct; > sk->sk_protocol = protocol; > return 0; > @@ -645,13 +637,6 @@ static int netlink_create(struct net *net, struct > socket *sock, int protocol, > goto out; > } > > -static void deferred_put_nlk_sk(struct rcu_head *head) > -{ > - struct netlink_sock *nlk = container_of(head, struct netlink_sock, > rcu); > - > - sock_put(&nlk->sk); > -} > - > static int netlink_release(struct socket *sock) > { > struct sock *sk = sock->sk; > @@ -724,7 +709,19 @@ static int netlink_release(struct socket *sock) > local_bh_disable(); > sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); > local_bh_enable(); > - call_rcu(&nlk->rcu, deferred_put_nlk_sk); > + if (nlk->cb_running) { > + mutex_lock(nlk->cb_mutex); > + if (nlk->cb_running) { > + if (nlk->cb.done) > + nlk->cb.done(&nlk->cb); > + > + module_put(nlk->cb.module); > + kfree_skb(nlk->cb.skb); > + nlk->cb_running = false; > + } > + mutex_unlock(nlk->cb_mutex); > + } > + sock_put(sk); > return 0; > } > > diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h > index 3cfd6cc60504..5dc08a7b0a2b 100644 > --- a/net/netlink/af_netlink.h > +++ b/net/netlink/af_netlink.h > @@ -32,7 +32,6 @@ struct netlink_sock { > struct module *module; > > struct rhash_head node; > - struct rcu_head rcu; > }; > > static inline struct netlink_sock *nlk_sk(struct sock *sk) Thanks Eric! I'll try this and get back with results over this weekend. -- Qualcomm Innovation Center, Inc. The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
On Fri, Nov 25, 2016 at 8:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > Oh well, this wont work, since sk->sk_destruct will be called from RCU > callback. > > Grabbing the mutex should not be done from netlink_sock_destruct() but > from netlink_release() But you also change the behavior of cb.done(), currently it is called when the last sock ref is gone, with your patch it is called when the first sock is closed. No? I don't see why we need to get genl_lock in ->done() here, because we are already the last sock using it and module ref protects the ops from being removed via module, seems we are pretty safe without any lock.
On Sat, 2016-11-26 at 18:08 -0800, Cong Wang wrote: > On Fri, Nov 25, 2016 at 8:54 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > Oh well, this wont work, since sk->sk_destruct will be called from RCU > > callback. > > > > Grabbing the mutex should not be done from netlink_sock_destruct() but > > from netlink_release() > > But you also change the behavior of cb.done(), currently it is called when the > last sock ref is gone, with your patch it is called when the first > sock is closed. No. It will be called when last refcount on the socket is released, sk_refcnt transitions to final 0. My patch changes the sock_hold() to the variant that makes sure sk_refcnt is not 0 before increase, otherwise a race can happen and release could be called twice. Classic refcounting stuff coupled to rcu rules. > No? Are you telling me inet_release() is called when we close() the first file descriptor ? fd1 = socket() fd2 = dup(fd1); close(fd2) -> release() ??? > > I don't see why we need to get genl_lock in ->done() here, because we are > already the last sock using it and module ref protects the ops from being > removed via module, seems we are pretty safe without any lock. Well, at least this exposes a real bug in Thomas patch. Removing the lock might be done for net-next, not stable branches.
On Sat, Nov 26, 2016 at 6:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > Are you telling me inet_release() is called when we close() the first > file descriptor ? > > fd1 = socket() > fd2 = dup(fd1); > close(fd2) -> release() ??? Sorry, I didn't express myself clearly, I meant your change, if exclude the SOCK_RCU_FREE part, basically reverts this commit: commit 3f660d66dfbc13ea4b61d3865851b348444c24b4 Author: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu May 3 03:17:14 2007 -0700 [NETLINK]: Kill CB only when socket is unused IOW, ->release() is called when the last sock fd ref is gone, but ->destructor() is called with the last sock ref is gone. They are very different. >> I don't see why we need to get genl_lock in ->done() here, because we are >> already the last sock using it and module ref protects the ops from being >> removed via module, seems we are pretty safe without any lock. > > Well, at least this exposes a real bug in Thomas patch. > > Removing the lock might be done for net-next, not stable branches. I am confused, what Subash reported is a kernel warning which can surely be fixed by removing genl lock (if it is correct, I need to double check), so why for net-next?
On Sat, 2016-11-26 at 22:28 -0800, Cong Wang wrote: > On Sat, Nov 26, 2016 at 6:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > > > > Are you telling me inet_release() is called when we close() the first > > file descriptor ? > > > > fd1 = socket() > > fd2 = dup(fd1); > > close(fd2) -> release() ??? > > Sorry, I didn't express myself clearly, I meant your change, > if exclude the SOCK_RCU_FREE part, basically reverts this commit: > > commit 3f660d66dfbc13ea4b61d3865851b348444c24b4 > Author: Herbert Xu <herbert@gondor.apana.org.au> > Date: Thu May 3 03:17:14 2007 -0700 > > [NETLINK]: Kill CB only when socket is unused > > IOW, ->release() is called when the last sock fd ref is gone, but ->destructor() > is called with the last sock ref is gone. They are very different. Hmm... > I am confused, what Subash reported is a kernel warning which can > surely be fixed by removing genl lock (if it is correct, I need to double > check), so why for net-next? Because Subash pointed to a buggy commit. We want to fix all issues bring by this commit, not only the immediate problem about mutex. I have no idea if we can safely remove the mutex from genl_lock_done() : The genl_lock() is not only protecting the socket itself, it might protect global data as well, or protect some kind of lock ordering among multiple mutexes. Have you checked all genl users, down to linux-4.0 , point where commit 21e4902aea80ef35a was added ? Herbert, Thomas, your help would be appreciated, thanks.
On Sun, Nov 27, 2016 at 8:23 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote: > On Sat, 2016-11-26 at 22:28 -0800, Cong Wang wrote: >> On Sat, Nov 26, 2016 at 6:26 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote: >> > >> > Are you telling me inet_release() is called when we close() the first >> > file descriptor ? >> > >> > fd1 = socket() >> > fd2 = dup(fd1); >> > close(fd2) -> release() ??? >> >> Sorry, I didn't express myself clearly, I meant your change, >> if exclude the SOCK_RCU_FREE part, basically reverts this commit: >> >> commit 3f660d66dfbc13ea4b61d3865851b348444c24b4 >> Author: Herbert Xu <herbert@gondor.apana.org.au> >> Date: Thu May 3 03:17:14 2007 -0700 >> >> [NETLINK]: Kill CB only when socket is unused >> >> IOW, ->release() is called when the last sock fd ref is gone, but ->destructor() >> is called with the last sock ref is gone. They are very different. > > Hmm... > > >> I am confused, what Subash reported is a kernel warning which can >> surely be fixed by removing genl lock (if it is correct, I need to double >> check), so why for net-next? > > Because Subash pointed to a buggy commit. > > We want to fix all issues bring by this commit, not only the immediate > problem about mutex. > > I have no idea if we can safely remove the mutex from genl_lock_done() : I meant removing it only for the destructor case, we definitely can't remove it for the dump case. > > The genl_lock() is not only protecting the socket itself, it might > protect global data as well, or protect some kind of lock ordering among > multiple mutexes. > > Have you checked all genl users, down to linux-4.0 , point where commit > 21e4902aea80ef35a was added ? > I just took a deeper look, some user calls rhashtable_destroy() in ->done(), so even removing that genl lock is not enough, perhaps we should just move it to a work struct like what Daniel does for the tcf_proto, but that is ugly... I don't know if RCU provides any API to execute the callback in process context.
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 62bea4591054..cce10e3c9b68 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -324,16 +324,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) static void netlink_sock_destruct(struct sock *sk) { - struct netlink_sock *nlk = nlk_sk(sk); - - if (nlk->cb_running) { - if (nlk->cb.done) - nlk->cb.done(&nlk->cb); - - module_put(nlk->cb.module); - kfree_skb(nlk->cb.skb); - } - skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -456,8 +446,9 @@ static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) rcu_read_lock(); sk = __netlink_lookup(table, portid, net); - if (sk) - sock_hold(sk); + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + rcu_read_unlock(); return sk; @@ -581,6 +572,7 @@ static int __netlink_create(struct net *net, struct socket *sock, } init_waitqueue_head(&nlk->wait); + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_destruct = netlink_sock_destruct; sk->sk_protocol = protocol; return 0; @@ -645,13 +637,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, goto out; } -static void deferred_put_nlk_sk(struct rcu_head *head) -{ - struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); - - sock_put(&nlk->sk); -} - static int netlink_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -724,7 +709,19 @@ static int netlink_release(struct socket *sock) local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); - call_rcu(&nlk->rcu, deferred_put_nlk_sk); + if (nlk->cb_running) { + mutex_lock(nlk->cb_mutex); + if (nlk->cb_running) { + if (nlk->cb.done) + nlk->cb.done(&nlk->cb); + + module_put(nlk->cb.module); + kfree_skb(nlk->cb.skb); + nlk->cb_running = false; + } + mutex_unlock(nlk->cb_mutex); + } + sock_put(sk); return 0; } diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 3cfd6cc60504..5dc08a7b0a2b 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -32,7 +32,6 @@ struct netlink_sock { struct module *module; struct rhash_head node; - struct rcu_head rcu; }; static inline struct netlink_sock *nlk_sk(struct sock *sk)