diff mbox

[v3] netlink: Do not schedule work from sk_destruct

Message ID 20161205072820.GB10204@gondor.apana.org.au
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Herbert Xu Dec. 5, 2016, 7:28 a.m. UTC
On Mon, Dec 05, 2016 at 03:26:00PM +0800, Herbert Xu wrote:
> On Mon, Dec 05, 2016 at 03:19:46PM +0800, Herbert Xu wrote:
> >
> > Thanks for the patch.  It'll obviously work but I wanted avoid that
> > because it penalises the common path for the rare case.
> > 
> > Andrey, please try this patch and let me know if it's any better.
> > 
> > ---8<---
> > Subject: netlink: Do not schedule work from sk_destruct
> 
> Crap, I screwed it up again.  Here is a v2 which moves the atomic
> call into the RCU callback as otherwise the socket can be freed from
> another path while we await the RCU callback.

With the move it no longer makes sense to rename deferred_put_nlk_sk
so here is v3 which restores the original name.

---8<---
It is wrong to schedule a work from sk_destruct using the socket
as the memory reserve because the socket will be freed immediately
after the return from sk_destruct.

Instead we should do the deferral prior to sk_free.

This patch does just that.

Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Comments

Andrey Konovalov Dec. 5, 2016, 11:51 a.m. UTC | #1
On Mon, Dec 5, 2016 at 8:28 AM, Herbert Xu <herbert@gondor.apana.org.au> wrote:
> On Mon, Dec 05, 2016 at 03:26:00PM +0800, Herbert Xu wrote:
>> On Mon, Dec 05, 2016 at 03:19:46PM +0800, Herbert Xu wrote:
>> >
>> > Thanks for the patch.  It'll obviously work but I wanted avoid that
>> > because it penalises the common path for the rare case.
>> >
>> > Andrey, please try this patch and let me know if it's any better.
>> >
>> > ---8<---
>> > Subject: netlink: Do not schedule work from sk_destruct
>>
>> Crap, I screwed it up again.  Here is a v2 which moves the atomic
>> call into the RCU callback as otherwise the socket can be freed from
>> another path while we await the RCU callback.
>
> With the move it no longer makes sense to rename deferred_put_nlk_sk
> so here is v3 which restores the original name.
>
> ---8<---
> It is wrong to schedule a work from sk_destruct using the socket
> as the memory reserve because the socket will be freed immediately
> after the return from sk_destruct.
>
> Instead we should do the deferral prior to sk_free.
>
> This patch does just that.
>
> Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread")
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 602e5eb..246f29d 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -322,11 +322,13 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
>         sk_mem_charge(sk, skb->truesize);
>  }
>
> -static void __netlink_sock_destruct(struct sock *sk)
> +static void netlink_sock_destruct(struct sock *sk)
>  {
>         struct netlink_sock *nlk = nlk_sk(sk);
>
>         if (nlk->cb_running) {
> +               if (nlk->cb.done)
> +                       nlk->cb.done(&nlk->cb);
>                 module_put(nlk->cb.module);
>                 kfree_skb(nlk->cb.skb);
>         }
> @@ -348,21 +350,7 @@ static void netlink_sock_destruct_work(struct work_struct *work)
>         struct netlink_sock *nlk = container_of(work, struct netlink_sock,
>                                                 work);
>
> -       nlk->cb.done(&nlk->cb);
> -       __netlink_sock_destruct(&nlk->sk);
> -}
> -
> -static void netlink_sock_destruct(struct sock *sk)
> -{
> -       struct netlink_sock *nlk = nlk_sk(sk);
> -
> -       if (nlk->cb_running && nlk->cb.done) {
> -               INIT_WORK(&nlk->work, netlink_sock_destruct_work);
> -               schedule_work(&nlk->work);
> -               return;
> -       }
> -
> -       __netlink_sock_destruct(sk);
> +       sk_free(&nlk->sk);
>  }
>
>  /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
> @@ -667,8 +655,18 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
>  static void deferred_put_nlk_sk(struct rcu_head *head)
>  {
>         struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
> +       struct sock *sk = &nlk->sk;
> +
> +       if (!atomic_dec_and_test(&sk->sk_refcnt))
> +               return;
> +
> +       if (nlk->cb_running && nlk->cb.done) {
> +               INIT_WORK(&nlk->work, netlink_sock_destruct_work);
> +               schedule_work(&nlk->work);
> +               return;
> +       }
>
> -       sock_put(&nlk->sk);
> +       sk_free(sk);
>  }
>
>  static int netlink_release(struct socket *sock)
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

Hi Herbert,

Tested the last version of your patch, the reports go away.

Thanks for the fix!

Tested-by: Andrey Konovalov <andreyknvl@google.com>
David Miller Dec. 6, 2016, 12:44 a.m. UTC | #2
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Dec 2016 15:28:21 +0800

> It is wrong to schedule a work from sk_destruct using the socket
> as the memory reserve because the socket will be freed immediately
> after the return from sk_destruct.
> 
> Instead we should do the deferral prior to sk_free.
> 
> This patch does just that.
> 
> Fixes: 707693c8a498 ("netlink: Call cb->done from a worker thread")
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Applied, thanks Herbert.
diff mbox

Patch

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 602e5eb..246f29d 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -322,11 +322,13 @@  static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 	sk_mem_charge(sk, skb->truesize);
 }
 
-static void __netlink_sock_destruct(struct sock *sk)
+static void netlink_sock_destruct(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 
 	if (nlk->cb_running) {
+		if (nlk->cb.done)
+			nlk->cb.done(&nlk->cb);
 		module_put(nlk->cb.module);
 		kfree_skb(nlk->cb.skb);
 	}
@@ -348,21 +350,7 @@  static void netlink_sock_destruct_work(struct work_struct *work)
 	struct netlink_sock *nlk = container_of(work, struct netlink_sock,
 						work);
 
-	nlk->cb.done(&nlk->cb);
-	__netlink_sock_destruct(&nlk->sk);
-}
-
-static void netlink_sock_destruct(struct sock *sk)
-{
-	struct netlink_sock *nlk = nlk_sk(sk);
-
-	if (nlk->cb_running && nlk->cb.done) {
-		INIT_WORK(&nlk->work, netlink_sock_destruct_work);
-		schedule_work(&nlk->work);
-		return;
-	}
-
-	__netlink_sock_destruct(sk);
+	sk_free(&nlk->sk);
 }
 
 /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
@@ -667,8 +655,18 @@  static int netlink_create(struct net *net, struct socket *sock, int protocol,
 static void deferred_put_nlk_sk(struct rcu_head *head)
 {
 	struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu);
+	struct sock *sk = &nlk->sk;
+
+	if (!atomic_dec_and_test(&sk->sk_refcnt))
+		return;
+
+	if (nlk->cb_running && nlk->cb.done) {
+		INIT_WORK(&nlk->work, netlink_sock_destruct_work);
+		schedule_work(&nlk->work);
+		return;
+	}
 
-	sock_put(&nlk->sk);
+	sk_free(sk);
 }
 
 static int netlink_release(struct socket *sock)