diff mbox series

答复: [PATCH] net: clean the sk_frag.page of new cloned socket

Message ID 2AD939572F25A448A3AE3CAEA61328C23694645C@BC-MAIL-MBX12.internal.baidu.com
State RFC, archived
Delegated to: David Miller
Headers show
Series 答复: [PATCH] net: clean the sk_frag.page of new cloned socket | expand

Commit Message

Li RongQing Jan. 26, 2018, 2:09 a.m. UTC
> >  		if (newsk->sk_prot->sockets_allocated)

> >  			sk_sockets_allocated_inc(newsk);

> 

> Good catch.

> 

> I suspect this was discovered by some syzkaller/syzbot run ?

> 



No.

I am seeing a panic, a page is in both task.task_frag.page and buddy free list;
It should not happen , and the page->lru->next and page->lru->pre is 
0xdead000000100100, then when alloc page from buddy, the system panic at
 __list_del of __rmqueue 

#0 [ffff881fff0c3850] machine_kexec at ffffffff8103cca8
 #1 [ffff881fff0c38a0] crash_kexec at ffffffff810c2443
 #2 [ffff881fff0c3968] oops_end at ffffffff816cae70
 #3 [ffff881fff0c3990] die at ffffffff810063eb
 #4 [ffff881fff0c39c0] do_general_protection at ffffffff816ca7ce
 #5 [ffff881fff0c39f0] general_protection at ffffffff816ca0d8
    [exception RIP: __rmqueue+120]
    RIP: ffffffff8113a918  RSP: ffff881fff0c3aa0  RFLAGS: 00010046
    RAX: ffff88207ffd8018  RBX: 0000000000000003  RCX: 0000000000000003
    RDX: 0000000000000001  RSI: ffffea006f4cf620  RDI: dead000000200200
    RBP: ffff881fff0c3b00   R8: ffff88207ffd8018   R9: 0000000000000000
    R10: dead000000100100  R11: ffffea007ecc6480  R12: ffffea006f4cf600
    R13: 0000000000000000  R14: 0000000000000003  R15: ffff88207ffd7e80
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
 #6 [ffff881fff0c3b08] get_page_from_freelist at ffffffff8113ce71
 #7 [ffff881fff0c3be0] __alloc_pages_nodemask at ffffffff8113d15f
 #8 [ffff881fff0c3d10] __alloc_page_frag at ffffffff815e2362
 #9 [ffff881fff0c3d40] __netdev_alloc_frag at ffffffff815e241b
#10 [ffff881fff0c3d58] __alloc_rx_skb at ffffffff815e2f91
#11 [ffff881fff0c3d78] __netdev_alloc_skb at ffffffff815e300b
#12 [ffff881fff0c3d90] ixgbe_clean_rx_irq at ffffffffa003a98f [ixgbe]
#13 [ffff881fff0c3df8] ixgbe_poll at ffffffffa003c233 [ixgbe]
#14 [ffff881fff0c3e70] net_rx_action at ffffffff815f2f09
#15 [ffff881fff0c3ec8] __do_softirq at ffffffff81064867
#16 [ffff881fff0c3f38] call_softirq at ffffffff816d3a9c
#17 [ffff881fff0c3f50] do_softirq at ffffffff81004e65
#18 [ffff881fff0c3f68] irq_exit at ffffffff81064b7d
#19 [ffff881fff0c3f78] do_IRQ at ffffffff816d4428

The page info is like below, some element is removed:

crash> struct page ffffea006f4cf600 -x
struct page {
  flags = 0x2fffff00004000, 
  mapping = 0x0, 
  {
    {
      counters = 0x2ffffffff, 
      {
        {
          _mapcount = {
            counter = 0xffffffff
          }, 
          {
            inuse = 0xffff, 
            objects = 0x7fff, 
            frozen = 0x1
          }, 
          units = 0xffffffff
        }, 
        _count = {
          counter = 0x2
        }
      }
    }
  }, 
  {
    lru = {
      next = 0xdead000000100100, 
      prev = 0xdead000000200200
    }, 
  }, 
…..
  }
}
crash>


the page ffffea006f4cf600 is in other task task_frag.page and 
the task backtrace is like below

crash> task 8683|grep ffffea006f4cf600 -A3  
    page = 0xffffea006f4cf600, 
    offset = 32768, 
    size = 32768
  }, 
crash>

crash> bt 8683
PID: 8683   TASK: ffff881faa088000  CPU: 10  COMMAND: "mynode"
 #0 [ffff881fff145e78] crash_nmi_callback at ffffffff81031712
 #1 [ffff881fff145e88] nmi_handle at ffffffff816cafe9
 #2 [ffff881fff145ec8] do_nmi at ffffffff816cb0f0
 #3 [ffff881fff145ef0] end_repeat_nmi at ffffffff816ca4a1
    [exception RIP: _raw_spin_lock_irqsave+62]
    RIP: ffffffff816c9a9e  RSP: ffff881fa992b990  RFLAGS: 00000002
    RAX: 0000000000004358  RBX: ffff88207ffd7e80  RCX: 0000000000004358
    RDX: 0000000000004356  RSI: 0000000000000246  RDI: ffff88207ffd7ee8
    RBP: ffff881fa992b990   R8: 0000000000000000   R9: 00000000019a16e6
    R10: 0000000000004d24  R11: 0000000000004000  R12: 0000000000000242
    R13: 0000000000004d24  R14: 0000000000000001  R15: 0000000000000000
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
--- <NMI exception stack> ---
 #4 [ffff881fa992b990] _raw_spin_lock_irqsave at ffffffff816c9a9e
 #5 [ffff881fa992b998] get_page_from_freelist at ffffffff8113ce5f
 #6 [ffff881fa992ba70] __alloc_pages_nodemask at ffffffff8113d15f
 #7 [ffff881fa992bba0] alloc_pages_current at ffffffff8117ab29
 #8 [ffff881fa992bbe8] sk_page_frag_refill at ffffffff815dd310
 #9 [ffff881fa992bc18] tcp_sendmsg at ffffffff8163e4f3
#10 [ffff881fa992bcd8] inet_sendmsg at ffffffff81668434
#11 [ffff881fa992bd08] sock_sendmsg at ffffffff815d9719
#12 [ffff881fa992be58] SYSC_sendto at ffffffff815d9c81
#13 [ffff881fa992bf70] sys_sendto at ffffffff815da6ae
#14 [ffff881fa992bf80] system_call_fastpath at ffffffff816d2189
    RIP: 00007f5bfe1d804b  RSP: 00007f5bfa63b3b0  RFLAGS: 00000206
    RAX: 000000000000002c  RBX: ffffffff816d2189  RCX: 00007f5bfa63b420
    RDX: 0000000000002000  RSI: 000000000c096000  RDI: 0000000000000040
    RBP: 0000000000000000   R8: 0000000000000000   R9: 0000000000000000
    R10: 0000000000000000  R11: 0000000000000246  R12: ffffffff815da6ae
    R13: ffff881fa992bf78  R14: 000000000000a552  R15: 0000000000000016
    ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b
crash>


my kernel is 3.10, I did not find the root cause, I guest all kind of possibility

> I would rather move that in tcp_disconnect() that only fuzzers use, instead of

> doing this on every clone and slowing down normal users.

> 



Do you mean we should fix it like below:

Comments

Eric Dumazet Jan. 26, 2018, 3:03 a.m. UTC | #1
On Fri, 2018-01-26 at 02:09 +0000, Li,Rongqing wrote:
> > >  		if (newsk->sk_prot->sockets_allocated)
> > >  			sk_sockets_allocated_inc(newsk);
> > 
> > Good catch.
> > 
> > I suspect this was discovered by some syzkaller/syzbot run ?
> > 
> 
> 
> No.
> 
> I am seeing a panic, a page is in both task.task_frag.page and buddy free list;
> It should not happen , and the page->lru->next and page->lru->pre is 
> 0xdead000000100100, then when alloc page from buddy, the system panic at
>  __list_del of __rmqueue 
> 
> #0 [ffff881fff0c3850] machine_kexec at ffffffff8103cca8
>  #1 [ffff881fff0c38a0] crash_kexec at ffffffff810c2443
>  #2 [ffff881fff0c3968] oops_end at ffffffff816cae70
>  #3 [ffff881fff0c3990] die at ffffffff810063eb
>  #4 [ffff881fff0c39c0] do_general_protection at ffffffff816ca7ce
>  #5 [ffff881fff0c39f0] general_protection at ffffffff816ca0d8
>     [exception RIP: __rmqueue+120]
>     RIP: ffffffff8113a918  RSP: ffff881fff0c3aa0  RFLAGS: 00010046
>     RAX: ffff88207ffd8018  RBX: 0000000000000003  RCX: 0000000000000003
>     RDX: 0000000000000001  RSI: ffffea006f4cf620  RDI: dead000000200200
>     RBP: ffff881fff0c3b00   R8: ffff88207ffd8018   R9: 0000000000000000
>     R10: dead000000100100  R11: ffffea007ecc6480  R12: ffffea006f4cf600
>     R13: 0000000000000000  R14: 0000000000000003  R15: ffff88207ffd7e80
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
>  #6 [ffff881fff0c3b08] get_page_from_freelist at ffffffff8113ce71
>  #7 [ffff881fff0c3be0] __alloc_pages_nodemask at ffffffff8113d15f
>  #8 [ffff881fff0c3d10] __alloc_page_frag at ffffffff815e2362
>  #9 [ffff881fff0c3d40] __netdev_alloc_frag at ffffffff815e241b
> #10 [ffff881fff0c3d58] __alloc_rx_skb at ffffffff815e2f91
> #11 [ffff881fff0c3d78] __netdev_alloc_skb at ffffffff815e300b
> #12 [ffff881fff0c3d90] ixgbe_clean_rx_irq at ffffffffa003a98f [ixgbe]
> #13 [ffff881fff0c3df8] ixgbe_poll at ffffffffa003c233 [ixgbe]
> #14 [ffff881fff0c3e70] net_rx_action at ffffffff815f2f09
> #15 [ffff881fff0c3ec8] __do_softirq at ffffffff81064867
> #16 [ffff881fff0c3f38] call_softirq at ffffffff816d3a9c
> #17 [ffff881fff0c3f50] do_softirq at ffffffff81004e65
> #18 [ffff881fff0c3f68] irq_exit at ffffffff81064b7d
> #19 [ffff881fff0c3f78] do_IRQ at ffffffff816d4428
> 
> The page info is like below, some element is removed:
> 
> crash> struct page ffffea006f4cf600 -x
> struct page {
>   flags = 0x2fffff00004000, 
>   mapping = 0x0, 
>   {
>     {
>       counters = 0x2ffffffff, 
>       {
>         {
>           _mapcount = {
>             counter = 0xffffffff
>           }, 
>           {
>             inuse = 0xffff, 
>             objects = 0x7fff, 
>             frozen = 0x1
>           }, 
>           units = 0xffffffff
>         }, 
>         _count = {
>           counter = 0x2
>         }
>       }
>     }
>   }, 
>   {
>     lru = {
>       next = 0xdead000000100100, 
>       prev = 0xdead000000200200
>     }, 
>   }, 
> …..
>   }
> }
> crash>
> 
> 
> the page ffffea006f4cf600 is in other task task_frag.page and 
> the task backtrace is like below
> 
> crash> task 8683|grep ffffea006f4cf600 -A3  
>     page = 0xffffea006f4cf600, 
>     offset = 32768, 
>     size = 32768
>   }, 
> crash>
> 
> crash> bt 8683
> PID: 8683   TASK: ffff881faa088000  CPU: 10  COMMAND: "mynode"
>  #0 [ffff881fff145e78] crash_nmi_callback at ffffffff81031712
>  #1 [ffff881fff145e88] nmi_handle at ffffffff816cafe9
>  #2 [ffff881fff145ec8] do_nmi at ffffffff816cb0f0
>  #3 [ffff881fff145ef0] end_repeat_nmi at ffffffff816ca4a1
>     [exception RIP: _raw_spin_lock_irqsave+62]
>     RIP: ffffffff816c9a9e  RSP: ffff881fa992b990  RFLAGS: 00000002
>     RAX: 0000000000004358  RBX: ffff88207ffd7e80  RCX: 0000000000004358
>     RDX: 0000000000004356  RSI: 0000000000000246  RDI: ffff88207ffd7ee8
>     RBP: ffff881fa992b990   R8: 0000000000000000   R9: 00000000019a16e6
>     R10: 0000000000004d24  R11: 0000000000004000  R12: 0000000000000242
>     R13: 0000000000004d24  R14: 0000000000000001  R15: 0000000000000000
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> --- <NMI exception stack> ---
>  #4 [ffff881fa992b990] _raw_spin_lock_irqsave at ffffffff816c9a9e
>  #5 [ffff881fa992b998] get_page_from_freelist at ffffffff8113ce5f
>  #6 [ffff881fa992ba70] __alloc_pages_nodemask at ffffffff8113d15f
>  #7 [ffff881fa992bba0] alloc_pages_current at ffffffff8117ab29
>  #8 [ffff881fa992bbe8] sk_page_frag_refill at ffffffff815dd310
>  #9 [ffff881fa992bc18] tcp_sendmsg at ffffffff8163e4f3
> #10 [ffff881fa992bcd8] inet_sendmsg at ffffffff81668434
> #11 [ffff881fa992bd08] sock_sendmsg at ffffffff815d9719
> #12 [ffff881fa992be58] SYSC_sendto at ffffffff815d9c81
> #13 [ffff881fa992bf70] sys_sendto at ffffffff815da6ae
> #14 [ffff881fa992bf80] system_call_fastpath at ffffffff816d2189
>     RIP: 00007f5bfe1d804b  RSP: 00007f5bfa63b3b0  RFLAGS: 00000206
>     RAX: 000000000000002c  RBX: ffffffff816d2189  RCX: 00007f5bfa63b420
>     RDX: 0000000000002000  RSI: 000000000c096000  RDI: 0000000000000040
>     RBP: 0000000000000000   R8: 0000000000000000   R9: 0000000000000000
>     R10: 0000000000000000  R11: 0000000000000246  R12: ffffffff815da6ae
>     R13: ffff881fa992bf78  R14: 000000000000a552  R15: 0000000000000016
>     ORIG_RAX: 000000000000002c  CS: 0033  SS: 002b
> crash>
> 
> 
> my kernel is 3.10, I did not find the root cause, I guest all kind of possibility
> 

Have you backported 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 ?


> > I would rather move that in tcp_disconnect() that only fuzzers use, instead of
> > doing this on every clone and slowing down normal users.
> > 
> 
> 
> Do you mean we should fix it like below:
> 
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index f08eebe60446..44f8320610ab 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -2431,6 +2431,12 @@ int tcp_disconnect(struct sock *sk, int flags)
>  
>         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
>  
> +
> +       if (sk->sk_frag.page) {
> +               put_page(sk->sk_frag.page);
> +               sk->sk_frag.page = NULL;
> +       }
> +
>         sk->sk_error_report(sk);
>         return err;
>  }

Yes, something like that.
Eric Dumazet Jan. 26, 2018, 3:14 a.m. UTC | #2
On Fri, 2018-01-26 at 02:09 +0000, Li,Rongqing wrote:

> 
> crash> bt 8683
> PID: 8683   TASK: ffff881faa088000  CPU: 10  COMMAND: "mynode"
>  #0 [ffff881fff145e78] crash_nmi_callback at ffffffff81031712
>  #1 [ffff881fff145e88] nmi_handle at ffffffff816cafe9
>  #2 [ffff881fff145ec8] do_nmi at ffffffff816cb0f0
>  #3 [ffff881fff145ef0] end_repeat_nmi at ffffffff816ca4a1
>     [exception RIP: _raw_spin_lock_irqsave+62]
>     RIP: ffffffff816c9a9e  RSP: ffff881fa992b990  RFLAGS: 00000002
>     RAX: 0000000000004358  RBX: ffff88207ffd7e80  RCX: 0000000000004358
>     RDX: 0000000000004356  RSI: 0000000000000246  RDI: ffff88207ffd7ee8
>     RBP: ffff881fa992b990   R8: 0000000000000000   R9: 00000000019a16e6
>     R10: 0000000000004d24  R11: 0000000000004000  R12: 0000000000000242
>     R13: 0000000000004d24  R14: 0000000000000001  R15: 0000000000000000
>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
> --- <NMI exception stack> ---
>  #4 [ffff881fa992b990] _raw_spin_lock_irqsave at ffffffff816c9a9e
>  #5 [ffff881fa992b998] get_page_from_freelist at ffffffff8113ce5f
>  #6 [ffff881fa992ba70] __alloc_pages_nodemask at ffffffff8113d15f
>  #7 [ffff881fa992bba0] alloc_pages_current at ffffffff8117ab29
>  #8 [ffff881fa992bbe8] sk_page_frag_refill at ffffffff815dd310
>  #9 [ffff881fa992bc18] tcp_sendmsg at ffffffff8163e4f3
> #10 [ffff881fa992bcd8] inet_sendmsg at ffffffff81668434
> #11 [ffff881fa992bd08] sock_sendmsg at ffffffff815d9719
> #12 [ffff881fa992be58] SYSC_sendto at ffffffff815d9c81
> #13 [ffff881fa992bf70] sys_sendto at ffffffff815da6ae
> #14 [ffff881fa992bf80] system_call_fastpath at ffffffff816d2189
> 

Note that tcp_sendmsg() does not use sk->sk_frag, but the per task
page.

Unless something changes sk->sk_allocation, which a user application
can not do.

Are you using a pristine upstream kernel ?
Li RongQing Jan. 26, 2018, 3:21 a.m. UTC | #3
> > my kernel is 3.10, I did not find the root cause, I guest all kind of

> > possibility

> >

> 

> Have you backported 22a0e18eac7a9e986fec76c60fa4a2926d1291e2 ?

> 

> 

When I see this bug, I find this commit, and backport it, 
But this seems to not related to my bug.


> > > I would rather move that in tcp_disconnect() that only fuzzers use,

> > > instead of doing this on every clone and slowing down normal users.

> > >

> >

> >

> > Do you mean we should fix it like below:

> >

> > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index

> > f08eebe60446..44f8320610ab 100644

> > --- a/net/ipv4/tcp.c

> > +++ b/net/ipv4/tcp.c

> > @@ -2431,6 +2431,12 @@ int tcp_disconnect(struct sock *sk, int flags)

> >

> >         WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);

> >

> > +

> > +       if (sk->sk_frag.page) {

> > +               put_page(sk->sk_frag.page);

> > +               sk->sk_frag.page = NULL;

> > +       }

> > +

> >         sk->sk_error_report(sk);

> >         return err;

> >  }

> 

> Yes, something like that.


Ok, thanks

-R
Li RongQing Jan. 26, 2018, 3:23 a.m. UTC | #4
> -----邮件原件-----

> 发件人: Eric Dumazet [mailto:eric.dumazet@gmail.com]

> 发送时间: 2018年1月26日 11:14

> 收件人: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org

> 抄送: edumazet@google.com

> 主题: Re: 答复: [PATCH] net: clean the sk_frag.page of new cloned socket

> 

> On Fri, 2018-01-26 at 02:09 +0000, Li,Rongqing wrote:

> 

> >

> > crash> bt 8683

> > PID: 8683   TASK: ffff881faa088000  CPU: 10  COMMAND: "mynode"

> >  #0 [ffff881fff145e78] crash_nmi_callback at ffffffff81031712

> >  #1 [ffff881fff145e88] nmi_handle at ffffffff816cafe9

> >  #2 [ffff881fff145ec8] do_nmi at ffffffff816cb0f0

> >  #3 [ffff881fff145ef0] end_repeat_nmi at ffffffff816ca4a1

> >     [exception RIP: _raw_spin_lock_irqsave+62]

> >     RIP: ffffffff816c9a9e  RSP: ffff881fa992b990  RFLAGS: 00000002

> >     RAX: 0000000000004358  RBX: ffff88207ffd7e80  RCX:

> 0000000000004358

> >     RDX: 0000000000004356  RSI: 0000000000000246  RDI:

> ffff88207ffd7ee8

> >     RBP: ffff881fa992b990   R8: 0000000000000000   R9:

> 00000000019a16e6

> >     R10: 0000000000004d24  R11: 0000000000004000  R12:

> 0000000000000242

> >     R13: 0000000000004d24  R14: 0000000000000001  R15:

> 0000000000000000

> >     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018

> > --- <NMI exception stack> ---

> >  #4 [ffff881fa992b990] _raw_spin_lock_irqsave at ffffffff816c9a9e

> >  #5 [ffff881fa992b998] get_page_from_freelist at ffffffff8113ce5f

> >  #6 [ffff881fa992ba70] __alloc_pages_nodemask at ffffffff8113d15f

> >  #7 [ffff881fa992bba0] alloc_pages_current at ffffffff8117ab29

> >  #8 [ffff881fa992bbe8] sk_page_frag_refill at ffffffff815dd310

> >  #9 [ffff881fa992bc18] tcp_sendmsg at ffffffff8163e4f3

> > #10 [ffff881fa992bcd8] inet_sendmsg at ffffffff81668434

> > #11 [ffff881fa992bd08] sock_sendmsg at ffffffff815d9719

> > #12 [ffff881fa992be58] SYSC_sendto at ffffffff815d9c81

> > #13 [ffff881fa992bf70] sys_sendto at ffffffff815da6ae

> > #14 [ffff881fa992bf80] system_call_fastpath at ffffffff816d2189

> >

> 

> Note that tcp_sendmsg() does not use sk->sk_frag, but the per task page.

> 

> Unless something changes sk->sk_allocation, which a user application can

> not do.

> 

> Are you using a pristine upstream kernel ?


No

I do not know how to reproduce my bug, I find it twice online.

-RongQing
Cong Wang Jan. 26, 2018, 5:16 a.m. UTC | #5
On Thu, Jan 25, 2018 at 7:14 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Fri, 2018-01-26 at 02:09 +0000, Li,Rongqing wrote:
>
>>
>> crash> bt 8683
>> PID: 8683   TASK: ffff881faa088000  CPU: 10  COMMAND: "mynode"
>>  #0 [ffff881fff145e78] crash_nmi_callback at ffffffff81031712
>>  #1 [ffff881fff145e88] nmi_handle at ffffffff816cafe9
>>  #2 [ffff881fff145ec8] do_nmi at ffffffff816cb0f0
>>  #3 [ffff881fff145ef0] end_repeat_nmi at ffffffff816ca4a1
>>     [exception RIP: _raw_spin_lock_irqsave+62]
>>     RIP: ffffffff816c9a9e  RSP: ffff881fa992b990  RFLAGS: 00000002
>>     RAX: 0000000000004358  RBX: ffff88207ffd7e80  RCX: 0000000000004358
>>     RDX: 0000000000004356  RSI: 0000000000000246  RDI: ffff88207ffd7ee8
>>     RBP: ffff881fa992b990   R8: 0000000000000000   R9: 00000000019a16e6
>>     R10: 0000000000004d24  R11: 0000000000004000  R12: 0000000000000242
>>     R13: 0000000000004d24  R14: 0000000000000001  R15: 0000000000000000
>>     ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
>> --- <NMI exception stack> ---
>>  #4 [ffff881fa992b990] _raw_spin_lock_irqsave at ffffffff816c9a9e
>>  #5 [ffff881fa992b998] get_page_from_freelist at ffffffff8113ce5f
>>  #6 [ffff881fa992ba70] __alloc_pages_nodemask at ffffffff8113d15f
>>  #7 [ffff881fa992bba0] alloc_pages_current at ffffffff8117ab29
>>  #8 [ffff881fa992bbe8] sk_page_frag_refill at ffffffff815dd310
>>  #9 [ffff881fa992bc18] tcp_sendmsg at ffffffff8163e4f3
>> #10 [ffff881fa992bcd8] inet_sendmsg at ffffffff81668434
>> #11 [ffff881fa992bd08] sock_sendmsg at ffffffff815d9719
>> #12 [ffff881fa992be58] SYSC_sendto at ffffffff815d9c81
>> #13 [ffff881fa992bf70] sys_sendto at ffffffff815da6ae
>> #14 [ffff881fa992bf80] system_call_fastpath at ffffffff816d2189
>>
>
> Note that tcp_sendmsg() does not use sk->sk_frag, but the per task
> page.
>
> Unless something changes sk->sk_allocation, which a user application
> can not do.
>

Some kernel TCP socket uses atomic allocation, e.g.,
o2net_open_listening_sock().
diff mbox series

Patch

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f08eebe60446..44f8320610ab 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2431,6 +2431,12 @@  int tcp_disconnect(struct sock *sk, int flags)
 
        WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
 
+
+       if (sk->sk_frag.page) {
+               put_page(sk->sk_frag.page);
+               sk->sk_frag.page = NULL;
+       }
+
        sk->sk_error_report(sk);
        return err;
 }