diff mbox series

[BUG,net-next] BUG triggered with GRO SKB list_head changes

Message ID 20180711223907.GA24943@sec
State RFC, archived
Delegated to: David Miller
Headers show
Series [BUG,net-next] BUG triggered with GRO SKB list_head changes | expand

Commit Message

Tyler Hicks July 11, 2018, 10:39 p.m. UTC
Starting with the following net-next commit, I see a BUG when starting a
LXD container inside of a KVM guest using virtio-net:

  d4546c2509b1 net: Convert GRO SKB handling to list_head.

Here's what the kernel spits out:

 kernel BUG at /var/scm/kernel/linux/include/linux/skbuff.h:2080!
 invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
 CPU: 0 PID: 1362 Comm: libvirtd Not tainted 4.18.0-rc2+ #69
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
 RIP: 0010:skb_pull+0x36/0x40
 Code: c6 77 24 29 f0 3b 87 84 00 00 00 89 87 80 00 00 00 72 17 89 f6 48 89 f0 48 03 87 d8 00 00 00 48 89 87 d8 00 00 00 c3 31 c0 c3 <0f> 0b 0f 1f 84 00 00 00 
00 00 0f 1f 44 00 00 39 b7 80 00 00 00 76 
 RSP: 0000:ffff96737f6039f0 EFLAGS: 00010297
 RAX: 000000009c66e2f2 RBX: 0000000000000000 RCX: 0000000000000501
 RDX: 0000000000000001 RSI: 000000000000000e RDI: ffff96737f7e3938
 RBP: ffff967379f40020 R08: 0000000000000000 R09: 0000000000000000
 R10: ffff96737f603988 R11: ffffffffc0461335 R12: ffff967379f409e0
 R13: ffff96737f7e3938 R14: 0000000000000000 R15: ffff967379e96ac0
 FS:  00007fc96087e640(0000) GS:ffff96737f600000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007fc913608aa0 CR3: 000000005dacc001 CR4: 00000000001606f0
 Call Trace:
  <IRQ>
  br_dev_xmit+0xe1/0x3d0 [bridge]
  dev_hard_start_xmit+0xbc/0x3b0
  __dev_queue_xmit+0xb98/0xc30
  ip_finish_output2+0x3e5/0x670
  ? ip_output+0x7f/0x250
  ip_output+0x7f/0x250
  ? ip_fragment.constprop.5+0x80/0x80
  ip_forward+0x3e2/0x650
  ? ipv4_frags_init_net+0x130/0x130
  ip_rcv+0x2be/0x500
  ? ip_local_deliver_finish+0x3b0/0x3b0
  __netif_receive_skb_core+0x6a8/0xb30
  ? lock_acquire+0xab/0x200
  ? netif_receive_skb_internal+0x2a/0x380
  netif_receive_skb_internal+0x73/0x380
  ? napi_gro_complete+0xcf/0x1b0
  dev_gro_receive+0x374/0x730
  napi_gro_receive+0x4f/0x1d0
  receive_buf+0x4b6/0x1930 [virtio_net]
  ? detach_buf+0x69/0x120 [virtio_ring]
  virtnet_poll+0x122/0x2e0 [virtio_net]
  net_rx_action+0x207/0x450
  __do_softirq+0x149/0x4ea
  irq_exit+0xbf/0xd0
  do_IRQ+0x6c/0x130
  common_interrupt+0xf/0xf
  </IRQ>
 RIP: 0010:__radix_tree_lookup+0x28/0xe0
 Code: 00 00 53 49 89 ca 41 bb 40 00 00 00 4c 8b 47 50 4c 89 c0 83 e0 03 48 83 f8 01 0f 85 a8 00 00 00 4c 89 c0 48 83 e0 fe 0f b6 08 <4c> 89 d8 48 d3 e0 48 83 
e8 01 48 39 c6 76 11 e9 9f 00 00 00 4c 89 
 RSP: 0000:ffffae150048fcc0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffffd9
 RAX: ffff96735d2ef908 RBX: 000000000000001f RCX: 0000000000000006
 RDX: 0000000000000000 RSI: 00000000000002e2 RDI: ffff96735d10b788
 RBP: 00000000000002e2 R08: ffff96735d2ef909 R09: 0000000000000000
 R10: 0000000000000000 R11: 0000000000000040 R12: 000000000000001f
 R13: ffffec01c15f3a80 R14: 000000000000001f R15: ffffae150048fd18
  __do_page_cache_readahead+0x11f/0x2e0
  filemap_fault+0x408/0x660
  ext4_filemap_fault+0x2f/0x40
  __do_fault+0x1f/0xd0
  __handle_mm_fault+0x915/0xfa0
  handle_mm_fault+0x1c2/0x390
  __do_page_fault+0x2f6/0x580
  ? async_page_fault+0x5/0x20
  async_page_fault+0x1b/0x20
 RIP: 0033:0x7fc913608aa0
 Code: Bad RIP value.
 RSP: 002b:00007ffcfa9c7f08 EFLAGS: 00010206
 RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000080
 RDX: 0000000000000006 RSI: 00007fc913a74bf8 RDI: 00007fc913df9720
 RBP: 0000000000000001 R08: 000055df45795700 R09: 0000000000000000
 R10: 000055df4574c010 R11: 0000000000000001 R12: 00007ffcfa9c8c38
 R13: 00007ffcfa9c8c48 R14: 00007fc913dc3d70 R15: 000055df4578ab30
 Modules linked in: veth ebtable_filter ebtables ipt_MASQUERADE xt_CHECKSUM xt_comment xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_filter bpfilter bridge stp llc fuse kvm_intel kvm irqbypass 9pnet_virtio 9pnet virtio_balloon ib_iser rdma_cm configfs iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables virtio_net net_failover virtio_blk failover crc32_pclmul crc32c_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper virtio_pci psmouse virtio_ring virtio

I'm not very familiar with the GRO or IP fragmentation code but I was
able to identify that this change "fixes" the issue:


That's not the correct fix, as we wouldn't want to waste space with two
list implementations always being around, but I think it shows that
perhaps there is something in the call stack attempting to use both the
list_head list and the ip_defrag_offset at the same time and
unintentionally trouncing over the other member in the union.

I wish I had a proper fix but I suspect that someone more familiar with
this code will spot the issue quickly. I didn't see anything incorrect
in the list manipulations in the offending commit so some deeper
knowledge of the network stack is needed.

Tyler

Comments

Prashant Bhole July 12, 2018, 7:29 a.m. UTC | #1
On 7/12/2018 7:39 AM, Tyler Hicks wrote:
> Starting with the following net-next commit, I see a BUG when starting a
> LXD container inside of a KVM guest using virtio-net:
> 
>    d4546c2509b1 net: Convert GRO SKB handling to list_head.

Recently I encountered KASAN:use-after-free BUG and git bisect pointed 
to above commit. Looks like this is the same issue without KASAN 
enabled. I have submitted a bugfix for this BUG with Tyler Hicks in 
Reported-by tag.

-Prashant


> 
> Here's what the kernel spits out:
> 
>   kernel BUG at /var/scm/kernel/linux/include/linux/skbuff.h:2080!
>   invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC PTI
>   CPU: 0 PID: 1362 Comm: libvirtd Not tainted 4.18.0-rc2+ #69
>   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
>   RIP: 0010:skb_pull+0x36/0x40
>   Code: c6 77 24 29 f0 3b 87 84 00 00 00 89 87 80 00 00 00 72 17 89 f6 48 89 f0 48 03 87 d8 00 00 00 48 89 87 d8 00 00 00 c3 31 c0 c3 <0f> 0b 0f 1f 84 00 00 00
> 00 00 0f 1f 44 00 00 39 b7 80 00 00 00 76
>   RSP: 0000:ffff96737f6039f0 EFLAGS: 00010297
>   RAX: 000000009c66e2f2 RBX: 0000000000000000 RCX: 0000000000000501
>   RDX: 0000000000000001 RSI: 000000000000000e RDI: ffff96737f7e3938
>   RBP: ffff967379f40020 R08: 0000000000000000 R09: 0000000000000000
>   R10: ffff96737f603988 R11: ffffffffc0461335 R12: ffff967379f409e0
>   R13: ffff96737f7e3938 R14: 0000000000000000 R15: ffff967379e96ac0
>   FS:  00007fc96087e640(0000) GS:ffff96737f600000(0000) knlGS:0000000000000000
>   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>   CR2: 00007fc913608aa0 CR3: 000000005dacc001 CR4: 00000000001606f0
>   Call Trace:
>    <IRQ>
>    br_dev_xmit+0xe1/0x3d0 [bridge]
>    dev_hard_start_xmit+0xbc/0x3b0
>    __dev_queue_xmit+0xb98/0xc30
>    ip_finish_output2+0x3e5/0x670
>    ? ip_output+0x7f/0x250
>    ip_output+0x7f/0x250
>    ? ip_fragment.constprop.5+0x80/0x80
>    ip_forward+0x3e2/0x650
>    ? ipv4_frags_init_net+0x130/0x130
>    ip_rcv+0x2be/0x500
>    ? ip_local_deliver_finish+0x3b0/0x3b0
>    __netif_receive_skb_core+0x6a8/0xb30
>    ? lock_acquire+0xab/0x200
>    ? netif_receive_skb_internal+0x2a/0x380
>    netif_receive_skb_internal+0x73/0x380
>    ? napi_gro_complete+0xcf/0x1b0
>    dev_gro_receive+0x374/0x730
>    napi_gro_receive+0x4f/0x1d0
>    receive_buf+0x4b6/0x1930 [virtio_net]
>    ? detach_buf+0x69/0x120 [virtio_ring]
>    virtnet_poll+0x122/0x2e0 [virtio_net]
>    net_rx_action+0x207/0x450
>    __do_softirq+0x149/0x4ea
>    irq_exit+0xbf/0xd0
>    do_IRQ+0x6c/0x130
>    common_interrupt+0xf/0xf
>    </IRQ>
>   RIP: 0010:__radix_tree_lookup+0x28/0xe0
>   Code: 00 00 53 49 89 ca 41 bb 40 00 00 00 4c 8b 47 50 4c 89 c0 83 e0 03 48 83 f8 01 0f 85 a8 00 00 00 4c 89 c0 48 83 e0 fe 0f b6 08 <4c> 89 d8 48 d3 e0 48 83
> e8 01 48 39 c6 76 11 e9 9f 00 00 00 4c 89
>   RSP: 0000:ffffae150048fcc0 EFLAGS: 00000282 ORIG_RAX: ffffffffffffffd9
>   RAX: ffff96735d2ef908 RBX: 000000000000001f RCX: 0000000000000006
>   RDX: 0000000000000000 RSI: 00000000000002e2 RDI: ffff96735d10b788
>   RBP: 00000000000002e2 R08: ffff96735d2ef909 R09: 0000000000000000
>   R10: 0000000000000000 R11: 0000000000000040 R12: 000000000000001f
>   R13: ffffec01c15f3a80 R14: 000000000000001f R15: ffffae150048fd18
>    __do_page_cache_readahead+0x11f/0x2e0
>    filemap_fault+0x408/0x660
>    ext4_filemap_fault+0x2f/0x40
>    __do_fault+0x1f/0xd0
>    __handle_mm_fault+0x915/0xfa0
>    handle_mm_fault+0x1c2/0x390
>    __do_page_fault+0x2f6/0x580
>    ? async_page_fault+0x5/0x20
>    async_page_fault+0x1b/0x20
>   RIP: 0033:0x7fc913608aa0
>   Code: Bad RIP value.
>   RSP: 002b:00007ffcfa9c7f08 EFLAGS: 00010206
>   RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000080
>   RDX: 0000000000000006 RSI: 00007fc913a74bf8 RDI: 00007fc913df9720
>   RBP: 0000000000000001 R08: 000055df45795700 R09: 0000000000000000
>   R10: 000055df4574c010 R11: 0000000000000001 R12: 00007ffcfa9c8c38
>   R13: 00007ffcfa9c8c48 R14: 00007fc913dc3d70 R15: 000055df4578ab30
>   Modules linked in: veth ebtable_filter ebtables ipt_MASQUERADE xt_CHECKSUM xt_comment xt_tcpudp iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_mangle iptable_filter bpfilter bridge stp llc fuse kvm_intel kvm irqbypass 9pnet_virtio 9pnet virtio_balloon ib_iser rdma_cm configfs iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables virtio_net net_failover virtio_blk failover crc32_pclmul crc32c_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper virtio_pci psmouse virtio_ring virtio
> 
> I'm not very familiar with the GRO or IP fragmentation code but I was
> able to identify that this change "fixes" the issue:
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 7ccc601b55d9..a5cea572a7f1 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -666,6 +666,7 @@ struct sk_buff {
>   			/* These two members must be first. */
>   			struct sk_buff		*next;
>   			struct sk_buff		*prev;
> +			struct list_head	list;
>   
>   			union {
>   				struct net_device	*dev;
> @@ -678,7 +679,6 @@ struct sk_buff {
>   			};
>   		};
>   		struct rb_node		rbnode; /* used in netem & tcp stack */
> -		struct list_head	list;
>   	};
>   	struct sock		*sk;
>   
> 
> That's not the correct fix, as we wouldn't want to waste space with two
> list implementations always being around, but I think it shows that
> perhaps there is something in the call stack attempting to use both the
> list_head list and the ip_defrag_offset at the same time and
> unintentionally trouncing over the other member in the union.
> 
> I wish I had a proper fix but I suspect that someone more familiar with
> this code will spot the issue quickly. I didn't see anything incorrect
> in the list manipulations in the offending commit so some deeper
> knowledge of the network stack is needed.
> 
> Tyler
>
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7ccc601b55d9..a5cea572a7f1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -666,6 +666,7 @@  struct sk_buff {
 			/* These two members must be first. */
 			struct sk_buff		*next;
 			struct sk_buff		*prev;
+			struct list_head	list;
 
 			union {
 				struct net_device	*dev;
@@ -678,7 +679,6 @@  struct sk_buff {
 			};
 		};
 		struct rb_node		rbnode; /* used in netem & tcp stack */
-		struct list_head	list;
 	};
 	struct sock		*sk;