diff mbox

net: heap out-of-bounds in fib6_clean_node/rt6_fill_node/fib6_age/fib6_prune_clone

Message ID 14c01aea-6c2f-6ba5-6aee-52c55f410da7@cumulusnetworks.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

David Ahern March 6, 2017, 11:41 p.m. UTC
On 3/6/17 11:51 AM, Dmitry Vyukov wrote:
> We hit it several thousand times, but we get only several dozens of
> crashes per day on ~80 VMs. So if you try to reproduce it on a single
> machine it can take days for a single crash.
> If you are ready to go that route, here are some instructions on
> setting up syzkaller:
> https://github.com/google/syzkaller
> You also need kernel built with CONFIG_KASAN.

ack and I have it setup on ubuntu 16.10 which has a fairly new compiler.

> I am ready to help with resolving any issues.
> 
> Another possible route is if you give me a patch with some additional
> WARNINGs. Then I can deploy it to bots and collect stacks.

try the attached.

Comments

Dmitry Vyukov March 7, 2017, 8:43 a.m. UTC | #1
On Tue, Mar 7, 2017 at 12:41 AM, David Ahern <dsa@cumulusnetworks.com> wrote:
> On 3/6/17 11:51 AM, Dmitry Vyukov wrote:
>> We hit it several thousand times, but we get only several dozens of
>> crashes per day on ~80 VMs. So if you try to reproduce it on a single
>> machine it can take days for a single crash.
>> If you are ready to go that route, here are some instructions on
>> setting up syzkaller:
>> https://github.com/google/syzkaller
>> You also need kernel built with CONFIG_KASAN.
>
> ack and I have it setup on ubuntu 16.10 which has a fairly new compiler.
>
>> I am ready to help with resolving any issues.
>>
>> Another possible route is if you give me a patch with some additional
>> WARNINGs. Then I can deploy it to bots and collect stacks.
>
> try the attached.


This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel
output from your patch (pr_err).

------------[ cut here ]------------
WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158
rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
Kernel panic - not syncing: panic_on_warn set ...

CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
 panic+0x20f/0x426 kernel/panic.c:180
 __warn+0x1c4/0x1e0 kernel/panic.c:541
 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
 rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
 fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
 fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
 __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
 ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
 inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
 rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
 netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
 rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
 netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
 netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
 netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 sock_write_iter+0x326/0x600 net/socket.c:846
 call_write_iter include/linux/fs.h:1733 [inline]
 do_iter_readv_writev fs/read_write.c:696 [inline]
 __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
 do_readv_writev+0x13f/0x200 fs/read_write.c:894
 vfs_writev+0x87/0xc0 fs/read_write.c:921
 do_writev+0x110/0x2c0 fs/read_write.c:954
 SYSC_writev fs/read_write.c:1027 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1024
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458d9
RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9
RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005
RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003



------------[ cut here ]------------
WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158
rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
Kernel panic - not syncing: panic_on_warn set ...

CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
 panic+0x20f/0x426 kernel/panic.c:180
 __warn+0x1c4/0x1e0 kernel/panic.c:541
 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
 rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
 fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
 fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so
cannot be deassigned
 __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
 ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
 inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
 rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
 netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
 rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
 netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
 netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
 netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 sock_write_iter+0x326/0x600 net/socket.c:846
 call_write_iter include/linux/fs.h:1733 [inline]
 do_iter_readv_writev fs/read_write.c:696 [inline]
 __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
 do_readv_writev+0x13f/0x200 fs/read_write.c:894
 vfs_writev+0x87/0xc0 fs/read_write.c:921
 do_writev+0x110/0x2c0 fs/read_write.c:954
 SYSC_writev fs/read_write.c:1027 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1024
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x4458d9
RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9
RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019
RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000
Dmitry Vyukov March 7, 2017, 9:21 a.m. UTC | #2
On Tue, Mar 7, 2017 at 9:43 AM, Dmitry Vyukov <dvyukov@google.com> wrote:
> On Tue, Mar 7, 2017 at 12:41 AM, David Ahern <dsa@cumulusnetworks.com> wrote:
>> On 3/6/17 11:51 AM, Dmitry Vyukov wrote:
>>> We hit it several thousand times, but we get only several dozens of
>>> crashes per day on ~80 VMs. So if you try to reproduce it on a single
>>> machine it can take days for a single crash.
>>> If you are ready to go that route, here are some instructions on
>>> setting up syzkaller:
>>> https://github.com/google/syzkaller
>>> You also need kernel built with CONFIG_KASAN.
>>
>> ack and I have it setup on ubuntu 16.10 which has a fairly new compiler.
>>
>>> I am ready to help with resolving any issues.
>>>
>>> Another possible route is if you give me a patch with some additional
>>> WARNINGs. Then I can deploy it to bots and collect stacks.
>>
>> try the attached.
>
>
> This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel
> output from your patch (pr_err).
>
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158
> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
> Kernel panic - not syncing: panic_on_warn set ...
>
> CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>  sock_sendmsg_nosec net/socket.c:633 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:643
>  sock_write_iter+0x326/0x600 net/socket.c:846
>  call_write_iter include/linux/fs.h:1733 [inline]
>  do_iter_readv_writev fs/read_write.c:696 [inline]
>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>  do_writev+0x110/0x2c0 fs/read_write.c:954
>  SYSC_writev fs/read_write.c:1027 [inline]
>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>  entry_SYSCALL_64_fastpath+0x1f/0xc2
> RIP: 0033:0x4458d9
> RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9
> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005
> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
> R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003
>
>
>
> ------------[ cut here ]------------
> WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158
> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
> Kernel panic - not syncing: panic_on_warn set ...
>
> CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
> kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so
> cannot be deassigned
>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>  sock_sendmsg_nosec net/socket.c:633 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:643
>  sock_write_iter+0x326/0x600 net/socket.c:846
>  call_write_iter include/linux/fs.h:1733 [inline]
>  do_iter_readv_writev fs/read_write.c:696 [inline]
>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>  do_writev+0x110/0x2c0 fs/read_write.c:954
>  SYSC_writev fs/read_write.c:1027 [inline]
>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>  entry_SYSCALL_64_fastpath+0x1f/0xc2
> RIP: 0033:0x4458d9
> RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9
> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019
> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
> R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000



I've commented that warning just to see I can obtain more information.
Then I also got this:

------------[ cut here ]------------
WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991
fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
Kernel panic - not syncing: panic_on_warn set ...

CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Workqueue: ipv6_addrconf addrconf_dad_work
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52
 dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52
 panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180
 __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541
 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584
 fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
 __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948
 ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959
 __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485
 ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518
 addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983
net/ipv6/addrconf.c:3983
 addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline]
 addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897
 addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897
 process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096
 worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230
 kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229
 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
arch/x86/entry/entry_64.S:430



And this without any preceding warnings:

==================================================================
BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54
Read of size 4 by task swapper/2/0
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.11.0-rc1+ #311
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
 kasan_object_err+0x1c/0x90 mm/kasan/report.c:166
 print_address_description mm/kasan/report.c:208 [inline]
 kasan_report_error mm/kasan/report.c:292 [inline]
 kasan_report.part.2+0x1b0/0x460 mm/kasan/report.c:314
 kasan_report mm/kasan/report.c:334 [inline]
 __asan_report_load4_noabort+0x29/0x30 mm/kasan/report.c:334
 fib6_age+0x3fd/0x480 net/ipv6/ip6_fib.c:1787
 fib6_clean_node+0x356/0x550 net/ipv6/ip6_fib.c:1665
 fib6_walk_continue+0x4b3/0x620 net/ipv6/ip6_fib.c:1594
 fib6_walk+0x91/0xf0 net/ipv6/ip6_fib.c:1639
 fib6_clean_tree+0x266/0x3a0 net/ipv6/ip6_fib.c:1711
 __fib6_clean_all+0x1e1/0x360 net/ipv6/ip6_fib.c:1727
 fib6_clean_all net/ipv6/ip6_fib.c:1738 [inline]
 fib6_run_gc+0x185/0x3d0 net/ipv6/ip6_fib.c:1835
 fib6_gc_timer_cb+0x1c/0x20 net/ipv6/ip6_fib.c:1850
 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268
 expire_timers kernel/time/timer.c:1307 [inline]
 __run_timers+0x960/0xcf0 kernel/time/timer.c:1601
 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614
 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
 invoke_softirq kernel/softirq.c:364 [inline]
 irq_exit+0x1cc/0x200 kernel/softirq.c:405
 exiting_irq arch/x86/include/asm/apic.h:657 [inline]
 smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962
 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:487
RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53
RSP: 0018:ffff880089437c10 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10
RAX: dffffc0000000000 RBX: 1ffff10011286f85 RCX: 0000000000000000
RDX: 1ffffffff0a18ebc RSI: 0000000000000001 RDI: ffffffff850c75e0
RBP: ffff880089437c10 R08: ffffed00113835c2 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10011286fa9
R13: ffff880089437cc8 R14: ffffffff856973f8 R15: ffff880089437e68
 </IRQ>
 arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline]
 default_idle+0xbf/0x440 arch/x86/kernel/process.c:275
 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:266
 default_idle_call+0x36/0x90 kernel/sched/idle.c:97
 cpuidle_idle_call kernel/sched/idle.c:155 [inline]
 do_idle+0x373/0x520 kernel/sched/idle.c:244
 cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:346
 start_secondary+0x36c/0x460 arch/x86/kernel/smpboot.c:275
 start_cpu+0x14/0x14 arch/x86/kernel/head_64.S:306
Object at ffff88004d4fbd40, in cache ip_dst_cache size: 216
Allocated:
PID = 8122
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xaa/0xd0 mm/kasan/kasan.c:616
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:555
 kmem_cache_alloc+0x102/0x6e0 mm/slab.c:3572
 dst_alloc+0x11b/0x1a0 net/core/dst.c:209
 rt_dst_alloc+0xf0/0x580 net/ipv4/route.c:1482
 __mkroute_output net/ipv4/route.c:2165 [inline]
 __ip_route_output_key_hash+0xce3/0x2ca0 net/ipv4/route.c:2375
 __ip_route_output_key include/net/route.h:122 [inline]
 ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2461
 ip_route_output_key include/net/route.h:132 [inline]
 sctp_v4_get_dst+0x5d2/0x1570 net/sctp/protocol.c:458
 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:292
 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:653
 sctp_sendmsg+0x180d/0x3980 net/sctp/socket.c:1871
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1685
 SyS_sendto+0x40/0x50 net/socket.c:1653
 entry_SYSCALL_64_fastpath+0x1f/0xc2
Freed:
PID = 2038
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_slab_free+0x6f/0xb0 mm/kasan/kasan.c:589
 __cache_free mm/slab.c:3514 [inline]
 kmem_cache_free+0x71/0x240 mm/slab.c:3774
 dst_destroy+0x211/0x340 net/core/dst.c:272
 dst_free include/net/dst.h:429 [inline]
 dst_rcu_free+0x152/0x190 include/net/dst.h:439
 __rcu_reclaim kernel/rcu/rcu.h:118 [inline]
 rcu_do_batch.isra.66+0xa31/0xe50 kernel/rcu/tree.c:2880
 invoke_rcu_callbacks kernel/rcu/tree.c:3143 [inline]
 __rcu_process_callbacks kernel/rcu/tree.c:3110 [inline]
 rcu_process_callbacks+0x45b/0xc50 kernel/rcu/tree.c:3127
 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284
Disposed:
PID = 26270
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_set_rcu_track+0xcf/0xf0 mm/kasan/kasan.c:694
 __call_rcu.constprop.77+0x1d6/0x15a0 kernel/rcu/tree.c:3230
 call_rcu_sched+0x12/0x20 kernel/rcu/tree.c:3291
 rt_free net/ipv4/route.c:592 [inline]
 rt_cache_route+0xf5/0x130 net/ipv4/route.c:1365
 rt_set_nexthop.constprop.57+0x408/0xfa0 net/ipv4/route.c:1453
 __mkroute_output net/ipv4/route.c:2195 [inline]
 __ip_route_output_key_hash+0xe50/0x2ca0 net/ipv4/route.c:2375
 __ip_route_output_key include/net/route.h:122 [inline]
 ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2461
 ip_route_output_key include/net/route.h:132 [inline]
 sctp_v4_get_dst+0x5d2/0x1570 net/sctp/protocol.c:458
 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:292
 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:653
 sctp_process_param net/sctp/sm_make_chunk.c:2548 [inline]
 sctp_process_init+0xf71/0x2320 net/sctp/sm_make_chunk.c:2354
 sctp_sf_do_unexpected_init.isra.28+0x7b8/0x1470 net/sctp/sm_statefuns.c:1510
 sctp_sf_do_5_2_1_siminit+0x35/0x40 net/sctp/sm_statefuns.c:1199
 sctp_do_sm+0x1e5/0x6a30 net/sctp/sm_sideeffect.c:1144
 sctp_assoc_bh_rcv+0x285/0x4b0 net/sctp/associola.c:1063
 sctp_inq_push+0x22b/0x2e0 net/sctp/inqueue.c:95
 sctp_backlog_rcv+0x177/0xb40 net/sctp/input.c:350
 sk_backlog_rcv include/net/sock.h:896 [inline]
 __release_sock+0x126/0x3a0 net/core/sock.c:2058
 release_sock+0xa5/0x2b0 net/core/sock.c:2545
 sctp_sendmsg+0x2b05/0x3980 net/sctp/socket.c:2011
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1685
 SyS_sendto+0x40/0x50 net/socket.c:1653
 entry_SYSCALL_64_fastpath+0x1f/0xc2
Memory state around the buggy address:
 ffff88004d4fbd00: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
 ffff88004d4fbd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff88004d4fbe00: 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc
                                                 ^
 ffff88004d4fbe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff88004d4fbf00: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc
==================================================================
David Ahern March 7, 2017, 5:17 p.m. UTC | #3
On 3/7/17 1:43 AM, Dmitry Vyukov wrote:
> This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel
> output from your patch (pr_err).

Is the below supposed to be from the same qemu instance at the time of
the crash? cpu1 and cpu2 are both supposedly doing a route insert?


> 
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158
> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
> Kernel panic - not syncing: panic_on_warn set ...
> 
> CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>  sock_sendmsg_nosec net/socket.c:633 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:643
>  sock_write_iter+0x326/0x600 net/socket.c:846
>  call_write_iter include/linux/fs.h:1733 [inline]
>  do_iter_readv_writev fs/read_write.c:696 [inline]
>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>  do_writev+0x110/0x2c0 fs/read_write.c:954
>  SYSC_writev fs/read_write.c:1027 [inline]
>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>  entry_SYSCALL_64_fastpath+0x1f/0xc2
> RIP: 0033:0x4458d9
> RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9
> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005
> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
> R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003
> 
> 
> 
> ------------[ cut here ]------------
> WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158
> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
> Kernel panic - not syncing: panic_on_warn set ...
> 
> CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
> kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so
> cannot be deassigned
>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>  sock_sendmsg_nosec net/socket.c:633 [inline]
>  sock_sendmsg+0xca/0x110 net/socket.c:643
>  sock_write_iter+0x326/0x600 net/socket.c:846
>  call_write_iter include/linux/fs.h:1733 [inline]
>  do_iter_readv_writev fs/read_write.c:696 [inline]
>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>  do_writev+0x110/0x2c0 fs/read_write.c:954
>  SYSC_writev fs/read_write.c:1027 [inline]
>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>  entry_SYSCALL_64_fastpath+0x1f/0xc2
> RIP: 0033:0x4458d9
> RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
> RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9
> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019
> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
> R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000
>
Dmitry Vyukov March 7, 2017, 5:45 p.m. UTC | #4
On Tue, Mar 7, 2017 at 6:17 PM, 'David Ahern' via syzkaller
<syzkaller@googlegroups.com> wrote:
> On 3/7/17 1:43 AM, Dmitry Vyukov wrote:
>> This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel
>> output from your patch (pr_err).
>
> Is the below supposed to be from the same qemu instance at the time of
> the crash? cpu1 and cpu2 are both supposedly doing a route insert?


No, it's all from different instances.

>> ------------[ cut here ]------------
>> WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158
>> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>> Kernel panic - not syncing: panic_on_warn set ...
>>
>> CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:16 [inline]
>>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>>  panic+0x20f/0x426 kernel/panic.c:180
>>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
>>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>>  sock_sendmsg_nosec net/socket.c:633 [inline]
>>  sock_sendmsg+0xca/0x110 net/socket.c:643
>>  sock_write_iter+0x326/0x600 net/socket.c:846
>>  call_write_iter include/linux/fs.h:1733 [inline]
>>  do_iter_readv_writev fs/read_write.c:696 [inline]
>>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>>  do_writev+0x110/0x2c0 fs/read_write.c:954
>>  SYSC_writev fs/read_write.c:1027 [inline]
>>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>>  entry_SYSCALL_64_fastpath+0x1f/0xc2
>> RIP: 0033:0x4458d9
>> RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
>> RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9
>> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005
>> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
>> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
>> R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003
>>
>>
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158
>> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>> Kernel panic - not syncing: panic_on_warn set ...
>>
>> CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:16 [inline]
>>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>>  panic+0x20f/0x426 kernel/panic.c:180
>>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
>>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]
>>  fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081
>> kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so
>> cannot be deassigned
>>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948
>>  ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130
>>  inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294
>>  rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104
>>  netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298
>>  rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110
>>  netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline]
>>  netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257
>>  netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803
>>  sock_sendmsg_nosec net/socket.c:633 [inline]
>>  sock_sendmsg+0xca/0x110 net/socket.c:643
>>  sock_write_iter+0x326/0x600 net/socket.c:846
>>  call_write_iter include/linux/fs.h:1733 [inline]
>>  do_iter_readv_writev fs/read_write.c:696 [inline]
>>  __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862
>>  do_readv_writev+0x13f/0x200 fs/read_write.c:894
>>  vfs_writev+0x87/0xc0 fs/read_write.c:921
>>  do_writev+0x110/0x2c0 fs/read_write.c:954
>>  SYSC_writev fs/read_write.c:1027 [inline]
>>  SyS_writev+0x27/0x30 fs/read_write.c:1024
>>  entry_SYSCALL_64_fastpath+0x1f/0xc2
>> RIP: 0033:0x4458d9
>> RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
>> RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9
>> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019
>> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000
>> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000
>> R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000
>>
>
> --
> You received this message because you are subscribed to the Google Groups "syzkaller" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com.
> For more options, visit https://groups.google.com/d/optout.
David Ahern March 7, 2017, 5:57 p.m. UTC | #5
On 3/7/17 1:43 AM, Dmitry Vyukov wrote:
> This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel
> output from your patch (pr_err).
> 
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158
> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158
> Kernel panic - not syncing: panic_on_warn set ...

you have panic_on_warn set ...

> 
> CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
>  rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158

and this is my WARN_ON in rt6_rcu_free which is showing an additional
change is needed

>  rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189
>  fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline]

in fib6_add_rt2node for the route replace path (whitespace damaged on
the copy-paste):

@@ -916,6 +919,7 @@ static int fib6_add_rt2node(struct fib6_node *fn,
struct rt6_info *rt,
                }
                nsiblings = iter->rt6i_nsiblings;
                fib6_purge_rt(iter, fn, info->nl_net);
+               iter->dst.flags &= ~DST_IN_FIB;
                rt6_release(iter);

                if (nsiblings) {
@@ -926,6 +930,7 @@ static int fib6_add_rt2node(struct fib6_node *fn,
struct rt6_info *rt,
                                if (rt6_qualify_for_ecmp(iter)) {
                                        *ins = iter->dst.rt6_next;
                                        fib6_purge_rt(iter, fn,
info->nl_net);
+                                       iter->dst.flags &= ~DST_IN_FIB;
                                        rt6_release(iter);
                                        nsiblings--;
                                } else {
David Ahern March 7, 2017, 6:03 p.m. UTC | #6
On 3/7/17 2:21 AM, Dmitry Vyukov wrote:
> I've commented that warning just to see I can obtain more information.
> Then I also got this:
> 
> ------------[ cut here ]------------
> WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991
> fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
> Kernel panic - not syncing: panic_on_warn set ...

again panic_on_warn is triggering ...

> 
> CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Workqueue: ipv6_addrconf addrconf_dad_work
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584
>  fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991

on this warning:

/* dst.next really should not be set at this point */
if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
        pr_warn("fib6_add: adding rt with bad next -- family %d dst
flags %x\n",
                rt->dst.next->ops->family, rt->dst.next->flags);

        WARN_ON(1);
}

You should have seen the pr_warn in the log preceding the WARN_ON dump.


>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948
>  ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959
>  __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485
>  ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518
>  addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983
> net/ipv6/addrconf.c:3983
>  addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline]
>  addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897
>  addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897
>  process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096
>  worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230
>  kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229
>  ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
> arch/x86/entry/entry_64.S:430
> 
> 
> 
> And this without any preceding warnings:
> 
> ==================================================================
> BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
> net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54

another ipv4 route in ipv6 fib walk
Dmitry Vyukov March 7, 2017, 6:13 p.m. UTC | #7
On Tue, Mar 7, 2017 at 7:03 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> On 3/7/17 2:21 AM, Dmitry Vyukov wrote:
>> I've commented that warning just to see I can obtain more information.
>> Then I also got this:
>>
>> ------------[ cut here ]------------
>> WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991
>> fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
>> Kernel panic - not syncing: panic_on_warn set ...
>
> again panic_on_warn is triggering ...
>
>>
>> CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> Workqueue: ipv6_addrconf addrconf_dad_work
>> Call Trace:
>>  __dump_stack lib/dump_stack.c:16 [inline]
>>  __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52
>>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52
>>  panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180
>>  __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541
>>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584
>>  fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
>
> on this warning:
>
> /* dst.next really should not be set at this point */
> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
> flags %x\n",
>                 rt->dst.next->ops->family, rt->dst.next->flags);
>
>         WARN_ON(1);
> }
>
> You should have seen the pr_warn in the log preceding the WARN_ON dump.


Right. They all have the same "IPv6: fib6_add: adding rt with bad next
-- family 2 dst flags 6"

[  171.222795] IPv6: fib6_add: adding rt with bad next -- family 2 dst flags 6
[  171.223809] ------------[ cut here ]------------
[  171.224407] WARNING: CPU: 3 PID: 27 at net/ipv6/ip6_fib.c:991
fib6_add+0x2e12/0x3290
[  171.225327] Kernel panic - not syncing: panic_on_warn set ...
[  171.225327]
[  171.226066] CPU: 3 PID: 27 Comm: kworker/3:0 Not tainted 4.11.0-rc1+ #311
[  171.226304] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Bochs 01/01/2011
[  171.226304] Workqueue: ipv6_addrconf addrconf_dad_work
[  171.226304] Call Trace:
[  171.226304]  dump_stack+0x2fb/0x3fd
[  171.226304]  ? arch_local_irq_restore+0x53/0x53
[  171.226304]  ? vprintk_emit+0x566/0x770
[  171.226304]  ? console_unlock+0xf50/0xf50
[  171.226304]  ? vprintk_emit+0x566/0x770
[  171.226304]  ? console_unlock+0xf50/0xf50
[  171.226304]  ? vprintk_emit+0x566/0x770
[  171.226304]  ? console_unlock+0xf50/0xf50
[  171.226304]  ? check_noncircular+0x20/0x20
[  171.226304]  ? trace_hardirqs_on+0xd/0x10
[  171.226304]  ? perf_trace_lock_acquire+0x141/0xa00
[  171.226304]  ? trace_hardirqs_off+0xd/0x10
[  171.226304]  ? quarantine_put+0xea/0x190
[  171.226304]  ? check_noncircular+0x20/0x20
[  171.236060]  ? vprintk_default+0x28/0x30
[  171.236662]  ? vprintk_func+0x47/0x90
[  171.236662]  ? printk+0xc8/0xf9
[  171.236662]  ? load_image_and_restore+0x134/0x134
[  171.236662]  ? pointer+0xac0/0xac0
[  171.236662]  panic+0x20f/0x426
[  171.236662]  ? copy_mm+0x1219/0x1219
[  171.236662]  ? vprintk_func+0x47/0x90
[  171.236662]  ? printk+0xc8/0xf9
[  171.236662]  ? fib6_add+0x2e12/0x3290
[  171.236662]  __warn+0x1c4/0x1e0
[  171.236662]  warn_slowpath_null+0x2c/0x40
[  171.236662]  fib6_add+0x2e12/0x3290
[  171.236662]  ? kasan_check_write+0x14/0x20
[  171.236662]  ? netlink_broadcast_filtered+0x734/0x1380
[  171.236662]  ? fib6_force_start_gc+0xf0/0xf0
[  171.236662]  ? netlink_has_listeners+0x450/0x450
[  171.236662]  ? memcpy+0x45/0x50
[  171.236662]  ? __nla_put+0x37/0x40
[  171.236662]  ? nla_put+0xf9/0x130
[  171.236662]  ? skb_put+0x149/0x1c0
[  171.236662]  ? kasan_check_write+0x14/0x20
[  171.236662]  ? do_raw_write_lock+0xbd/0x1e0
[  171.236662]  __ip6_ins_rt+0x60/0x80
[  171.236662]  ip6_ins_rt+0x19b/0x220
[  171.236662]  ? ip6_route_info_create+0x2380/0x2380
[  171.236662]  ? nlmsg_notify+0xaf/0x160
[  171.236662]  ? rtnl_notify+0xbb/0xe0
[  171.236662]  __ipv6_ifa_notify+0x62e/0x7a0
[  171.251057]  ipv6_ifa_notify+0xdf/0x1d0
[  171.251057]  ? __ipv6_ifa_notify+0x7a0/0x7a0
[  171.251057]  addrconf_dad_completed+0xe6/0x950
[  171.251057]  ? addrconf_verify_work+0x20/0x20
[  171.251057]  ? kasan_check_write+0x14/0x20
[  171.251057]  addrconf_dad_work+0x32a/0xea0
[  171.251057]  ? addrconf_ifdown+0x1ad0/0x1ad0
[  171.251057]  ? rcu_pm_notify+0xc0/0xc0
[  171.251057]  ? wq_update_unbound_numa+0x8d0/0x8d0
[  171.251057]  ? kasan_check_write+0x14/0x20
[  171.251057]  process_one_work+0xc06/0x1c40
[  171.251057]  ? process_one_work+0xb3d/0x1c40
[  171.251057]  ? pwq_dec_nr_in_flight+0x470/0x470
[  171.251057]  ? preempt_notifier_register+0x1f0/0x1f0
[  171.259856]  ? __schedule+0x893/0x22d0
[  171.259856]  ? kasan_check_write+0x14/0x20
[  171.259856]  ? worker_thread+0x47d/0x19f0
[  171.259856]  ? lock_set_class+0xc00/0xc00
[  171.259856]  ? worker_thread+0x467/0x19f0
[  171.259856]  ? lock_acquire+0x630/0x630
[  171.259856]  ? _raw_spin_unlock_irq+0x27/0x70
[  171.259856]  ? check_noncircular+0x20/0x20
[  171.259856]  ? mark_held_locks+0x100/0x100
[  171.259856]  ? trace_hardirqs_on_thunk+0x1a/0x1c
[  171.259856]  ? __schedule+0x22d0/0x22d0
[  171.259856]  ? do_raw_spin_trylock+0x1a0/0x1a0
[  171.259856]  ? do_raw_spin_lock+0xbd/0x1f0
[  171.259856]  worker_thread+0x223/0x19f0
[  171.259856]  ? process_one_work+0x1c40/0x1c40
[  171.259856]  ? lock_repin_lock+0x4a0/0x4a0
[  171.259856]  ? unwind_dump.isra.5.part.6+0x320/0x320
[  171.259856]  ? kasan_check_write+0x14/0x20
[  171.259856]  ? finish_task_switch+0x1ea/0x740
[  171.259856]  ? finish_task_switch+0x196/0x740
[  171.259856]  ? preempt_notifier_register+0x1f0/0x1f0
[  171.259856]  ? __schedule+0x893/0x22d0
[  171.259856]  ? lockdep_count_backward_deps+0x480/0x480
[  171.259856]  ? ret_from_fork+0x31/0x40
[  171.259856]  ? do_raw_spin_lock+0xbd/0x1f0
[  171.259856]  ? complete+0xbf/0x190
[  171.259856]  ? register_lock_class+0x1c30/0x1c30
[  171.276560]  ? __wake_up_common+0xb4/0x150
[  171.276560]  ? rcu_pm_notify+0xc0/0xc0
[  171.276560]  ? __schedule+0x22d0/0x22d0
[  171.276560]  ? __init_waitqueue_head+0x8a/0x120
[  171.276560]  ? __wake_up_bit+0x290/0x290
[  171.279715]  ? preempt_notifier_register+0x1f0/0x1f0
[  171.279715]  ? __kthread_parkme+0x173/0x240
[  171.279715]  kthread+0x334/0x400
[  171.279715]  ? process_one_work+0x1c40/0x1c40
[  171.279715]  ? kthread_create_on_node+0x110/0x110
[  171.279715]  ret_from_fork+0x31/0x40
[  171.279715] Dumping ftrace buffer:
[  171.279715]    (ftrace buffer empty)
[  171.279715] Kernel Offset: disabled
[  171.279715] Rebooting in 86400 seconds..




>>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948
>>  ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959
>>  __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485
>>  ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518
>>  addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983
>> net/ipv6/addrconf.c:3983
>>  addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline]
>>  addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897
>>  addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897
>>  process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096
>>  worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230
>>  kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229
>>  ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430
>> arch/x86/entry/entry_64.S:430
>>
>>
>>
>> And this without any preceding warnings:
>>
>> ==================================================================
>> BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
>> net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54
>
> another ipv4 route in ipv6 fib walk
David Ahern April 25, 2017, 3:57 p.m. UTC | #8
On 3/7/17 2:21 AM, Dmitry Vyukov wrote:
> ------------[ cut here ]------------
> WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991
> fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
> Kernel panic - not syncing: panic_on_warn set ...
> 
> CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> Workqueue: ipv6_addrconf addrconf_dad_work
> Call Trace:
>  __dump_stack lib/dump_stack.c:16 [inline]
>  __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52
>  dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52
>  panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180
>  __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541
>  warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584
>  fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991
>  __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948
>  ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959
>  __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485
>  ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518
>  addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983
> net/ipv6/addrconf.c:3983
>  addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline]

Similarly for this one.
diff mbox

Patch

diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33da3b6..d164eb8ceab8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -58,6 +58,7 @@  struct dst_entry {
 #define DST_XFRM_TUNNEL		0x0080
 #define DST_XFRM_QUEUE		0x0100
 #define DST_METADATA		0x0200
+#define DST_IN_FIB		0x0400
 
 	short			error;
 
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..c98447fe8510 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -232,6 +232,9 @@  void __dst_free(struct dst_entry *dst)
 {
 	spin_lock_bh(&dst_garbage.lock);
 	___dst_free(dst);
+if (dst->flags & DST_IN_FIB)
+	pr_warn("dst %p is marked as in fib\n", dst);
+//WARN_ON(dst->flags & DST_IN_FIB);
 	dst->next = dst_garbage.list;
 	dst_garbage.list = dst;
 	if (dst_garbage.timer_inc > DST_GC_INC) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e4266746e4a2..a4d55ba00a43 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -155,6 +155,7 @@  static void node_free(struct fib6_node *fn)
 
 static void rt6_rcu_free(struct rt6_info *rt)
 {
+WARN_ON(rt->dst.flags & DST_IN_FIB);
 	call_rcu(&rt->dst.rcu_head, dst_rcu_free);
 }
 
@@ -878,6 +879,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			return err;
 
 		rt->dst.rt6_next = iter;
+		rt->dst.flags |= DST_IN_FIB;
 		*ins = rt;
 		rt->rt6i_node = fn;
 		atomic_inc(&rt->rt6i_ref);
@@ -907,6 +909,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		*ins = rt;
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
+		rt->dst.flags |= DST_IN_FIB;
 		atomic_inc(&rt->rt6i_ref);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
@@ -974,6 +977,20 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			 !atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
 
+if (rt->dst.ops->family != AF_INET6) {
+	pr_warn("fib6_add: adding rt with family is %d dst flags %x\n",
+		rt->dst.ops->family, rt->dst.flags);
+
+	WARN_ON(1);
+}
+/* dst.next really should not be set at this point */
+if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
+	pr_warn("fib6_add: adding rt with bad next -- family %d dst flags %x\n",
+		rt->dst.next->ops->family, rt->dst.next->flags);
+
+	WARN_ON(1);
+}
+
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1444,6 +1461,7 @@  static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	read_unlock(&net->ipv6.fib6_walker_lock);
 
 	rt->dst.rt6_next = NULL;
+	rt->dst.flags &= ~DST_IN_FIB;
 
 	/* If it was last route, expunge its radix tree node */
 	if (!fn->leaf) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 229bfcc451ef..e91d7871ccfc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1135,6 +1135,8 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		dst_hold(&uncached_rt->dst);
 
+		uncached_rt->dst.flags &= ~DST_IN_FIB;
+
 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
 		return uncached_rt;
 
@@ -1160,6 +1162,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_release(&rt->dst);
 		}
 
+		pcpu_rt->dst.flags &= ~DST_IN_FIB;
 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
 		return pcpu_rt;