Message ID | 14c01aea-6c2f-6ba5-6aee-52c55f410da7@cumulusnetworks.com |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
On Tue, Mar 7, 2017 at 12:41 AM, David Ahern <dsa@cumulusnetworks.com> wrote: > On 3/6/17 11:51 AM, Dmitry Vyukov wrote: >> We hit it several thousand times, but we get only several dozens of >> crashes per day on ~80 VMs. So if you try to reproduce it on a single >> machine it can take days for a single crash. >> If you are ready to go that route, here are some instructions on >> setting up syzkaller: >> https://github.com/google/syzkaller >> You also need kernel built with CONFIG_KASAN. > > ack and I have it setup on ubuntu 16.10 which has a fairly new compiler. > >> I am ready to help with resolving any issues. >> >> Another possible route is if you give me a patch with some additional >> WARNINGs. Then I can deploy it to bots and collect stacks. > > try the attached. This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel output from your patch (pr_err). ------------[ cut here ]------------ WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 panic+0x20f/0x426 kernel/panic.c:180 __warn+0x1c4/0x1e0 kernel/panic.c:541 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x326/0x600 net/socket.c:846 call_write_iter include/linux/fs.h:1733 [inline] do_iter_readv_writev fs/read_write.c:696 [inline] __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 do_readv_writev+0x13f/0x200 fs/read_write.c:894 vfs_writev+0x87/0xc0 fs/read_write.c:921 do_writev+0x110/0x2c0 fs/read_write.c:954 SYSC_writev fs/read_write.c:1027 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1024 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458d9 RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9 RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005 RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003 ------------[ cut here ]------------ WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 panic+0x20f/0x426 kernel/panic.c:180 __warn+0x1c4/0x1e0 kernel/panic.c:541 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so cannot be deassigned __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 sock_write_iter+0x326/0x600 net/socket.c:846 call_write_iter include/linux/fs.h:1733 [inline] do_iter_readv_writev fs/read_write.c:696 [inline] __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 do_readv_writev+0x13f/0x200 fs/read_write.c:894 vfs_writev+0x87/0xc0 fs/read_write.c:921 do_writev+0x110/0x2c0 fs/read_write.c:954 SYSC_writev fs/read_write.c:1027 [inline] SyS_writev+0x27/0x30 fs/read_write.c:1024 entry_SYSCALL_64_fastpath+0x1f/0xc2 RIP: 0033:0x4458d9 RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9 RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019 RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000
On Tue, Mar 7, 2017 at 9:43 AM, Dmitry Vyukov <dvyukov@google.com> wrote: > On Tue, Mar 7, 2017 at 12:41 AM, David Ahern <dsa@cumulusnetworks.com> wrote: >> On 3/6/17 11:51 AM, Dmitry Vyukov wrote: >>> We hit it several thousand times, but we get only several dozens of >>> crashes per day on ~80 VMs. So if you try to reproduce it on a single >>> machine it can take days for a single crash. >>> If you are ready to go that route, here are some instructions on >>> setting up syzkaller: >>> https://github.com/google/syzkaller >>> You also need kernel built with CONFIG_KASAN. >> >> ack and I have it setup on ubuntu 16.10 which has a fairly new compiler. >> >>> I am ready to help with resolving any issues. >>> >>> Another possible route is if you give me a patch with some additional >>> WARNINGs. Then I can deploy it to bots and collect stacks. >> >> try the attached. > > > This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel > output from your patch (pr_err). > > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > Kernel panic - not syncing: panic_on_warn set ... > > CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 > fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] > fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 > ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 > inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 > rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 > netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 > rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 > netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] > netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 > netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 > sock_sendmsg_nosec net/socket.c:633 [inline] > sock_sendmsg+0xca/0x110 net/socket.c:643 > sock_write_iter+0x326/0x600 net/socket.c:846 > call_write_iter include/linux/fs.h:1733 [inline] > do_iter_readv_writev fs/read_write.c:696 [inline] > __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 > do_readv_writev+0x13f/0x200 fs/read_write.c:894 > vfs_writev+0x87/0xc0 fs/read_write.c:921 > do_writev+0x110/0x2c0 fs/read_write.c:954 > SYSC_writev fs/read_write.c:1027 [inline] > SyS_writev+0x27/0x30 fs/read_write.c:1024 > entry_SYSCALL_64_fastpath+0x1f/0xc2 > RIP: 0033:0x4458d9 > RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 > RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9 > RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005 > RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 > R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003 > > > > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > Kernel panic - not syncing: panic_on_warn set ... > > CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 > fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] > fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 > kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so > cannot be deassigned > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 > ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 > inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 > rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 > netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 > rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 > netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] > netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 > netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 > sock_sendmsg_nosec net/socket.c:633 [inline] > sock_sendmsg+0xca/0x110 net/socket.c:643 > sock_write_iter+0x326/0x600 net/socket.c:846 > call_write_iter include/linux/fs.h:1733 [inline] > do_iter_readv_writev fs/read_write.c:696 [inline] > __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 > do_readv_writev+0x13f/0x200 fs/read_write.c:894 > vfs_writev+0x87/0xc0 fs/read_write.c:921 > do_writev+0x110/0x2c0 fs/read_write.c:954 > SYSC_writev fs/read_write.c:1027 [inline] > SyS_writev+0x27/0x30 fs/read_write.c:1024 > entry_SYSCALL_64_fastpath+0x1f/0xc2 > RIP: 0033:0x4458d9 > RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 > RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9 > RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019 > RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 > R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000 I've commented that warning just to see I can obtain more information. Then I also got this: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991 fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 Kernel panic - not syncing: panic_on_warn set ... CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work Call Trace: __dump_stack lib/dump_stack.c:16 [inline] __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52 dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52 panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180 __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584 fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948 ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959 __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485 ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518 addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983 net/ipv6/addrconf.c:3983 addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897 addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897 process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096 worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230 kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229 ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 arch/x86/entry/entry_64.S:430 And this without any preceding warnings: ================================================================== BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480 net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54 Read of size 4 by task swapper/2/0 CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.11.0-rc1+ #311 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:16 [inline] dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 kasan_object_err+0x1c/0x90 mm/kasan/report.c:166 print_address_description mm/kasan/report.c:208 [inline] kasan_report_error mm/kasan/report.c:292 [inline] kasan_report.part.2+0x1b0/0x460 mm/kasan/report.c:314 kasan_report mm/kasan/report.c:334 [inline] __asan_report_load4_noabort+0x29/0x30 mm/kasan/report.c:334 fib6_age+0x3fd/0x480 net/ipv6/ip6_fib.c:1787 fib6_clean_node+0x356/0x550 net/ipv6/ip6_fib.c:1665 fib6_walk_continue+0x4b3/0x620 net/ipv6/ip6_fib.c:1594 fib6_walk+0x91/0xf0 net/ipv6/ip6_fib.c:1639 fib6_clean_tree+0x266/0x3a0 net/ipv6/ip6_fib.c:1711 __fib6_clean_all+0x1e1/0x360 net/ipv6/ip6_fib.c:1727 fib6_clean_all net/ipv6/ip6_fib.c:1738 [inline] fib6_run_gc+0x185/0x3d0 net/ipv6/ip6_fib.c:1835 fib6_gc_timer_cb+0x1c/0x20 net/ipv6/ip6_fib.c:1850 call_timer_fn+0x241/0x820 kernel/time/timer.c:1268 expire_timers kernel/time/timer.c:1307 [inline] __run_timers+0x960/0xcf0 kernel/time/timer.c:1601 run_timer_softirq+0x21/0x80 kernel/time/timer.c:1614 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:657 [inline] smp_apic_timer_interrupt+0x76/0xa0 arch/x86/kernel/apic/apic.c:962 apic_timer_interrupt+0x93/0xa0 arch/x86/entry/entry_64.S:487 RIP: 0010:native_safe_halt+0x6/0x10 arch/x86/include/asm/irqflags.h:53 RSP: 0018:ffff880089437c10 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff10 RAX: dffffc0000000000 RBX: 1ffff10011286f85 RCX: 0000000000000000 RDX: 1ffffffff0a18ebc RSI: 0000000000000001 RDI: ffffffff850c75e0 RBP: ffff880089437c10 R08: ffffed00113835c2 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff10011286fa9 R13: ffff880089437cc8 R14: ffffffff856973f8 R15: ffff880089437e68 </IRQ> arch_safe_halt arch/x86/include/asm/paravirt.h:98 [inline] default_idle+0xbf/0x440 arch/x86/kernel/process.c:275 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:266 default_idle_call+0x36/0x90 kernel/sched/idle.c:97 cpuidle_idle_call kernel/sched/idle.c:155 [inline] do_idle+0x373/0x520 kernel/sched/idle.c:244 cpu_startup_entry+0x18/0x20 kernel/sched/idle.c:346 start_secondary+0x36c/0x460 arch/x86/kernel/smpboot.c:275 start_cpu+0x14/0x14 arch/x86/kernel/head_64.S:306 Object at ffff88004d4fbd40, in cache ip_dst_cache size: 216 Allocated: PID = 8122 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:513 set_track mm/kasan/kasan.c:525 [inline] kasan_kmalloc+0xaa/0xd0 mm/kasan/kasan.c:616 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:555 kmem_cache_alloc+0x102/0x6e0 mm/slab.c:3572 dst_alloc+0x11b/0x1a0 net/core/dst.c:209 rt_dst_alloc+0xf0/0x580 net/ipv4/route.c:1482 __mkroute_output net/ipv4/route.c:2165 [inline] __ip_route_output_key_hash+0xce3/0x2ca0 net/ipv4/route.c:2375 __ip_route_output_key include/net/route.h:122 [inline] ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2461 ip_route_output_key include/net/route.h:132 [inline] sctp_v4_get_dst+0x5d2/0x1570 net/sctp/protocol.c:458 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:292 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:653 sctp_sendmsg+0x180d/0x3980 net/sctp/socket.c:1871 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1685 SyS_sendto+0x40/0x50 net/socket.c:1653 entry_SYSCALL_64_fastpath+0x1f/0xc2 Freed: PID = 2038 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:513 set_track mm/kasan/kasan.c:525 [inline] kasan_slab_free+0x6f/0xb0 mm/kasan/kasan.c:589 __cache_free mm/slab.c:3514 [inline] kmem_cache_free+0x71/0x240 mm/slab.c:3774 dst_destroy+0x211/0x340 net/core/dst.c:272 dst_free include/net/dst.h:429 [inline] dst_rcu_free+0x152/0x190 include/net/dst.h:439 __rcu_reclaim kernel/rcu/rcu.h:118 [inline] rcu_do_batch.isra.66+0xa31/0xe50 kernel/rcu/tree.c:2880 invoke_rcu_callbacks kernel/rcu/tree.c:3143 [inline] __rcu_process_callbacks kernel/rcu/tree.c:3110 [inline] rcu_process_callbacks+0x45b/0xc50 kernel/rcu/tree.c:3127 __do_softirq+0x31f/0xbe7 kernel/softirq.c:284 Disposed: PID = 26270 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 save_stack+0x43/0xd0 mm/kasan/kasan.c:513 set_track mm/kasan/kasan.c:525 [inline] kasan_set_rcu_track+0xcf/0xf0 mm/kasan/kasan.c:694 __call_rcu.constprop.77+0x1d6/0x15a0 kernel/rcu/tree.c:3230 call_rcu_sched+0x12/0x20 kernel/rcu/tree.c:3291 rt_free net/ipv4/route.c:592 [inline] rt_cache_route+0xf5/0x130 net/ipv4/route.c:1365 rt_set_nexthop.constprop.57+0x408/0xfa0 net/ipv4/route.c:1453 __mkroute_output net/ipv4/route.c:2195 [inline] __ip_route_output_key_hash+0xe50/0x2ca0 net/ipv4/route.c:2375 __ip_route_output_key include/net/route.h:122 [inline] ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2461 ip_route_output_key include/net/route.h:132 [inline] sctp_v4_get_dst+0x5d2/0x1570 net/sctp/protocol.c:458 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:292 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:653 sctp_process_param net/sctp/sm_make_chunk.c:2548 [inline] sctp_process_init+0xf71/0x2320 net/sctp/sm_make_chunk.c:2354 sctp_sf_do_unexpected_init.isra.28+0x7b8/0x1470 net/sctp/sm_statefuns.c:1510 sctp_sf_do_5_2_1_siminit+0x35/0x40 net/sctp/sm_statefuns.c:1199 sctp_do_sm+0x1e5/0x6a30 net/sctp/sm_sideeffect.c:1144 sctp_assoc_bh_rcv+0x285/0x4b0 net/sctp/associola.c:1063 sctp_inq_push+0x22b/0x2e0 net/sctp/inqueue.c:95 sctp_backlog_rcv+0x177/0xb40 net/sctp/input.c:350 sk_backlog_rcv include/net/sock.h:896 [inline] __release_sock+0x126/0x3a0 net/core/sock.c:2058 release_sock+0xa5/0x2b0 net/core/sock.c:2545 sctp_sendmsg+0x2b05/0x3980 net/sctp/socket.c:2011 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:761 sock_sendmsg_nosec net/socket.c:633 [inline] sock_sendmsg+0xca/0x110 net/socket.c:643 SYSC_sendto+0x660/0x810 net/socket.c:1685 SyS_sendto+0x40/0x50 net/socket.c:1653 entry_SYSCALL_64_fastpath+0x1f/0xc2 Memory state around the buggy address: ffff88004d4fbd00: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00 ffff88004d4fbd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff88004d4fbe00: 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88004d4fbe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88004d4fbf00: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc ==================================================================
On 3/7/17 1:43 AM, Dmitry Vyukov wrote: > This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel > output from your patch (pr_err). Is the below supposed to be from the same qemu instance at the time of the crash? cpu1 and cpu2 are both supposedly doing a route insert? > > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > Kernel panic - not syncing: panic_on_warn set ... > > CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 > fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] > fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 > ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 > inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 > rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 > netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 > rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 > netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] > netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 > netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 > sock_sendmsg_nosec net/socket.c:633 [inline] > sock_sendmsg+0xca/0x110 net/socket.c:643 > sock_write_iter+0x326/0x600 net/socket.c:846 > call_write_iter include/linux/fs.h:1733 [inline] > do_iter_readv_writev fs/read_write.c:696 [inline] > __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 > do_readv_writev+0x13f/0x200 fs/read_write.c:894 > vfs_writev+0x87/0xc0 fs/read_write.c:921 > do_writev+0x110/0x2c0 fs/read_write.c:954 > SYSC_writev fs/read_write.c:1027 [inline] > SyS_writev+0x27/0x30 fs/read_write.c:1024 > entry_SYSCALL_64_fastpath+0x1f/0xc2 > RIP: 0033:0x4458d9 > RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 > RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9 > RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005 > RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 > R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003 > > > > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > Kernel panic - not syncing: panic_on_warn set ... > > CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 > fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] > fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 > kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so > cannot be deassigned > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 > ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 > inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 > rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 > netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 > rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 > netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] > netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 > netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 > sock_sendmsg_nosec net/socket.c:633 [inline] > sock_sendmsg+0xca/0x110 net/socket.c:643 > sock_write_iter+0x326/0x600 net/socket.c:846 > call_write_iter include/linux/fs.h:1733 [inline] > do_iter_readv_writev fs/read_write.c:696 [inline] > __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 > do_readv_writev+0x13f/0x200 fs/read_write.c:894 > vfs_writev+0x87/0xc0 fs/read_write.c:921 > do_writev+0x110/0x2c0 fs/read_write.c:954 > SYSC_writev fs/read_write.c:1027 [inline] > SyS_writev+0x27/0x30 fs/read_write.c:1024 > entry_SYSCALL_64_fastpath+0x1f/0xc2 > RIP: 0033:0x4458d9 > RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 > RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9 > RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019 > RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 > R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 > R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000 >
On Tue, Mar 7, 2017 at 6:17 PM, 'David Ahern' via syzkaller <syzkaller@googlegroups.com> wrote: > On 3/7/17 1:43 AM, Dmitry Vyukov wrote: >> This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel >> output from your patch (pr_err). > > Is the below supposed to be from the same qemu instance at the time of > the crash? cpu1 and cpu2 are both supposedly doing a route insert? No, it's all from different instances. >> ------------[ cut here ]------------ >> WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158 >> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 >> Kernel panic - not syncing: panic_on_warn set ... >> >> CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310 >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 >> Call Trace: >> __dump_stack lib/dump_stack.c:16 [inline] >> dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 >> panic+0x20f/0x426 kernel/panic.c:180 >> __warn+0x1c4/0x1e0 kernel/panic.c:541 >> warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 >> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 >> rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 >> fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] >> fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 >> __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 >> ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 >> inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 >> rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 >> netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 >> rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 >> netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] >> netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 >> netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 >> sock_sendmsg_nosec net/socket.c:633 [inline] >> sock_sendmsg+0xca/0x110 net/socket.c:643 >> sock_write_iter+0x326/0x600 net/socket.c:846 >> call_write_iter include/linux/fs.h:1733 [inline] >> do_iter_readv_writev fs/read_write.c:696 [inline] >> __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 >> do_readv_writev+0x13f/0x200 fs/read_write.c:894 >> vfs_writev+0x87/0xc0 fs/read_write.c:921 >> do_writev+0x110/0x2c0 fs/read_write.c:954 >> SYSC_writev fs/read_write.c:1027 [inline] >> SyS_writev+0x27/0x30 fs/read_write.c:1024 >> entry_SYSCALL_64_fastpath+0x1f/0xc2 >> RIP: 0033:0x4458d9 >> RSP: 002b:00007f31fcf33b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 >> RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 00000000004458d9 >> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000005 >> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 >> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 >> R13: 0000000020fad000 R14: 0000000000001000 R15: 0000000000000003 >> >> >> >> ------------[ cut here ]------------ >> WARNING: CPU: 2 PID: 31175 at net/ipv6/ip6_fib.c:158 >> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 >> Kernel panic - not syncing: panic_on_warn set ... >> >> CPU: 2 PID: 31175 Comm: syz-executor1 Not tainted 4.11.0-rc1+ #310 >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 >> Call Trace: >> __dump_stack lib/dump_stack.c:16 [inline] >> dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 >> panic+0x20f/0x426 kernel/panic.c:180 >> __warn+0x1c4/0x1e0 kernel/panic.c:541 >> warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 >> rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 >> rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 >> fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] >> fib6_add+0x1d51/0x3290 net/ipv6/ip6_fib.c:1081 >> kvm_vm_ioctl_deassign_device: device hasn't been assigned before, so >> cannot be deassigned >> __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 >> ip6_route_add+0x1a7/0x310 net/ipv6/route.c:2130 >> inet6_rtm_newroute+0x191/0x1b0 net/ipv6/route.c:3294 >> rtnetlink_rcv_msg+0x609/0x860 net/core/rtnetlink.c:4104 >> netlink_rcv_skb+0x2ab/0x390 net/netlink/af_netlink.c:2298 >> rtnetlink_rcv+0x2a/0x40 net/core/rtnetlink.c:4110 >> netlink_unicast_kernel net/netlink/af_netlink.c:1231 [inline] >> netlink_unicast+0x525/0x730 net/netlink/af_netlink.c:1257 >> netlink_sendmsg+0xab3/0xe70 net/netlink/af_netlink.c:1803 >> sock_sendmsg_nosec net/socket.c:633 [inline] >> sock_sendmsg+0xca/0x110 net/socket.c:643 >> sock_write_iter+0x326/0x600 net/socket.c:846 >> call_write_iter include/linux/fs.h:1733 [inline] >> do_iter_readv_writev fs/read_write.c:696 [inline] >> __do_readv_writev+0xbbc/0x10a0 fs/read_write.c:862 >> do_readv_writev+0x13f/0x200 fs/read_write.c:894 >> vfs_writev+0x87/0xc0 fs/read_write.c:921 >> do_writev+0x110/0x2c0 fs/read_write.c:954 >> SYSC_writev fs/read_write.c:1027 [inline] >> SyS_writev+0x27/0x30 fs/read_write.c:1024 >> entry_SYSCALL_64_fastpath+0x1f/0xc2 >> RIP: 0033:0x4458d9 >> RSP: 002b:00007f1639006b58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014 >> RAX: ffffffffffffffda RBX: 0000000000000019 RCX: 00000000004458d9 >> RDX: 0000000000000001 RSI: 00000000207cd000 RDI: 0000000000000019 >> RBP: 00000000006e30c0 R08: 0000000000000000 R09: 0000000000000000 >> R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000708000 >> R13: 0000000000000010 R14: 0000000000000003 R15: 0000000000000000 >> > > -- > You received this message because you are subscribed to the Google Groups "syzkaller" group. > To unsubscribe from this group and stop receiving emails from it, send an email to syzkaller+unsubscribe@googlegroups.com. > For more options, visit https://groups.google.com/d/optout.
On 3/7/17 1:43 AM, Dmitry Vyukov wrote: > This is on c1ae3cfa0e89fa1a7ecc4c99031f5e9ae99d9201. No other kernel > output from your patch (pr_err). > > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 30179 at net/ipv6/ip6_fib.c:158 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 > Kernel panic - not syncing: panic_on_warn set ... you have panic_on_warn set ... > > CPU: 1 PID: 30179 Comm: syz-executor3 Not tainted 4.11.0-rc1+ #310 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 > rt6_rcu_free+0x61/0x70 net/ipv6/ip6_fib.c:158 and this is my WARN_ON in rt6_rcu_free which is showing an additional change is needed > rt6_release+0x1ee/0x290 net/ipv6/ip6_fib.c:189 > fib6_add_rt2node net/ipv6/ip6_fib.c:922 [inline] in fib6_add_rt2node for the route replace path (whitespace damaged on the copy-paste): @@ -916,6 +919,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, } nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); + iter->dst.flags &= ~DST_IN_FIB; rt6_release(iter); if (nsiblings) { @@ -926,6 +930,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); + iter->dst.flags &= ~DST_IN_FIB; rt6_release(iter); nsiblings--; } else {
On 3/7/17 2:21 AM, Dmitry Vyukov wrote: > I've commented that warning just to see I can obtain more information. > Then I also got this: > > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991 > fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 > Kernel panic - not syncing: panic_on_warn set ... again panic_on_warn is triggering ... > > CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Workqueue: ipv6_addrconf addrconf_dad_work > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52 > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584 > fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 on this warning: /* dst.next really should not be set at this point */ if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) { pr_warn("fib6_add: adding rt with bad next -- family %d dst flags %x\n", rt->dst.next->ops->family, rt->dst.next->flags); WARN_ON(1); } You should have seen the pr_warn in the log preceding the WARN_ON dump. > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948 > ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959 > __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485 > ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518 > addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983 > net/ipv6/addrconf.c:3983 > addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] > addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897 > addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897 > process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096 > worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230 > kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229 > ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 > arch/x86/entry/entry_64.S:430 > > > > And this without any preceding warnings: > > ================================================================== > BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480 > net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54 another ipv4 route in ipv6 fib walk
On Tue, Mar 7, 2017 at 7:03 PM, David Ahern <dsa@cumulusnetworks.com> wrote: > On 3/7/17 2:21 AM, Dmitry Vyukov wrote: >> I've commented that warning just to see I can obtain more information. >> Then I also got this: >> >> ------------[ cut here ]------------ >> WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991 >> fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 >> Kernel panic - not syncing: panic_on_warn set ... > > again panic_on_warn is triggering ... > >> >> CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311 >> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 >> Workqueue: ipv6_addrconf addrconf_dad_work >> Call Trace: >> __dump_stack lib/dump_stack.c:16 [inline] >> __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52 >> dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52 >> panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180 >> __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541 >> warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584 >> fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 > > on this warning: > > /* dst.next really should not be set at this point */ > if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) { > pr_warn("fib6_add: adding rt with bad next -- family %d dst > flags %x\n", > rt->dst.next->ops->family, rt->dst.next->flags); > > WARN_ON(1); > } > > You should have seen the pr_warn in the log preceding the WARN_ON dump. Right. They all have the same "IPv6: fib6_add: adding rt with bad next -- family 2 dst flags 6" [ 171.222795] IPv6: fib6_add: adding rt with bad next -- family 2 dst flags 6 [ 171.223809] ------------[ cut here ]------------ [ 171.224407] WARNING: CPU: 3 PID: 27 at net/ipv6/ip6_fib.c:991 fib6_add+0x2e12/0x3290 [ 171.225327] Kernel panic - not syncing: panic_on_warn set ... [ 171.225327] [ 171.226066] CPU: 3 PID: 27 Comm: kworker/3:0 Not tainted 4.11.0-rc1+ #311 [ 171.226304] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 171.226304] Workqueue: ipv6_addrconf addrconf_dad_work [ 171.226304] Call Trace: [ 171.226304] dump_stack+0x2fb/0x3fd [ 171.226304] ? arch_local_irq_restore+0x53/0x53 [ 171.226304] ? vprintk_emit+0x566/0x770 [ 171.226304] ? console_unlock+0xf50/0xf50 [ 171.226304] ? vprintk_emit+0x566/0x770 [ 171.226304] ? console_unlock+0xf50/0xf50 [ 171.226304] ? vprintk_emit+0x566/0x770 [ 171.226304] ? console_unlock+0xf50/0xf50 [ 171.226304] ? check_noncircular+0x20/0x20 [ 171.226304] ? trace_hardirqs_on+0xd/0x10 [ 171.226304] ? perf_trace_lock_acquire+0x141/0xa00 [ 171.226304] ? trace_hardirqs_off+0xd/0x10 [ 171.226304] ? quarantine_put+0xea/0x190 [ 171.226304] ? check_noncircular+0x20/0x20 [ 171.236060] ? vprintk_default+0x28/0x30 [ 171.236662] ? vprintk_func+0x47/0x90 [ 171.236662] ? printk+0xc8/0xf9 [ 171.236662] ? load_image_and_restore+0x134/0x134 [ 171.236662] ? pointer+0xac0/0xac0 [ 171.236662] panic+0x20f/0x426 [ 171.236662] ? copy_mm+0x1219/0x1219 [ 171.236662] ? vprintk_func+0x47/0x90 [ 171.236662] ? printk+0xc8/0xf9 [ 171.236662] ? fib6_add+0x2e12/0x3290 [ 171.236662] __warn+0x1c4/0x1e0 [ 171.236662] warn_slowpath_null+0x2c/0x40 [ 171.236662] fib6_add+0x2e12/0x3290 [ 171.236662] ? kasan_check_write+0x14/0x20 [ 171.236662] ? netlink_broadcast_filtered+0x734/0x1380 [ 171.236662] ? fib6_force_start_gc+0xf0/0xf0 [ 171.236662] ? netlink_has_listeners+0x450/0x450 [ 171.236662] ? memcpy+0x45/0x50 [ 171.236662] ? __nla_put+0x37/0x40 [ 171.236662] ? nla_put+0xf9/0x130 [ 171.236662] ? skb_put+0x149/0x1c0 [ 171.236662] ? kasan_check_write+0x14/0x20 [ 171.236662] ? do_raw_write_lock+0xbd/0x1e0 [ 171.236662] __ip6_ins_rt+0x60/0x80 [ 171.236662] ip6_ins_rt+0x19b/0x220 [ 171.236662] ? ip6_route_info_create+0x2380/0x2380 [ 171.236662] ? nlmsg_notify+0xaf/0x160 [ 171.236662] ? rtnl_notify+0xbb/0xe0 [ 171.236662] __ipv6_ifa_notify+0x62e/0x7a0 [ 171.251057] ipv6_ifa_notify+0xdf/0x1d0 [ 171.251057] ? __ipv6_ifa_notify+0x7a0/0x7a0 [ 171.251057] addrconf_dad_completed+0xe6/0x950 [ 171.251057] ? addrconf_verify_work+0x20/0x20 [ 171.251057] ? kasan_check_write+0x14/0x20 [ 171.251057] addrconf_dad_work+0x32a/0xea0 [ 171.251057] ? addrconf_ifdown+0x1ad0/0x1ad0 [ 171.251057] ? rcu_pm_notify+0xc0/0xc0 [ 171.251057] ? wq_update_unbound_numa+0x8d0/0x8d0 [ 171.251057] ? kasan_check_write+0x14/0x20 [ 171.251057] process_one_work+0xc06/0x1c40 [ 171.251057] ? process_one_work+0xb3d/0x1c40 [ 171.251057] ? pwq_dec_nr_in_flight+0x470/0x470 [ 171.251057] ? preempt_notifier_register+0x1f0/0x1f0 [ 171.259856] ? __schedule+0x893/0x22d0 [ 171.259856] ? kasan_check_write+0x14/0x20 [ 171.259856] ? worker_thread+0x47d/0x19f0 [ 171.259856] ? lock_set_class+0xc00/0xc00 [ 171.259856] ? worker_thread+0x467/0x19f0 [ 171.259856] ? lock_acquire+0x630/0x630 [ 171.259856] ? _raw_spin_unlock_irq+0x27/0x70 [ 171.259856] ? check_noncircular+0x20/0x20 [ 171.259856] ? mark_held_locks+0x100/0x100 [ 171.259856] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 171.259856] ? __schedule+0x22d0/0x22d0 [ 171.259856] ? do_raw_spin_trylock+0x1a0/0x1a0 [ 171.259856] ? do_raw_spin_lock+0xbd/0x1f0 [ 171.259856] worker_thread+0x223/0x19f0 [ 171.259856] ? process_one_work+0x1c40/0x1c40 [ 171.259856] ? lock_repin_lock+0x4a0/0x4a0 [ 171.259856] ? unwind_dump.isra.5.part.6+0x320/0x320 [ 171.259856] ? kasan_check_write+0x14/0x20 [ 171.259856] ? finish_task_switch+0x1ea/0x740 [ 171.259856] ? finish_task_switch+0x196/0x740 [ 171.259856] ? preempt_notifier_register+0x1f0/0x1f0 [ 171.259856] ? __schedule+0x893/0x22d0 [ 171.259856] ? lockdep_count_backward_deps+0x480/0x480 [ 171.259856] ? ret_from_fork+0x31/0x40 [ 171.259856] ? do_raw_spin_lock+0xbd/0x1f0 [ 171.259856] ? complete+0xbf/0x190 [ 171.259856] ? register_lock_class+0x1c30/0x1c30 [ 171.276560] ? __wake_up_common+0xb4/0x150 [ 171.276560] ? rcu_pm_notify+0xc0/0xc0 [ 171.276560] ? __schedule+0x22d0/0x22d0 [ 171.276560] ? __init_waitqueue_head+0x8a/0x120 [ 171.276560] ? __wake_up_bit+0x290/0x290 [ 171.279715] ? preempt_notifier_register+0x1f0/0x1f0 [ 171.279715] ? __kthread_parkme+0x173/0x240 [ 171.279715] kthread+0x334/0x400 [ 171.279715] ? process_one_work+0x1c40/0x1c40 [ 171.279715] ? kthread_create_on_node+0x110/0x110 [ 171.279715] ret_from_fork+0x31/0x40 [ 171.279715] Dumping ftrace buffer: [ 171.279715] (ftrace buffer empty) [ 171.279715] Kernel Offset: disabled [ 171.279715] Rebooting in 86400 seconds.. >> __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948 >> ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959 >> __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485 >> ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518 >> addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983 >> net/ipv6/addrconf.c:3983 >> addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] >> addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] net/ipv6/addrconf.c:3897 >> addrconf_dad_work+0x32a/0xea0 net/ipv6/addrconf.c:3897 net/ipv6/addrconf.c:3897 >> process_one_work+0xc06/0x1c40 kernel/workqueue.c:2096 kernel/workqueue.c:2096 >> worker_thread+0x223/0x19f0 kernel/workqueue.c:2230 kernel/workqueue.c:2230 >> kthread+0x334/0x400 kernel/kthread.c:229 kernel/kthread.c:229 >> ret_from_fork+0x31/0x40 arch/x86/entry/entry_64.S:430 >> arch/x86/entry/entry_64.S:430 >> >> >> >> And this without any preceding warnings: >> >> ================================================================== >> BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480 >> net/ipv6/ip6_fib.c:1787 at addr ffff88004d4fbe54 > > another ipv4 route in ipv6 fib walk
On 3/7/17 2:21 AM, Dmitry Vyukov wrote: > ------------[ cut here ]------------ > WARNING: CPU: 2 PID: 3990 at net/ipv6/ip6_fib.c:991 > fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 > Kernel panic - not syncing: panic_on_warn set ... > > CPU: 2 PID: 3990 Comm: kworker/2:4 Not tainted 4.11.0-rc1+ #311 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Workqueue: ipv6_addrconf addrconf_dad_work > Call Trace: > __dump_stack lib/dump_stack.c:16 [inline] > __dump_stack lib/dump_stack.c:16 [inline] lib/dump_stack.c:52 > dump_stack+0x2fb/0x3fd lib/dump_stack.c:52 lib/dump_stack.c:52 > panic+0x20f/0x426 kernel/panic.c:180 kernel/panic.c:180 > __warn+0x1c4/0x1e0 kernel/panic.c:541 kernel/panic.c:541 > warn_slowpath_null+0x2c/0x40 kernel/panic.c:584 kernel/panic.c:584 > fib6_add+0x2e12/0x3290 net/ipv6/ip6_fib.c:991 net/ipv6/ip6_fib.c:991 > __ip6_ins_rt+0x60/0x80 net/ipv6/route.c:948 net/ipv6/route.c:948 > ip6_ins_rt+0x19b/0x220 net/ipv6/route.c:959 net/ipv6/route.c:959 > __ipv6_ifa_notify+0x62e/0x7a0 net/ipv6/addrconf.c:5485 net/ipv6/addrconf.c:5485 > ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518 net/ipv6/addrconf.c:5518 > addrconf_dad_completed+0xe6/0x950 net/ipv6/addrconf.c:3983 > net/ipv6/addrconf.c:3983 > addrconf_dad_begin net/ipv6/addrconf.c:3797 [inline] Similarly for this one.
diff --git a/include/net/dst.h b/include/net/dst.h index 049af33da3b6..d164eb8ceab8 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -58,6 +58,7 @@ struct dst_entry { #define DST_XFRM_TUNNEL 0x0080 #define DST_XFRM_QUEUE 0x0100 #define DST_METADATA 0x0200 +#define DST_IN_FIB 0x0400 short error; diff --git a/net/core/dst.c b/net/core/dst.c index 960e503b5a52..c98447fe8510 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -232,6 +232,9 @@ void __dst_free(struct dst_entry *dst) { spin_lock_bh(&dst_garbage.lock); ___dst_free(dst); +if (dst->flags & DST_IN_FIB) + pr_warn("dst %p is marked as in fib\n", dst); +//WARN_ON(dst->flags & DST_IN_FIB); dst->next = dst_garbage.list; dst_garbage.list = dst; if (dst_garbage.timer_inc > DST_GC_INC) { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e4266746e4a2..a4d55ba00a43 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -155,6 +155,7 @@ static void node_free(struct fib6_node *fn) static void rt6_rcu_free(struct rt6_info *rt) { +WARN_ON(rt->dst.flags & DST_IN_FIB); call_rcu(&rt->dst.rcu_head, dst_rcu_free); } @@ -878,6 +879,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, return err; rt->dst.rt6_next = iter; + rt->dst.flags |= DST_IN_FIB; *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); @@ -907,6 +909,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; + rt->dst.flags |= DST_IN_FIB; atomic_inc(&rt->rt6i_ref); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); @@ -974,6 +977,20 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, !atomic_read(&rt->dst.__refcnt))) return -EINVAL; +if (rt->dst.ops->family != AF_INET6) { + pr_warn("fib6_add: adding rt with family is %d dst flags %x\n", + rt->dst.ops->family, rt->dst.flags); + + WARN_ON(1); +} +/* dst.next really should not be set at this point */ +if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) { + pr_warn("fib6_add: adding rt with bad next -- family %d dst flags %x\n", + rt->dst.next->ops->family, rt->dst.next->flags); + + WARN_ON(1); +} + if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1444,6 +1461,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, read_unlock(&net->ipv6.fib6_walker_lock); rt->dst.rt6_next = NULL; + rt->dst.flags &= ~DST_IN_FIB; /* If it was last route, expunge its radix tree node */ if (!fn->leaf) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 229bfcc451ef..e91d7871ccfc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1135,6 +1135,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, dst_hold(&uncached_rt->dst); + uncached_rt->dst.flags &= ~DST_IN_FIB; + trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); return uncached_rt; @@ -1160,6 +1162,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, dst_release(&rt->dst); } + pcpu_rt->dst.flags &= ~DST_IN_FIB; trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); return pcpu_rt;