diff mbox

net: heap out-of-bounds in fib6_clean_node/rt6_fill_node/fib6_age/fib6_prune_clone

Message ID 328b1fa7-2d97-6ae3-3b87-e33a0d564ad9@cumulusnetworks.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

David Ahern March 7, 2017, 6:43 p.m. UTC
On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>> on this warning:
>>
>> /* dst.next really should not be set at this point */
>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>> flags %x\n",
>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>
>>         WARN_ON(1);
>> }
>>
>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
> 
> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
> -- family 2 dst flags 6"

remove the previous changes and try the attached.

Comments

Dmitry Vyukov March 7, 2017, 7:02 p.m. UTC | #1
On Tue, Mar 7, 2017 at 7:43 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>>> on this warning:
>>>
>>> /* dst.next really should not be set at this point */
>>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>>> flags %x\n",
>>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>>
>>>         WARN_ON(1);
>>> }
>>>
>>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
>>
>> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
>> -- family 2 dst flags 6"
>
> remove the previous changes and try the attached.


Doing this now.
FWIW I've also applied your last patch with missing "iter->dst.flags
&= ~DST_IN_FIB;" and restored the warning in rt6_rcu_free and it did
not fire (in a limited run). I only saw the "WARNING in fib6_add" that
I already reported.
Dmitry Vyukov March 7, 2017, 7:30 p.m. UTC | #2
On Tue, Mar 7, 2017 at 8:02 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
> On Tue, Mar 7, 2017 at 7:43 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
>> On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>>>> on this warning:
>>>>
>>>> /* dst.next really should not be set at this point */
>>>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>>>> flags %x\n",
>>>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>>>
>>>>         WARN_ON(1);
>>>> }
>>>>
>>>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
>>>
>>> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
>>> -- family 2 dst flags 6"
>>
>> remove the previous changes and try the attached.
>
>
> Doing this now.
> FWIW I've also applied your last patch with missing "iter->dst.flags
> &= ~DST_IN_FIB;" and restored the warning in rt6_rcu_free and it did
> not fire (in a limited run). I only saw the "WARNING in fib6_add" that
> I already reported.


So far I've hit only:
[ 1103.840031] BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
at addr ffff8800799d2254
without any preceeding warnings.
But note that since the kernel is heavily stressed I can reliably get
any pr_err output if it happens right before BUG/WARNING. Anything
that happens minutes before will be lots because there are tons of
output.
Dmitry Vyukov March 7, 2017, 8 p.m. UTC | #3
On Tue, Mar 7, 2017 at 8:30 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
>>> On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>>>>> on this warning:
>>>>>
>>>>> /* dst.next really should not be set at this point */
>>>>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>>>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>>>>> flags %x\n",
>>>>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>>>>
>>>>>         WARN_ON(1);
>>>>> }
>>>>>
>>>>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
>>>>
>>>> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
>>>> -- family 2 dst flags 6"
>>>
>>> remove the previous changes and try the attached.
>>
>>
>> Doing this now.
>> FWIW I've also applied your last patch with missing "iter->dst.flags
>> &= ~DST_IN_FIB;" and restored the warning in rt6_rcu_free and it did
>> not fire (in a limited run). I only saw the "WARNING in fib6_add" that
>> I already reported.
>
>
> So far I've hit only:
> [ 1103.840031] BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
> at addr ffff8800799d2254
> without any preceeding warnings.
> But note that since the kernel is heavily stressed I can reliably get
> any pr_err output if it happens right before BUG/WARNING. Anything
> that happens minutes before will be lots because there are tons of
> output.



So far 6 "KASAN: slab-out-of-bounds Read in fib6_age" but no other warnings.
Dmitry Vyukov March 8, 2017, 11:55 a.m. UTC | #4
On Tue, Mar 7, 2017 at 9:00 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
> On Tue, Mar 7, 2017 at 8:30 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
>>>> On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>>>>>> on this warning:
>>>>>>
>>>>>> /* dst.next really should not be set at this point */
>>>>>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>>>>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>>>>>> flags %x\n",
>>>>>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>>>>>
>>>>>>         WARN_ON(1);
>>>>>> }
>>>>>>
>>>>>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
>>>>>
>>>>> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
>>>>> -- family 2 dst flags 6"
>>>>
>>>> remove the previous changes and try the attached.
>>>
>>>
>>> Doing this now.
>>> FWIW I've also applied your last patch with missing "iter->dst.flags
>>> &= ~DST_IN_FIB;" and restored the warning in rt6_rcu_free and it did
>>> not fire (in a limited run). I only saw the "WARNING in fib6_add" that
>>> I already reported.
>>
>>
>> So far I've hit only:
>> [ 1103.840031] BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
>> at addr ffff8800799d2254
>> without any preceeding warnings.
>> But note that since the kernel is heavily stressed I can reliably get
>> any pr_err output if it happens right before BUG/WARNING. Anything
>> that happens minutes before will be lots because there are tons of
>> output.
>
>
>
> So far 6 "KASAN: slab-out-of-bounds Read in fib6_age" but no other warnings.


I've got a bunch of the crashes that I was getting previously, but no
new warnings.
Dmitry Vyukov March 27, 2017, 12:42 p.m. UTC | #5
On Wed, Mar 8, 2017 at 12:55 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
> On Tue, Mar 7, 2017 at 9:00 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
>> On Tue, Mar 7, 2017 at 8:30 PM, Dmitry Vyukov <dvyukov@google.com> wrote:
>>>>> On 3/7/17 11:13 AM, Dmitry Vyukov wrote:
>>>>>>> on this warning:
>>>>>>>
>>>>>>> /* dst.next really should not be set at this point */
>>>>>>> if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
>>>>>>>         pr_warn("fib6_add: adding rt with bad next -- family %d dst
>>>>>>> flags %x\n",
>>>>>>>                 rt->dst.next->ops->family, rt->dst.next->flags);
>>>>>>>
>>>>>>>         WARN_ON(1);
>>>>>>> }
>>>>>>>
>>>>>>> You should have seen the pr_warn in the log preceding the WARN_ON dump.
>>>>>>
>>>>>> Right. They all have the same "IPv6: fib6_add: adding rt with bad next
>>>>>> -- family 2 dst flags 6"
>>>>>
>>>>> remove the previous changes and try the attached.
>>>>
>>>>
>>>> Doing this now.
>>>> FWIW I've also applied your last patch with missing "iter->dst.flags
>>>> &= ~DST_IN_FIB;" and restored the warning in rt6_rcu_free and it did
>>>> not fire (in a limited run). I only saw the "WARNING in fib6_add" that
>>>> I already reported.
>>>
>>>
>>> So far I've hit only:
>>> [ 1103.840031] BUG: KASAN: slab-out-of-bounds in fib6_age+0x3fd/0x480
>>> at addr ffff8800799d2254
>>> without any preceeding warnings.
>>> But note that since the kernel is heavily stressed I can reliably get
>>> any pr_err output if it happens right before BUG/WARNING. Anything
>>> that happens minutes before will be lots because there are tons of
>>> output.
>>
>>
>>
>> So far 6 "KASAN: slab-out-of-bounds Read in fib6_age" but no other warnings.
>
>
> I've got a bunch of the crashes that I was getting previously, but no
> new warnings.



A friendly ping. This still happens all the time for us.

I also see the following warning, not sure if it's related or not:

on 0dc82fa59b9d82469799c354d3307d48e13d5d5e:

#if RT6_DEBUG >= 2
        if (rt->dst.obsolete > 0) {
                WARN_ON(fn);
                return -ENOENT;
        }
#endif

------------[ cut here ]------------
WARNING: CPU: 1 PID: 23535 at net/ipv6/ip6_fib.c:1472
fib6_del+0x923/0x14d0 net/ipv6/ip6_fib.c:1472
CPU: 1 PID: 23535 Comm: syz-executor3 Not tainted 4.11.0-rc3+ #517
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:52
 panic+0x1fb/0x412 kernel/panic.c:180
 __warn+0x1c4/0x1e0 kernel/panic.c:541
 warn_slowpath_null+0x2c/0x40 kernel/panic.c:584
 fib6_del+0x923/0x14d0 net/ipv6/ip6_fib.c:1472
 __ip6_del_rt+0x100/0x160 net/ipv6/route.c:2153
 ip6_del_rt+0x140/0x1b0 net/ipv6/route.c:2166
 __ipv6_ifa_notify+0x269/0x780 net/ipv6/addrconf.c:5506
 ipv6_ifa_notify+0xdf/0x1d0 net/ipv6/addrconf.c:5518
 ipv6_del_addr+0x62b/0xa80 net/ipv6/addrconf.c:1175
 inet6_addr_del+0x348/0x5b0 net/ipv6/addrconf.c:2853
 addrconf_del_ifaddr+0x154/0x1e0 net/ipv6/addrconf.c:2898
 inet6_ioctl+0x86/0x1e0 net/ipv6/af_inet6.c:525
 sock_do_ioctl+0x65/0xb0 net/socket.c:906
 sock_ioctl+0x2c2/0x440 net/socket.c:1004
 vfs_ioctl fs/ioctl.c:45 [inline]
 do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:685
 SYSC_ioctl fs/ioctl.c:700 [inline]
 SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x44fb79
RSP: 002b:00007f4b299bfb58 EFLAGS: 00000212 ORIG_RAX: 0000000000000010
RAX: ffffffffffffffda RBX: 0000000000008936 RCX: 000000000044fb79
RDX: 0000000020000000 RSI: 0000000000008936 RDI: 000000000000001a
RBP: 000000000000001a R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000212 R12: 0000000000708000
R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000
David Ahern March 27, 2017, 1:57 p.m. UTC | #6
On 3/27/17 6:42 AM, Dmitry Vyukov wrote:
> A friendly ping. This still happens all the time for us.

Haven't looked at this in a couple of weeks. I have syzkaller installed
on a machine locally and never was able to reproduce this ipv6 problem.
I am using a jessie rootfs; from the syzkaller files I take it you are
using wheezy. Should not matter but as I recall there are differences in
sysctl setttings. Regardless, can you send me the output of 'sysctl
net.ipv6'?

It is spring break week here, and I am taking a couple of days off. With
netdev next week, I realistically won't have time to come back to this
for 2-3 weeks.
Andrey Konovalov April 18, 2017, 8:43 p.m. UTC | #7
Hi!

I've finally managed to reproduce one of the crashes on commit
4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 (4.11-rc7).

I'm not sure if this bug has the same root cause as the first one
reported in this thread, but it definitely has to do with ipv6
routing.

C reproducer, syzkaller program and my .config are attached.

Thanks!

kasan: CONFIG_KASAN_INLINE enabled
kasan: GPF could be caused by NULL-ptr deref or user memory access
general protection fault: 0000 [#1] SMP KASAN
Modules linked in:
CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff880069809600 task.stack: ffff880062dc8000
RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975
RSP: 0018:ffff880062dced30 EFLAGS: 00010206
RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006
RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018
RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000
FS:  00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0
Call Trace:
 ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128
 ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212
 fib6_rule_action+0x261/0x8a0 net/ipv6/fib6_rules.c:100
 fib_rules_lookup+0x3be/0xbc0 net/core/fib_rules.c:265
 fib6_rule_lookup+0x175/0x360 net/ipv6/fib6_rules.c:44
 ip6_route_output_flags+0x260/0x2f0 net/ipv6/route.c:1240
 ip6_route_output ./include/net/ip6_route.h:79
 ip6_dst_lookup_tail+0xd5e/0x18b0 net/ipv6/ip6_output.c:959
 ip6_dst_lookup_flow+0xb1/0x260 net/ipv6/ip6_output.c:1082
 rawv6_sendmsg+0x11b2/0x42e0 net/ipv6/raw.c:903
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:204
RIP: 0033:0x7feebda90b79
RSP: 002b:000000000072fee8 EFLAGS: 00000206 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 00007ffe1f920180 RCX: 00007feebda90b79
RDX: 0000000000000000 RSI: 0000000020fd0fd0 RDI: 0000000000000004
RBP: 0000000000400f30 R08: 00000000205a0fe4 R09: 000000000000001c
R10: 0000000000000800 R11: 0000000000000206 R12: 0000000000000000
R13: 00007ffe1f920180 R14: 0000000000000000 R15: 0000000000000000
Code: 03 80 3c 02 00 0f 85 37 04 00 00 4d 8b 64 24 40 e8 90 dd 82 fd
49 8d 7c 24 18 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80>
3c 02 00 0f 85 61 04 00 00 49 8b 74 24 18 48 b8 00 00 00 00
RIP: ip6_rt_cache_alloc+0xa6/0x560 RSP: ffff880062dced30
---[ end trace 9f58077ffa8cf9c0 ]---
David Ahern April 18, 2017, 11:20 p.m. UTC | #8
On 4/18/17 2:43 PM, Andrey Konovalov wrote:
> I've finally managed to reproduce one of the crashes on commit
> 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 (4.11-rc7).
> 
> I'm not sure if this bug has the same root cause as the first one
> reported in this thread, but it definitely has to do with ipv6
> routing.
> 
> C reproducer, syzkaller program and my .config are attached.
> 
> Thanks!
> 
> kasan: CONFIG_KASAN_INLINE enabled
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault: 0000 [#1] SMP KASAN
> Modules linked in:
> CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> task: ffff880069809600 task.stack: ffff880062dc8000
> RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975

From a quick glance seems to be a different bug than Dmitry's.
Andrey Konovalov April 19, 2017, 1:09 a.m. UTC | #9
On Wed, Apr 19, 2017 at 1:20 AM, David Ahern <dsa@cumulusnetworks.com> wrote:
> On 4/18/17 2:43 PM, Andrey Konovalov wrote:
>> I've finally managed to reproduce one of the crashes on commit
>> 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 (4.11-rc7).
>>
>> I'm not sure if this bug has the same root cause as the first one
>> reported in this thread, but it definitely has to do with ipv6
>> routing.
>>
>> C reproducer, syzkaller program and my .config are attached.

Just FYI, the reproducer uses interface number 9 inside a user
namespace, which is apparently ip6gre0.

1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/ipip 0.0.0.0 brd 0.0.0.0
3: gre0@NONE: <NOARP> mtu 1476 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/gre 0.0.0.0 brd 0.0.0.0
4: gretap0@NONE: <BROADCAST,MULTICAST> mtu 1462 qdisc noop state DOWN
mode DEFAULT qlen 1000
    link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
5: ip_vti0@NONE: <NOARP> mtu 1332 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/ipip 0.0.0.0 brd 0.0.0.0
6: ip6_vti0@NONE: <NOARP> mtu 1500 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/tunnel6 :: brd ::
7: sit0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/sit 0.0.0.0 brd 0.0.0.0
8: ip6tnl0@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/tunnel6 :: brd ::
9: ip6gre0@NONE: <NOARP> mtu 1448 qdisc noop state DOWN mode DEFAULT qlen 1000
    link/[823] 00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00 brd
00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00

>>
>> Thanks!
>>
>> kasan: CONFIG_KASAN_INLINE enabled
>> kasan: GPF could be caused by NULL-ptr deref or user memory access
>> general protection fault: 0000 [#1] SMP KASAN
>> Modules linked in:
>> CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>> task: ffff880069809600 task.stack: ffff880062dc8000
>> RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975
>
> From a quick glance seems to be a different bug than Dmitry's.

It might be.

>
David Ahern April 19, 2017, 4:09 p.m. UTC | #10
On 4/18/17 2:43 PM, Andrey Konovalov wrote:
> Hi!
> 
> I've finally managed to reproduce one of the crashes on commit
> 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 (4.11-rc7).
> 
> I'm not sure if this bug has the same root cause as the first one
> reported in this thread, but it definitely has to do with ipv6
> routing.
> 
> C reproducer, syzkaller program and my .config are attached.

built a kernel with that config. booted the vm. ran the program. nada.

strace is showing:

clone(child_stack=0x72ffb0,
flags=CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWNET) = -1 EINVAL
(Invalid argument)
Andrey Konovalov April 19, 2017, 4:12 p.m. UTC | #11
On Wed, Apr 19, 2017 at 6:09 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> On 4/18/17 2:43 PM, Andrey Konovalov wrote:
>> Hi!
>>
>> I've finally managed to reproduce one of the crashes on commit
>> 4f7d029b9bf009fbee76bb10c0c4351a1870d2f3 (4.11-rc7).
>>
>> I'm not sure if this bug has the same root cause as the first one
>> reported in this thread, but it definitely has to do with ipv6
>> routing.
>>
>> C reproducer, syzkaller program and my .config are attached.
>
> built a kernel with that config. booted the vm. ran the program. nada.
>
> strace is showing:
>
> clone(child_stack=0x72ffb0,
> flags=CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWNET) = -1 EINVAL
> (Invalid argument)
>

That's weird. I usually see this when I have CONFIG_USER_NS disabled.

Anyway, I just finished simplifying the reproducer. Give this one a try.
David Ahern April 19, 2017, 4:29 p.m. UTC | #12
On 4/19/17 10:12 AM, Andrey Konovalov wrote:
> That's weird. I usually see this when I have CONFIG_USER_NS disabled.

I bungled the movement of .config between servers. reproduced. will
investigate.
David Ahern April 25, 2017, 3:51 p.m. UTC | #13
On 4/18/17 2:43 PM, Andrey Konovalov wrote:
> kasan: CONFIG_KASAN_INLINE enabled
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault: 0000 [#1] SMP KASAN
> Modules linked in:
> CPU: 1 PID: 4035 Comm: a.out Not tainted 4.11.0-rc7+ #250
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
> task: ffff880069809600 task.stack: ffff880062dc8000
> RIP: 0010:ip6_rt_cache_alloc+0xa6/0x560 net/ipv6/route.c:975
> RSP: 0018:ffff880062dced30 EFLAGS: 00010206
> RAX: dffffc0000000000 RBX: ffff8800670561c0 RCX: 0000000000000006
> RDX: 0000000000000003 RSI: ffff880062dcfb28 RDI: 0000000000000018
> RBP: ffff880062dced68 R08: 0000000000000001 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
> R13: ffff880062dcfb28 R14: dffffc0000000000 R15: 0000000000000000
> FS:  00007feebe37e7c0(0000) GS:ffff88006cb00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00000000205a0fe4 CR3: 000000006b5c9000 CR4: 00000000000006e0
> Call Trace:
>  ip6_pol_route+0x1512/0x1f20 net/ipv6/route.c:1128

This one is fixed by:

commit 557c44be917c322860665be3d28376afa84aa936
Author: David Ahern <dsa@cumulusnetworks.com>
Date:   Wed Apr 19 14:19:43 2017 -0700

    net: ipv6: RTF_PCPU should not be settable from userspace
diff mbox

Patch

diff --git a/include/net/dst.h b/include/net/dst.h
index 049af33da3b6..d164eb8ceab8 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -58,6 +58,7 @@  struct dst_entry {
 #define DST_XFRM_TUNNEL		0x0080
 #define DST_XFRM_QUEUE		0x0100
 #define DST_METADATA		0x0200
+#define DST_IN_FIB		0x0400
 
 	short			error;
 
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index c84b3287e38b..cd0df8f76420 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -15,6 +15,7 @@  struct dst_ops {
 	unsigned short		family;
 	unsigned int		gc_thresh;
 
+	void			(*dump)(struct dst_entry *);
 	int			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
 	unsigned int		(*default_advmss)(const struct dst_entry *);
diff --git a/net/core/dst.c b/net/core/dst.c
index 960e503b5a52..c98447fe8510 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -232,6 +232,9 @@  void __dst_free(struct dst_entry *dst)
 {
 	spin_lock_bh(&dst_garbage.lock);
 	___dst_free(dst);
+if (dst->flags & DST_IN_FIB)
+	pr_warn("dst %p is marked as in fib\n", dst);
+//WARN_ON(dst->flags & DST_IN_FIB);
 	dst->next = dst_garbage.list;
 	dst_garbage.list = dst;
 	if (dst_garbage.timer_inc > DST_GC_INC) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e4266746e4a2..d4539d9a463e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -155,6 +155,7 @@  static void node_free(struct fib6_node *fn)
 
 static void rt6_rcu_free(struct rt6_info *rt)
 {
+WARN_ON(rt->dst.flags & DST_IN_FIB);
 	call_rcu(&rt->dst.rcu_head, dst_rcu_free);
 }
 
@@ -878,6 +879,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			return err;
 
 		rt->dst.rt6_next = iter;
+		rt->dst.flags |= DST_IN_FIB;
 		*ins = rt;
 		rt->rt6i_node = fn;
 		atomic_inc(&rt->rt6i_ref);
@@ -907,6 +909,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		*ins = rt;
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
+		rt->dst.flags |= DST_IN_FIB;
 		atomic_inc(&rt->rt6i_ref);
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
@@ -916,6 +919,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		}
 		nsiblings = iter->rt6i_nsiblings;
 		fib6_purge_rt(iter, fn, info->nl_net);
+		iter->dst.flags &= ~DST_IN_FIB;
 		rt6_release(iter);
 
 		if (nsiblings) {
@@ -926,6 +930,7 @@  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 				if (rt6_qualify_for_ecmp(iter)) {
 					*ins = iter->dst.rt6_next;
 					fib6_purge_rt(iter, fn, info->nl_net);
+					iter->dst.flags &= ~DST_IN_FIB;
 					rt6_release(iter);
 					nsiblings--;
 				} else {
@@ -974,6 +979,21 @@  int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 			 !atomic_read(&rt->dst.__refcnt)))
 		return -EINVAL;
 
+if (rt->dst.ops->family != AF_INET6) {
+	pr_warn("fib6_add: adding rt with family is %d dst flags %x\n",
+		rt->dst.ops->family, rt->dst.flags);
+
+	WARN_ON(1);
+}
+/* dst.next really should not be set at this point */
+if (rt->dst.next && rt->dst.next->ops->family != AF_INET6) {
+	pr_warn("fib6_add: adding rt with bad next -- family %d dst flags %x\n",
+		rt->dst.next->ops->family, rt->dst.next->flags);
+
+	if (rt->dst.ops->dump)
+		rt->dst.ops->dump(&rt->dst);
+}
+
 	if (info->nlh) {
 		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 			allow_create = 0;
@@ -1444,6 +1464,7 @@  static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	read_unlock(&net->ipv6.fib6_walker_lock);
 
 	rt->dst.rt6_next = NULL;
+	rt->dst.flags &= ~DST_IN_FIB;
 
 	/* If it was last route, expunge its radix tree node */
 	if (!fn->leaf) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 229bfcc451ef..e83b5ef7fbcd 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -238,10 +238,22 @@  static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 	__ipv6_confirm_neigh(dev, daddr);
 }
 
+static void ip6_dst_dump(struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+
+	pr_warn("rt %p: dev %s gw %pI6c dst %pI6c/%d src %pI6c prefsrc %pI6c flags %x rt6i_nsiblings %u\n",
+		rt, rt->rt6i_idev ? rt->rt6i_idev->dev->name : "<unknown>",
+		&rt->rt6i_gateway, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+		&rt->rt6i_src.addr, &rt->rt6i_prefsrc.addr,
+		rt->rt6i_flags, rt->rt6i_nsiblings);
+}
+
 static struct dst_ops ip6_dst_ops_template = {
 	.family			=	AF_INET6,
 	.gc			=	ip6_dst_gc,
 	.gc_thresh		=	1024,
+	.dump			=	ip6_dst_dump,
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
 	.mtu			=	ip6_mtu,
@@ -1135,6 +1147,8 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 
 		dst_hold(&uncached_rt->dst);
 
+		uncached_rt->dst.flags &= ~DST_IN_FIB;
+
 		trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
 		return uncached_rt;
 
@@ -1160,6 +1174,7 @@  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
 			dst_release(&rt->dst);
 		}
 
+		pcpu_rt->dst.flags &= ~DST_IN_FIB;
 		trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
 		return pcpu_rt;