diff mbox

[ovs-dev] pkt reassemble: fix kernel panic for ovs reassemble

Message ID 6DAF063A35010343823807B082E5681F1A71E195@mbx05.360buyAD.local
State Superseded
Delegated to: Joe Stringer
Headers show

Commit Message

Zhike Wang June 22, 2017, 1:54 a.m. UTC
Ovs and kernel stack would add frag_queue to same netns_frags list.
As result, ovs and kernel may access the fraq_queue without correct
lock. Also the struct ipq may be different on kernel(older than 4.3),
which leads to invalid pointer access.

The fix creates specific netns_frags for ovs.

Signed-off-by: wangzhike <wangzhike@jd.com>
---
datapath/datapath.c                                | 22 +++---
datapath/datapath.h                                |  6 ++
datapath/linux/compat/include/net/inet_frag.h      | 18 -----
datapath/linux/compat/include/net/ip.h             |  4 ++
.../include/net/netfilter/ipv6/nf_defrag_ipv6.h    |  4 ++
datapath/linux/compat/inet_fragment.c              | 83 ----------------------
datapath/linux/compat/ip_fragment.c                | 66 ++++++++++++++---
datapath/linux/compat/nf_conntrack_reasm.c         | 58 +++++++++++++--
8 files changed, 138 insertions(+), 123 deletions(-)

+static struct netns_frags *get_netns_frags6_from_net(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+       return &(ovs_net->nf_frags);
+#else
+       return &(net->nf_frag.frags);
+#endif
+}
+
+static struct net *get_net_from_netns_frags6(struct netns_frags *frags)
+{
+       struct net *net;
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net;
+
+       ovs_net = container_of(frags, struct ovs_net, nf_frags);
+       net = ovs_net->net;
+#else
+       net = container_of(frags, struct net, nf_frag.frags);
+#endif
+       return net;
+}
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
       return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -105,7 +131,7 @@ static void nf_ct_frag6_expire(unsigned long data)
       struct net *net;
        fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
-        net = container_of(fq->q.net, struct net, nf_frag.frags);
+       net = get_net_from_netns_frags6(fq->q.net);
        ip6_expire_frag_queue(net, fq, &nf_frags);
}
@@ -118,6 +144,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
       struct inet_frag_queue *q;
       struct ip6_create_arg arg;
       unsigned int hash;
+       struct netns_frags *frags;
        arg.id = id;
       arg.user = user;
@@ -132,7 +159,8 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
#endif
       hash = nf_hash_frag(id, src, dst);
-        q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
+       frags = get_netns_frags6_from_net(net);
+       q = inet_frag_find(frags, &nf_frags, &arg, hash);
       local_bh_enable();
       if (IS_ERR_OR_NULL(q)) {
                inet_frag_maybe_warn_overflow(q, pr_fmt());
@@ -506,6 +534,7 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
       struct frag_queue *fq;
       struct ipv6hdr *hdr;
       u8 prevhdr;
+       struct netns_frags *frags;
        /* Jumbo payload inhibits frag. header */
       if (ipv6_hdr(skb)->payload_len == 0) {
@@ -524,9 +553,10 @@ int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
       fhdr = (struct frag_hdr *)skb_transport_header(skb);
 /* See ip_evictor(). */
+       frags = get_netns_frags6_from_net(net);
#ifdef HAVE_INET_FRAG_EVICTOR
       local_bh_disable();
-        inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);
+       inet_frag_evictor(frags, &nf_frags, false);
       local_bh_enable();
#endif
@@ -567,7 +597,27 @@ static int nf_ct_net_init(struct net *net)
 static void nf_ct_net_exit(struct net *net)
{
-        inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+}
+
+void ovs_netns_frags6_init(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+       ovs_net->nf_frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
+       ovs_net->nf_frags.low_thresh = IPV6_FRAG_LOW_THRESH;
+       ovs_net->nf_frags.timeout = IPV6_FRAG_TIMEOUT;
+
+       inet_frags_init_net(&(ovs_net->nf_frags));
+#endif
+}
+
+void ovs_netns_frags6_exit(struct net *net)
+{
+       struct netns_frags *frags;
+
+       frags = get_netns_frags6_from_net(net);
+       inet_frags_exit_net(frags, &nf_frags);
}
 static struct pernet_operations nf_ct_net_ops = {
--
1.8.3.1

Comments

Joe Stringer June 23, 2017, 12:20 a.m. UTC | #1
On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
> Ovs and kernel stack would add frag_queue to same netns_frags list.
> As result, ovs and kernel may access the fraq_queue without correct
> lock. Also the struct ipq may be different on kernel(older than 4.3),
> which leads to invalid pointer access.
>
> The fix creates specific netns_frags for ovs.
>
> Signed-off-by: wangzhike <wangzhike@jd.com>
> ---

Hi,

It looks like the whitespace has been corrupted in this version of the
patch that you sent, I cannot apply it. Probably your email client
mistreats it when sending the email out. A reliable method to send
patches correctly via email is to use the commandline client 'git
send-email'. This is the preferred method. If you are unable to set
that up, consider attaching the patch to the email (or send a pull
request on GitHub).

Cheers,
Joe
Zhike Wang June 23, 2017, 12:53 a.m. UTC | #2
Hi Joe,

Please check the attachment. Thanks.

Br,
Wang Zhike

-----邮件原件-----
发件人: Joe Stringer [mailto:joe@ovn.org] 
发送时间: 2017年6月23日 8:20
收件人: 王志克
抄送: dev@openvswitch.org
主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
> Ovs and kernel stack would add frag_queue to same netns_frags list.

> As result, ovs and kernel may access the fraq_queue without correct 

> lock. Also the struct ipq may be different on kernel(older than 4.3), 

> which leads to invalid pointer access.

>

> The fix creates specific netns_frags for ovs.

>

> Signed-off-by: wangzhike <wangzhike@jd.com>

> ---


Hi,

It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).

Cheers,
Joe
Gregory Rose June 23, 2017, 8:58 p.m. UTC | #3
On 06/22/2017 05:53 PM, 王志克 wrote:
> Hi Joe,
>
> Please check the attachment. Thanks.

The attachment is only this:
Joe Stringer June 23, 2017, 9:14 p.m. UTC | #4
Hi Wang Zhike,

I'd like if others like Greg could take a look as well, since this
code is delicate. The more review it gets, the better. It seems like
maybe the version of your email that goes to the list does not get the
attachment. Perhaps you could try sending the patch using git
send-email or putting the patch on GitHub instead, and linking to it
here.

For what it's worth, I did run your patch for a while and it seemed
OK, but when I tried again today on an Ubuntu Trusty (Linux
3.13.0-119-generic) box, running make check-kmod, I saw an issue with
get_next_timer_interrupt():

[181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0
[181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0
[181250.892557] Oops: 0000 [#1] SMP
[181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding
8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc
fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy
ahci libahci [last unloaded: libcrc32c]
[181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
3.13.0-119-generic #166-Ubuntu
[181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Bochs 01/01/2011
[181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
ffffffff81c00000
[181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
get_next_timer_interrupt+0x86/0x250
[181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002
[181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
00000000000000db
[181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
0000000102b245db
[181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
0000000000000000
[181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
0000000142b245d9
[181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
0000000000cd63e8
[181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
knlGS:0000000000000000
[181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
00000000000006f0
[181250.892557] Stack:
[181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
ffff88013fc13bc0
[181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
00000063ae154000
[181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
0000a4d8c2cb6200
[181250.892557] Call Trace:
[181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0
[181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
[181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160
[181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70
[181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0
[181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80
[181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d
[181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c
[181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
[181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
[181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152
[181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
b6 f6
[181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557]  RSP <ffffffff81c01e00>
[181250.892557] CR2: ffffffffa03317e0

It seems like perhaps a fragment timer signed up by OVS is still
remaining when the OVS module is unloaded, so it may attempt to clean
up an entry using OVS code but the OVS code has been unloaded at that
point. This might be related to IPv6 cvlan test - that seems to be
where my VM froze and went to 100% CPU, but I would think that the
IPv6 fragmentation cleanup test is a more likely to cause this, since
it leaves fragments behind in the cache after the test finishes. I've
only hit this when running all of the tests in make check-kmod.

Cheers,
Joe

On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
> Hi Joe,
>
> Please check the attachment. Thanks.
>
> Br,
> Wang Zhike
>
> -----邮件原件-----
> 发件人: Joe Stringer [mailto:joe@ovn.org]
> 发送时间: 2017年6月23日 8:20
> 收件人: 王志克
> 抄送: dev@openvswitch.org
> 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
>
> On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
>> Ovs and kernel stack would add frag_queue to same netns_frags list.
>> As result, ovs and kernel may access the fraq_queue without correct
>> lock. Also the struct ipq may be different on kernel(older than 4.3),
>> which leads to invalid pointer access.
>>
>> The fix creates specific netns_frags for ovs.
>>
>> Signed-off-by: wangzhike <wangzhike@jd.com>
>> ---
>
> Hi,
>
> It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).
>
> Cheers,
> Joe
Zhike Wang June 26, 2017, 11:56 a.m. UTC | #5
Hi Joe,

I will try to check how to send the patch. Maybe tomorrow since I am quite busy now.

Regarding the crash, I can reproduce it even with official OVS, like ovs2.6.0. (I just run the check kmod in a loop until kernel panic). So it is not related to the new fix.

Br,
Wang Zhike

-----邮件原件-----
发件人: Joe Stringer [mailto:joe@ovn.org] 
发送时间: 2017年6月24日 5:15
收件人: 王志克
抄送: dev@openvswitch.org
主题: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

Hi Wang Zhike,

I'd like if others like Greg could take a look as well, since this code is delicate. The more review it gets, the better. It seems like maybe the version of your email that goes to the list does not get the attachment. Perhaps you could try sending the patch using git send-email or putting the patch on GitHub instead, and linking to it here.

For what it's worth, I did run your patch for a while and it seemed OK, but when I tried again today on an Ubuntu Trusty (Linux
3.13.0-119-generic) box, running make check-kmod, I saw an issue with
get_next_timer_interrupt():

[181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0 [181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0 [181250.892557] Oops: 0000 [#1] SMP [181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy ahci libahci [last unloaded: libcrc32c]
[181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
3.13.0-119-generic #166-Ubuntu
[181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
ffffffff81c00000
[181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
get_next_timer_interrupt+0x86/0x250
[181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002 [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
00000000000000db
[181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
0000000102b245db
[181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
0000000000000000
[181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
0000000142b245d9
[181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
0000000000cd63e8
[181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
knlGS:0000000000000000
[181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
00000000000006f0
[181250.892557] Stack:
[181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
ffff88013fc13bc0
[181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
00000063ae154000
[181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
0000a4d8c2cb6200
[181250.892557] Call Trace:
[181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0 [181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
[181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160 [181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70 [181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0 [181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80 [181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d [181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c [181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
[181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
[181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152 [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
b6 f6
[181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557]  RSP <ffffffff81c01e00>
[181250.892557] CR2: ffffffffa03317e0

It seems like perhaps a fragment timer signed up by OVS is still remaining when the OVS module is unloaded, so it may attempt to clean up an entry using OVS code but the OVS code has been unloaded at that point. This might be related to IPv6 cvlan test - that seems to be where my VM froze and went to 100% CPU, but I would think that the
IPv6 fragmentation cleanup test is a more likely to cause this, since it leaves fragments behind in the cache after the test finishes. I've only hit this when running all of the tests in make check-kmod.

Cheers,
Joe

On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
> Hi Joe,

>

> Please check the attachment. Thanks.

>

> Br,

> Wang Zhike

>

> -----邮件原件-----

> 发件人: Joe Stringer [mailto:joe@ovn.org]

> 发送时间: 2017年6月23日 8:20

> 收件人: 王志克

> 抄送: dev@openvswitch.org

> 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs 

> reassemble

>

> On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:

>> Ovs and kernel stack would add frag_queue to same netns_frags list.

>> As result, ovs and kernel may access the fraq_queue without correct 

>> lock. Also the struct ipq may be different on kernel(older than 4.3), 

>> which leads to invalid pointer access.

>>

>> The fix creates specific netns_frags for ovs.

>>

>> Signed-off-by: wangzhike <wangzhike@jd.com>

>> ---

>

> Hi,

>

> It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).

>

> Cheers,

> Joe
Gregory Rose June 26, 2017, 3:02 p.m. UTC | #6
On 06/26/2017 04:56 AM, 王志克 wrote:
> Hi Joe,
>
> I will try to check how to send the patch. Maybe tomorrow since I am quite busy now.
>
> Regarding the crash, I can reproduce it even with official OVS, like ovs2.6.0. (I just run the check kmod in a loop until kernel panic). So it is not related to the new fix.

I'm running the check-kmod test in a loop and will see if I can get a repro.

Thanks,

- Greg

>
> Br,
> Wang Zhike
>
> -----邮件原件-----
> 发件人: Joe Stringer [mailto:joe@ovn.org]
> 发送时间: 2017年6月24日 5:15
> 收件人: 王志克
> 抄送: dev@openvswitch.org
> 主题: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
>
> Hi Wang Zhike,
>
> I'd like if others like Greg could take a look as well, since this code is delicate. The more review it gets, the better. It seems like maybe the version of your email that goes to the list does not get the attachment. Perhaps you could try sending the patch using git send-email or putting the patch on GitHub instead, and linking to it here.
>
> For what it's worth, I did run your patch for a while and it seemed OK, but when I tried again today on an Ubuntu Trusty (Linux
> 3.13.0-119-generic) box, running make check-kmod, I saw an issue with
> get_next_timer_interrupt():
>
> [181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0 [181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0 [181250.892557] Oops: 0000 [#1] SMP [181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
> gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
> nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
> aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy ahci libahci [last unloaded: libcrc32c]
> [181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
> 3.13.0-119-generic #166-Ubuntu
> [181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
> ffffffff81c00000
> [181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
> get_next_timer_interrupt+0x86/0x250
> [181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002 [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
> 00000000000000db
> [181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
> 0000000102b245db
> [181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
> 0000000000000000
> [181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
> 0000000142b245d9
> [181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
> 0000000000cd63e8
> [181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
> knlGS:0000000000000000
> [181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
> 00000000000006f0
> [181250.892557] Stack:
> [181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
> ffff88013fc13bc0
> [181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
> 00000063ae154000
> [181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
> 0000a4d8c2cb6200
> [181250.892557] Call Trace:
> [181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0 [181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
> [181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160 [181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70 [181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0 [181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80 [181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d [181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c [181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
> [181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
> [181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152 [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
> 89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
> 39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
> b6 f6
> [181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557]  RSP <ffffffff81c01e00>
> [181250.892557] CR2: ffffffffa03317e0
>
> It seems like perhaps a fragment timer signed up by OVS is still remaining when the OVS module is unloaded, so it may attempt to clean up an entry using OVS code but the OVS code has been unloaded at that point. This might be related to IPv6 cvlan test - that seems to be where my VM froze and went to 100% CPU, but I would think that the
> IPv6 fragmentation cleanup test is a more likely to cause this, since it leaves fragments behind in the cache after the test finishes. I've only hit this when running all of the tests in make check-kmod.
>
> Cheers,
> Joe
>
> On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
> > Hi Joe,
> >
> > Please check the attachment. Thanks.
> >
> > Br,
> > Wang Zhike
> >
> > -----邮件原件-----
> > 发件人: Joe Stringer [mailto:joe@ovn.org]
> > 发送时间: 2017年6月23日 8:20
> > 收件人: 王志克
> > 抄送: dev@openvswitch.org
> > 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs
> > reassemble
> >
> > On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
> >> Ovs and kernel stack would add frag_queue to same netns_frags list.
> >> As result, ovs and kernel may access the fraq_queue without correct
> >> lock. Also the struct ipq may be different on kernel(older than 4.3),
> >> which leads to invalid pointer access.
> >>
> >> The fix creates specific netns_frags for ovs.
> >>
> >> Signed-off-by: wangzhike <wangzhike@jd.com>
> >> ---
> >
> > Hi,
> >
> > It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).
> >
> > Cheers,
> > Joe
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Gregory Rose June 26, 2017, 10:25 p.m. UTC | #7
On 06/26/2017 04:56 AM, 王志克 wrote:
> Hi Joe,
>
> I will try to check how to send the patch. Maybe tomorrow since I am quite busy now.
>
> Regarding the crash, I can reproduce it even with official OVS, like ovs2.6.0. (I just run the check kmod in a loop until kernel panic). So it is not related to the new fix.
>
> Br,
> Wang Zhike
I've been running 'make check-kmod' in a continuous loop on 3 virtual machines since this morning.  So far no kernel splats but plenty of errors:

This is on the Ubuntu machine running 4.0 kernel:

ERROR: 66 tests were run,
24 failed unexpectedly.
23 tests were skipped.
## -------------------------------------- ##
## system-kmod-testsuite.log was created. ##
## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 16 17 35 57 58 59 60 61 62 63 70 71 72 75 76 81 82 83 84 85 86 87 88 89 failed

Centos 7.2 running 4.9.24 kernel:

## ------------- ##
## Test results. ##
## ------------- ##

ERROR: 76 tests were run,
34 failed unexpectedly.
13 tests were skipped.
## -------------------------------------- ##
## system-kmod-testsuite.log was created. ##
## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

Centos 7.2 running 4.10.17 kernel:

## ------------- ##
## Test results. ##
## ------------- ##

ERROR: 74 tests were run,
34 failed unexpectedly.
15 tests were skipped.
## -------------------------------------- ##
## system-kmod-testsuite.log was created. ##
## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

I confess to not spending a lot of time running check-kmod.  I certainly intend to in the future.

- Greg

>
> -----邮件原件-----
> 发件人: Joe Stringer [mailto:joe@ovn.org]
> 发送时间: 2017年6月24日 5:15
> 收件人: 王志克
> 抄送: dev@openvswitch.org
> 主题: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
>
> Hi Wang Zhike,
>
> I'd like if others like Greg could take a look as well, since this code is delicate. The more review it gets, the better. It seems like maybe the version of your email that goes to the list does not get the attachment. Perhaps you could try sending the patch using git send-email or putting the patch on GitHub instead, and linking to it here.
>
> For what it's worth, I did run your patch for a while and it seemed OK, but when I tried again today on an Ubuntu Trusty (Linux
> 3.13.0-119-generic) box, running make check-kmod, I saw an issue with
> get_next_timer_interrupt():
>
> [181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0 [181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0 [181250.892557] Oops: 0000 [#1] SMP [181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
> gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
> nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
> aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy ahci libahci [last unloaded: libcrc32c]
> [181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
> 3.13.0-119-generic #166-Ubuntu
> [181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
> ffffffff81c00000
> [181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
> get_next_timer_interrupt+0x86/0x250
> [181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002 [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
> 00000000000000db
> [181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
> 0000000102b245db
> [181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
> 0000000000000000
> [181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
> 0000000142b245d9
> [181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
> 0000000000cd63e8
> [181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
> knlGS:0000000000000000
> [181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
> 00000000000006f0
> [181250.892557] Stack:
> [181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
> ffff88013fc13bc0
> [181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
> 00000063ae154000
> [181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
> 0000a4d8c2cb6200
> [181250.892557] Call Trace:
> [181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0 [181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
> [181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160 [181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70 [181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0 [181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80 [181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d [181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c [181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
> [181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
> [181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152 [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
> 89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
> 39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
> b6 f6
> [181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557]  RSP <ffffffff81c01e00>
> [181250.892557] CR2: ffffffffa03317e0
>
> It seems like perhaps a fragment timer signed up by OVS is still remaining when the OVS module is unloaded, so it may attempt to clean up an entry using OVS code but the OVS code has been unloaded at that point. This might be related to IPv6 cvlan test - that seems to be where my VM froze and went to 100% CPU, but I would think that the
> IPv6 fragmentation cleanup test is a more likely to cause this, since it leaves fragments behind in the cache after the test finishes. I've only hit this when running all of the tests in make check-kmod.
>
> Cheers,
> Joe
>
> On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
> > Hi Joe,
> >
> > Please check the attachment. Thanks.
> >
> > Br,
> > Wang Zhike
> >
> > -----邮件原件-----
> > 发件人: Joe Stringer [mailto:joe@ovn.org]
> > 发送时间: 2017年6月23日 8:20
> > 收件人: 王志克
> > 抄送: dev@openvswitch.org
> > 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs
> > reassemble
> >
> > On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
> >> Ovs and kernel stack would add frag_queue to same netns_frags list.
> >> As result, ovs and kernel may access the fraq_queue without correct
> >> lock. Also the struct ipq may be different on kernel(older than 4.3),
> >> which leads to invalid pointer access.
> >>
> >> The fix creates specific netns_frags for ovs.
> >>
> >> Signed-off-by: wangzhike <wangzhike@jd.com>
> >> ---
> >
> > Hi,
> >
> > It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).
> >
> > Cheers,
> > Joe
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Zhike Wang June 27, 2017, 12:51 a.m. UTC | #8
Hi Greg,

The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.

datapath-sanity

  1: datapath - ping between two ports               ok
  2: datapath - http between two ports               ok
  3: datapath - ping between two ports on vlan       ok
  4: datapath - ping6 between two ports              ok
  5: datapath - ping6 between two ports on vlan      ok
  6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)
  7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)
  8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)
  9: datapath - basic truncate action                ok
 10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)
 11: conntrack - controller                          FAILED (system-traffic.at:522)
 12: conntrack - IPv4 HTTP                           ok
 13: conntrack - IPv6 HTTP                           ok
 14: conntrack - IPv4 ping                           ok
 15: conntrack - IPv6 ping                           ok
 16: conntrack - commit, recirc                      ok
 17: conntrack - preserve registers                  ok
 18: conntrack - invalid                             ok
 19: conntrack - zones                               ok
 20: conntrack - zones from field ....(system crash...)


[root@localhost vmcore-127.0.0.1-2017-06-25-23:17:12]# ls
analyzer      backtrace  count      last_occurrence  os_info     runlevel  type  username  vmcore
architecture  component  event_log  machineid        os_release  time      uid   uuid      vmcore-dmesg.txt
[root@localhost vmcore-127.0.0.1-2017-06-25-23:17:12]# cat backtrace

Version: 3.10.0-327.el7.x86_64
BUG: unable to handle kernel paging request at ffffffffa0715ae8
IP: [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270
PGD 194d067 PUD 194e063 PMD b746f067 PTE 0
Oops: 0000 [#1] SMP
Modules linked in: nf_nat_ftp nf_conntrack_ftp nf_conntrack_netlink nfnetlink ip_gre ip_tunnel gre vxlan ip6_udp_tunnel udp_tunnel 8021q garp m                                                                                              rp veth xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute brid                                                                                              ge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6                                                                                              table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_ra                                                                                              w iptable_filter vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log dm_mod snd_seq_midi snd_seq_midi_event snd_ens1371 snd_raw                                                                                              midi coretemp snd_ac97_codec ac97_bus crc32_pclmul snd_seq ghash_clmulni_intel ppdev
 snd_seq_device cryptd btusb snd_pcm bluetooth snd_timer snd soundcore sg vmw_balloon rfkill pcspkr parport_pc parport i2c_piix4 vmw_vmci shpch                                                                                              p nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom ata_generic sd_mod crc_t10dif crct10dif_generic pata_acpi cr                                                                                              ct10dif_pclmul crct10dif_common crc32c_intel serio_raw vmwgfx drm_kms_helper ttm mptspi scsi_transport_spi e1000 mptscsih mptbase drm i2c_core                                                                                               ata_piix libata [last unloaded: openvswitch]
CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           OE  ------------   3.10.0-327.el7.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
task: ffff8800b9a81700 ti: ffff8800b9a8c000 task.ti: ffff8800b9a8c000
RIP: 0010:[<ffffffff8108e6a7>]  [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270
RSP: 0018:ffff8800b9a8fdd8  EFLAGS: 00010012
RAX: ffffffffa0715ad0 RBX: 00000863b6f08300 RCX: ffff8800b95a8d08
RDX: 00000000000000ce RSI: 00000000000000ce RDI: 0000000100882cce
RBP: ffff8800b9a8fe30 R08: 0000000000000202 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000001 R12: 0000000100882ccd
R13: 7fffffffffffffff R14: ffff8800b95a8000 R15: 0000000100882ccd
FS:  0000000000000000(0000) GS:ffff8800bb620000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffa0715ae8 CR3: 00000000b64d8000 CR4: 00000000003407e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
 ffff8800b9f5e780 0000000000000000 ffff8800b9a8dfd8 ffff8800b9a8fe10
 ffff8800b9a8fe48 20cc1170855d3261 ffff8800bb62dbc0 00000863b6f08300
 0000000000000001 ffff8800bb62cf00 0000000100882ccd ffff8800b9a8fe88
Call Trace:
 [<ffffffff810e0978>] tick_nohz_stop_sched_tick+0x1e8/0x2e0
 [<ffffffff8101cd15>] ? native_sched_clock+0x35/0x80
 [<ffffffff810e0b0e>] __tick_nohz_idle_enter+0x9e/0x150
 [<ffffffff810e102d>] tick_nohz_idle_enter+0x3d/0x70
 [<ffffffff810d615e>] cpu_startup_entry+0x9e/0x290
 [<ffffffff810475fa>] start_secondary+0x1ba/0x230
Code: 18 49 8b 7e 10 48 39 cf 48 89 ca 78 5a 40 0f b6 d7 89 d6 48 63 c6 48 c1 e0 04 49 8d 0c 06 48 8b 41 28 48 83 c1 28 48 39 c8 74 0e <f6> 40                                                                                               18 01 74 23 48 8b 00 48 39 c8 75 f2 83 c6 01 40 0f b6 f6
RIP  [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270
 RSP <ffff8800b9a8fdd8>


Wang Zhike

-----邮件原件-----
发件人: Greg Rose [mailto:gvrose8192@gmail.com] 
发送时间: 2017年6月27日 6:26
收件人: 王志克
抄送: dev@openvswitch.org; Joe Stringer
主题: Re: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

On 06/26/2017 04:56 AM, 王志克 wrote:
> Hi Joe,

>

> I will try to check how to send the patch. Maybe tomorrow since I am quite busy now.

>

> Regarding the crash, I can reproduce it even with official OVS, like ovs2.6.0. (I just run the check kmod in a loop until kernel panic). So it is not related to the new fix.

>

> Br,

> Wang Zhike

I've been running 'make check-kmod' in a continuous loop on 3 virtual machines since this morning.  So far no kernel splats but plenty of errors:

This is on the Ubuntu machine running 4.0 kernel:

ERROR: 66 tests were run,
24 failed unexpectedly.
23 tests were skipped.
## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 16 17 35 57 58 59 60 61 62 63 70 71 72 75 76 81 82 83 84 85 86 87 88 89 failed

Centos 7.2 running 4.9.24 kernel:

## ------------- ##
## Test results. ##
## ------------- ##

ERROR: 76 tests were run,
34 failed unexpectedly.
13 tests were skipped.
## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

Centos 7.2 running 4.10.17 kernel:

## ------------- ##
## Test results. ##
## ------------- ##

ERROR: 74 tests were run,
34 failed unexpectedly.
15 tests were skipped.
## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

Please send `tests/system-kmod-testsuite.log' and all information you think might help:

    To: <bugs@openvswitch.org>
       Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

I confess to not spending a lot of time running check-kmod.  I certainly intend to in the future.

- Greg

>

> -----邮件原件-----

> 发件人: Joe Stringer [mailto:joe@ovn.org]

> 发送时间: 2017年6月24日 5:15

> 收件人: 王志克

> 抄送: dev@openvswitch.org

> 主题: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs 

> reassemble

>

> Hi Wang Zhike,

>

> I'd like if others like Greg could take a look as well, since this code is delicate. The more review it gets, the better. It seems like maybe the version of your email that goes to the list does not get the attachment. Perhaps you could try sending the patch using git send-email or putting the patch on GitHub instead, and linking to it here.

>

> For what it's worth, I did run your patch for a while and it seemed 

> OK, but when I tried again today on an Ubuntu Trusty (Linux

> 3.13.0-119-generic) box, running make check-kmod, I saw an issue with

> get_next_timer_interrupt():

>

> [181250.892557] BUG: unable to handle kernel paging request at 

> ffffffffa03317e0 [181250.892557] IP: [<ffffffff81079606>] 

> get_next_timer_interrupt+0x86/0x250

> [181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0 

> [181250.892557] Oops: 0000 [#1] SMP [181250.892557] Modules linked in: 

> nf_nat_ipv6 nf_nat_ipv4 nf_nat

> gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6

> nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding 

> 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc 

> fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs 

> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel

> aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy ahci libahci [last unloaded: libcrc32c]

> [181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX

> 3.13.0-119-generic #166-Ubuntu

> [181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:

> ffffffff81c00000

> [181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]

> get_next_timer_interrupt+0x86/0x250

> [181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002 [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:

> 00000000000000db

> [181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:

> 0000000102b245db

> [181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:

> 0000000000000000

> [181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:

> 0000000142b245d9

> [181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:

> 0000000000cd63e8

> [181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)

> knlGS:0000000000000000

> [181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:

> 00000000000006f0

> [181250.892557] Stack:

> [181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5

> ffff88013fc13bc0

> [181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000

> 00000063ae154000

> [181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655

> 0000a4d8c2cb6200

> [181250.892557] Call Trace:

> [181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0 

> [181250.892557]  [<ffffffff810da655>] 

> tick_nohz_stop_sched_tick+0x1e5/0x340

> [181250.892557]  [<ffffffff810da851>] 

> __tick_nohz_idle_enter+0xa1/0x160 [181250.892557]  

> [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70 [181250.892557]  

> [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0 [181250.892557]  

> [<ffffffff8171b387>] rest_init+0x77/0x80 [181250.892557]  

> [<ffffffff81d34f6a>] start_kernel+0x432/0x43d [181250.892557]  

> [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c [181250.892557]  

> [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120

> [181250.892557]  [<ffffffff81d345ee>] 

> x86_64_start_reservations+0x2a/0x2c

> [181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152 

> [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf

> 89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48

> 39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f

> b6 f6

> [181250.892557] RIP  [<ffffffff81079606>] 

> get_next_timer_interrupt+0x86/0x250

> [181250.892557]  RSP <ffffffff81c01e00> [181250.892557] CR2: 

> ffffffffa03317e0

>

> It seems like perhaps a fragment timer signed up by OVS is still 

> remaining when the OVS module is unloaded, so it may attempt to clean 

> up an entry using OVS code but the OVS code has been unloaded at that 

> point. This might be related to IPv6 cvlan test - that seems to be 

> where my VM froze and went to 100% CPU, but I would think that the

> IPv6 fragmentation cleanup test is a more likely to cause this, since it leaves fragments behind in the cache after the test finishes. I've only hit this when running all of the tests in make check-kmod.

>

> Cheers,

> Joe

>

> On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:

> > Hi Joe,

> >

> > Please check the attachment. Thanks.

> >

> > Br,

> > Wang Zhike

> >

> > -----邮件原件-----

> > 发件人: Joe Stringer [mailto:joe@ovn.org]

> > 发送时间: 2017年6月23日 8:20

> > 收件人: 王志克

> > 抄送: dev@openvswitch.org

> > 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs 

> > reassemble

> >

> > On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:

> >> Ovs and kernel stack would add frag_queue to same netns_frags list.

> >> As result, ovs and kernel may access the fraq_queue without correct 

> >> lock. Also the struct ipq may be different on kernel(older than 

> >> 4.3), which leads to invalid pointer access.

> >>

> >> The fix creates specific netns_frags for ovs.

> >>

> >> Signed-off-by: wangzhike <wangzhike@jd.com>

> >> ---

> >

> > Hi,

> >

> > It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).

> >

> > Cheers,

> > Joe

> _______________________________________________

> dev mailing list

> dev@openvswitch.org

> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

>
Zhike Wang June 27, 2017, 4:05 a.m. UTC | #9
Hi Joe, Greg,

I tried to create a pull request, please check whether it works. Thanks.

https://github.com/openvswitch/ovs/pull/187

Br,
Wang Zhike
-----Original Message-----
From: Joe Stringer [mailto:joe@ovn.org] 

Sent: Saturday, June 24, 2017 5:15 AM
To: 王志克
Cc: dev@openvswitch.org
Subject: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

Hi Wang Zhike,

I'd like if others like Greg could take a look as well, since this
code is delicate. The more review it gets, the better. It seems like
maybe the version of your email that goes to the list does not get the
attachment. Perhaps you could try sending the patch using git
send-email or putting the patch on GitHub instead, and linking to it
here.

For what it's worth, I did run your patch for a while and it seemed
OK, but when I tried again today on an Ubuntu Trusty (Linux
3.13.0-119-generic) box, running make check-kmod, I saw an issue with
get_next_timer_interrupt():

[181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0
[181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0
[181250.892557] Oops: 0000 [#1] SMP
[181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding
8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc
fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs
crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy
ahci libahci [last unloaded: libcrc32c]
[181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
3.13.0-119-generic #166-Ubuntu
[181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS Bochs 01/01/2011
[181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
ffffffff81c00000
[181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
get_next_timer_interrupt+0x86/0x250
[181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002
[181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
00000000000000db
[181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
0000000102b245db
[181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
0000000000000000
[181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
0000000142b245d9
[181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
0000000000cd63e8
[181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
knlGS:0000000000000000
[181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
00000000000006f0
[181250.892557] Stack:
[181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
ffff88013fc13bc0
[181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
00000063ae154000
[181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
0000a4d8c2cb6200
[181250.892557] Call Trace:
[181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0
[181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
[181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160
[181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70
[181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0
[181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80
[181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d
[181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c
[181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
[181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
[181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152
[181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
b6 f6
[181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
[181250.892557]  RSP <ffffffff81c01e00>
[181250.892557] CR2: ffffffffa03317e0

It seems like perhaps a fragment timer signed up by OVS is still
remaining when the OVS module is unloaded, so it may attempt to clean
up an entry using OVS code but the OVS code has been unloaded at that
point. This might be related to IPv6 cvlan test - that seems to be
where my VM froze and went to 100% CPU, but I would think that the
IPv6 fragmentation cleanup test is a more likely to cause this, since
it leaves fragments behind in the cache after the test finishes. I've
only hit this when running all of the tests in make check-kmod.

Cheers,
Joe

On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
> Hi Joe,

>

> Please check the attachment. Thanks.

>

> Br,

> Wang Zhike

>

> -----邮件原件-----

> 发件人: Joe Stringer [mailto:joe@ovn.org]

> 发送时间: 2017年6月23日 8:20

> 收件人: 王志克

> 抄送: dev@openvswitch.org

> 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

>

> On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:

>> Ovs and kernel stack would add frag_queue to same netns_frags list.

>> As result, ovs and kernel may access the fraq_queue without correct

>> lock. Also the struct ipq may be different on kernel(older than 4.3),

>> which leads to invalid pointer access.

>>

>> The fix creates specific netns_frags for ovs.

>>

>> Signed-off-by: wangzhike <wangzhike@jd.com>

>> ---

>

> Hi,

>

> It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).

>

> Cheers,

> Joe
Gregory Rose June 27, 2017, 2:52 p.m. UTC | #10
On 06/26/2017 05:51 PM, 王志克 wrote:
> Hi Greg,

>

> The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.


OK, I'll try with that kernel.  The three VMs I have that are running the test are still up and running after overnight.  So let me try the base install  kernel.

Thanks,

- Greg

>

> datapath-sanity

>

>    1: datapath - ping between two ports               ok

>    2: datapath - http between two ports               ok

>    3: datapath - ping between two ports on vlan       ok

>    4: datapath - ping6 between two ports              ok

>    5: datapath - ping6 between two ports on vlan      ok

>    6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)

>    7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)

>    8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)

>    9: datapath - basic truncate action                ok

>   10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)

>   11: conntrack - controller                          FAILED (system-traffic.at:522)

>   12: conntrack - IPv4 HTTP                           ok

>   13: conntrack - IPv6 HTTP                           ok

>   14: conntrack - IPv4 ping                           ok

>   15: conntrack - IPv6 ping                           ok

>   16: conntrack - commit, recirc                      ok

>   17: conntrack - preserve registers                  ok

>   18: conntrack - invalid                             ok

>   19: conntrack - zones                               ok

>   20: conntrack - zones from field ....(system crash...)

>

>

> [root@localhost vmcore-127.0.0.1-2017-06-25-23:17:12]# ls

> analyzer      backtrace  count      last_occurrence  os_info     runlevel  type  username  vmcore

> architecture  component  event_log  machineid        os_release  time      uid   uuid      vmcore-dmesg.txt

> [root@localhost vmcore-127.0.0.1-2017-06-25-23:17:12]# cat backtrace

>

> Version: 3.10.0-327.el7.x86_64

> BUG: unable to handle kernel paging request at ffffffffa0715ae8

> IP: [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270

> PGD 194d067 PUD 194e063 PMD b746f067 PTE 0

> Oops: 0000 [#1] SMP

> Modules linked in: nf_nat_ftp nf_conntrack_ftp nf_conntrack_netlink nfnetlink ip_gre ip_tunnel gre vxlan ip6_udp_tunnel udp_tunnel 8021q garp m                                                                                              rp veth xt_CHECKSUM ipt_MASQUERADE nf_nat_masquerade_ipv4 tun ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute brid                                                                                              ge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6                                                                                              table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_ra                                                                                              w iptable_filter vmw_vsock_vmci_transport vsock bnep dm_mirror dm_region_hash dm_log dm_mod snd_seq_midi snd_seq_midi_event snd_ens1371 snd_raw                                                                                              midi coretemp snd_ac97_codec ac97_bus crc32_pclmul snd_seq ghash_clmulni_intel ppdev

>   snd_seq_device cryptd btusb snd_pcm bluetooth snd_timer snd soundcore sg vmw_balloon rfkill pcspkr parport_pc parport i2c_piix4 vmw_vmci shpch                                                                                              p nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod cdrom ata_generic sd_mod crc_t10dif crct10dif_generic pata_acpi cr                                                                                              ct10dif_pclmul crct10dif_common crc32c_intel serio_raw vmwgfx drm_kms_helper ttm mptspi scsi_transport_spi e1000 mptscsih mptbase drm i2c_core                                                                                               ata_piix libata [last unloaded: openvswitch]

> CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           OE  ------------   3.10.0-327.el7.x86_64 #1

> Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015

> task: ffff8800b9a81700 ti: ffff8800b9a8c000 task.ti: ffff8800b9a8c000

> RIP: 0010:[<ffffffff8108e6a7>]  [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270

> RSP: 0018:ffff8800b9a8fdd8  EFLAGS: 00010012

> RAX: ffffffffa0715ad0 RBX: 00000863b6f08300 RCX: ffff8800b95a8d08

> RDX: 00000000000000ce RSI: 00000000000000ce RDI: 0000000100882cce

> RBP: ffff8800b9a8fe30 R08: 0000000000000202 R09: 0000000000000000

> R10: 0000000000000000 R11: 0000000000000001 R12: 0000000100882ccd

> R13: 7fffffffffffffff R14: ffff8800b95a8000 R15: 0000000100882ccd

> FS:  0000000000000000(0000) GS:ffff8800bb620000(0000) knlGS:0000000000000000

> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033

> CR2: ffffffffa0715ae8 CR3: 00000000b64d8000 CR4: 00000000003407e0

> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000

> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400

> Stack:

>   ffff8800b9f5e780 0000000000000000 ffff8800b9a8dfd8 ffff8800b9a8fe10

>   ffff8800b9a8fe48 20cc1170855d3261 ffff8800bb62dbc0 00000863b6f08300

>   0000000000000001 ffff8800bb62cf00 0000000100882ccd ffff8800b9a8fe88

> Call Trace:

>   [<ffffffff810e0978>] tick_nohz_stop_sched_tick+0x1e8/0x2e0

>   [<ffffffff8101cd15>] ? native_sched_clock+0x35/0x80

>   [<ffffffff810e0b0e>] __tick_nohz_idle_enter+0x9e/0x150

>   [<ffffffff810e102d>] tick_nohz_idle_enter+0x3d/0x70

>   [<ffffffff810d615e>] cpu_startup_entry+0x9e/0x290

>   [<ffffffff810475fa>] start_secondary+0x1ba/0x230

> Code: 18 49 8b 7e 10 48 39 cf 48 89 ca 78 5a 40 0f b6 d7 89 d6 48 63 c6 48 c1 e0 04 49 8d 0c 06 48 8b 41 28 48 83 c1 28 48 39 c8 74 0e <f6> 40                                                                                               18 01 74 23 48 8b 00 48 39 c8 75 f2 83 c6 01 40 0f b6 f6

> RIP  [<ffffffff8108e6a7>] get_next_timer_interrupt+0x97/0x270

>   RSP <ffff8800b9a8fdd8>

>

>

> Wang Zhike

>

> -----邮件原件-----

> 发件人: Greg Rose [mailto:gvrose8192@gmail.com]

> 发送时间: 2017年6月27日 6:26

> 收件人: 王志克

> 抄送: dev@openvswitch.org; Joe Stringer

> 主题: Re: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

>

> On 06/26/2017 04:56 AM, 王志克 wrote:

> > Hi Joe,

> >

> > I will try to check how to send the patch. Maybe tomorrow since I am quite busy now.

> >

> > Regarding the crash, I can reproduce it even with official OVS, like ovs2.6.0. (I just run the check kmod in a loop until kernel panic). So it is not related to the new fix.

> >

> > Br,

> > Wang Zhike

> I've been running 'make check-kmod' in a continuous loop on 3 virtual machines since this morning.  So far no kernel splats but plenty of errors:

>

> This is on the Ubuntu machine running 4.0 kernel:

>

> ERROR: 66 tests were run,

> 24 failed unexpectedly.

> 23 tests were skipped.

> ## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

>

> Please send `tests/system-kmod-testsuite.log' and all information you think might help:

>

>      To: <bugs@openvswitch.org>

>         Subject: [openvswitch 2.7.90] system-kmod-testsuite: 16 17 35 57 58 59 60 61 62 63 70 71 72 75 76 81 82 83 84 85 86 87 88 89 failed

>

> Centos 7.2 running 4.9.24 kernel:

>

> ## ------------- ##

> ## Test results. ##

> ## ------------- ##

>

> ERROR: 76 tests were run,

> 34 failed unexpectedly.

> 13 tests were skipped.

> ## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

>

> Please send `tests/system-kmod-testsuite.log' and all information you think might help:

>

>      To: <bugs@openvswitch.org>

>         Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

>

> Centos 7.2 running 4.10.17 kernel:

>

> ## ------------- ##

> ## Test results. ##

> ## ------------- ##

>

> ERROR: 74 tests were run,

> 34 failed unexpectedly.

> 15 tests were skipped.

> ## -------------------------------------- ## ## system-kmod-testsuite.log was created. ## ## -------------------------------------- ##

>

> Please send `tests/system-kmod-testsuite.log' and all information you think might help:

>

>      To: <bugs@openvswitch.org>

>         Subject: [openvswitch 2.7.90] system-kmod-testsuite: 2 14 15 20 21 22 23 24 25 26 27 28 29 30 31 32 47 48 49 50 51 57 59 60 61 62 70 71 75 76 84 85 86 87 failed

>

> I confess to not spending a lot of time running check-kmod.  I certainly intend to in the future.

>

> - Greg

>

> >

> > -----邮件原件-----

> > 发件人: Joe Stringer [mailto:joe@ovn.org]

> > 发送时间: 2017年6月24日 5:15

> > 收件人: 王志克

> > 抄送: dev@openvswitch.org

> > 主题: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs

> > reassemble

> >

> > Hi Wang Zhike,

> >

> > I'd like if others like Greg could take a look as well, since this code is delicate. The more review it gets, the better. It seems like maybe the version of your email that goes to the list does not get the attachment. Perhaps you could try sending the patch using git send-email or putting the patch on GitHub instead, and linking to it here.

> >

> > For what it's worth, I did run your patch for a while and it seemed

> > OK, but when I tried again today on an Ubuntu Trusty (Linux

> > 3.13.0-119-generic) box, running make check-kmod, I saw an issue with

> > get_next_timer_interrupt():

> >

> > [181250.892557] BUG: unable to handle kernel paging request at

> > ffffffffa03317e0 [181250.892557] IP: [<ffffffff81079606>]

> > get_next_timer_interrupt+0x86/0x250

> > [181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0

> > [181250.892557] Oops: 0000 [#1] SMP [181250.892557] Modules linked in:

> > nf_nat_ipv6 nf_nat_ipv4 nf_nat

> > gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6

> > nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding

> > 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc

> > fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs

> > crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel

> > aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy ahci libahci [last unloaded: libcrc32c]

> > [181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX

> > 3.13.0-119-generic #166-Ubuntu

> > [181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:

> > ffffffff81c00000

> > [181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]

> > get_next_timer_interrupt+0x86/0x250

> > [181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002 [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:

> > 00000000000000db

> > [181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:

> > 0000000102b245db

> > [181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:

> > 0000000000000000

> > [181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:

> > 0000000142b245d9

> > [181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:

> > 0000000000cd63e8

> > [181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)

> > knlGS:0000000000000000

> > [181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:

> > 00000000000006f0

> > [181250.892557] Stack:

> > [181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5

> > ffff88013fc13bc0

> > [181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000

> > 00000063ae154000

> > [181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655

> > 0000a4d8c2cb6200

> > [181250.892557] Call Trace:

> > [181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0

> > [181250.892557]  [<ffffffff810da655>]

> > tick_nohz_stop_sched_tick+0x1e5/0x340

> > [181250.892557]  [<ffffffff810da851>]

> > __tick_nohz_idle_enter+0xa1/0x160 [181250.892557]

> > [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70 [181250.892557]

> > [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0 [181250.892557]

> > [<ffffffff8171b387>] rest_init+0x77/0x80 [181250.892557]

> > [<ffffffff81d34f6a>] start_kernel+0x432/0x43d [181250.892557]

> > [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c [181250.892557]

> > [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120

> > [181250.892557]  [<ffffffff81d345ee>]

> > x86_64_start_reservations+0x2a/0x2c

> > [181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152

> > [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf

> > 89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48

> > 39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f

> > b6 f6

> > [181250.892557] RIP  [<ffffffff81079606>]

> > get_next_timer_interrupt+0x86/0x250

> > [181250.892557]  RSP <ffffffff81c01e00> [181250.892557] CR2:

> > ffffffffa03317e0

> >

> > It seems like perhaps a fragment timer signed up by OVS is still

> > remaining when the OVS module is unloaded, so it may attempt to clean

> > up an entry using OVS code but the OVS code has been unloaded at that

> > point. This might be related to IPv6 cvlan test - that seems to be

> > where my VM froze and went to 100% CPU, but I would think that the

> > IPv6 fragmentation cleanup test is a more likely to cause this, since it leaves fragments behind in the cache after the test finishes. I've only hit this when running all of the tests in make check-kmod.

> >

> > Cheers,

> > Joe

> >

> > On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:

> >> Hi Joe,

> >>

> >> Please check the attachment. Thanks.

> >>

> >> Br,

> >> Wang Zhike

> >>

> >> -----邮件原件-----

> >> 发件人: Joe Stringer [mailto:joe@ovn.org]

> >> 发送时间: 2017年6月23日 8:20

> >> 收件人: 王志克

> >> 抄送: dev@openvswitch.org

> >> 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs

> >> reassemble

> >>

> >> On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:

> >>> Ovs and kernel stack would add frag_queue to same netns_frags list.

> >>> As result, ovs and kernel may access the fraq_queue without correct

> >>> lock. Also the struct ipq may be different on kernel(older than

> >>> 4.3), which leads to invalid pointer access.

> >>>

> >>> The fix creates specific netns_frags for ovs.

> >>>

> >>> Signed-off-by: wangzhike <wangzhike@jd.com>

> >>> ---

> >>

> >> Hi,

> >>

> >> It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).

> >>

> >> Cheers,

> >> Joe

> > _______________________________________________

> > dev mailing list

> > dev@openvswitch.org

> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev

> >

>
Gregory Rose June 27, 2017, 11:53 p.m. UTC | #11
On 06/26/2017 09:05 PM, 王志克 wrote:
> Hi Joe, Greg,
> 
> I tried to create a pull request, please check whether it works. Thanks.
> 
> https://github.com/openvswitch/ovs/pull/187

I've been trying to test this on a Centos 7.2 3.10 kernel but am
running into unrelated build issues.  I'll see if I can get a repro
when I've fixed those.

Thanks,

- Greg

> 
> Br,
> Wang Zhike
> -----Original Message-----
> From: Joe Stringer [mailto:joe@ovn.org]
> Sent: Saturday, June 24, 2017 5:15 AM
> To: 王志克
> Cc: dev@openvswitch.org
> Subject: Re: 答复: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
> 
> Hi Wang Zhike,
> 
> I'd like if others like Greg could take a look as well, since this
> code is delicate. The more review it gets, the better. It seems like
> maybe the version of your email that goes to the list does not get the
> attachment. Perhaps you could try sending the patch using git
> send-email or putting the patch on GitHub instead, and linking to it
> here.
> 
> For what it's worth, I did run your patch for a while and it seemed
> OK, but when I tried again today on an Ubuntu Trusty (Linux
> 3.13.0-119-generic) box, running make check-kmod, I saw an issue with
> get_next_timer_interrupt():
> 
> [181250.892557] BUG: unable to handle kernel paging request at ffffffffa03317e0
> [181250.892557] IP: [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557] PGD 1c11067 PUD 1c12063 PMD 1381a2067 PTE 0
> [181250.892557] Oops: 0000 [#1] SMP
> [181250.892557] Modules linked in: nf_nat_ipv6 nf_nat_ipv4 nf_nat
> gre(-) nf_conntrack_ipv6 nf_conntrack_ipv4 nf_defrag_ipv6
> nf_defrag_ipv4 nf_conntrack_netlink nfnetlink nf_conntrack bonding
> 8021q garp stp mrp llc veth nfsd auth_rpcgss nfs_acl nfs lockd sunrpc
> fscache dm_crypt kvm_intel kvm serio_raw netconsole configfs
> crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel
> aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd psmouse floppy
> ahci libahci [last unloaded: libcrc32c]
> [181250.892557] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G           OX
> 3.13.0-119-generic #166-Ubuntu
> [181250.892557] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
> BIOS Bochs 01/01/2011
> [181250.892557] task: ffffffff81c15480 ti: ffffffff81c00000 task.ti:
> ffffffff81c00000
> [181250.892557] RIP: 0010:[<ffffffff81079606>]  [<ffffffff81079606>]
> get_next_timer_interrupt+0x86/0x250
> [181250.892557] RSP: 0018:ffffffff81c01e00  EFLAGS: 00010002
> [181250.892557] RAX: ffffffffa03317c8 RBX: 0000000102b245da RCX:
> 00000000000000db
> [181250.892557] RDX: ffffffff81ebac58 RSI: 00000000000000db RDI:
> 0000000102b245db
> [181250.892557] RBP: ffffffff81c01e48 R08: 0000000000c88c1c R09:
> 0000000000000000
> [181250.892557] R10: 0000000000000000 R11: 0000000000000000 R12:
> 0000000142b245d9
> [181250.892557] R13: ffffffff81eb9e80 R14: 0000000102b245da R15:
> 0000000000cd63e8
> [181250.892557] FS:  0000000000000000(0000) GS:ffff88013fc00000(0000)
> knlGS:0000000000000000
> [181250.892557] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
> [181250.892557] CR2: ffffffffa03317e0 CR3: 000000003707f000 CR4:
> 00000000000006f0
> [181250.892557] Stack:
> [181250.892557]  0000000000000000 ffffffff81c01e30 ffffffff810a3af5
> ffff88013fc13bc0
> [181250.892557]  ffff88013fc0dce0 0000000102b245da 0000000000000000
> 00000063ae154000
> [181250.892557]  0000000000cd63e8 ffffffff81c01ea8 ffffffff810da655
> 0000a4d8c2cb6200
> [181250.892557] Call Trace:
> [181250.892557]  [<ffffffff810a3af5>] ? set_next_entity+0x95/0xb0
> [181250.892557]  [<ffffffff810da655>] tick_nohz_stop_sched_tick+0x1e5/0x340
> [181250.892557]  [<ffffffff810da851>] __tick_nohz_idle_enter+0xa1/0x160
> [181250.892557]  [<ffffffff810dab4d>] tick_nohz_idle_enter+0x3d/0x70
> [181250.892557]  [<ffffffff810c2af7>] cpu_startup_entry+0x87/0x2b0
> [181250.892557]  [<ffffffff8171b387>] rest_init+0x77/0x80
> [181250.892557]  [<ffffffff81d34f6a>] start_kernel+0x432/0x43d
> [181250.892557]  [<ffffffff81d34941>] ? repair_env_string+0x5c/0x5c
> [181250.892557]  [<ffffffff81d34120>] ? early_idt_handler_array+0x120/0x120
> [181250.892557]  [<ffffffff81d345ee>] x86_64_start_reservations+0x2a/0x2c
> [181250.892557]  [<ffffffff81d34733>] x86_64_start_kernel+0x143/0x152
> [181250.892557] Code: 8b 7d 10 4d 8b 75 18 4c 39 f7 78 5c 40 0f b6 cf
> 89 ce 48 63 c6 48 c1 e0 04 49 8d 54 05 00 48 8b 42 28 48 83 c2 28 48
> 39 d0 74 0e <f6> 40 18 01 74 24 48 8b 00 48 39 d0 75 f2 83 c6 01 40 0f
> b6 f6
> [181250.892557] RIP  [<ffffffff81079606>] get_next_timer_interrupt+0x86/0x250
> [181250.892557]  RSP <ffffffff81c01e00>
> [181250.892557] CR2: ffffffffa03317e0
> 
> It seems like perhaps a fragment timer signed up by OVS is still
> remaining when the OVS module is unloaded, so it may attempt to clean
> up an entry using OVS code but the OVS code has been unloaded at that
> point. This might be related to IPv6 cvlan test - that seems to be
> where my VM froze and went to 100% CPU, but I would think that the
> IPv6 fragmentation cleanup test is a more likely to cause this, since
> it leaves fragments behind in the cache after the test finishes. I've
> only hit this when running all of the tests in make check-kmod.
> 
> Cheers,
> Joe
> 
> On 22 June 2017 at 17:53, 王志克 <wangzhike@jd.com> wrote:
>> Hi Joe,
>>
>> Please check the attachment. Thanks.
>>
>> Br,
>> Wang Zhike
>>
>> -----邮件原件-----
>> 发件人: Joe Stringer [mailto:joe@ovn.org]
>> 发送时间: 2017年6月23日 8:20
>> 收件人: 王志克
>> 抄送: dev@openvswitch.org
>> 主题: Re: [ovs-dev] [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
>>
>> On 21 June 2017 at 18:54, 王志克 <wangzhike@jd.com> wrote:
>>> Ovs and kernel stack would add frag_queue to same netns_frags list.
>>> As result, ovs and kernel may access the fraq_queue without correct
>>> lock. Also the struct ipq may be different on kernel(older than 4.3),
>>> which leads to invalid pointer access.
>>>
>>> The fix creates specific netns_frags for ovs.
>>>
>>> Signed-off-by: wangzhike <wangzhike@jd.com>
>>> ---
>>
>> Hi,
>>
>> It looks like the whitespace has been corrupted in this version of the patch that you sent, I cannot apply it. Probably your email client mistreats it when sending the email out. A reliable method to send patches correctly via email is to use the commandline client 'git send-email'. This is the preferred method. If you are unable to set that up, consider attaching the patch to the email (or send a pull request on GitHub).
>>
>> Cheers,
>> Joe
Gregory Rose June 28, 2017, 8:29 p.m. UTC | #12
On 06/26/2017 05:51 PM, 王志克 wrote:
> Hi Greg,
> 
> The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.
> 
> datapath-sanity
> 
>    1: datapath - ping between two ports               ok
>    2: datapath - http between two ports               ok
>    3: datapath - ping between two ports on vlan       ok
>    4: datapath - ping6 between two ports              ok
>    5: datapath - ping6 between two ports on vlan      ok
>    6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)
>    7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)
>    8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)
>    9: datapath - basic truncate action                ok
>   10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)
>   11: conntrack - controller                          FAILED (system-traffic.at:522)
>   12: conntrack - IPv4 HTTP                           ok
>   13: conntrack - IPv6 HTTP                           ok
>   14: conntrack - IPv4 ping                           ok
>   15: conntrack - IPv6 ping                           ok
>   16: conntrack - commit, recirc                      ok
>   17: conntrack - preserve registers                  ok
>   18: conntrack - invalid                             ok
>   19: conntrack - zones                               ok
>   20: conntrack - zones from field ....(system crash...)
> 
> 

[snipped]

Hi Wang,

I am having some definite problems trying to get this to repro.  I can't even get
openvswitch-2.6.0 to build.  I am running into numerous compatibility layer issues
with netfilter and the net_ns (!!!!!!!!!!!!) code that prevent compilation, much
less getting any check-kmod tests to run.  It's a complete mess.

Can you point me to a link with an openvswitch 2.6 tarball that builds on your Centos7.2
3.10.0-327.el7.x86_64  kernel?

I'm building on Centos 7.2 as well - using the 3.10.0-514.el7.x86_64 kernel myself but that shouldn't
matter.  Or if it does then that is an important detail.

Thanks,

- Greg
Zhike Wang June 29, 2017, 12:53 a.m. UTC | #13
Hi Greg,

I just download offical tar bar:
wget http://openvswitch.org/releases/openvswitch-2.6.0.tar.gz

Then compiling as below: ( I do not see any compiling issue)

./configure --with-linux=/lib/modules/$(uname -r)/build
make
make install
make modules_install

Br,
Wang Zhike
-----Original Message-----
From: Greg Rose [mailto:gvrose8192@gmail.com] 

Sent: Thursday, June 29, 2017 4:29 AM
To: 王志克
Cc: dev@openvswitch.org; Joe Stringer
Subject: Re: 答复: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

On 06/26/2017 05:51 PM, 王志克 wrote:
> Hi Greg,

> 

> The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.

> 

> datapath-sanity

> 

>    1: datapath - ping between two ports               ok

>    2: datapath - http between two ports               ok

>    3: datapath - ping between two ports on vlan       ok

>    4: datapath - ping6 between two ports              ok

>    5: datapath - ping6 between two ports on vlan      ok

>    6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)

>    7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)

>    8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)

>    9: datapath - basic truncate action                ok

>   10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)

>   11: conntrack - controller                          FAILED (system-traffic.at:522)

>   12: conntrack - IPv4 HTTP                           ok

>   13: conntrack - IPv6 HTTP                           ok

>   14: conntrack - IPv4 ping                           ok

>   15: conntrack - IPv6 ping                           ok

>   16: conntrack - commit, recirc                      ok

>   17: conntrack - preserve registers                  ok

>   18: conntrack - invalid                             ok

>   19: conntrack - zones                               ok

>   20: conntrack - zones from field ....(system crash...)

> 

> 


[snipped]

Hi Wang,

I am having some definite problems trying to get this to repro.  I can't even get
openvswitch-2.6.0 to build.  I am running into numerous compatibility layer issues
with netfilter and the net_ns (!!!!!!!!!!!!) code that prevent compilation, much
less getting any check-kmod tests to run.  It's a complete mess.

Can you point me to a link with an openvswitch 2.6 tarball that builds on your Centos7.2
3.10.0-327.el7.x86_64  kernel?

I'm building on Centos 7.2 as well - using the 3.10.0-514.el7.x86_64 kernel myself but that shouldn't
matter.  Or if it does then that is an important detail.

Thanks,

- Greg
Gregory Rose June 29, 2017, 5:22 p.m. UTC | #14
On 06/28/2017 05:53 PM, 王志克 wrote:
> Hi Greg,
> 
> I just download offical tar bar:
> wget http://openvswitch.org/releases/openvswitch-2.6.0.tar.gz
> 
> Then compiling as below: ( I do not see any compiling issue)
> 
> ./configure --with-linux=/lib/modules/$(uname -r)/build
> make
> make install
> make modules_install
> 
> Br,
> Wang Zhike

Weird... below is what I get at the compile phase when I follow the same steps.  Let me
try a completely fresh installation of Centos 7.2 on a new VM.  Perhaps something has muddled
the build environments for the VM I'm using.

I'll try that and see if I can get something going.

Thanks,

- Greg

In file included from include/net/inet_sock.h:24:0,
                  from include/net/ip.h:30,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/ip.h:4,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netfilter_ipv6.h:7,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.c:25:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/net/inet_sock.h:24,
                  from include/net/ip.h:30,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/ip.h:4,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netfilter_ipv6.h:7,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.c:25:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from include/uapi/linux/if_arp.h:26:0,
                  from include/linux/if_arp.h:27,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.c:23:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/uapi/linux/if_arp.h:26,
                  from include/linux/if_arp.h:27,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.c:23:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.c:19:0:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.c:19:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from include/net/sock.h:51:0,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/sock.h:4,
                  from include/linux/tcp.h:23,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.c:21:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/net/sock.h:51,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/sock.h:4,
                  from include/linux/tcp.h:23,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.c:21:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.o] Error 1
make[5]: *** Waiting for unfinished jobs....
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.o] Error 1
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.o] Error 1
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.o] Error 1
make[4]: *** [_module_/home/gvrose/prj/openvswitch-2.6.0/datapath/linux] Error 2
make[4]: Leaving directory `/usr/src/kernels/3.10.0-514.16.1.el7.x86_64'
make[3]: *** [default] Error 2
make[3]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0/datapath/linux'
make[2]: *** [all-recursive] Error 1
make[2]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0/datapath'
make[1]: *** [all-recursive] Error 1
make[1]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0'
make: *** [all] Error 2

> -----Original Message-----
> From: Greg Rose [mailto:gvrose8192@gmail.com]
> Sent: Thursday, June 29, 2017 4:29 AM
> To: 王志克
> Cc: dev@openvswitch.org; Joe Stringer
> Subject: Re: 答复: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble
> 
> On 06/26/2017 05:51 PM, 王志克 wrote:
>> Hi Greg,
>>
>> The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.
>>
>> datapath-sanity
>>
>>     1: datapath - ping between two ports               ok
>>     2: datapath - http between two ports               ok
>>     3: datapath - ping between two ports on vlan       ok
>>     4: datapath - ping6 between two ports              ok
>>     5: datapath - ping6 between two ports on vlan      ok
>>     6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)
>>     7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)
>>     8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)
>>     9: datapath - basic truncate action                ok
>>    10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)
>>    11: conntrack - controller                          FAILED (system-traffic.at:522)
>>    12: conntrack - IPv4 HTTP                           ok
>>    13: conntrack - IPv6 HTTP                           ok
>>    14: conntrack - IPv4 ping                           ok
>>    15: conntrack - IPv6 ping                           ok
>>    16: conntrack - commit, recirc                      ok
>>    17: conntrack - preserve registers                  ok
>>    18: conntrack - invalid                             ok
>>    19: conntrack - zones                               ok
>>    20: conntrack - zones from field ....(system crash...)
>>
>>
> 
> [snipped]
> 
> Hi Wang,
> 
> I am having some definite problems trying to get this to repro.  I can't even get
> openvswitch-2.6.0 to build.  I am running into numerous compatibility layer issues
> with netfilter and the net_ns (!!!!!!!!!!!!) code that prevent compilation, much
> less getting any check-kmod tests to run.  It's a complete mess.
> 
> Can you point me to a link with an openvswitch 2.6 tarball that builds on your Centos7.2
> 3.10.0-327.el7.x86_64  kernel?
> 
> I'm building on Centos 7.2 as well - using the 3.10.0-514.el7.x86_64 kernel myself but that shouldn't
> matter.  Or if it does then that is an important detail.
> 
> Thanks,
> 
> - Greg
>
Zhike Wang July 7, 2017, 3:25 a.m. UTC | #15
Hi Greg,

Any progress?

Thanks.

Br,
Wang Zhike

-----Original Message-----
From: Greg Rose [mailto:gvrose8192@gmail.com] 

Sent: Friday, June 30, 2017 1:23 AM
To: 王志克
Cc: dev@openvswitch.org; Joe Stringer
Subject: Re: 答复: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

On 06/28/2017 05:53 PM, 王志克 wrote:
> Hi Greg,

> 

> I just download offical tar bar:

> wget http://openvswitch.org/releases/openvswitch-2.6.0.tar.gz

> 

> Then compiling as below: ( I do not see any compiling issue)

> 

> ./configure --with-linux=/lib/modules/$(uname -r)/build

> make

> make install

> make modules_install

> 

> Br,

> Wang Zhike


Weird... below is what I get at the compile phase when I follow the same steps.  Let me
try a completely fresh installation of Centos 7.2 on a new VM.  Perhaps something has muddled
the build environments for the VM I'm using.

I'll try that and see if I can get something going.

Thanks,

- Greg

In file included from include/net/inet_sock.h:24:0,
                  from include/net/ip.h:30,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/ip.h:4,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netfilter_ipv6.h:7,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.c:25:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/net/inet_sock.h:24,
                  from include/net/ip.h:30,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/ip.h:4,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netfilter_ipv6.h:7,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.c:25:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from include/uapi/linux/if_arp.h:26:0,
                  from include/linux/if_arp.h:27,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.c:23:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/uapi/linux/if_arp.h:26,
                  from include/linux/if_arp.h:27,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.c:23:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.c:19:0:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.c:19:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
In file included from include/net/sock.h:51:0,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/sock.h:4,
                  from include/linux/tcp.h:23,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.c:21:
/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:125:34: error: conflicting types for 
‘netdev_notifier_info_to_dev’
  static inline struct net_device *netdev_notifier_info_to_dev(void *info)
                                   ^
In file included from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/linux/netdevice.h:4:0,
                  from include/net/sock.h:51,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/compat/include/net/sock.h:4,
                  from include/linux/tcp.h:23,
                  from /home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.c:21:
include/linux/netdevice.h:2257:1: note: previous definition of ‘netdev_notifier_info_to_dev’ was here
  netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
  ^
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/dp_notify.o] Error 1
make[5]: *** Waiting for unfinished jobs....
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/actions.o] Error 1
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/datapath.o] Error 1
make[5]: *** [/home/gvrose/prj/openvswitch-2.6.0/datapath/linux/conntrack.o] Error 1
make[4]: *** [_module_/home/gvrose/prj/openvswitch-2.6.0/datapath/linux] Error 2
make[4]: Leaving directory `/usr/src/kernels/3.10.0-514.16.1.el7.x86_64'
make[3]: *** [default] Error 2
make[3]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0/datapath/linux'
make[2]: *** [all-recursive] Error 1
make[2]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0/datapath'
make[1]: *** [all-recursive] Error 1
make[1]: Leaving directory `/home/gvrose/prj/openvswitch-2.6.0'
make: *** [all] Error 2

> -----Original Message-----

> From: Greg Rose [mailto:gvrose8192@gmail.com]

> Sent: Thursday, June 29, 2017 4:29 AM

> To: 王志克

> Cc: dev@openvswitch.org; Joe Stringer

> Subject: Re: 答复: [ovs-dev] 答复: 答复: [PATCH] pkt reassemble: fix kernel panic for ovs reassemble

> 

> On 06/26/2017 05:51 PM, 王志克 wrote:

>> Hi Greg,

>>

>> The exact issue occured on the 20th of check-kmod (sometimes there are other kernel issue: kernel just hangs but without panic). OVS2.6.0 on CentOS7.2 with kernel 3.10.0-327.el7.x86_64. Some info below, which hopes helpful.

>>

>> datapath-sanity

>>

>>     1: datapath - ping between two ports               ok

>>     2: datapath - http between two ports               ok

>>     3: datapath - ping between two ports on vlan       ok

>>     4: datapath - ping6 between two ports              ok

>>     5: datapath - ping6 between two ports on vlan      ok

>>     6: datapath - ping over vxlan tunnel               FAILED (system-traffic.at:159)

>>     7: datapath - ping over gre tunnel                 FAILED (system-traffic.at:199)

>>     8: datapath - ping over geneve tunnel              skipped (system-traffic.at:213)

>>     9: datapath - basic truncate action                ok

>>    10: datapath - truncate and output to gre tunnel    FAILED (system-traffic.at:445)

>>    11: conntrack - controller                          FAILED (system-traffic.at:522)

>>    12: conntrack - IPv4 HTTP                           ok

>>    13: conntrack - IPv6 HTTP                           ok

>>    14: conntrack - IPv4 ping                           ok

>>    15: conntrack - IPv6 ping                           ok

>>    16: conntrack - commit, recirc                      ok

>>    17: conntrack - preserve registers                  ok

>>    18: conntrack - invalid                             ok

>>    19: conntrack - zones                               ok

>>    20: conntrack - zones from field ....(system crash...)

>>

>>

> 

> [snipped]

> 

> Hi Wang,

> 

> I am having some definite problems trying to get this to repro.  I can't even get

> openvswitch-2.6.0 to build.  I am running into numerous compatibility layer issues

> with netfilter and the net_ns (!!!!!!!!!!!!) code that prevent compilation, much

> less getting any check-kmod tests to run.  It's a complete mess.

> 

> Can you point me to a link with an openvswitch 2.6 tarball that builds on your Centos7.2

> 3.10.0-327.el7.x86_64  kernel?

> 

> I'm building on Centos 7.2 as well - using the 3.10.0-514.el7.x86_64 kernel myself but that shouldn't

> matter.  Or if it does then that is an important detail.

> 

> Thanks,

> 

> - Greg

>
Gregory Rose July 12, 2017, 3 p.m. UTC | #16
On 07/06/2017 08:25 PM, 王志克 wrote:
> Hi Greg,
> 
> Any progress?
> 
> Thanks.
> 
> Br,
> Wang Zhike
> 

Hi Wang,

I've been out of town since June 30 but will try to find some time to take this up again this week.

Thanks,

- Greg
diff mbox

Patch

diff --git a/datapath/datapath.c b/datapath/datapath.c
index c85029c..82cad74 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -2297,6 +2297,8 @@  static int __net_init ovs_init_net(struct net *net)
       INIT_LIST_HEAD(&ovs_net->dps);
       INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
       ovs_ct_init(net);
+       ovs_netns_frags_init(net);
+       ovs_netns_frags6_init(net);
       return 0;
}
@@ -2332,6 +2334,8 @@  static void __net_exit ovs_exit_net(struct net *dnet)
       struct net *net;
       LIST_HEAD(head);
+       ovs_netns_frags6_exit(dnet);
+       ovs_netns_frags_exit(dnet);
       ovs_ct_exit(dnet);
       ovs_lock();
       list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
@@ -2368,13 +2372,9 @@  static int __init dp_init(void)
        pr_info("Open vSwitch switching datapath %s\n", VERSION);
-        err = compat_init();
-        if (err)
-                 goto error;
-
       err = action_fifos_init();
       if (err)
-                 goto error_compat_exit;
+                goto error;
        err = ovs_internal_dev_rtnl_link_register();
       if (err)
@@ -2392,10 +2392,14 @@  static int __init dp_init(void)
       if (err)
                goto error_vport_exit;
-        err = register_netdevice_notifier(&ovs_dp_device_notifier);
+       err = compat_init();
       if (err)
                goto error_netns_exit;
+       err = register_netdevice_notifier(&ovs_dp_device_notifier);
+       if (err)
+                goto error_compat_exit;
+
       err = ovs_netdev_init();
       if (err)
                goto error_unreg_notifier;
@@ -2410,6 +2414,8 @@  error_unreg_netdev:
       ovs_netdev_exit();
error_unreg_notifier:
       unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_compat_exit:
+       compat_exit();
error_netns_exit:
       unregister_pernet_device(&ovs_net_ops);
error_vport_exit:
@@ -2420,8 +2426,6 @@  error_unreg_rtnl_link:
       ovs_internal_dev_rtnl_link_unregister();
error_action_fifos_exit:
       action_fifos_exit();
-error_compat_exit:
-        compat_exit();
error:
       return err;
}
@@ -2431,13 +2435,13 @@  static void dp_cleanup(void)
       dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
       ovs_netdev_exit();
       unregister_netdevice_notifier(&ovs_dp_device_notifier);
+       compat_exit();
       unregister_pernet_device(&ovs_net_ops);
       rcu_barrier();
       ovs_vport_exit();
       ovs_flow_exit();
       ovs_internal_dev_rtnl_link_unregister();
       action_fifos_exit();
-        compat_exit();
}
 module_init(dp_init);
diff --git a/datapath/datapath.h b/datapath/datapath.h
index b835ada..8849625 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -141,6 +141,12 @@  struct ovs_net {
        /* Module reference for configuring conntrack. */
       bool xt_label;
+
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct net *net;
+       struct netns_frags ipv4_frags;
+       struct netns_frags nf_frags;
+#endif
};
 extern unsigned int ovs_net_id;
diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h
index 01d79ad..34078c8 100644
--- a/datapath/linux/compat/include/net/inet_frag.h
+++ b/datapath/linux/compat/include/net/inet_frag.h
@@ -52,22 +52,4 @@  static inline int rpl_inet_frags_init(struct inet_frags *frags)
#define inet_frags_init rpl_inet_frags_init
#endif
-#ifndef HAVE_CORRECT_MRU_HANDLING
-/* We reuse the upstream inet_fragment.c common code for managing fragment
- * stores, However we actually store the fragments within our own 'inet_frags'
- * structures (in {ip_fragment,nf_conntrack_reasm}.c). When unloading the OVS
- * kernel module, we need to flush all of the remaining fragments from these
- * caches, or else we will panic with the following sequence of events:
- *
- * 1) A fragment for a packet arrives and is cached in inet_frags. This
- *    starts a timer to ensure the fragment does not hang around forever.
- * 2) openvswitch module is unloaded.
- * 3) The timer for the fragment fires, calling into backported OVS code
- *    to free the fragment.
- * 4) BUG: unable to handle kernel paging request at ffffffffc03c01e0
- */
-void rpl_inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
-#define inet_frags_exit_net rpl_inet_frags_exit_net
-#endif
-
#endif /* inet_frag.h */
diff --git a/datapath/linux/compat/include/net/ip.h b/datapath/linux/compat/include/net/ip.h
index b188996..ad5ac33 100644
--- a/datapath/linux/compat/include/net/ip.h
+++ b/datapath/linux/compat/include/net/ip.h
@@ -97,6 +97,8 @@  int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
#define ip_defrag rpl_ip_defrag
int __init rpl_ipfrag_init(void);
void rpl_ipfrag_fini(void);
+void ovs_netns_frags_init(struct net *net);
+void ovs_netns_frags_exit(struct net *net);
 #else /* HAVE_CORRECT_MRU_HANDLING */
@@ -131,6 +133,8 @@  static inline int rpl_ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
  * compat_{in,ex}it() can be no-ops. */
static inline int rpl_ipfrag_init(void) { return 0; }
static inline void rpl_ipfrag_fini(void) { }
+static inline void ovs_netns_frags_init(struct net *net) { }
+static inline void ovs_netns_frags_exit(struct net *net) { }
#endif /* HAVE_CORRECT_MRU_HANDLING */
 #define ipfrag_init rpl_ipfrag_init
diff --git a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 2ab6c0a..c4c0f79 100644
--- a/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/datapath/linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -28,9 +28,13 @@  int rpl_nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
  */
int __init rpl_nf_ct_frag6_init(void);
void rpl_nf_ct_frag6_cleanup(void);
+void ovs_netns_frags6_init(struct net *net);
+void ovs_netns_frags6_exit(struct net *net);
#else /* !OVS_NF_DEFRAG6_BACKPORT */
static inline int __init rpl_nf_ct_frag6_init(void) { return 0; }
static inline void rpl_nf_ct_frag6_cleanup(void) { }
+static inline void ovs_netns_frags6_init(struct net *net) { }
+static inline void ovs_netns_frags6_exit(struct net *net) { }
#endif /* OVS_NF_DEFRAG6_BACKPORT */
#define nf_ct_frag6_init rpl_nf_ct_frag6_init
#define nf_ct_frag6_cleanup rpl_nf_ct_frag6_cleanup
diff --git a/datapath/linux/compat/inet_fragment.c b/datapath/linux/compat/inet_fragment.c
index f05e617..21736e6 100644
--- a/datapath/linux/compat/inet_fragment.c
+++ b/datapath/linux/compat/inet_fragment.c
@@ -27,88 +27,5 @@ 
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#ifdef HAVE_INET_FRAGS_WITH_FRAGS_WORK
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
-        return q->net->low_thresh == 0 ||
-               frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
-        struct inet_frag_queue *fq;
-        struct hlist_node *n;
-        unsigned int evicted = 0;
-        HLIST_HEAD(expired);
-
-        spin_lock(&hb->chain_lock);
-
-        hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
-                 if (!inet_fragq_should_evict(fq))
-                           continue;
-
-                 if (!del_timer(&fq->timer))
-                           continue;
-
-#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
-                 hlist_add_head(&fq->list_evictor, &expired);
-#else
-                 hlist_del(&fq->list);
-                 hlist_add_head(&fq->list, &expired);
-#endif
-                 ++evicted;
-        }
-
-        spin_unlock(&hb->chain_lock);
-
-#ifdef HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR
-        hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
-#else
-        hlist_for_each_entry_safe(fq, n, &expired, list)
-#endif
-                 f->frag_expire((unsigned long) fq);
-
-        return evicted;
-}
-
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
-{
-        int thresh = nf->low_thresh;
-        unsigned int seq;
-        int i;
-
-        nf->low_thresh = 0;
-
-evict_again:
-        local_bh_disable();
-        seq = read_seqbegin(&f->rnd_seqlock);
-
-        for (i = 0; i < INETFRAGS_HASHSZ ; i++)
-                 inet_evict_bucket(f, &f->hash[i]);
-
-        local_bh_enable();
-        cond_resched();
-
-        if (read_seqretry(&f->rnd_seqlock, seq) ||
-            percpu_counter_sum(&nf->mem))
-                 goto evict_again;
-
-        nf->low_thresh = thresh;
-}
-#else /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
-{
-        int thresh = nf->low_thresh;
-
-        nf->low_thresh = 0;
-
-        local_bh_disable();
-        inet_frag_evictor(nf, f, true);
-        local_bh_enable();
-
-        nf->low_thresh = thresh;
-}
-#endif /* HAVE_INET_FRAGS_WITH_FRAGS_WORK */
 #endif /* !HAVE_CORRECT_MRU_HANDLING */
diff --git a/datapath/linux/compat/ip_fragment.c b/datapath/linux/compat/ip_fragment.c
index 47b51b5..8f2012b 100644
--- a/datapath/linux/compat/ip_fragment.c
+++ b/datapath/linux/compat/ip_fragment.c
@@ -57,6 +57,8 @@ 
#include <net/inet_ecn.h>
#include <net/vrf.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netns/generic.h>
+#include "datapath.h"
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -107,6 +109,51 @@  struct ip4_create_arg {
       int vif;
};
+static struct netns_frags *get_netns_frags_from_net(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+       return &(ovs_net->ipv4_frags);
+#else
+       return &(net->ipv4.frags);
+#endif
+}
+
+static struct net *get_net_from_netns_frags(struct netns_frags *frags)
+{
+       struct net *net;
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net;
+
+       ovs_net = container_of(frags, struct ovs_net, ipv4_frags);
+       net = ovs_net->net;
+#else
+       net = container_of(frags, struct net, ipv4.frags);
+#endif
+       return net;
+}
+
+void ovs_netns_frags_init(struct net *net)
+{
+#ifdef HAVE_INET_FRAG_LRU_MOVE
+       struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+       ovs_net->ipv4_frags.high_thresh = 4 * 1024 * 1024;
+       ovs_net->ipv4_frags.low_thresh = 3 * 1024 * 1024;
+       ovs_net->ipv4_frags.timeout = IP_FRAG_TIME;
+       inet_frags_init_net(&(ovs_net->ipv4_frags));
+       ovs_net->net = net;
+#endif
+}
+
+void ovs_netns_frags_exit(struct net *net)
+{
+       struct netns_frags *frags;
+
+       frags = get_netns_frags_from_net(net);
+       inet_frags_exit_net(frags, &ip4_frags);
+}
+
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
       net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
@@ -158,9 +205,7 @@  static void ip4_frag_init(struct inet_frag_queue *q, void *a)
#endif
{
       struct ipq *qp = container_of(q, struct ipq, q);
-        struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
-                                                    frags);
-        struct net *net = container_of(ipv4, struct net, ipv4);
+       struct net *net = get_net_from_netns_frags(q->net);
        const struct ip4_create_arg *arg = a;
@@ -219,7 +264,7 @@  static void ip_expire(unsigned long arg)
       struct net *net;
        qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
-        net = container_of(qp->q.net, struct net, ipv4.frags);
+       net = get_net_from_netns_frags(qp->q.net);
        spin_lock(&qp->q.lock);
@@ -278,8 +323,10 @@  out:
static void ip_evictor(struct net *net)
{
       int evicted;
+       struct netns_frags *frags;
-        evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
+       frags = get_netns_frags_from_net(net);
+       evicted = inet_frag_evictor(frags, &ip4_frags, false);
       if (evicted)
                IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted);
}
@@ -294,6 +341,7 @@  static struct ipq *ip_find(struct net *net, struct iphdr *iph,
       struct inet_frag_queue *q;
       struct ip4_create_arg arg;
       unsigned int hash;
+       struct netns_frags *frags;
        arg.iph = iph;
       arg.user = user;
@@ -304,7 +352,8 @@  static struct ipq *ip_find(struct net *net, struct iphdr *iph,
#endif
       hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-        q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
+       frags = get_netns_frags_from_net(net);
+       q = inet_frag_find(frags, &ip4_frags, &arg, hash);
       if (IS_ERR_OR_NULL(q)) {
                inet_frag_maybe_warn_overflow(q, pr_fmt());
                return NULL;
@@ -333,7 +382,7 @@  static int ip_frag_too_far(struct ipq *qp)
       if (rc) {
                struct net *net;
-                 net = container_of(qp->q.net, struct net, ipv4.frags);
+                net = get_net_from_netns_frags(qp->q.net);
                IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
       }
@@ -566,7 +615,7 @@  err:
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
                           struct net_device *dev)
{
-        struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+       struct net *net = get_net_from_netns_frags(qp->q.net);
       struct iphdr *iph;
       struct sk_buff *fp, *head = qp->q.fragments;
       int len;
@@ -738,7 +787,6 @@  static int __net_init ipv4_frags_init_net(struct net *net)
 static void __net_exit ipv4_frags_exit_net(struct net *net)
{
-        inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
}
 static struct pernet_operations ip4_frags_ops = {
diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c
index 0da9463..ea153c3 100644
--- a/datapath/linux/compat/nf_conntrack_reasm.c
+++ b/datapath/linux/compat/nf_conntrack_reasm.c
@@ -53,6 +53,8 @@ 
#include <linux/kernel.h>
#include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netns/generic.h>
+#include "datapath.h"
 #ifdef OVS_NF_DEFRAG6_BACKPORT
@@ -68,6 +70,30 @@  struct nf_ct_frag6_skb_cb
 static struct inet_frags nf_frags;