diff mbox series

[Bionic/Cosmic/Disco/Unstable,SRU,1/1] UBUNTU: SAUCE: openvswitch: Disable eventmask support on 32-bit

Message ID 20190311073643.11829-2-juergh@canonical.com
State New
Headers show
Series openvswitch: kernel oops destroying interfaces on i386 (LP #1736390) | expand

Commit Message

Juerg Haefliger March 11, 2019, 7:36 a.m. UTC
BugLink: https://bugs.launchpad.net/bugs/1736390

Openvswitch eventmask support for CT actions was introduced with commit
120645513f55 ("openvswitch: Add eventmask support to CT action."). This
commit introduces a regression on i386 which results in a kernel crash.
Fix that by disabling eventmask support on i386.

Per upstream, the result of *not* having this "will cause additional CPU
use and potential buffering issues for CT event monitors in userspace."

https://lore.kernel.org/lkml/E891AED6-8406-4914-BDC3-92248F77C63B@ovn.org/

Fixes: 120645513f55 ("openvswitch: Add eventmask support to CT action.")
Signed-off-by: Juerg Haefliger <juergh@canonical.com>
---
 include/uapi/linux/openvswitch.h |  2 ++
 net/openvswitch/conntrack.c      | 12 ++++++++++++
 2 files changed, 14 insertions(+)

Comments

Thadeu Lima de Souza Cascardo March 11, 2019, 11:53 a.m. UTC | #1
On Mon, Mar 11, 2019 at 08:36:43AM +0100, Juerg Haefliger wrote:
> BugLink: https://bugs.launchpad.net/bugs/1736390
> 
> Openvswitch eventmask support for CT actions was introduced with commit
> 120645513f55 ("openvswitch: Add eventmask support to CT action."). This
> commit introduces a regression on i386 which results in a kernel crash.
> Fix that by disabling eventmask support on i386.
> 
> Per upstream, the result of *not* having this "will cause additional CPU
> use and potential buffering issues for CT event monitors in userspace."
> 
> https://lore.kernel.org/lkml/E891AED6-8406-4914-BDC3-92248F77C63B@ovn.org/
> 
> Fixes: 120645513f55 ("openvswitch: Add eventmask support to CT action.")
> Signed-off-by: Juerg Haefliger <juergh@canonical.com>
> ---
>  include/uapi/linux/openvswitch.h |  2 ++
>  net/openvswitch/conntrack.c      | 12 ++++++++++++
>  2 files changed, 14 insertions(+)
> 

I have gone through the bug, the ovs thread and the ovs patch, with some of the
explanation from Jarno on why this should only be disabled on i386.

I am still not very comfortable with this, as this seems to be corrupting
memory on different places, by looking at the backtrace you used on the ovs-dev
mailing list.

In any case, I failed to identify the test results with the patch applied on
the bug. Can you tell more about them?

Cascardo.

> diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> index dcfab5e3b55c..9da2942d7f7e 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -744,7 +744,9 @@ enum ovs_ct_attr {
>  				   related connections. */
>  	OVS_CT_ATTR_NAT,        /* Nested OVS_NAT_ATTR_* */
>  	OVS_CT_ATTR_FORCE_COMMIT,  /* No argument */
> +#ifndef CONFIG_X86_32
>  	OVS_CT_ATTR_EVENTMASK,  /* u32 mask of IPCT_* events. */
> +#endif
>  	__OVS_CT_ATTR_MAX
>  };
>  
> diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> index 8f04bd6e44bb..0c44fb56317e 100644
> --- a/net/openvswitch/conntrack.c
> +++ b/net/openvswitch/conntrack.c
> @@ -66,9 +66,13 @@ struct ovs_conntrack_info {
>  	u8 commit : 1;
>  	u8 nat : 3;                 /* enum ovs_ct_nat */
>  	u8 force : 1;
> +#ifndef CONFIG_X86_32
>  	u8 have_eventmask : 1;
> +#endif
>  	u16 family;
> +#ifndef CONFIG_X86_32
>  	u32 eventmask;              /* Mask of 1 << IPCT_*. */
> +#endif
>  	struct md_mark mark;
>  	struct md_labels labels;
>  #ifdef CONFIG_NF_NAT_NEEDED
> @@ -1054,6 +1058,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
>  	if (!ct)
>  		return 0;
>  
> +#ifndef CONFIG_X86_32
>  	/* Set the conntrack event mask if given.  NEW and DELETE events have
>  	 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
>  	 * typically would receive many kinds of updates.  Setting the event
> @@ -1067,6 +1072,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
>  		if (cache)
>  			cache->ctmask = info->eventmask;
>  	}
> +#endif
>  
>  	/* Apply changes before confirming the connection so that the initial
>  	 * conntrack NEW netlink event carries the values given in the CT
> @@ -1340,8 +1346,10 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
>  	/* NAT length is checked when parsing the nested attributes. */
>  	[OVS_CT_ATTR_NAT]	= { .minlen = 0, .maxlen = INT_MAX },
>  #endif
> +#ifndef CONFIG_X86_32
>  	[OVS_CT_ATTR_EVENTMASK]	= { .minlen = sizeof(u32),
>  				    .maxlen = sizeof(u32) },
> +#endif
>  };
>  
>  static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
> @@ -1423,10 +1431,12 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
>  			break;
>  		}
>  #endif
> +#ifndef CONFIG_X86_32
>  		case OVS_CT_ATTR_EVENTMASK:
>  			info->have_eventmask = true;
>  			info->eventmask = nla_get_u32(a);
>  			break;
> +#endif
>  
>  		default:
>  			OVS_NLERR(log, "Unknown conntrack attr (%d)",
> @@ -1627,9 +1637,11 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
>  				   ct_info->helper->name))
>  			return -EMSGSIZE;
>  	}
> +#ifndef CONFIG_X86_32
>  	if (ct_info->have_eventmask &&
>  	    nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
>  		return -EMSGSIZE;
> +#endif
>  
>  #ifdef CONFIG_NF_NAT_NEEDED
>  	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
> -- 
> 2.19.1
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Juerg Haefliger April 5, 2019, 6:10 a.m. UTC | #2
On Mon, 11 Mar 2019 08:53:22 -0300
Thadeu Lima de Souza Cascardo <cascardo@canonical.com> wrote:

> On Mon, Mar 11, 2019 at 08:36:43AM +0100, Juerg Haefliger wrote:
> > BugLink: https://bugs.launchpad.net/bugs/1736390
> > 
> > Openvswitch eventmask support for CT actions was introduced with commit
> > 120645513f55 ("openvswitch: Add eventmask support to CT action."). This
> > commit introduces a regression on i386 which results in a kernel crash.
> > Fix that by disabling eventmask support on i386.
> > 
> > Per upstream, the result of *not* having this "will cause additional CPU
> > use and potential buffering issues for CT event monitors in userspace."
> > 
> > https://lore.kernel.org/lkml/E891AED6-8406-4914-BDC3-92248F77C63B@ovn.org/
> > 
> > Fixes: 120645513f55 ("openvswitch: Add eventmask support to CT action.")
> > Signed-off-by: Juerg Haefliger <juergh@canonical.com>
> > ---
> >  include/uapi/linux/openvswitch.h |  2 ++
> >  net/openvswitch/conntrack.c      | 12 ++++++++++++
> >  2 files changed, 14 insertions(+)
> >   
> 
> I have gone through the bug, the ovs thread and the ovs patch, with some of the
> explanation from Jarno on why this should only be disabled on i386.
> 
> I am still not very comfortable with this, as this seems to be corrupting
> memory on different places, by looking at the backtrace you used on the ovs-dev
> mailing list.

Yes. That's why I want to disable it completely on i386 until upstream comes
up with a root-cause/fix (which they haven't so far).


> In any case, I failed to identify the test results with the patch applied on
> the bug. Can you tell more about them?

The simple case is to just add and delete an OVS bridge repeatedly which kills
the (i386) machine without this patch.

...Juerg


 
> Cascardo.
> 
> > diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
> > index dcfab5e3b55c..9da2942d7f7e 100644
> > --- a/include/uapi/linux/openvswitch.h
> > +++ b/include/uapi/linux/openvswitch.h
> > @@ -744,7 +744,9 @@ enum ovs_ct_attr {
> >  				   related connections. */
> >  	OVS_CT_ATTR_NAT,        /* Nested OVS_NAT_ATTR_* */
> >  	OVS_CT_ATTR_FORCE_COMMIT,  /* No argument */
> > +#ifndef CONFIG_X86_32
> >  	OVS_CT_ATTR_EVENTMASK,  /* u32 mask of IPCT_* events. */
> > +#endif
> >  	__OVS_CT_ATTR_MAX
> >  };
> >  
> > diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
> > index 8f04bd6e44bb..0c44fb56317e 100644
> > --- a/net/openvswitch/conntrack.c
> > +++ b/net/openvswitch/conntrack.c
> > @@ -66,9 +66,13 @@ struct ovs_conntrack_info {
> >  	u8 commit : 1;
> >  	u8 nat : 3;                 /* enum ovs_ct_nat */
> >  	u8 force : 1;
> > +#ifndef CONFIG_X86_32
> >  	u8 have_eventmask : 1;
> > +#endif
> >  	u16 family;
> > +#ifndef CONFIG_X86_32
> >  	u32 eventmask;              /* Mask of 1 << IPCT_*. */
> > +#endif
> >  	struct md_mark mark;
> >  	struct md_labels labels;
> >  #ifdef CONFIG_NF_NAT_NEEDED
> > @@ -1054,6 +1058,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
> >  	if (!ct)
> >  		return 0;
> >  
> > +#ifndef CONFIG_X86_32
> >  	/* Set the conntrack event mask if given.  NEW and DELETE events have
> >  	 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
> >  	 * typically would receive many kinds of updates.  Setting the event
> > @@ -1067,6 +1072,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
> >  		if (cache)
> >  			cache->ctmask = info->eventmask;
> >  	}
> > +#endif
> >  
> >  	/* Apply changes before confirming the connection so that the initial
> >  	 * conntrack NEW netlink event carries the values given in the CT
> > @@ -1340,8 +1346,10 @@ static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
> >  	/* NAT length is checked when parsing the nested attributes. */
> >  	[OVS_CT_ATTR_NAT]	= { .minlen = 0, .maxlen = INT_MAX },
> >  #endif
> > +#ifndef CONFIG_X86_32
> >  	[OVS_CT_ATTR_EVENTMASK]	= { .minlen = sizeof(u32),
> >  				    .maxlen = sizeof(u32) },
> > +#endif
> >  };
> >  
> >  static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
> > @@ -1423,10 +1431,12 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
> >  			break;
> >  		}
> >  #endif
> > +#ifndef CONFIG_X86_32
> >  		case OVS_CT_ATTR_EVENTMASK:
> >  			info->have_eventmask = true;
> >  			info->eventmask = nla_get_u32(a);
> >  			break;
> > +#endif
> >  
> >  		default:
> >  			OVS_NLERR(log, "Unknown conntrack attr (%d)",
> > @@ -1627,9 +1637,11 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
> >  				   ct_info->helper->name))
> >  			return -EMSGSIZE;
> >  	}
> > +#ifndef CONFIG_X86_32
> >  	if (ct_info->have_eventmask &&
> >  	    nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
> >  		return -EMSGSIZE;
> > +#endif
> >  
> >  #ifdef CONFIG_NF_NAT_NEEDED
> >  	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))
> > -- 
> > 2.19.1
> > 
> > 
> > -- 
> > kernel-team mailing list
> > kernel-team@lists.ubuntu.com
> > https://lists.ubuntu.com/mailman/listinfo/kernel-team
Andrea Righi April 5, 2019, 7:07 a.m. UTC | #3
On Fri, Apr 05, 2019 at 08:10:42AM +0200, Juerg Haefliger wrote:
> On Mon, 11 Mar 2019 08:53:22 -0300
> Thadeu Lima de Souza Cascardo <cascardo@canonical.com> wrote:
> 
> > On Mon, Mar 11, 2019 at 08:36:43AM +0100, Juerg Haefliger wrote:
> > > BugLink: https://bugs.launchpad.net/bugs/1736390
> > > 
> > > Openvswitch eventmask support for CT actions was introduced with commit
> > > 120645513f55 ("openvswitch: Add eventmask support to CT action."). This
> > > commit introduces a regression on i386 which results in a kernel crash.
> > > Fix that by disabling eventmask support on i386.
> > > 
> > > Per upstream, the result of *not* having this "will cause additional CPU
> > > use and potential buffering issues for CT event monitors in userspace."
> > > 
> > > https://lore.kernel.org/lkml/E891AED6-8406-4914-BDC3-92248F77C63B@ovn.org/
> > > 
> > > Fixes: 120645513f55 ("openvswitch: Add eventmask support to CT action.")
> > > Signed-off-by: Juerg Haefliger <juergh@canonical.com>
> > > ---
> > >  include/uapi/linux/openvswitch.h |  2 ++
> > >  net/openvswitch/conntrack.c      | 12 ++++++++++++
> > >  2 files changed, 14 insertions(+)
> > >   
> > 
> > I have gone through the bug, the ovs thread and the ovs patch, with some of the
> > explanation from Jarno on why this should only be disabled on i386.
> > 
> > I am still not very comfortable with this, as this seems to be corrupting
> > memory on different places, by looking at the backtrace you used on the ovs-dev
> > mailing list.
> 
> Yes. That's why I want to disable it completely on i386 until upstream comes
> up with a root-cause/fix (which they haven't so far).
> 
> 
> > In any case, I failed to identify the test results with the patch applied on
> > the bug. Can you tell more about them?
> 
> The simple case is to just add and delete an OVS bridge repeatedly which kills
> the (i386) machine without this patch.
> 
> ...Juerg

This bug might be a duplicate of LP: #1813244.

I've tried to reproduce the problem creating and deleting a bridge in a
busy loop and with LP: #1813244's fix applied the bug doesn't seem to
happen anymore.

-Andrea
diff mbox series

Patch

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index dcfab5e3b55c..9da2942d7f7e 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -744,7 +744,9 @@  enum ovs_ct_attr {
 				   related connections. */
 	OVS_CT_ATTR_NAT,        /* Nested OVS_NAT_ATTR_* */
 	OVS_CT_ATTR_FORCE_COMMIT,  /* No argument */
+#ifndef CONFIG_X86_32
 	OVS_CT_ATTR_EVENTMASK,  /* u32 mask of IPCT_* events. */
+#endif
 	__OVS_CT_ATTR_MAX
 };
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 8f04bd6e44bb..0c44fb56317e 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -66,9 +66,13 @@  struct ovs_conntrack_info {
 	u8 commit : 1;
 	u8 nat : 3;                 /* enum ovs_ct_nat */
 	u8 force : 1;
+#ifndef CONFIG_X86_32
 	u8 have_eventmask : 1;
+#endif
 	u16 family;
+#ifndef CONFIG_X86_32
 	u32 eventmask;              /* Mask of 1 << IPCT_*. */
+#endif
 	struct md_mark mark;
 	struct md_labels labels;
 #ifdef CONFIG_NF_NAT_NEEDED
@@ -1054,6 +1058,7 @@  static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 	if (!ct)
 		return 0;
 
+#ifndef CONFIG_X86_32
 	/* Set the conntrack event mask if given.  NEW and DELETE events have
 	 * their own groups, but the NFNLGRP_CONNTRACK_UPDATE group listener
 	 * typically would receive many kinds of updates.  Setting the event
@@ -1067,6 +1072,7 @@  static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 		if (cache)
 			cache->ctmask = info->eventmask;
 	}
+#endif
 
 	/* Apply changes before confirming the connection so that the initial
 	 * conntrack NEW netlink event carries the values given in the CT
@@ -1340,8 +1346,10 @@  static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
 	/* NAT length is checked when parsing the nested attributes. */
 	[OVS_CT_ATTR_NAT]	= { .minlen = 0, .maxlen = INT_MAX },
 #endif
+#ifndef CONFIG_X86_32
 	[OVS_CT_ATTR_EVENTMASK]	= { .minlen = sizeof(u32),
 				    .maxlen = sizeof(u32) },
+#endif
 };
 
 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
@@ -1423,10 +1431,12 @@  static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
 			break;
 		}
 #endif
+#ifndef CONFIG_X86_32
 		case OVS_CT_ATTR_EVENTMASK:
 			info->have_eventmask = true;
 			info->eventmask = nla_get_u32(a);
 			break;
+#endif
 
 		default:
 			OVS_NLERR(log, "Unknown conntrack attr (%d)",
@@ -1627,9 +1637,11 @@  int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
 				   ct_info->helper->name))
 			return -EMSGSIZE;
 	}
+#ifndef CONFIG_X86_32
 	if (ct_info->have_eventmask &&
 	    nla_put_u32(skb, OVS_CT_ATTR_EVENTMASK, ct_info->eventmask))
 		return -EMSGSIZE;
+#endif
 
 #ifdef CONFIG_NF_NAT_NEEDED
 	if (ct_info->nat && !ovs_ct_nat_to_attr(ct_info, skb))