diff mbox

[ovs-dev] compat: Remove rpl_dev_queue_xmit() backport.

Message ID 20170209005036.2864-1-joe@ovn.org
State Rejected
Headers show

Commit Message

Joe Stringer Feb. 9, 2017, 12:50 a.m. UTC
The MPLS portions of this were inadvertently broken in v2.4 due to
433637881ca5 ("datapath: define compat __skb_gso_segment()") which
inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
backport dropped its VLAN portion in v2.6, the whole function became a
no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").

Apparently the MPLS side of this code never worked in a released version
of OVS and no-one noticed, so remove it.

Signed-off-by: Joe Stringer <joe@ovn.org>
---
CC: Simon Horman <horms@verge.net.au>

Simon, I'm not sure what the repercussions are of this but by my reading
of the code the extra skb_gso_segment compat code here is never invoked
anyway so I figure we can just drop it. Looks like it might've been
fine when originally submitted, but by the time v2.4 came out it was
already nerfed >.<

That said, if we just need to fix the version check to get it working
again then that's an option. Also, I see that the upstream commit
48d2ab609b6b ("net: mpls: Fixups for GSO") will further change the way
that OVS deals with MPLS GSO, so perhaps we'll actually need to bring
back a backport for handling those cases, when that patch is backported
to the OVS tree.
---
 datapath/linux/compat/gso.c                     | 91 -------------------------
 datapath/linux/compat/include/linux/netdevice.h |  5 --
 2 files changed, 96 deletions(-)

Comments

Simon Horman Feb. 9, 2017, 10:20 a.m. UTC | #1
Hi Joe,

On Wed, Feb 08, 2017 at 04:50:36PM -0800, Joe Stringer wrote:
> The MPLS portions of this were inadvertently broken in v2.4 due to
> 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
> inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
> backport dropped its VLAN portion in v2.6, the whole function became a
> no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
> 
> Apparently the MPLS side of this code never worked in a released version
> of OVS and no-one noticed, so remove it.
> 
> Signed-off-by: Joe Stringer <joe@ovn.org>
> ---
> CC: Simon Horman <horms@verge.net.au>
> 
> Simon, I'm not sure what the repercussions are of this but by my reading
> of the code the extra skb_gso_segment compat code here is never invoked
> anyway so I figure we can just drop it. Looks like it might've been
> fine when originally submitted, but by the time v2.4 came out it was
> already nerfed >.<
>
> That said, if we just need to fix the version check to get it working
> again then that's an option.

True. I'm pretty sure I exercised the code in question at some point so I
think it ought to work modulo being disabled. But of course some testing
would be required.  Probably this would be worth doing as unexpected GSO
failures are somewhat hard to track down IMHO.  That said, I don't feel
strongly about this given that it only affects what by now are rather old
kernels and no one seems to have reported a problem.

Do you have any view on how much longer kernels older than v3.19 will be
supported by OvS?

> Also, I see that the upstream commit
> 48d2ab609b6b ("net: mpls: Fixups for GSO") will further change the way
> that OVS deals with MPLS GSO, so perhaps we'll actually need to bring
> back a backport for handling those cases, when that patch is backported
> to the OVS tree.

Yes, it seems likely that OvS would need some compatibility code to handle
that change - and IIRC some related changes to catch all the corner cases.

> ---
>  datapath/linux/compat/gso.c                     | 91 -------------------------
>  datapath/linux/compat/include/linux/netdevice.h |  5 --
>  2 files changed, 96 deletions(-)
> 
> diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
> index 48a56b9f5d5f..2638f1114ede 100644
> --- a/datapath/linux/compat/gso.c
> +++ b/datapath/linux/compat/gso.c
> @@ -41,97 +41,6 @@
>  
>  #include "gso.h"
>  
> -#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
> -/* Strictly this is not needed and will be optimised out
> - * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0).
> - * It is here to make things explicit should the compatibility
> - * code be extended in some way prior extending its life-span
> - * beyond v3.19.
> - */
> -static bool supports_mpls_gso(void)
> -{
> -/* MPLS GSO was introduced in v3.11, however it was not correctly
> - * activated using mpls_features until v3.19. */
> -#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
> -	return true;
> -#else
> -	return false;
> -#endif
> -}
> -
> -int rpl_dev_queue_xmit(struct sk_buff *skb)
> -{
> -#undef dev_queue_xmit
> -	int err = -ENOMEM;
> -	bool mpls;
> -
> -	mpls = false;
> -
> -	/* Avoid traversing any VLAN tags that are present to determine if
> -	 * the ethtype is MPLS. Instead compare the mac_len (end of L2) and
> -	 * skb_network_offset() (beginning of L3) whose inequality will
> -	 * indicate the presence of an MPLS label stack. */
> -	if (skb->mac_len != skb_network_offset(skb) && !supports_mpls_gso())
> -		mpls = true;
> -
> -	if (mpls) {
> -		int features;
> -
> -		features = netif_skb_features(skb);
> -
> -		/* As of v3.11 the kernel provides an mpls_features field in
> -		 * struct net_device which allows devices to advertise which
> -		 * features its supports for MPLS. This value defaults to
> -		 * NETIF_F_SG and as of v3.19.
> -		 *
> -		 * This compatibility code is intended for kernels older
> -		 * than v3.19 that do not support MPLS GSO and do not
> -		 * use mpls_features. Thus this code uses NETIF_F_SG
> -		 * directly in place of mpls_features.
> -		 */
> -		if (mpls)
> -			features &= NETIF_F_SG;
> -
> -		if (netif_needs_gso(skb, features)) {
> -			struct sk_buff *nskb;
> -
> -			nskb = skb_gso_segment(skb, features);
> -			if (!nskb) {
> -				if (unlikely(skb_cloned(skb) &&
> -				    pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
> -					goto drop;
> -
> -				skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
> -				goto xmit;
> -			}
> -
> -			if (IS_ERR(nskb)) {
> -				err = PTR_ERR(nskb);
> -				goto drop;
> -			}
> -			consume_skb(skb);
> -			skb = nskb;
> -
> -			do {
> -				nskb = skb->next;
> -				skb->next = NULL;
> -				err = dev_queue_xmit(skb);
> -				skb = nskb;
> -			} while (skb);
> -
> -			return err;
> -		}
> -	}
> -xmit:
> -	return dev_queue_xmit(skb);
> -
> -drop:
> -	kfree_skb(skb);
> -	return err;
> -}
> -EXPORT_SYMBOL_GPL(rpl_dev_queue_xmit);
> -#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */
> -
>  #ifndef USE_UPSTREAM_TUNNEL_GSO
>  static __be16 __skb_network_protocol(struct sk_buff *skb)
>  {
> diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h
> index 75315dc16e02..dc05ee9a9ca7 100644
> --- a/datapath/linux/compat/include/linux/netdevice.h
> +++ b/datapath/linux/compat/include/linux/netdevice.h
> @@ -111,11 +111,6 @@ static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
>  
>  #endif
>  
> -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
> -#define dev_queue_xmit rpl_dev_queue_xmit
> -int rpl_dev_queue_xmit(struct sk_buff *skb);
> -#endif
> -
>  #if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
>  static inline struct net_device *rpl_netdev_notifier_info_to_dev(void *info)
>  {
> -- 
> 2.11.0
>
Joe Stringer Feb. 10, 2017, 12:32 a.m. UTC | #2
On 9 February 2017 at 02:20, Simon Horman <simon.horman@netronome.com> wrote:
> Hi Joe,
>
> On Wed, Feb 08, 2017 at 04:50:36PM -0800, Joe Stringer wrote:
>> The MPLS portions of this were inadvertently broken in v2.4 due to
>> 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
>> inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
>> backport dropped its VLAN portion in v2.6, the whole function became a
>> no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
>>
>> Apparently the MPLS side of this code never worked in a released version
>> of OVS and no-one noticed, so remove it.
>>
>> Signed-off-by: Joe Stringer <joe@ovn.org>
>> ---
>> CC: Simon Horman <horms@verge.net.au>
>>
>> Simon, I'm not sure what the repercussions are of this but by my reading
>> of the code the extra skb_gso_segment compat code here is never invoked
>> anyway so I figure we can just drop it. Looks like it might've been
>> fine when originally submitted, but by the time v2.4 came out it was
>> already nerfed >.<
>>
>> That said, if we just need to fix the version check to get it working
>> again then that's an option.
>
> True. I'm pretty sure I exercised the code in question at some point so I
> think it ought to work modulo being disabled. But of course some testing
> would be required.  Probably this would be worth doing as unexpected GSO
> failures are somewhat hard to track down IMHO.  That said, I don't feel
> strongly about this given that it only affects what by now are rather old
> kernels and no one seems to have reported a problem.

OK. Do you have an idea of what the behaviour is given that this
backport isn't working correctly? If it's just slower but with the
correct behaviour, then given no-one noticed I think it's safe to just
remove. If there's packet corruption or drop (or worse, crash), then
we probably want to do better.

> Do you have any view on how much longer kernels older than v3.19 will be
> supported by OvS?

I don't have a concrete answer for you on that. One way to look at it
is that supporting new features on these older kernels takes a certain
amount of effort. If lots of people are running those kernels (eg
CentOS w/ 3.10 or Ubuntu w/ 3.13), and the effort is reasonably small
then it's probably worthwhile to continue to backport. If those
kernels are less widely used, or if backporting newer code to those
older kernels becomes a massive undertaking, then it'd be worth
considering which kernels are worth spending time on. To some degree,
we can make this determination on a feature-by-feature basis (eg, we
add a new feature but only enable it if your kernel is X or newer);
but there are dangers there around code complexity and debuggability
of different feature combinations so this approach is typically
avoided.

>> Also, I see that the upstream commit
>> 48d2ab609b6b ("net: mpls: Fixups for GSO") will further change the way
>> that OVS deals with MPLS GSO, so perhaps we'll actually need to bring
>> back a backport for handling those cases, when that patch is backported
>> to the OVS tree.
>
> Yes, it seems likely that OvS would need some compatibility code to handle
> that change - and IIRC some related changes to catch all the corner cases.

If you're referring to the followup fixes for that patch, then yes. If
you've got something else in mind regarding corner cases, I'd be
interested to hear what those are.
Simon Horman Feb. 10, 2017, 3:45 p.m. UTC | #3
On Thu, Feb 09, 2017 at 04:32:08PM -0800, Joe Stringer wrote:
> On 9 February 2017 at 02:20, Simon Horman <simon.horman@netronome.com> wrote:
> > Hi Joe,
> >
> > On Wed, Feb 08, 2017 at 04:50:36PM -0800, Joe Stringer wrote:
> >> The MPLS portions of this were inadvertently broken in v2.4 due to
> >> 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
> >> inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
> >> backport dropped its VLAN portion in v2.6, the whole function became a
> >> no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
> >>
> >> Apparently the MPLS side of this code never worked in a released version
> >> of OVS and no-one noticed, so remove it.
> >>
> >> Signed-off-by: Joe Stringer <joe@ovn.org>
> >> ---
> >> CC: Simon Horman <horms@verge.net.au>
> >>
> >> Simon, I'm not sure what the repercussions are of this but by my reading
> >> of the code the extra skb_gso_segment compat code here is never invoked
> >> anyway so I figure we can just drop it. Looks like it might've been
> >> fine when originally submitted, but by the time v2.4 came out it was
> >> already nerfed >.<
> >>
> >> That said, if we just need to fix the version check to get it working
> >> again then that's an option.
> >
> > True. I'm pretty sure I exercised the code in question at some point so I
> > think it ought to work modulo being disabled. But of course some testing
> > would be required.  Probably this would be worth doing as unexpected GSO
> > failures are somewhat hard to track down IMHO.  That said, I don't feel
> > strongly about this given that it only affects what by now are rather old
> > kernels and no one seems to have reported a problem.
> 
> OK. Do you have an idea of what the behaviour is given that this
> backport isn't working correctly? If it's just slower but with the
> correct behaviour, then given no-one noticed I think it's safe to just
> remove. If there's packet corruption or drop (or worse, crash), then
> we probably want to do better.

I would expect over-sized packets to be dropped - GSO skbs that have not
been segmented.

This may or may not result in the sender re-transmitting. If retransmit
occurs then communication would continue albeit at a greatly reduced speed.

> > Do you have any view on how much longer kernels older than v3.19 will be
> > supported by OvS?
> 
> I don't have a concrete answer for you on that. One way to look at it
> is that supporting new features on these older kernels takes a certain
> amount of effort. If lots of people are running those kernels (eg
> CentOS w/ 3.10 or Ubuntu w/ 3.13), and the effort is reasonably small
> then it's probably worthwhile to continue to backport. If those
> kernels are less widely used, or if backporting newer code to those
> older kernels becomes a massive undertaking, then it'd be worth
> considering which kernels are worth spending time on. To some degree,
> we can make this determination on a feature-by-feature basis (eg, we
> add a new feature but only enable it if your kernel is X or newer);
> but there are dangers there around code complexity and debuggability
> of different feature combinations so this approach is typically
> avoided.

I agree with the above.

FWIW, I think detecting this feature at runtime may be not a significant
win over fixing the compat code.

> >> Also, I see that the upstream commit
> >> 48d2ab609b6b ("net: mpls: Fixups for GSO") will further change the way
> >> that OVS deals with MPLS GSO, so perhaps we'll actually need to bring
> >> back a backport for handling those cases, when that patch is backported
> >> to the OVS tree.
> >
> > Yes, it seems likely that OvS would need some compatibility code to handle
> > that change - and IIRC some related changes to catch all the corner cases.
> 
> If you're referring to the followup fixes for that patch, then yes. If
> you've got something else in mind regarding corner cases, I'd be
> interested to hear what those are.

I was referring to the followup patches.
Joe Stringer March 2, 2017, 11 p.m. UTC | #4
On 8 February 2017 at 16:50, Joe Stringer <joe@ovn.org> wrote:
> The MPLS portions of this were inadvertently broken in v2.4 due to
> 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
> inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
> backport dropped its VLAN portion in v2.6, the whole function became a
> no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
>
> Apparently the MPLS side of this code never worked in a released version
> of OVS and no-one noticed, so remove it.
>
> Signed-off-by: Joe Stringer <joe@ovn.org>

Picking this thread back up... it's a bit elaborate, but as per my
understanding there were basically three important versions of the
MPLS GSO code in upstream Linux to date:

Linux earlier than 3.19 had a broken implementation.
Linux 3.19 up until 4.9 had a version that worked OK for OVS
codepaths, but was broken in other paths.
Linux 4.9 and later should be correct in all cases[0 + fixes].

Then we have our OVS out-of-tree backport of the upstream OVS module.
Today, the MPLS code in OVS matches roughly Linux 4.8. Prior to 3.19,
we backported MPLS GSO, but there was a problem with the activation of
the code[1] so MPLS GSO is broken with out-of-tree against kernels up
to 3.19. For kernels 3.19 to 4.8, the MPLS GSO works correctly in the
OVS tree. Linux 4.9 expects the skb to be prepared differently for
MPLS GSO to work, so the out-of-tree module in conjunction with that
version will be broken. When we backport this patch[0], that should be
fixed---but we'll need to take care to do it in a way that still works
on earlier versions..

This affects OVS 2.7 - users wanting to run OVS 2.7 with out-of-tree
module with MPLS GSO (including loading the mpls_gso module) and a
Linux 4.9 kernel may observe some issues like this when using MPLS
push/pop. As far as I can tell, the most likely affected users would
be those running recent Fedora releases but I don't think that they
use the DKMS module by default. Given how elaborate this chain of
requirements is, I'm not so strongly concerned about addressing this
before bringing the three or four later backport series into the
tree.. they've been blocking for pretty long already. It'd definitely
be nice to fix it soon though.

Yi-Hung has been doing the investigation on this, thanks for providing
the details and I hope I portrayed it correctly to the list here!

[0] https://github.com/torvalds/linux/commit/48d2ab609b6bbecb7698487c8579bc40de9d6dfa#diff-a6103578af113225119317a107fe30ec
[1] http://patchwork.ozlabs.org/patch/725891/
Simon Horman March 20, 2017, 1:36 p.m. UTC | #5
On Thu, Mar 02, 2017 at 03:00:37PM -0800, Joe Stringer wrote:
> On 8 February 2017 at 16:50, Joe Stringer <joe@ovn.org> wrote:
> > The MPLS portions of this were inadvertently broken in v2.4 due to
> > 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
> > inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
> > backport dropped its VLAN portion in v2.6, the whole function became a
> > no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
> >
> > Apparently the MPLS side of this code never worked in a released version
> > of OVS and no-one noticed, so remove it.
> >
> > Signed-off-by: Joe Stringer <joe@ovn.org>
> 
> Picking this thread back up... it's a bit elaborate, but as per my
> understanding there were basically three important versions of the
> MPLS GSO code in upstream Linux to date:
> 
> Linux earlier than 3.19 had a broken implementation.
> Linux 3.19 up until 4.9 had a version that worked OK for OVS
> codepaths, but was broken in other paths.
> Linux 4.9 and later should be correct in all cases[0 + fixes].
> 
> Then we have our OVS out-of-tree backport of the upstream OVS module.
> Today, the MPLS code in OVS matches roughly Linux 4.8. Prior to 3.19,
> we backported MPLS GSO, but there was a problem with the activation of
> the code[1] so MPLS GSO is broken with out-of-tree against kernels up
> to 3.19. For kernels 3.19 to 4.8, the MPLS GSO works correctly in the
> OVS tree. Linux 4.9 expects the skb to be prepared differently for
> MPLS GSO to work, so the out-of-tree module in conjunction with that
> version will be broken. When we backport this patch[0], that should be
> fixed---but we'll need to take care to do it in a way that still works
> on earlier versions..
> 
> This affects OVS 2.7 - users wanting to run OVS 2.7 with out-of-tree
> module with MPLS GSO (including loading the mpls_gso module) and a
> Linux 4.9 kernel may observe some issues like this when using MPLS
> push/pop. As far as I can tell, the most likely affected users would
> be those running recent Fedora releases but I don't think that they
> use the DKMS module by default. Given how elaborate this chain of
> requirements is, I'm not so strongly concerned about addressing this
> before bringing the three or four later backport series into the
> tree.. they've been blocking for pretty long already. It'd definitely
> be nice to fix it soon though.
> 
> Yi-Hung has been doing the investigation on this, thanks for providing
> the details and I hope I portrayed it correctly to the list here!

Am I correct in understanding that you would like to:
1. Address outstanding backports and then
2. Address the problem discussed in this thread, particularly for
   OVS2.7 + Linux 4.9

If so that sounds reasonable to me. I'd be happy to assist with this work
but I suspect that two (or more) heads will be better than one given the
complexity of getting it correct for all the relevant kernel versions.

> [0] https://github.com/torvalds/linux/commit/48d2ab609b6bbecb7698487c8579bc40de9d6dfa#diff-a6103578af113225119317a107fe30ec
> [1] http://patchwork.ozlabs.org/patch/725891/
>
Joe Stringer March 20, 2017, 7:37 p.m. UTC | #6
On 20 March 2017 at 06:36, Simon Horman <simon.horman@netronome.com> wrote:
> On Thu, Mar 02, 2017 at 03:00:37PM -0800, Joe Stringer wrote:
>> On 8 February 2017 at 16:50, Joe Stringer <joe@ovn.org> wrote:
>> > The MPLS portions of this were inadvertently broken in v2.4 due to
>> > 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
>> > inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
>> > backport dropped its VLAN portion in v2.6, the whole function became a
>> > no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
>> >
>> > Apparently the MPLS side of this code never worked in a released version
>> > of OVS and no-one noticed, so remove it.
>> >
>> > Signed-off-by: Joe Stringer <joe@ovn.org>
>>
>> Picking this thread back up... it's a bit elaborate, but as per my
>> understanding there were basically three important versions of the
>> MPLS GSO code in upstream Linux to date:
>>
>> Linux earlier than 3.19 had a broken implementation.
>> Linux 3.19 up until 4.9 had a version that worked OK for OVS
>> codepaths, but was broken in other paths.
>> Linux 4.9 and later should be correct in all cases[0 + fixes].
>>
>> Then we have our OVS out-of-tree backport of the upstream OVS module.
>> Today, the MPLS code in OVS matches roughly Linux 4.8. Prior to 3.19,
>> we backported MPLS GSO, but there was a problem with the activation of
>> the code[1] so MPLS GSO is broken with out-of-tree against kernels up
>> to 3.19. For kernels 3.19 to 4.8, the MPLS GSO works correctly in the
>> OVS tree. Linux 4.9 expects the skb to be prepared differently for
>> MPLS GSO to work, so the out-of-tree module in conjunction with that
>> version will be broken. When we backport this patch[0], that should be
>> fixed---but we'll need to take care to do it in a way that still works
>> on earlier versions..
>>
>> This affects OVS 2.7 - users wanting to run OVS 2.7 with out-of-tree
>> module with MPLS GSO (including loading the mpls_gso module) and a
>> Linux 4.9 kernel may observe some issues like this when using MPLS
>> push/pop. As far as I can tell, the most likely affected users would
>> be those running recent Fedora releases but I don't think that they
>> use the DKMS module by default. Given how elaborate this chain of
>> requirements is, I'm not so strongly concerned about addressing this
>> before bringing the three or four later backport series into the
>> tree.. they've been blocking for pretty long already. It'd definitely
>> be nice to fix it soon though.
>>
>> Yi-Hung has been doing the investigation on this, thanks for providing
>> the details and I hope I portrayed it correctly to the list here!
>
> Am I correct in understanding that you would like to:
> 1. Address outstanding backports and then
> 2. Address the problem discussed in this thread, particularly for
>    OVS2.7 + Linux 4.9
>
> If so that sounds reasonable to me. I'd be happy to assist with this work
> but I suspect that two (or more) heads will be better than one given the
> complexity of getting it correct for all the relevant kernel versions.

Correct. At this point I've reviewed and applied most backport changes
up until the latest changes on net/net-next over the past few weeks,
with the exception of these MPLS changes. I agree that this stuff is
tricky and it would be helpful to have some assistance, thanks for the
offer. I believe that Yi-Hung should be able to share a patch soon,
and we can continue the discussion from there.
Yi-Hung Wei April 3, 2017, 11:31 p.m. UTC | #7
Hi Simon,

I posted a patch series[1,2,3] to backport the MPLS GSO upstream
commits. It would be great if you could provide some feedback on that.
I have tested the backport series on kernel 4.4, and 4.9 with
the similar setup as in [3]. To trigger MPLS GSO, I use iperf instead of
the ping test in [3].

Thanks,

[1] https://patchwork.ozlabs.org/patch/746673/
[2] https://patchwork.ozlabs.org/patch/746674/
[3] https://patchwork.ozlabs.org/patch/746675/

-Yi-Hung

On Mon, Mar 20, 2017 at 12:37 PM, Joe Stringer <joe@ovn.org> wrote:
> On 20 March 2017 at 06:36, Simon Horman <simon.horman@netronome.com> wrote:
>> On Thu, Mar 02, 2017 at 03:00:37PM -0800, Joe Stringer wrote:
>>> On 8 February 2017 at 16:50, Joe Stringer <joe@ovn.org> wrote:
>>> > The MPLS portions of this were inadvertently broken in v2.4 due to
>>> > 433637881ca5 ("datapath: define compat __skb_gso_segment()") which
>>> > inverts the supports_mpls_gso() logic, then when rpl_dev_queue_xmit()
>>> > backport dropped its VLAN portion in v2.6, the whole function became a
>>> > no-op - since b63bf2488209 ("datapath: remove VLAN compat code from GSO").
>>> >
>>> > Apparently the MPLS side of this code never worked in a released version
>>> > of OVS and no-one noticed, so remove it.
>>> >
>>> > Signed-off-by: Joe Stringer <joe@ovn.org>
>>>
>>> Picking this thread back up... it's a bit elaborate, but as per my
>>> understanding there were basically three important versions of the
>>> MPLS GSO code in upstream Linux to date:
>>>
>>> Linux earlier than 3.19 had a broken implementation.
>>> Linux 3.19 up until 4.9 had a version that worked OK for OVS
>>> codepaths, but was broken in other paths.
>>> Linux 4.9 and later should be correct in all cases[0 + fixes].
>>>
>>> Then we have our OVS out-of-tree backport of the upstream OVS module.
>>> Today, the MPLS code in OVS matches roughly Linux 4.8. Prior to 3.19,
>>> we backported MPLS GSO, but there was a problem with the activation of
>>> the code[1] so MPLS GSO is broken with out-of-tree against kernels up
>>> to 3.19. For kernels 3.19 to 4.8, the MPLS GSO works correctly in the
>>> OVS tree. Linux 4.9 expects the skb to be prepared differently for
>>> MPLS GSO to work, so the out-of-tree module in conjunction with that
>>> version will be broken. When we backport this patch[0], that should be
>>> fixed---but we'll need to take care to do it in a way that still works
>>> on earlier versions..
>>>
>>> This affects OVS 2.7 - users wanting to run OVS 2.7 with out-of-tree
>>> module with MPLS GSO (including loading the mpls_gso module) and a
>>> Linux 4.9 kernel may observe some issues like this when using MPLS
>>> push/pop. As far as I can tell, the most likely affected users would
>>> be those running recent Fedora releases but I don't think that they
>>> use the DKMS module by default. Given how elaborate this chain of
>>> requirements is, I'm not so strongly concerned about addressing this
>>> before bringing the three or four later backport series into the
>>> tree.. they've been blocking for pretty long already. It'd definitely
>>> be nice to fix it soon though.
>>>
>>> Yi-Hung has been doing the investigation on this, thanks for providing
>>> the details and I hope I portrayed it correctly to the list here!
>>
>> Am I correct in understanding that you would like to:
>> 1. Address outstanding backports and then
>> 2. Address the problem discussed in this thread, particularly for
>>    OVS2.7 + Linux 4.9
>>
>> If so that sounds reasonable to me. I'd be happy to assist with this work
>> but I suspect that two (or more) heads will be better than one given the
>> complexity of getting it correct for all the relevant kernel versions.
>
> Correct. At this point I've reviewed and applied most backport changes
> up until the latest changes on net/net-next over the past few weeks,
> with the exception of these MPLS changes. I agree that this stuff is
> tricky and it would be helpful to have some assistance, thanks for the
> offer. I believe that Yi-Hung should be able to share a patch soon,
> and we can continue the discussion from there.
diff mbox

Patch

diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
index 48a56b9f5d5f..2638f1114ede 100644
--- a/datapath/linux/compat/gso.c
+++ b/datapath/linux/compat/gso.c
@@ -41,97 +41,6 @@ 
 
 #include "gso.h"
 
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
-/* Strictly this is not needed and will be optimised out
- * as this code is guarded by if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0).
- * It is here to make things explicit should the compatibility
- * code be extended in some way prior extending its life-span
- * beyond v3.19.
- */
-static bool supports_mpls_gso(void)
-{
-/* MPLS GSO was introduced in v3.11, however it was not correctly
- * activated using mpls_features until v3.19. */
-#ifdef OVS_USE_COMPAT_GSO_SEGMENTATION
-	return true;
-#else
-	return false;
-#endif
-}
-
-int rpl_dev_queue_xmit(struct sk_buff *skb)
-{
-#undef dev_queue_xmit
-	int err = -ENOMEM;
-	bool mpls;
-
-	mpls = false;
-
-	/* Avoid traversing any VLAN tags that are present to determine if
-	 * the ethtype is MPLS. Instead compare the mac_len (end of L2) and
-	 * skb_network_offset() (beginning of L3) whose inequality will
-	 * indicate the presence of an MPLS label stack. */
-	if (skb->mac_len != skb_network_offset(skb) && !supports_mpls_gso())
-		mpls = true;
-
-	if (mpls) {
-		int features;
-
-		features = netif_skb_features(skb);
-
-		/* As of v3.11 the kernel provides an mpls_features field in
-		 * struct net_device which allows devices to advertise which
-		 * features its supports for MPLS. This value defaults to
-		 * NETIF_F_SG and as of v3.19.
-		 *
-		 * This compatibility code is intended for kernels older
-		 * than v3.19 that do not support MPLS GSO and do not
-		 * use mpls_features. Thus this code uses NETIF_F_SG
-		 * directly in place of mpls_features.
-		 */
-		if (mpls)
-			features &= NETIF_F_SG;
-
-		if (netif_needs_gso(skb, features)) {
-			struct sk_buff *nskb;
-
-			nskb = skb_gso_segment(skb, features);
-			if (!nskb) {
-				if (unlikely(skb_cloned(skb) &&
-				    pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
-					goto drop;
-
-				skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY;
-				goto xmit;
-			}
-
-			if (IS_ERR(nskb)) {
-				err = PTR_ERR(nskb);
-				goto drop;
-			}
-			consume_skb(skb);
-			skb = nskb;
-
-			do {
-				nskb = skb->next;
-				skb->next = NULL;
-				err = dev_queue_xmit(skb);
-				skb = nskb;
-			} while (skb);
-
-			return err;
-		}
-	}
-xmit:
-	return dev_queue_xmit(skb);
-
-drop:
-	kfree_skb(skb);
-	return err;
-}
-EXPORT_SYMBOL_GPL(rpl_dev_queue_xmit);
-#endif /* OVS_USE_COMPAT_GSO_SEGMENTATION */
-
 #ifndef USE_UPSTREAM_TUNNEL_GSO
 static __be16 __skb_network_protocol(struct sk_buff *skb)
 {
diff --git a/datapath/linux/compat/include/linux/netdevice.h b/datapath/linux/compat/include/linux/netdevice.h
index 75315dc16e02..dc05ee9a9ca7 100644
--- a/datapath/linux/compat/include/linux/netdevice.h
+++ b/datapath/linux/compat/include/linux/netdevice.h
@@ -111,11 +111,6 @@  static inline int rpl_netdev_master_upper_dev_link(struct net_device *dev,
 
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
-#define dev_queue_xmit rpl_dev_queue_xmit
-int rpl_dev_queue_xmit(struct sk_buff *skb);
-#endif
-
 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,11,0)
 static inline struct net_device *rpl_netdev_notifier_info_to_dev(void *info)
 {