diff mbox

[net-next,1/2] net: More fine-grained support for encapsulated GSO features

Message ID 1366683556-4956-2-git-send-email-horms@verge.net.au
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Simon Horman April 23, 2013, 2:19 a.m. UTC
"net: Add support for hardware-offloaded encapsulation" introduced
the encapsulation field of struct skb, which when set provides hints
that GSO should handle an skb that encapsulates a packet.

This patch adds an encapsulation_features field which provides
a hint to dev dev_hard_start_xmit() that harware-offload encapsulation
features should be used. Previously this hint was provided by the
encapsulation field.

The other uses of the encapsulation field are left unchanged.

The two in-tree locations that set the encapsulation have been updated to
also set encapsulation_field.

The motivation for this is to provide support segmentation of GSO MPLS skbs.
This may be necessary when a non-MPLS GSO skb is turned into an MPLS GSO
skb by Open vSwtich and its MPLS push action.

In this case it  harware-offload encapsulation features should be used,
actually to be more exact software segmentation should be selected, but
other hints provided by the encapsulation field are not applicable.

Cc: Joseph Gasparakis <joseph.gasparakis@intel.com>
Cc: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/linux/skbuff.h | 9 ++++++++-
 net/core/dev.c         | 2 +-
 net/core/skbuff.c      | 1 +
 net/ipv4/gre.c         | 1 +
 net/ipv4/ipip.c        | 1 +
 5 files changed, 12 insertions(+), 2 deletions(-)

Comments

Joseph Gasparakis April 23, 2013, 9 p.m. UTC | #1
On Mon, 22 Apr 2013, Simon Horman wrote:

> "net: Add support for hardware-offloaded encapsulation" introduced
> the encapsulation field of struct skb, which when set provides hints
> that GSO should handle an skb that encapsulates a packet.
> 
> This patch adds an encapsulation_features field which provides
> a hint to dev dev_hard_start_xmit() that harware-offload encapsulation
> features should be used. Previously this hint was provided by the
> encapsulation field.
> 
> The other uses of the encapsulation field are left unchanged.
> 
> The two in-tree locations that set the encapsulation have been updated to
> also set encapsulation_field.
> 
> The motivation for this is to provide support segmentation of GSO MPLS skbs.
> This may be necessary when a non-MPLS GSO skb is turned into an MPLS GSO
> skb by Open vSwtich and its MPLS push action.
> 
> In this case it  harware-offload encapsulation features should be used,
> actually to be more exact software segmentation should be selected, but
> other hints provided by the encapsulation field are not applicable.
> 
> Cc: Joseph Gasparakis <joseph.gasparakis@intel.com>
> Cc: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
> Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Simon Horman <horms@verge.net.au>
> ---
>  include/linux/skbuff.h | 9 ++++++++-
>  net/core/dev.c         | 2 +-
>  net/core/skbuff.c      | 1 +
>  net/ipv4/gre.c         | 1 +
>  net/ipv4/ipip.c        | 1 +
>  5 files changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 2e0ced1..d9ec1de 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -494,7 +494,14 @@ struct sk_buff {
>  	 * headers if needed
>  	 */
>  	__u8			encapsulation:1;
> -	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
> +	/* Encapsulation protocol and NIC drivers should use
> +	 * this flag to indicate to each other if the skb contains
> +	 * encapsulated packet and GSO should use encapsulation features
> +	 * instead of standard features for the netdev. This is typically
> +	 * a subset of cases where skb->encapsulation is set.
> +	 */
> +	__u8			encapsulation_features:1;
> +	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
>  	kmemcheck_bitfield_end(flags2);
>  
>  #ifdef CONFIG_NET_DMA
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 9e26b8d..53236c5 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2498,7 +2498,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
>  		 * hardware encapsulation features instead of standard
>  		 * features for the netdev
>  		 */
> -		if (skb->encapsulation)
> +		if (skb->encapsulation_features)
>  			features &= dev->hw_enc_features;
>  
>  		if (netif_needs_gso(skb, features)) {
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 898cf5c..f23d136 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -708,6 +708,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
>  	new->l4_rxhash		= old->l4_rxhash;
>  	new->no_fcs		= old->no_fcs;
>  	new->encapsulation	= old->encapsulation;
> +	new->encapsulation_features = old->encapsulation_features;
>  #ifdef CONFIG_XFRM
>  	new->sp			= secpath_get(old->sp);
>  #endif
> diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
> index 0ae998b..8420f29 100644
> --- a/net/ipv4/gre.c
> +++ b/net/ipv4/gre.c
> @@ -157,6 +157,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
>  	}
>  
>  	skb->encapsulation = 0;
> +	skb->encapsulation_features = 0;
>  
>  	if (unlikely(!pskb_may_pull(skb, ghl)))
>  		goto out;
> diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
> index 77bfcce..a6db3c0 100644
> --- a/net/ipv4/ipip.c
> +++ b/net/ipv4/ipip.c
> @@ -220,6 +220,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
>  	if (likely(!skb->encapsulation)) {
>  		skb_reset_inner_headers(skb);
>  		skb->encapsulation = 1;
> +		skb->encapsulation_features = 1;
>  	}
>  
>  	ip_tunnel_xmit(skb, dev, tiph);
> 

Any particular reason to introduce skb->encapsulation_features instead of 
using the existing skb->encapsulation? Also I don't see it used in your 
second patch either.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman April 25, 2013, 7:36 a.m. UTC | #2
On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> 
> 
> On Mon, 22 Apr 2013, Simon Horman wrote:
> 
> > "net: Add support for hardware-offloaded encapsulation" introduced
> > the encapsulation field of struct skb, which when set provides hints
> > that GSO should handle an skb that encapsulates a packet.
> > 
> > This patch adds an encapsulation_features field which provides
> > a hint to dev dev_hard_start_xmit() that harware-offload encapsulation
> > features should be used. Previously this hint was provided by the
> > encapsulation field.
> > 
> > The other uses of the encapsulation field are left unchanged.
> > 
> > The two in-tree locations that set the encapsulation have been updated to
> > also set encapsulation_field.
> > 
> > The motivation for this is to provide support segmentation of GSO MPLS skbs.
> > This may be necessary when a non-MPLS GSO skb is turned into an MPLS GSO
> > skb by Open vSwtich and its MPLS push action.
> > 
> > In this case it  harware-offload encapsulation features should be used,
> > actually to be more exact software segmentation should be selected, but
> > other hints provided by the encapsulation field are not applicable.
> > 
> > Cc: Joseph Gasparakis <joseph.gasparakis@intel.com>
> > Cc: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
> > Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> > Signed-off-by: Simon Horman <horms@verge.net.au>
> > ---
> >  include/linux/skbuff.h | 9 ++++++++-
> >  net/core/dev.c         | 2 +-
> >  net/core/skbuff.c      | 1 +
> >  net/ipv4/gre.c         | 1 +
> >  net/ipv4/ipip.c        | 1 +
> >  5 files changed, 12 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 2e0ced1..d9ec1de 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -494,7 +494,14 @@ struct sk_buff {
> >  	 * headers if needed
> >  	 */
> >  	__u8			encapsulation:1;
> > -	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
> > +	/* Encapsulation protocol and NIC drivers should use
> > +	 * this flag to indicate to each other if the skb contains
> > +	 * encapsulated packet and GSO should use encapsulation features
> > +	 * instead of standard features for the netdev. This is typically
> > +	 * a subset of cases where skb->encapsulation is set.
> > +	 */
> > +	__u8			encapsulation_features:1;
> > +	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
> >  	kmemcheck_bitfield_end(flags2);
> >  
> >  #ifdef CONFIG_NET_DMA
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index 9e26b8d..53236c5 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -2498,7 +2498,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
> >  		 * hardware encapsulation features instead of standard
> >  		 * features for the netdev
> >  		 */
> > -		if (skb->encapsulation)
> > +		if (skb->encapsulation_features)
> >  			features &= dev->hw_enc_features;
> >  
> >  		if (netif_needs_gso(skb, features)) {
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 898cf5c..f23d136 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -708,6 +708,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
> >  	new->l4_rxhash		= old->l4_rxhash;
> >  	new->no_fcs		= old->no_fcs;
> >  	new->encapsulation	= old->encapsulation;
> > +	new->encapsulation_features = old->encapsulation_features;
> >  #ifdef CONFIG_XFRM
> >  	new->sp			= secpath_get(old->sp);
> >  #endif
> > diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
> > index 0ae998b..8420f29 100644
> > --- a/net/ipv4/gre.c
> > +++ b/net/ipv4/gre.c
> > @@ -157,6 +157,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
> >  	}
> >  
> >  	skb->encapsulation = 0;
> > +	skb->encapsulation_features = 0;
> >  
> >  	if (unlikely(!pskb_may_pull(skb, ghl)))
> >  		goto out;
> > diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
> > index 77bfcce..a6db3c0 100644
> > --- a/net/ipv4/ipip.c
> > +++ b/net/ipv4/ipip.c
> > @@ -220,6 +220,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
> >  	if (likely(!skb->encapsulation)) {
> >  		skb_reset_inner_headers(skb);
> >  		skb->encapsulation = 1;
> > +		skb->encapsulation_features = 1;
> >  	}
> >  
> >  	ip_tunnel_xmit(skb, dev, tiph);
> > 
> 
> Any particular reason to introduce skb->encapsulation_features instead of 
> using the existing skb->encapsulation? Also I don't see it used in your 
> second patch either.

My reasoning is that skb->encapsulation seems to alter the behaviour of
many different locations and I'm not sure that any of them, other than the
one in dev_hard_start_xmit() make sense for MPLS.

For example the following in inet_gso_segment()

	tunnel = !!skb->encapsulation;
	...
	do {
		...
		if (!tunnel && proto == IPPROTO_UDP) {
			iph->id = htons(id);
			iph->frag_off = htons(offset >> 3);
			if (skb->next != NULL)
				iph->frag_off |= htons(IP_MF);
			offset += (skb->len - skb->mac_len - iph->ihl * 4);
		} else  {
			iph->id = htons(id++);
		}
		...

On reflection I need to examine the relevant code-paths more closely, but I
believe the !tunnel portion of the above code is intended to help effect
GSO segmentation UDP tunnelling protocols and is not relevant to MPLS.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman April 25, 2013, 8:03 a.m. UTC | #3
On Thu, Apr 25, 2013 at 04:36:44PM +0900, Simon Horman wrote:
> On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> > 
> > 
> > On Mon, 22 Apr 2013, Simon Horman wrote:
> > 
> > > "net: Add support for hardware-offloaded encapsulation" introduced
> > > the encapsulation field of struct skb, which when set provides hints
> > > that GSO should handle an skb that encapsulates a packet.
> > > 
> > > This patch adds an encapsulation_features field which provides
> > > a hint to dev dev_hard_start_xmit() that harware-offload encapsulation
> > > features should be used. Previously this hint was provided by the
> > > encapsulation field.
> > > 
> > > The other uses of the encapsulation field are left unchanged.
> > > 
> > > The two in-tree locations that set the encapsulation have been updated to
> > > also set encapsulation_field.
> > > 
> > > The motivation for this is to provide support segmentation of GSO MPLS skbs.
> > > This may be necessary when a non-MPLS GSO skb is turned into an MPLS GSO
> > > skb by Open vSwtich and its MPLS push action.
> > > 
> > > In this case it  harware-offload encapsulation features should be used,
> > > actually to be more exact software segmentation should be selected, but
> > > other hints provided by the encapsulation field are not applicable.
> > > 
> > > Cc: Joseph Gasparakis <joseph.gasparakis@intel.com>
> > > Cc: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
> > > Cc: Alexander Duyck <alexander.h.duyck@intel.com>
> > > Signed-off-by: Simon Horman <horms@verge.net.au>
> > > ---
> > >  include/linux/skbuff.h | 9 ++++++++-
> > >  net/core/dev.c         | 2 +-
> > >  net/core/skbuff.c      | 1 +
> > >  net/ipv4/gre.c         | 1 +
> > >  net/ipv4/ipip.c        | 1 +
> > >  5 files changed, 12 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > > index 2e0ced1..d9ec1de 100644
> > > --- a/include/linux/skbuff.h
> > > +++ b/include/linux/skbuff.h
> > > @@ -494,7 +494,14 @@ struct sk_buff {
> > >  	 * headers if needed
> > >  	 */
> > >  	__u8			encapsulation:1;
> > > -	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
> > > +	/* Encapsulation protocol and NIC drivers should use
> > > +	 * this flag to indicate to each other if the skb contains
> > > +	 * encapsulated packet and GSO should use encapsulation features
> > > +	 * instead of standard features for the netdev. This is typically
> > > +	 * a subset of cases where skb->encapsulation is set.
> > > +	 */
> > > +	__u8			encapsulation_features:1;
> > > +	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
> > >  	kmemcheck_bitfield_end(flags2);
> > >  
> > >  #ifdef CONFIG_NET_DMA
> > > diff --git a/net/core/dev.c b/net/core/dev.c
> > > index 9e26b8d..53236c5 100644
> > > --- a/net/core/dev.c
> > > +++ b/net/core/dev.c
> > > @@ -2498,7 +2498,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
> > >  		 * hardware encapsulation features instead of standard
> > >  		 * features for the netdev
> > >  		 */
> > > -		if (skb->encapsulation)
> > > +		if (skb->encapsulation_features)
> > >  			features &= dev->hw_enc_features;
> > >  
> > >  		if (netif_needs_gso(skb, features)) {
> > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > > index 898cf5c..f23d136 100644
> > > --- a/net/core/skbuff.c
> > > +++ b/net/core/skbuff.c
> > > @@ -708,6 +708,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
> > >  	new->l4_rxhash		= old->l4_rxhash;
> > >  	new->no_fcs		= old->no_fcs;
> > >  	new->encapsulation	= old->encapsulation;
> > > +	new->encapsulation_features = old->encapsulation_features;
> > >  #ifdef CONFIG_XFRM
> > >  	new->sp			= secpath_get(old->sp);
> > >  #endif
> > > diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
> > > index 0ae998b..8420f29 100644
> > > --- a/net/ipv4/gre.c
> > > +++ b/net/ipv4/gre.c
> > > @@ -157,6 +157,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
> > >  	}
> > >  
> > >  	skb->encapsulation = 0;
> > > +	skb->encapsulation_features = 0;
> > >  
> > >  	if (unlikely(!pskb_may_pull(skb, ghl)))
> > >  		goto out;
> > > diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
> > > index 77bfcce..a6db3c0 100644
> > > --- a/net/ipv4/ipip.c
> > > +++ b/net/ipv4/ipip.c
> > > @@ -220,6 +220,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
> > >  	if (likely(!skb->encapsulation)) {
> > >  		skb_reset_inner_headers(skb);
> > >  		skb->encapsulation = 1;
> > > +		skb->encapsulation_features = 1;
> > >  	}
> > >  
> > >  	ip_tunnel_xmit(skb, dev, tiph);
> > > 
> > 
> > Any particular reason to introduce skb->encapsulation_features instead of 
> > using the existing skb->encapsulation? Also I don't see it used in your 
> > second patch either.
> 
> My reasoning is that skb->encapsulation seems to alter the behaviour of
> many different locations and I'm not sure that any of them, other than the
> one in dev_hard_start_xmit() make sense for MPLS.
> 
> For example the following in inet_gso_segment()
> 
> 	tunnel = !!skb->encapsulation;
> 	...
> 	do {
> 		...
> 		if (!tunnel && proto == IPPROTO_UDP) {
> 			iph->id = htons(id);
> 			iph->frag_off = htons(offset >> 3);
> 			if (skb->next != NULL)
> 				iph->frag_off |= htons(IP_MF);
> 			offset += (skb->len - skb->mac_len - iph->ihl * 4);
> 		} else  {
> 			iph->id = htons(id++);
> 		}
> 		...
> 
> On reflection I need to examine the relevant code-paths more closely, but I
> believe the !tunnel portion of the above code is intended to help effect
> GSO segmentation UDP tunnelling protocols and is not relevant to MPLS.

I forgot to mention in my previous email that this change is used in the
patch "[PATCH v2.26] datapath: Add basic MPLS support to kernel".
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross April 26, 2013, 11:03 p.m. UTC | #4
On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
>> Any particular reason to introduce skb->encapsulation_features instead of
>> using the existing skb->encapsulation? Also I don't see it used in your
>> second patch either.
>
> My reasoning is that skb->encapsulation seems to alter the behaviour of
> many different locations and I'm not sure that any of them, other than the
> one in dev_hard_start_xmit() make sense for MPLS.

The problem is the meaning of skb->encapsulation isn't really defined
clearly and I'm certain that the current implementation is not going
to work in the future. Depending on your perspective, vlans, MPLS,
tunnels, etc. can all be considered forms of encapsulation but clearly
there are many NICs that have different capabilities across those. I
believe the intention here was really to describe L3 tunnels as
encapsulation, in which case MPLS really shouldn't be using this
mechanism at all.

Now there is some overlap, especially today since most currently
shipping silicon wasn't designed to support tunnels and so is using
some form of offset based offloads. In that case, all forms of
encapsulation are pretty similar. However, in the future that won't be
the case as support for specific protocols is implemented for higher
performance and richer support. When that happens, not only will MPLS
and tunnels have different capabilities but various forms tunnels
might as well.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman April 30, 2013, 3:21 a.m. UTC | #5
On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> >> Any particular reason to introduce skb->encapsulation_features instead of
> >> using the existing skb->encapsulation? Also I don't see it used in your
> >> second patch either.
> >
> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> > many different locations and I'm not sure that any of them, other than the
> > one in dev_hard_start_xmit() make sense for MPLS.
> 
> The problem is the meaning of skb->encapsulation isn't really defined
> clearly and I'm certain that the current implementation is not going
> to work in the future. Depending on your perspective, vlans, MPLS,
> tunnels, etc. can all be considered forms of encapsulation but clearly
> there are many NICs that have different capabilities across those. I
> believe the intention here was really to describe L3 tunnels as
> encapsulation, in which case MPLS really shouldn't be using this
> mechanism at all.
> 
> Now there is some overlap, especially today since most currently
> shipping silicon wasn't designed to support tunnels and so is using
> some form of offset based offloads. In that case, all forms of
> encapsulation are pretty similar. However, in the future that won't be
> the case as support for specific protocols is implemented for higher
> performance and richer support. When that happens, not only will MPLS
> and tunnels have different capabilities but various forms tunnels
> might as well.

Wouldn't be possible to describe those differences using,
dev->hw_enc_features? I assumed that was its purpose.

The intention of my patch was allow MPLS to utilise
dev->hw_enc_features without any of the other implications of
skb->encapsulation.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross April 30, 2013, 4:19 p.m. UTC | #6
On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
>> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
>> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
>> >> Any particular reason to introduce skb->encapsulation_features instead of
>> >> using the existing skb->encapsulation? Also I don't see it used in your
>> >> second patch either.
>> >
>> > My reasoning is that skb->encapsulation seems to alter the behaviour of
>> > many different locations and I'm not sure that any of them, other than the
>> > one in dev_hard_start_xmit() make sense for MPLS.
>>
>> The problem is the meaning of skb->encapsulation isn't really defined
>> clearly and I'm certain that the current implementation is not going
>> to work in the future. Depending on your perspective, vlans, MPLS,
>> tunnels, etc. can all be considered forms of encapsulation but clearly
>> there are many NICs that have different capabilities across those. I
>> believe the intention here was really to describe L3 tunnels as
>> encapsulation, in which case MPLS really shouldn't be using this
>> mechanism at all.
>>
>> Now there is some overlap, especially today since most currently
>> shipping silicon wasn't designed to support tunnels and so is using
>> some form of offset based offloads. In that case, all forms of
>> encapsulation are pretty similar. However, in the future that won't be
>> the case as support for specific protocols is implemented for higher
>> performance and richer support. When that happens, not only will MPLS
>> and tunnels have different capabilities but various forms tunnels
>> might as well.
>
> Wouldn't be possible to describe those differences using,
> dev->hw_enc_features? I assumed that was its purpose.

If there truly are differences between the offload capabilities of
MPLS and L3 tunnels then no, it's not possible, because it's a single
field. It's certainly not a valid assumption that a NIC that can do
TSO over GRE can also do it over MPLS.

However, it's unlikely that there are truly significant differences
between various encapsulation formats on a per-feature basis. I think
what we need to do is separate out the ability to understand the
headers from the capabilities so you have two fields: header (none,
VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
etc.) rather than the product of each. Otherwise, we end up with a ton
of different combinations.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman May 1, 2013, 7:50 a.m. UTC | #7
On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> >> >> Any particular reason to introduce skb->encapsulation_features instead of
> >> >> using the existing skb->encapsulation? Also I don't see it used in your
> >> >> second patch either.
> >> >
> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> >> > many different locations and I'm not sure that any of them, other than the
> >> > one in dev_hard_start_xmit() make sense for MPLS.
> >>
> >> The problem is the meaning of skb->encapsulation isn't really defined
> >> clearly and I'm certain that the current implementation is not going
> >> to work in the future. Depending on your perspective, vlans, MPLS,
> >> tunnels, etc. can all be considered forms of encapsulation but clearly
> >> there are many NICs that have different capabilities across those. I
> >> believe the intention here was really to describe L3 tunnels as
> >> encapsulation, in which case MPLS really shouldn't be using this
> >> mechanism at all.
> >>
> >> Now there is some overlap, especially today since most currently
> >> shipping silicon wasn't designed to support tunnels and so is using
> >> some form of offset based offloads. In that case, all forms of
> >> encapsulation are pretty similar. However, in the future that won't be
> >> the case as support for specific protocols is implemented for higher
> >> performance and richer support. When that happens, not only will MPLS
> >> and tunnels have different capabilities but various forms tunnels
> >> might as well.
> >
> > Wouldn't be possible to describe those differences using,
> > dev->hw_enc_features? I assumed that was its purpose.
> 
> If there truly are differences between the offload capabilities of
> MPLS and L3 tunnels then no, it's not possible, because it's a single
> field. It's certainly not a valid assumption that a NIC that can do
> TSO over GRE can also do it over MPLS.
> 
> However, it's unlikely that there are truly significant differences
> between various encapsulation formats on a per-feature basis. I think
> what we need to do is separate out the ability to understand the
> headers from the capabilities so you have two fields: header (none,
> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
> etc.) rather than the product of each. Otherwise, we end up with a ton
> of different combinations.

I'm not quite sure that I follow.

Is your idea to replace skb->encapsulation (a single bit) with
a field that corresponds to the outer-most (encapsulation) header in use
and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?

If so, I believe that would solve the problem I was trying to address
with this patch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross May 1, 2013, 6:16 p.m. UTC | #8
On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
> On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
>> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
>> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
>> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
>> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
>> >> >> Any particular reason to introduce skb->encapsulation_features instead of
>> >> >> using the existing skb->encapsulation? Also I don't see it used in your
>> >> >> second patch either.
>> >> >
>> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
>> >> > many different locations and I'm not sure that any of them, other than the
>> >> > one in dev_hard_start_xmit() make sense for MPLS.
>> >>
>> >> The problem is the meaning of skb->encapsulation isn't really defined
>> >> clearly and I'm certain that the current implementation is not going
>> >> to work in the future. Depending on your perspective, vlans, MPLS,
>> >> tunnels, etc. can all be considered forms of encapsulation but clearly
>> >> there are many NICs that have different capabilities across those. I
>> >> believe the intention here was really to describe L3 tunnels as
>> >> encapsulation, in which case MPLS really shouldn't be using this
>> >> mechanism at all.
>> >>
>> >> Now there is some overlap, especially today since most currently
>> >> shipping silicon wasn't designed to support tunnels and so is using
>> >> some form of offset based offloads. In that case, all forms of
>> >> encapsulation are pretty similar. However, in the future that won't be
>> >> the case as support for specific protocols is implemented for higher
>> >> performance and richer support. When that happens, not only will MPLS
>> >> and tunnels have different capabilities but various forms tunnels
>> >> might as well.
>> >
>> > Wouldn't be possible to describe those differences using,
>> > dev->hw_enc_features? I assumed that was its purpose.
>>
>> If there truly are differences between the offload capabilities of
>> MPLS and L3 tunnels then no, it's not possible, because it's a single
>> field. It's certainly not a valid assumption that a NIC that can do
>> TSO over GRE can also do it over MPLS.
>>
>> However, it's unlikely that there are truly significant differences
>> between various encapsulation formats on a per-feature basis. I think
>> what we need to do is separate out the ability to understand the
>> headers from the capabilities so you have two fields: header (none,
>> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
>> etc.) rather than the product of each. Otherwise, we end up with a ton
>> of different combinations.
>
> I'm not quite sure that I follow.
>
> Is your idea to replace skb->encapsulation (a single bit) with
> a field that corresponds to the outer-most (encapsulation) header in use
> and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?

No, I'm talking about netdev features. You can already tell the
encapsulation type of a packet by looking at the EtherType.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman May 1, 2013, 10:57 p.m. UTC | #9
On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
> >> >> >> second patch either.
> >> >> >
> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> >> >> > many different locations and I'm not sure that any of them, other than the
> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
> >> >>
> >> >> The problem is the meaning of skb->encapsulation isn't really defined
> >> >> clearly and I'm certain that the current implementation is not going
> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
> >> >> there are many NICs that have different capabilities across those. I
> >> >> believe the intention here was really to describe L3 tunnels as
> >> >> encapsulation, in which case MPLS really shouldn't be using this
> >> >> mechanism at all.
> >> >>
> >> >> Now there is some overlap, especially today since most currently
> >> >> shipping silicon wasn't designed to support tunnels and so is using
> >> >> some form of offset based offloads. In that case, all forms of
> >> >> encapsulation are pretty similar. However, in the future that won't be
> >> >> the case as support for specific protocols is implemented for higher
> >> >> performance and richer support. When that happens, not only will MPLS
> >> >> and tunnels have different capabilities but various forms tunnels
> >> >> might as well.
> >> >
> >> > Wouldn't be possible to describe those differences using,
> >> > dev->hw_enc_features? I assumed that was its purpose.
> >>
> >> If there truly are differences between the offload capabilities of
> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
> >> field. It's certainly not a valid assumption that a NIC that can do
> >> TSO over GRE can also do it over MPLS.
> >>
> >> However, it's unlikely that there are truly significant differences
> >> between various encapsulation formats on a per-feature basis. I think
> >> what we need to do is separate out the ability to understand the
> >> headers from the capabilities so you have two fields: header (none,
> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
> >> etc.) rather than the product of each. Otherwise, we end up with a ton
> >> of different combinations.
> >
> > I'm not quite sure that I follow.
> >
> > Is your idea to replace skb->encapsulation (a single bit) with
> > a field that corresponds to the outer-most (encapsulation) header in use
> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
> 
> No, I'm talking about netdev features. You can already tell the
> encapsulation type of a packet by looking at the EtherType.

Now I am completely confused about what are the two fields that you
refer to in your previous email.


In regards to looking ath the ethernet type:

One of the tricky parts of MPLS is that the packet itself does not contain
the ethernet type or any other way of knowing the type of the inner-packet.
Information that is needed for GSO.

My proposal to get around this is to leave skb->protocol as the
original, in the case we are interested in non-MPLS, ethernet type.

The MPLS ethertype is in in the packet itself, however checking
that seems expensive.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross May 2, 2013, 4:53 a.m. UTC | #10
On Wed, May 1, 2013 at 3:57 PM, Simon Horman <horms@verge.net.au> wrote:
> On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
>> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
>> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
>> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
>> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
>> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
>> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
>> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
>> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
>> >> >> >> second patch either.
>> >> >> >
>> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
>> >> >> > many different locations and I'm not sure that any of them, other than the
>> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
>> >> >>
>> >> >> The problem is the meaning of skb->encapsulation isn't really defined
>> >> >> clearly and I'm certain that the current implementation is not going
>> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
>> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
>> >> >> there are many NICs that have different capabilities across those. I
>> >> >> believe the intention here was really to describe L3 tunnels as
>> >> >> encapsulation, in which case MPLS really shouldn't be using this
>> >> >> mechanism at all.
>> >> >>
>> >> >> Now there is some overlap, especially today since most currently
>> >> >> shipping silicon wasn't designed to support tunnels and so is using
>> >> >> some form of offset based offloads. In that case, all forms of
>> >> >> encapsulation are pretty similar. However, in the future that won't be
>> >> >> the case as support for specific protocols is implemented for higher
>> >> >> performance and richer support. When that happens, not only will MPLS
>> >> >> and tunnels have different capabilities but various forms tunnels
>> >> >> might as well.
>> >> >
>> >> > Wouldn't be possible to describe those differences using,
>> >> > dev->hw_enc_features? I assumed that was its purpose.
>> >>
>> >> If there truly are differences between the offload capabilities of
>> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
>> >> field. It's certainly not a valid assumption that a NIC that can do
>> >> TSO over GRE can also do it over MPLS.
>> >>
>> >> However, it's unlikely that there are truly significant differences
>> >> between various encapsulation formats on a per-feature basis. I think
>> >> what we need to do is separate out the ability to understand the
>> >> headers from the capabilities so you have two fields: header (none,
>> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
>> >> etc.) rather than the product of each. Otherwise, we end up with a ton
>> >> of different combinations.
>> >
>> > I'm not quite sure that I follow.
>> >
>> > Is your idea to replace skb->encapsulation (a single bit) with
>> > a field that corresponds to the outer-most (encapsulation) header in use
>> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
>>
>> No, I'm talking about netdev features. You can already tell the
>> encapsulation type of a packet by looking at the EtherType.
>
> Now I am completely confused about what are the two fields that you
> refer to in your previous email.

I have always been referring to the netdev features for various
protocol types. This is because considering MPLS as a form of
encapsulation for the purpose of offloads buckets too many protocols
into the same set and NICs will have varying features for those.
Trying to avoid this by having a bit for offloadable encapsulations is
just going to be very confusing and not very future proof.

> In regards to looking ath the ethernet type:
>
> One of the tricky parts of MPLS is that the packet itself does not contain
> the ethernet type or any other way of knowing the type of the inner-packet.
> Information that is needed for GSO.

I'm aware of that. However, you were referring to the type of
encapsulation. It is easy to determine that a packet is MPLS.

> My proposal to get around this is to leave skb->protocol as the
> original, in the case we are interested in non-MPLS, ethernet type.

At the very least, this is not consistent with how it is currently
handled (for example, with VLANs) and seems difficult to do properly.
However, I have not seen any further analysis since the last time that
we discussed this.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman May 2, 2013, 5:31 a.m. UTC | #11
On Wed, May 01, 2013 at 09:53:42PM -0700, Jesse Gross wrote:
> On Wed, May 1, 2013 at 3:57 PM, Simon Horman <horms@verge.net.au> wrote:
> > On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
> >> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
> >> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
> >> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> >> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> >> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> >> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> >> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
> >> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
> >> >> >> >> second patch either.
> >> >> >> >
> >> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> >> >> >> > many different locations and I'm not sure that any of them, other than the
> >> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
> >> >> >>
> >> >> >> The problem is the meaning of skb->encapsulation isn't really defined
> >> >> >> clearly and I'm certain that the current implementation is not going
> >> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
> >> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
> >> >> >> there are many NICs that have different capabilities across those. I
> >> >> >> believe the intention here was really to describe L3 tunnels as
> >> >> >> encapsulation, in which case MPLS really shouldn't be using this
> >> >> >> mechanism at all.
> >> >> >>
> >> >> >> Now there is some overlap, especially today since most currently
> >> >> >> shipping silicon wasn't designed to support tunnels and so is using
> >> >> >> some form of offset based offloads. In that case, all forms of
> >> >> >> encapsulation are pretty similar. However, in the future that won't be
> >> >> >> the case as support for specific protocols is implemented for higher
> >> >> >> performance and richer support. When that happens, not only will MPLS
> >> >> >> and tunnels have different capabilities but various forms tunnels
> >> >> >> might as well.
> >> >> >
> >> >> > Wouldn't be possible to describe those differences using,
> >> >> > dev->hw_enc_features? I assumed that was its purpose.
> >> >>
> >> >> If there truly are differences between the offload capabilities of
> >> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
> >> >> field. It's certainly not a valid assumption that a NIC that can do
> >> >> TSO over GRE can also do it over MPLS.
> >> >>
> >> >> However, it's unlikely that there are truly significant differences
> >> >> between various encapsulation formats on a per-feature basis. I think
> >> >> what we need to do is separate out the ability to understand the
> >> >> headers from the capabilities so you have two fields: header (none,
> >> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
> >> >> etc.) rather than the product of each. Otherwise, we end up with a ton
> >> >> of different combinations.
> >> >
> >> > I'm not quite sure that I follow.
> >> >
> >> > Is your idea to replace skb->encapsulation (a single bit) with
> >> > a field that corresponds to the outer-most (encapsulation) header in use
> >> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
> >>
> >> No, I'm talking about netdev features. You can already tell the
> >> encapsulation type of a packet by looking at the EtherType.
> >
> > Now I am completely confused about what are the two fields that you
> > refer to in your previous email.
> 
> I have always been referring to the netdev features for various
> protocol types. This is because considering MPLS as a form of
> encapsulation for the purpose of offloads buckets too many protocols
> into the same set and NICs will have varying features for those.
> Trying to avoid this by having a bit for offloadable encapsulations is
> just going to be very confusing and not very future proof.
> 
> > In regards to looking ath the ethernet type:
> >
> > One of the tricky parts of MPLS is that the packet itself does not contain
> > the ethernet type or any other way of knowing the type of the inner-packet.
> > Information that is needed for GSO.
> 
> I'm aware of that. However, you were referring to the type of
> encapsulation. It is easy to determine that a packet is MPLS.
> 
> > My proposal to get around this is to leave skb->protocol as the
> > original, in the case we are interested in non-MPLS, ethernet type.
> 
> At the very least, this is not consistent with how it is currently
> handled (for example, with VLANs) and seems difficult to do properly.
> However, I have not seen any further analysis since the last time that
> we discussed this.

Unfortunately my efforts to solicit feedback from others regarding
that have not been successful.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman May 2, 2013, 2:39 p.m. UTC | #12
On Thu, May 02, 2013 at 02:31:44PM +0900, Simon Horman wrote:
> On Wed, May 01, 2013 at 09:53:42PM -0700, Jesse Gross wrote:
> > On Wed, May 1, 2013 at 3:57 PM, Simon Horman <horms@verge.net.au> wrote:
> > > On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
> > >> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
> > >> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
> > >> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> > >> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> > >> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> > >> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> > >> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
> > >> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
> > >> >> >> >> second patch either.
> > >> >> >> >
> > >> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> > >> >> >> > many different locations and I'm not sure that any of them, other than the
> > >> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
> > >> >> >>
> > >> >> >> The problem is the meaning of skb->encapsulation isn't really defined
> > >> >> >> clearly and I'm certain that the current implementation is not going
> > >> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
> > >> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
> > >> >> >> there are many NICs that have different capabilities across those. I
> > >> >> >> believe the intention here was really to describe L3 tunnels as
> > >> >> >> encapsulation, in which case MPLS really shouldn't be using this
> > >> >> >> mechanism at all.
> > >> >> >>
> > >> >> >> Now there is some overlap, especially today since most currently
> > >> >> >> shipping silicon wasn't designed to support tunnels and so is using
> > >> >> >> some form of offset based offloads. In that case, all forms of
> > >> >> >> encapsulation are pretty similar. However, in the future that won't be
> > >> >> >> the case as support for specific protocols is implemented for higher
> > >> >> >> performance and richer support. When that happens, not only will MPLS
> > >> >> >> and tunnels have different capabilities but various forms tunnels
> > >> >> >> might as well.
> > >> >> >
> > >> >> > Wouldn't be possible to describe those differences using,
> > >> >> > dev->hw_enc_features? I assumed that was its purpose.
> > >> >>
> > >> >> If there truly are differences between the offload capabilities of
> > >> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
> > >> >> field. It's certainly not a valid assumption that a NIC that can do
> > >> >> TSO over GRE can also do it over MPLS.
> > >> >>
> > >> >> However, it's unlikely that there are truly significant differences
> > >> >> between various encapsulation formats on a per-feature basis. I think
> > >> >> what we need to do is separate out the ability to understand the
> > >> >> headers from the capabilities so you have two fields: header (none,
> > >> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
> > >> >> etc.) rather than the product of each. Otherwise, we end up with a ton
> > >> >> of different combinations.
> > >> >
> > >> > I'm not quite sure that I follow.
> > >> >
> > >> > Is your idea to replace skb->encapsulation (a single bit) with
> > >> > a field that corresponds to the outer-most (encapsulation) header in use
> > >> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
> > >>
> > >> No, I'm talking about netdev features. You can already tell the
> > >> encapsulation type of a packet by looking at the EtherType.
> > >
> > > Now I am completely confused about what are the two fields that you
> > > refer to in your previous email.
> > 
> > I have always been referring to the netdev features for various
> > protocol types. This is because considering MPLS as a form of
> > encapsulation for the purpose of offloads buckets too many protocols
> > into the same set and NICs will have varying features for those.
> > Trying to avoid this by having a bit for offloadable encapsulations is
> > just going to be very confusing and not very future proof.

I understand your point regarding a magic bit for encapsulation not
being particularly future-proof. I agree that it is reasonable to expect
that a NIC may support an offload for one of the growing list of
supported encapsulation protocols and not another.

However, as tedious as this may be, I am rather confused about what your
proposal above is.

> > > In regards to looking ath the ethernet type:
> > >
> > > One of the tricky parts of MPLS is that the packet itself does not contain
> > > the ethernet type or any other way of knowing the type of the inner-packet.
> > > Information that is needed for GSO.
> > 
> > I'm aware of that. However, you were referring to the type of
> > encapsulation. It is easy to determine that a packet is MPLS.
> > 
> > > My proposal to get around this is to leave skb->protocol as the
> > > original, in the case we are interested in non-MPLS, ethernet type.
> > 
> > At the very least, this is not consistent with how it is currently
> > handled (for example, with VLANs) and seems difficult to do properly.
> > However, I have not seen any further analysis since the last time that
> > we discussed this.
> 
> Unfortunately my efforts to solicit feedback from others regarding
> that have not been successful.

An idea I have for the treatment of skb->protocol and friends is to add
skb->inner_protocol.  It could be set to the inner protocol, if known, for
protocols such as MPLS which don't include that information in the packet.
It would allow skb->protocol to be set to the MPLS ethernet type
corresponding to the ethernet type of the packet.

From here I see two options:

1.  Offloads could be registered for MPLS unicast and multicast.  And the
    registered MPLS GSO segmentation call-back could set and restore
    skb->protocol before and after calling skb_mac_gso_segment().

    The MPLS GSO segmentation callback could also calculate features
    to pass to skb_mac_gso_segment() by some means.

2. Teach skb_network_protocol() about skb->inner_protocol.
   And most likely teach netif_skb_features() about
   skb->protocol == ETH_P_MPLS*.

   I'm not entirely sure how to avoid overhead for non-MPLS packets using
   this approach.

I believe the above could be achieved without using skb->encapsulation
in newly added code.

Your features proposal above not withstanding, in the current scheme of
things, it would seem that it would be appropriate to add SKB_GSO_GRE -
currently not supported by any hardware - and set skb_shinfo(skb)->gso_type
= SKB_GSO_GRE in the datapath. I think this should be sufficient to trigger
a call to skb_mac_gso_segment() in dev_hard_start_xmit().
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross May 2, 2013, 4:53 p.m. UTC | #13
On Wed, May 1, 2013 at 10:31 PM, Simon Horman <horms@verge.net.au> wrote:
> On Wed, May 01, 2013 at 09:53:42PM -0700, Jesse Gross wrote:
>> On Wed, May 1, 2013 at 3:57 PM, Simon Horman <horms@verge.net.au> wrote:
>> > On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
>> >> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
>> >> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
>> >> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
>> >> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
>> >> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
>> >> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
>> >> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
>> >> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
>> >> >> >> >> second patch either.
>> >> >> >> >
>> >> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
>> >> >> >> > many different locations and I'm not sure that any of them, other than the
>> >> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
>> >> >> >>
>> >> >> >> The problem is the meaning of skb->encapsulation isn't really defined
>> >> >> >> clearly and I'm certain that the current implementation is not going
>> >> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
>> >> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
>> >> >> >> there are many NICs that have different capabilities across those. I
>> >> >> >> believe the intention here was really to describe L3 tunnels as
>> >> >> >> encapsulation, in which case MPLS really shouldn't be using this
>> >> >> >> mechanism at all.
>> >> >> >>
>> >> >> >> Now there is some overlap, especially today since most currently
>> >> >> >> shipping silicon wasn't designed to support tunnels and so is using
>> >> >> >> some form of offset based offloads. In that case, all forms of
>> >> >> >> encapsulation are pretty similar. However, in the future that won't be
>> >> >> >> the case as support for specific protocols is implemented for higher
>> >> >> >> performance and richer support. When that happens, not only will MPLS
>> >> >> >> and tunnels have different capabilities but various forms tunnels
>> >> >> >> might as well.
>> >> >> >
>> >> >> > Wouldn't be possible to describe those differences using,
>> >> >> > dev->hw_enc_features? I assumed that was its purpose.
>> >> >>
>> >> >> If there truly are differences between the offload capabilities of
>> >> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
>> >> >> field. It's certainly not a valid assumption that a NIC that can do
>> >> >> TSO over GRE can also do it over MPLS.
>> >> >>
>> >> >> However, it's unlikely that there are truly significant differences
>> >> >> between various encapsulation formats on a per-feature basis. I think
>> >> >> what we need to do is separate out the ability to understand the
>> >> >> headers from the capabilities so you have two fields: header (none,
>> >> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
>> >> >> etc.) rather than the product of each. Otherwise, we end up with a ton
>> >> >> of different combinations.
>> >> >
>> >> > I'm not quite sure that I follow.
>> >> >
>> >> > Is your idea to replace skb->encapsulation (a single bit) with
>> >> > a field that corresponds to the outer-most (encapsulation) header in use
>> >> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
>> >>
>> >> No, I'm talking about netdev features. You can already tell the
>> >> encapsulation type of a packet by looking at the EtherType.
>> >
>> > Now I am completely confused about what are the two fields that you
>> > refer to in your previous email.
>>
>> I have always been referring to the netdev features for various
>> protocol types. This is because considering MPLS as a form of
>> encapsulation for the purpose of offloads buckets too many protocols
>> into the same set and NICs will have varying features for those.
>> Trying to avoid this by having a bit for offloadable encapsulations is
>> just going to be very confusing and not very future proof.
>>
>> > In regards to looking ath the ethernet type:
>> >
>> > One of the tricky parts of MPLS is that the packet itself does not contain
>> > the ethernet type or any other way of knowing the type of the inner-packet.
>> > Information that is needed for GSO.
>>
>> I'm aware of that. However, you were referring to the type of
>> encapsulation. It is easy to determine that a packet is MPLS.
>>
>> > My proposal to get around this is to leave skb->protocol as the
>> > original, in the case we are interested in non-MPLS, ethernet type.
>>
>> At the very least, this is not consistent with how it is currently
>> handled (for example, with VLANs) and seems difficult to do properly.
>> However, I have not seen any further analysis since the last time that
>> we discussed this.
>
> Unfortunately my efforts to solicit feedback from others regarding
> that have not been successful.

Give me a break, Simon. These are your patches and therefore it is
your responsibility to do the analysis. This has come up over and over
again and I'm not happy about it.

Rather than trying to respond to me as quickly as possible, you need
to slow down, take a step back, and think about the correct direction.
Given that some of these patches have revision numbers in the 20s you
have nothing to complain about as far as receiving help.

In order to assist you in this, I'm going to drop all of your pending
patches from my queue and not respond to anything further from you
until the merge window is open again.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Simon Horman May 2, 2013, 11:55 p.m. UTC | #14
On Thu, May 02, 2013 at 09:53:25AM -0700, Jesse Gross wrote:
> On Wed, May 1, 2013 at 10:31 PM, Simon Horman <horms@verge.net.au> wrote:
> > On Wed, May 01, 2013 at 09:53:42PM -0700, Jesse Gross wrote:
> >> On Wed, May 1, 2013 at 3:57 PM, Simon Horman <horms@verge.net.au> wrote:
> >> > On Wed, May 01, 2013 at 11:16:40AM -0700, Jesse Gross wrote:
> >> >> On Wed, May 1, 2013 at 12:50 AM, Simon Horman <horms@verge.net.au> wrote:
> >> >> > On Tue, Apr 30, 2013 at 09:19:51AM -0700, Jesse Gross wrote:
> >> >> >> On Mon, Apr 29, 2013 at 8:21 PM, Simon Horman <horms@verge.net.au> wrote:
> >> >> >> > On Fri, Apr 26, 2013 at 04:03:21PM -0700, Jesse Gross wrote:
> >> >> >> >> On Thu, Apr 25, 2013 at 12:36 AM, Simon Horman <horms@verge.net.au> wrote:
> >> >> >> >> > On Tue, Apr 23, 2013 at 02:00:19PM -0700, Joseph Gasparakis wrote:
> >> >> >> >> >> Any particular reason to introduce skb->encapsulation_features instead of
> >> >> >> >> >> using the existing skb->encapsulation? Also I don't see it used in your
> >> >> >> >> >> second patch either.
> >> >> >> >> >
> >> >> >> >> > My reasoning is that skb->encapsulation seems to alter the behaviour of
> >> >> >> >> > many different locations and I'm not sure that any of them, other than the
> >> >> >> >> > one in dev_hard_start_xmit() make sense for MPLS.
> >> >> >> >>
> >> >> >> >> The problem is the meaning of skb->encapsulation isn't really defined
> >> >> >> >> clearly and I'm certain that the current implementation is not going
> >> >> >> >> to work in the future. Depending on your perspective, vlans, MPLS,
> >> >> >> >> tunnels, etc. can all be considered forms of encapsulation but clearly
> >> >> >> >> there are many NICs that have different capabilities across those. I
> >> >> >> >> believe the intention here was really to describe L3 tunnels as
> >> >> >> >> encapsulation, in which case MPLS really shouldn't be using this
> >> >> >> >> mechanism at all.
> >> >> >> >>
> >> >> >> >> Now there is some overlap, especially today since most currently
> >> >> >> >> shipping silicon wasn't designed to support tunnels and so is using
> >> >> >> >> some form of offset based offloads. In that case, all forms of
> >> >> >> >> encapsulation are pretty similar. However, in the future that won't be
> >> >> >> >> the case as support for specific protocols is implemented for higher
> >> >> >> >> performance and richer support. When that happens, not only will MPLS
> >> >> >> >> and tunnels have different capabilities but various forms tunnels
> >> >> >> >> might as well.
> >> >> >> >
> >> >> >> > Wouldn't be possible to describe those differences using,
> >> >> >> > dev->hw_enc_features? I assumed that was its purpose.
> >> >> >>
> >> >> >> If there truly are differences between the offload capabilities of
> >> >> >> MPLS and L3 tunnels then no, it's not possible, because it's a single
> >> >> >> field. It's certainly not a valid assumption that a NIC that can do
> >> >> >> TSO over GRE can also do it over MPLS.
> >> >> >>
> >> >> >> However, it's unlikely that there are truly significant differences
> >> >> >> between various encapsulation formats on a per-feature basis. I think
> >> >> >> what we need to do is separate out the ability to understand the
> >> >> >> headers from the capabilities so you have two fields: header (none,
> >> >> >> VLAN, QinQ, MPLS, VXLAN, GRE, etc.) and feature (checksum, SG, TSO,
> >> >> >> etc.) rather than the product of each. Otherwise, we end up with a ton
> >> >> >> of different combinations.
> >> >> >
> >> >> > I'm not quite sure that I follow.
> >> >> >
> >> >> > Is your idea to replace skb->encapsulation (a single bit) with
> >> >> > a field that corresponds to the outer-most (encapsulation) header in use
> >> >> > and has bits for none, VLAN, QinQ, MPLS, VXLAN, GRE, etc...?
> >> >>
> >> >> No, I'm talking about netdev features. You can already tell the
> >> >> encapsulation type of a packet by looking at the EtherType.
> >> >
> >> > Now I am completely confused about what are the two fields that you
> >> > refer to in your previous email.
> >>
> >> I have always been referring to the netdev features for various
> >> protocol types. This is because considering MPLS as a form of
> >> encapsulation for the purpose of offloads buckets too many protocols
> >> into the same set and NICs will have varying features for those.
> >> Trying to avoid this by having a bit for offloadable encapsulations is
> >> just going to be very confusing and not very future proof.
> >>
> >> > In regards to looking ath the ethernet type:
> >> >
> >> > One of the tricky parts of MPLS is that the packet itself does not contain
> >> > the ethernet type or any other way of knowing the type of the inner-packet.
> >> > Information that is needed for GSO.
> >>
> >> I'm aware of that. However, you were referring to the type of
> >> encapsulation. It is easy to determine that a packet is MPLS.
> >>
> >> > My proposal to get around this is to leave skb->protocol as the
> >> > original, in the case we are interested in non-MPLS, ethernet type.
> >>
> >> At the very least, this is not consistent with how it is currently
> >> handled (for example, with VLANs) and seems difficult to do properly.
> >> However, I have not seen any further analysis since the last time that
> >> we discussed this.
> >
> > Unfortunately my efforts to solicit feedback from others regarding
> > that have not been successful.
> 
> Give me a break, Simon. These are your patches and therefore it is
> your responsibility to do the analysis. This has come up over and over
> again and I'm not happy about it.
> 
> Rather than trying to respond to me as quickly as possible, you need
> to slow down, take a step back, and think about the correct direction.
> Given that some of these patches have revision numbers in the 20s you
> have nothing to complain about as far as receiving help.
> 
> In order to assist you in this, I'm going to drop all of your pending
> patches from my queue and not respond to anything further from you
> until the merge window is open again.

I apologise, the comment I made above was unnecessary.

I accept your criticism in regards to responding to quickly.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e0ced1..d9ec1de 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -494,7 +494,14 @@  struct sk_buff {
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
-	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
+	/* Encapsulation protocol and NIC drivers should use
+	 * this flag to indicate to each other if the skb contains
+	 * encapsulated packet and GSO should use encapsulation features
+	 * instead of standard features for the netdev. This is typically
+	 * a subset of cases where skb->encapsulation is set.
+	 */
+	__u8			encapsulation_features:1;
+	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #ifdef CONFIG_NET_DMA
diff --git a/net/core/dev.c b/net/core/dev.c
index 9e26b8d..53236c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2498,7 +2498,7 @@  int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		 * hardware encapsulation features instead of standard
 		 * features for the netdev
 		 */
-		if (skb->encapsulation)
+		if (skb->encapsulation_features)
 			features &= dev->hw_enc_features;
 
 		if (netif_needs_gso(skb, features)) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 898cf5c..f23d136 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -708,6 +708,7 @@  static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->l4_rxhash		= old->l4_rxhash;
 	new->no_fcs		= old->no_fcs;
 	new->encapsulation	= old->encapsulation;
+	new->encapsulation_features = old->encapsulation_features;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
index 0ae998b..8420f29 100644
--- a/net/ipv4/gre.c
+++ b/net/ipv4/gre.c
@@ -157,6 +157,7 @@  static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 	}
 
 	skb->encapsulation = 0;
+	skb->encapsulation_features = 0;
 
 	if (unlikely(!pskb_may_pull(skb, ghl)))
 		goto out;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 77bfcce..a6db3c0 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -220,6 +220,7 @@  static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (likely(!skb->encapsulation)) {
 		skb_reset_inner_headers(skb);
 		skb->encapsulation = 1;
+		skb->encapsulation_features = 1;
 	}
 
 	ip_tunnel_xmit(skb, dev, tiph);