diff mbox

[Trusty,SRU] net-gro: restore frag0 optimization

Message ID 23550.1405716136@localhost.localdomain
State New
Headers show

Commit Message

Jay Vosburgh July 18, 2014, 8:42 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

BugLink: http://bugs.launchpad.net/bugs/1344323

Main difference between napi_frags_skb() and napi_gro_receive() is that
the later is called while ethernet header was already pulled by the NIC
driver (eth_type_trans() was called before napi_gro_receive())

Jerry Chu in commit 299603e8370a ("net-gro: Prepare GRO stack for the
upcoming tunneling support") tried to remove this difference by calling
eth_type_trans() from napi_frags_skb() instead of doing this later from
napi_frags_finish()

Goal was that napi_gro_complete() could call
ptype->callbacks.gro_complete(skb, 0)  (offset of first network header =
0)

Also, xxx_gro_receive() handlers all use off = skb_gro_offset(skb) to
point to their own header, for the current skb and ones held in gro_list

Problem is this cleanup work defeated the frag0 optimization:
It turns out the consecutive pskb_may_pull() calls are too expensive.

This patch brings back the frag0 stuff in napi_frags_skb().

As all skb have their mac header in skb head, we no longer need
skb_gro_mac_header()

Reported-by: Michal Schmidt <mschmidt@redhat.com>
Fixes: 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jerry Chu <hkchu@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(backported from commit a50e233c50dbc881abaa0e4070789064e8d12d70 upstream)
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>

---
 include/linux/netdevice.h |  5 ---
 net/core/dev.c            | 92 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 60 insertions(+), 37 deletions(-)

Comments

Tim Gardner July 21, 2014, 3:09 p.m. UTC | #1

Brad Figg July 21, 2014, 4:12 p.m. UTC | #2
On 07/18/2014 01:42 PM, Jay Vosburgh wrote:
> 
> From: Eric Dumazet <edumazet@google.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1344323
> 
> Main difference between napi_frags_skb() and napi_gro_receive() is that
> the later is called while ethernet header was already pulled by the NIC
> driver (eth_type_trans() was called before napi_gro_receive())
> 
> Jerry Chu in commit 299603e8370a ("net-gro: Prepare GRO stack for the
> upcoming tunneling support") tried to remove this difference by calling
> eth_type_trans() from napi_frags_skb() instead of doing this later from
> napi_frags_finish()
> 
> Goal was that napi_gro_complete() could call
> ptype->callbacks.gro_complete(skb, 0)  (offset of first network header =
> 0)
> 
> Also, xxx_gro_receive() handlers all use off = skb_gro_offset(skb) to
> point to their own header, for the current skb and ones held in gro_list
> 
> Problem is this cleanup work defeated the frag0 optimization:
> It turns out the consecutive pskb_may_pull() calls are too expensive.
> 
> This patch brings back the frag0 stuff in napi_frags_skb().
> 
> As all skb have their mac header in skb head, we no longer need
> skb_gro_mac_header()
> 
> Reported-by: Michal Schmidt <mschmidt@redhat.com>
> Fixes: 299603e8370a ("net-gro: Prepare GRO stack for the upcoming tunneling support")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Jerry Chu <hkchu@google.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (backported from commit a50e233c50dbc881abaa0e4070789064e8d12d70 upstream)
> Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
> 
> ---
>  include/linux/netdevice.h |  5 ---
>  net/core/dev.c            | 92 ++++++++++++++++++++++++++++++-----------------
>  2 files changed, 60 insertions(+), 37 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index ee0677f..c46b3b2 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1911,11 +1911,6 @@ static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
>  	return skb->data + offset;
>  }
>  
> -static inline void *skb_gro_mac_header(struct sk_buff *skb)
> -{
> -	return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb);
> -}
> -
>  static inline void *skb_gro_network_header(struct sk_buff *skb)
>  {
>  	return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 3eca989..c5dac8c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3817,10 +3817,10 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
>  		diffs |= p->vlan_tci ^ skb->vlan_tci;
>  		if (maclen == ETH_HLEN)
>  			diffs |= compare_ether_header(skb_mac_header(p),
> -						      skb_gro_mac_header(skb));
> +						      skb_mac_header(skb));
>  		else if (!diffs)
>  			diffs = memcmp(skb_mac_header(p),
> -				       skb_gro_mac_header(skb),
> +				       skb_mac_header(skb),
>  				       maclen);
>  		NAPI_GRO_CB(p)->same_flow = !diffs;
>  		NAPI_GRO_CB(p)->flush = 0;
> @@ -3844,6 +3844,27 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
>  	}
>  }
>  
> +static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
> +{
> +	struct skb_shared_info *pinfo = skb_shinfo(skb);
> +
> +	BUG_ON(skb->end - skb->tail < grow);
> +
> +	memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
> +
> +	skb->data_len -= grow;
> +	skb->tail += grow;
> +
> +	pinfo->frags[0].page_offset += grow;
> +	skb_frag_size_sub(&pinfo->frags[0], grow);
> +
> +	if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
> +	       skb_frag_unref(skb, 0);
> +	       memmove(pinfo->frags, pinfo->frags + 1,
> +		       --pinfo->nr_frags * sizeof(pinfo->frags[0]));
> +	}
> +}
> +
>  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>  {
>  	struct sk_buff **pp = NULL;
> @@ -3852,6 +3873,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	struct list_head *head = &offload_base;
>  	int same_flow;
>  	enum gro_result ret;
> +	int grow;
>  
>  	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
>  		goto normal;
> @@ -3859,7 +3881,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	if (skb_is_gso(skb) || skb_has_frag_list(skb))
>  		goto normal;
>  
> -	skb_gro_reset_offset(skb);
>  	gro_list_prepare(napi, skb);
>  	NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
>  
> @@ -3924,27 +3945,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>  	ret = GRO_HELD;
>  
>  pull:
> -	if (skb_headlen(skb) < skb_gro_offset(skb)) {
> -		int grow = skb_gro_offset(skb) - skb_headlen(skb);
> -
> -		BUG_ON(skb->end - skb->tail < grow);
> -
> -		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
> -
> -		skb->tail += grow;
> -		skb->data_len -= grow;
> -
> -		skb_shinfo(skb)->frags[0].page_offset += grow;
> -		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
> -
> -		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
> -			skb_frag_unref(skb, 0);
> -			memmove(skb_shinfo(skb)->frags,
> -				skb_shinfo(skb)->frags + 1,
> -				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
> -		}
> -	}
> -
> +	grow = skb_gro_offset(skb) - skb_headlen(skb);
> +	if (grow > 0)
> +		gro_pull_from_frag0(skb, grow);
>  ok:
>  	return ret;
>  
> @@ -4010,6 +4013,8 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
>  
>  gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>  {
> +	skb_gro_reset_offset(skb);
> +
>  	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
>  }
>  EXPORT_SYMBOL(napi_gro_receive);
> @@ -4040,12 +4045,16 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
>  }
>  EXPORT_SYMBOL(napi_get_frags);
>  
> -static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
> -			       gro_result_t ret)
> +static gro_result_t napi_frags_finish(struct napi_struct *napi,
> +				      struct sk_buff *skb,
> +				      gro_result_t ret)
>  {
>  	switch (ret) {
>  	case GRO_NORMAL:
> -		if (netif_receive_skb(skb))
> +	case GRO_HELD:
> +		__skb_push(skb, ETH_HLEN);
> +		skb->protocol = eth_type_trans(skb, skb->dev);
> +		if (ret == GRO_NORMAL && netif_receive_skb(skb))
>  			ret = GRO_DROP;
>  		break;
>  
> @@ -4054,7 +4063,6 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
>  		napi_reuse_skb(napi, skb);
>  		break;
>  
> -	case GRO_HELD:
>  	case GRO_MERGED:
>  		break;
>  	}
> @@ -4065,14 +4073,34 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
>  static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
>  {
>  	struct sk_buff *skb = napi->skb;
> +	const struct ethhdr *eth;
> +	unsigned int hlen = sizeof(*eth);
>  
>  	napi->skb = NULL;
>  
> -	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
> -		napi_reuse_skb(napi, skb);
> -		return NULL;
> +	skb_reset_mac_header(skb);
> +	skb_gro_reset_offset(skb);
> +
> +	eth = skb_gro_header_fast(skb, 0);
> +	if (unlikely(skb_gro_header_hard(skb, hlen))) {
> +		eth = skb_gro_header_slow(skb, hlen, 0);
> +		if (unlikely(!eth)) {
> +			napi_reuse_skb(napi, skb);
> +			return NULL;
> +		}
> +	} else {
> +		gro_pull_from_frag0(skb, hlen);
> +		NAPI_GRO_CB(skb)->frag0 += hlen;
> +		NAPI_GRO_CB(skb)->frag0_len -= hlen;
>  	}
> -	skb->protocol = eth_type_trans(skb, skb->dev);
> +	__skb_pull(skb, hlen);
> +
> +	/*
> +	 * This works because the only protocols we care about don't require
> +	 * special handling.
> +	 * We'll fix it up properly in napi_frags_finish()
> +	 */
> +	skb->protocol = eth->h_proto;
>  
>  	return skb;
>  }
>
Tim Gardner July 21, 2014, 4:16 p.m. UTC | #3

diff mbox

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee0677f..c46b3b2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1911,11 +1911,6 @@  static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen,
 	return skb->data + offset;
 }
 
-static inline void *skb_gro_mac_header(struct sk_buff *skb)
-{
-	return NAPI_GRO_CB(skb)->frag0 ?: skb_mac_header(skb);
-}
-
 static inline void *skb_gro_network_header(struct sk_buff *skb)
 {
 	return (NAPI_GRO_CB(skb)->frag0 ?: skb->data) +
diff --git a/net/core/dev.c b/net/core/dev.c
index 3eca989..c5dac8c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3817,10 +3817,10 @@  static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
 		diffs |= p->vlan_tci ^ skb->vlan_tci;
 		if (maclen == ETH_HLEN)
 			diffs |= compare_ether_header(skb_mac_header(p),
-						      skb_gro_mac_header(skb));
+						      skb_mac_header(skb));
 		else if (!diffs)
 			diffs = memcmp(skb_mac_header(p),
-				       skb_gro_mac_header(skb),
+				       skb_mac_header(skb),
 				       maclen);
 		NAPI_GRO_CB(p)->same_flow = !diffs;
 		NAPI_GRO_CB(p)->flush = 0;
@@ -3844,6 +3844,27 @@  static void skb_gro_reset_offset(struct sk_buff *skb)
 	}
 }
 
+static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
+{
+	struct skb_shared_info *pinfo = skb_shinfo(skb);
+
+	BUG_ON(skb->end - skb->tail < grow);
+
+	memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
+
+	skb->data_len -= grow;
+	skb->tail += grow;
+
+	pinfo->frags[0].page_offset += grow;
+	skb_frag_size_sub(&pinfo->frags[0], grow);
+
+	if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
+	       skb_frag_unref(skb, 0);
+	       memmove(pinfo->frags, pinfo->frags + 1,
+		       --pinfo->nr_frags * sizeof(pinfo->frags[0]));
+	}
+}
+
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	struct sk_buff **pp = NULL;
@@ -3852,6 +3873,7 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	struct list_head *head = &offload_base;
 	int same_flow;
 	enum gro_result ret;
+	int grow;
 
 	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
 		goto normal;
@@ -3859,7 +3881,6 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (skb_is_gso(skb) || skb_has_frag_list(skb))
 		goto normal;
 
-	skb_gro_reset_offset(skb);
 	gro_list_prepare(napi, skb);
 	NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
 
@@ -3924,27 +3945,9 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = GRO_HELD;
 
 pull:
-	if (skb_headlen(skb) < skb_gro_offset(skb)) {
-		int grow = skb_gro_offset(skb) - skb_headlen(skb);
-
-		BUG_ON(skb->end - skb->tail < grow);
-
-		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
-
-		skb->tail += grow;
-		skb->data_len -= grow;
-
-		skb_shinfo(skb)->frags[0].page_offset += grow;
-		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
-
-		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
-			skb_frag_unref(skb, 0);
-			memmove(skb_shinfo(skb)->frags,
-				skb_shinfo(skb)->frags + 1,
-				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
-		}
-	}
-
+	grow = skb_gro_offset(skb) - skb_headlen(skb);
+	if (grow > 0)
+		gro_pull_from_frag0(skb, grow);
 ok:
 	return ret;
 
@@ -4010,6 +4013,8 @@  static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	skb_gro_reset_offset(skb);
+
 	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
 }
 EXPORT_SYMBOL(napi_gro_receive);
@@ -4040,12 +4045,16 @@  struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
-static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
-			       gro_result_t ret)
+static gro_result_t napi_frags_finish(struct napi_struct *napi,
+				      struct sk_buff *skb,
+				      gro_result_t ret)
 {
 	switch (ret) {
 	case GRO_NORMAL:
-		if (netif_receive_skb(skb))
+	case GRO_HELD:
+		__skb_push(skb, ETH_HLEN);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		if (ret == GRO_NORMAL && netif_receive_skb(skb))
 			ret = GRO_DROP;
 		break;
 
@@ -4054,7 +4063,6 @@  static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
 		napi_reuse_skb(napi, skb);
 		break;
 
-	case GRO_HELD:
 	case GRO_MERGED:
 		break;
 	}
@@ -4065,14 +4073,34 @@  static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *
 static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 {
 	struct sk_buff *skb = napi->skb;
+	const struct ethhdr *eth;
+	unsigned int hlen = sizeof(*eth);
 
 	napi->skb = NULL;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr)))) {
-		napi_reuse_skb(napi, skb);
-		return NULL;
+	skb_reset_mac_header(skb);
+	skb_gro_reset_offset(skb);
+
+	eth = skb_gro_header_fast(skb, 0);
+	if (unlikely(skb_gro_header_hard(skb, hlen))) {
+		eth = skb_gro_header_slow(skb, hlen, 0);
+		if (unlikely(!eth)) {
+			napi_reuse_skb(napi, skb);
+			return NULL;
+		}
+	} else {
+		gro_pull_from_frag0(skb, hlen);
+		NAPI_GRO_CB(skb)->frag0 += hlen;
+		NAPI_GRO_CB(skb)->frag0_len -= hlen;
 	}
-	skb->protocol = eth_type_trans(skb, skb->dev);
+	__skb_pull(skb, hlen);
+
+	/*
+	 * This works because the only protocols we care about don't require
+	 * special handling.
+	 * We'll fix it up properly in napi_frags_finish()
+	 */
+	skb->protocol = eth->h_proto;
 
 	return skb;
 }