Patchwork [net-next] bnx2x: fix GRO parameters

login
register
mail settings
Submitter Eric Dumazet
Date Jan. 16, 2013, 5:37 a.m.
Message ID <1358314662.19956.51.camel@edumazet-glaptop>
Download mbox | patch
Permalink /patch/212392/
State Superseded
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - Jan. 16, 2013, 5:37 a.m.
From: Eric Dumazet <edumazet@google.com>

bnx2x does an internal GRO pass but doesn't provide gso_segs, thus
breaking qdisc_pkt_len_init() in case ingress qdisc is used.

We store gso_segs in NAPI_GRO_CB(skb)->count, where tcp_gro_complete()
expects to find the number of aggregated segments.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuval Mintz <yuvalmin@broadcom.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   44 +++++++-------
 1 file changed, 24 insertions(+), 20 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yuval Mintz - Jan. 16, 2013, 7:01 a.m.
> -static u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
> -			     u16 len_on_bd)
> +static void bnx2x_set_gro_params(struct sk_buff *skb, struct bnx2x *bp,
> +				 u16 parsing_flags, u16 len_on_bd,
> +				 unsigned int pkt_len)

This is purely semantic, but our convention is for `struct bnx2x' to be
the first argument in our functions.

>  {
>  	/*
> -	 * TPA arrgregation won't have either IP options or TCP options
> +	 * TPA aggregation won't have either IP options or TCP options
>  	 * other than timestamp or IPv6 extension headers.
>  	 */
>  	u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);

TPA_MODE_LRO indicates that an LRO aggregation was made by our FW. It seems
like your patch  eliminates the difference in configuration between the two
(GRO and LRO)

perhaps instead we should do something like:

+ static void bnx2x_set_lro_params(struct bnx2x *bp, struct sk_buff *skb,
+ 				   u16 parsing_flags, u16 len_on_bd,
+ 				   unsigned int pkt_len,
+				   bnx2x_tpa_mode_t mode)

And arrange its suggested code so that only gso_size will be set for LRO.

>  
>  	if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
> -	    PRS_FLAG_OVERETH_IPV6)
> +	    PRS_FLAG_OVERETH_IPV6) {
>  		hdrs_len += sizeof(struct ipv6hdr);
> -	else /* IPv4 */
> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> +	} else {
>  		hdrs_len += sizeof(struct iphdr);
> -
> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +	}
>  



>  #ifdef BNX2X_STOP_ON_ERROR
> @@ -651,7 +655,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
>  			       struct sk_buff *skb)
>  {
>  #ifdef CONFIG_INET
> -	if (fp->mode == TPA_MODE_GRO && skb_shinfo(skb)->gso_size) {
> +	if (skb_shinfo(skb)->gso_size) {

This also seems like an incorrect removal, as TPA_MODE_LRO is (again)
a feasible option, and we wouldn't want the a `gro_complete' here.

>  		skb_set_network_header(skb, 0);
>  		switch (be16_to_cpu(skb->protocol)) {
>  		case ETH_P_IP:
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yuval Mintz - Jan. 16, 2013, 2:38 p.m.
>>> -static u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
>>> -			     u16 len_on_bd)
>>> +static void bnx2x_set_gro_params(struct sk_buff *skb, struct bnx2x *bp,
>>> +				 u16 parsing_flags, u16 len_on_bd,
>>> +				 unsigned int pkt_len)
>>
>> This is purely semantic, but our convention is for `struct bnx2x' to be
>> the first argument in our functions.
>>
>>>  {
>>>  	/*
>>> -	 * TPA arrgregation won't have either IP options or TCP options
>>> +	 * TPA aggregation won't have either IP options or TCP options
>>>  	 * other than timestamp or IPv6 extension headers.
>>>  	 */
>>>  	u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
>>
>> TPA_MODE_LRO indicates that an LRO aggregation was made by our FW. It seems
>> like your patch  eliminates the difference in configuration between the two
>> (GRO and LRO)
>>
> 
> Thats the case, since you call GRO functions even if 'LRO ' is ON
> 
> I specifically had to remove the tests you guys do.
> 
>> perhaps instead we should do something like:
>>
>> + static void bnx2x_set_lro_params(struct bnx2x *bp, struct sk_buff *skb,
>> + 				   u16 parsing_flags, u16 len_on_bd,
>> + 				   unsigned int pkt_len,
>> +				   bnx2x_tpa_mode_t mode)
>>
>> And arrange its suggested code so that only gso_size will be set for LRO.
>>
> 
> I fail to understand why adding a conditional would change something.
> 
> Setting it is needed for GRO as well.
> 

I was a bit unclear - I meant the for LRO, only gso_size will be set
(while for GRO it will be set as well as the additional GRO fields)

>>>  
>>>  	if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
>>> -	    PRS_FLAG_OVERETH_IPV6)
>>> +	    PRS_FLAG_OVERETH_IPV6) {
>>>  		hdrs_len += sizeof(struct ipv6hdr);
>>> -	else /* IPv4 */
>>> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
>>> +	} else {
>>>  		hdrs_len += sizeof(struct iphdr);
>>> -
>>> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
>>> +	}
>>>  
>>
>>
>>
>>>  #ifdef BNX2X_STOP_ON_ERROR
>>> @@ -651,7 +655,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
>>>  			       struct sk_buff *skb)
>>>  {
>>>  #ifdef CONFIG_INET
>>> -	if (fp->mode == TPA_MODE_GRO && skb_shinfo(skb)->gso_size) {
>>> +	if (skb_shinfo(skb)->gso_size) {
>>
>> This also seems like an incorrect removal, as TPA_MODE_LRO is (again)
>> a feasible option, and we wouldn't want the a `gro_complete' here.
>>
> 
> 
> Problem is : You call GRO functions, faking a GRO packet.
> 
> You must therefore exactly present same attributes in skb than after a
> true software GRO step.
> 
> I did my tests booting a standard driver, that is with LRO on.

I think we have a misunderstanding here - when LRO is on, our FW works in
TPA_MODE_LRO (LRO/GRO FW are mutually exclusive). In that mode, the bnx2x
driver will not try to 'fake' GRO packets in the same manner.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 16, 2013, 3:29 p.m.
On Wed, 2013-01-16 at 09:01 +0200, Yuval Mintz wrote:
> > -static u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
> > -			     u16 len_on_bd)
> > +static void bnx2x_set_gro_params(struct sk_buff *skb, struct bnx2x *bp,
> > +				 u16 parsing_flags, u16 len_on_bd,
> > +				 unsigned int pkt_len)
> 
> This is purely semantic, but our convention is for `struct bnx2x' to be
> the first argument in our functions.
> 
> >  {
> >  	/*
> > -	 * TPA arrgregation won't have either IP options or TCP options
> > +	 * TPA aggregation won't have either IP options or TCP options
> >  	 * other than timestamp or IPv6 extension headers.
> >  	 */
> >  	u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
> 
> TPA_MODE_LRO indicates that an LRO aggregation was made by our FW. It seems
> like your patch  eliminates the difference in configuration between the two
> (GRO and LRO)
> 

Thats the case, since you call GRO functions even if 'LRO ' is ON

I specifically had to remove the tests you guys do.

> perhaps instead we should do something like:
> 
> + static void bnx2x_set_lro_params(struct bnx2x *bp, struct sk_buff *skb,
> + 				   u16 parsing_flags, u16 len_on_bd,
> + 				   unsigned int pkt_len,
> +				   bnx2x_tpa_mode_t mode)
> 
> And arrange its suggested code so that only gso_size will be set for LRO.
> 

I fail to understand why adding a conditional would change something.

Setting it is needed for GRO as well.

> >  
> >  	if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
> > -	    PRS_FLAG_OVERETH_IPV6)
> > +	    PRS_FLAG_OVERETH_IPV6) {
> >  		hdrs_len += sizeof(struct ipv6hdr);
> > -	else /* IPv4 */
> > +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> > +	} else {
> >  		hdrs_len += sizeof(struct iphdr);
> > -
> > +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > +	}
> >  
> 
> 
> 
> >  #ifdef BNX2X_STOP_ON_ERROR
> > @@ -651,7 +655,7 @@ static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
> >  			       struct sk_buff *skb)
> >  {
> >  #ifdef CONFIG_INET
> > -	if (fp->mode == TPA_MODE_GRO && skb_shinfo(skb)->gso_size) {
> > +	if (skb_shinfo(skb)->gso_size) {
> 
> This also seems like an incorrect removal, as TPA_MODE_LRO is (again)
> a feasible option, and we wouldn't want the a `gro_complete' here.
> 


Problem is : You call GRO functions, faking a GRO packet.

You must therefore exactly present same attributes in skb than after a
true software GRO step.

I did my tests booting a standard driver, that is with LRO on.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 16, 2013, 4:50 p.m.
On Wed, 2013-01-16 at 16:38 +0200, Yuval Mintz wrote:

> I think we have a misunderstanding here - when LRO is on, our FW works in
> TPA_MODE_LRO (LRO/GRO FW are mutually exclusive). In that mode, the bnx2x
> driver will not try to 'fake' GRO packets in the same manner.

I think you didn't really understand the issue then.

Call the hardware assist Receive Offload "LRO" if you really want, even
if not using net/ipv4/inet_lro.c , but provide :

gso_segs and gso_type as well. (Its useful for various things, check
netif_skb_features() for another example)

Or else we need to parse the headers again to check IPv4/ v6 / TCP / UDP
later.

There is no downside providing gso_segs and gso_type, you have all
needed information as shown in the patch I cooked and tested.

Our goal is to remove CONFIG_INET_LRO, so all multi-segments packets
should have the same set of parameters (gso_size, gso_segs, gso_type)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yuval Mintz - Jan. 17, 2013, 7:16 a.m.
> There is no downside providing gso_segs and gso_type, you have all
> needed information as shown in the patch I cooked and tested.
> 
> Our goal is to remove CONFIG_INET_LRO, so all multi-segments packets
> should have the same set of parameters (gso_size, gso_segs, gso_type)

I concur - I'll make the semantic modification to your patch and re-submit
it later on today.

Thanks,
Yuval


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 18fc26e..e9bea2e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -439,31 +439,37 @@  static void bnx2x_tpa_start(struct bnx2x_fastpath *fp, u16 queue,
  */
 #define TPA_TSTAMP_OPT_LEN	12
 /**
- * bnx2x_set_lro_mss - calculate the approximate value of the MSS
+ * bnx2x_set_gro_params - compute GRO values
  *
+ * @skb:		packet skb
  * @bp:			driver handle
  * @parsing_flags:	parsing flags from the START CQE
  * @len_on_bd:		total length of the first packet for the
  *			aggregation.
+ * @pkt_len:		length of all segments
  *
  * Approximate value of the MSS for this aggregation calculated using
  * the first packet of it.
+ * Compute number of aggregated segments, and gso_type
  */
-static u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
-			     u16 len_on_bd)
+static void bnx2x_set_gro_params(struct sk_buff *skb, struct bnx2x *bp,
+				 u16 parsing_flags, u16 len_on_bd,
+				 unsigned int pkt_len)
 {
 	/*
-	 * TPA arrgregation won't have either IP options or TCP options
+	 * TPA aggregation won't have either IP options or TCP options
 	 * other than timestamp or IPv6 extension headers.
 	 */
 	u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
 
 	if (GET_FLAG(parsing_flags, PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
-	    PRS_FLAG_OVERETH_IPV6)
+	    PRS_FLAG_OVERETH_IPV6) {
 		hdrs_len += sizeof(struct ipv6hdr);
-	else /* IPv4 */
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+	} else {
 		hdrs_len += sizeof(struct iphdr);
-
+		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+	}
 
 	/* Check if there was a TCP timestamp, if there is it's will
 	 * always be 12 bytes length: nop nop kind length echo val.
@@ -473,7 +479,13 @@  static u16 bnx2x_set_lro_mss(struct bnx2x *bp, u16 parsing_flags,
 	if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
 		hdrs_len += TPA_TSTAMP_OPT_LEN;
 
-	return len_on_bd - hdrs_len;
+	skb_shinfo(skb)->gso_size = len_on_bd - hdrs_len;
+
+	/* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
+	 * to skb_shinfo(skb)->gso_segs
+	 */
+	NAPI_GRO_CB(skb)->count = DIV_ROUND_UP(pkt_len - hdrs_len,
+					       skb_shinfo(skb)->gso_size);
 }
 
 static int bnx2x_alloc_rx_sge(struct bnx2x *bp,
@@ -527,18 +539,10 @@  static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 	}
 
 	/* This is needed in order to enable forwarding support */
-	if (frag_size) {
-		skb_shinfo(skb)->gso_size = bnx2x_set_lro_mss(bp,
-					tpa_info->parsing_flags, len_on_bd);
+	if (frag_size)
+		bnx2x_set_gro_params(skb, bp, tpa_info->parsing_flags,
+				     len_on_bd, le16_to_cpu(cqe->pkt_len));
 
-		/* set for GRO */
-		if (fp->mode == TPA_MODE_GRO && skb_shinfo(skb)->gso_size)
-			skb_shinfo(skb)->gso_type =
-			    (GET_FLAG(tpa_info->parsing_flags,
-				      PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) ==
-						PRS_FLAG_OVERETH_IPV6) ?
-				SKB_GSO_TCPV6 : SKB_GSO_TCPV4;
-	}
 
 
 #ifdef BNX2X_STOP_ON_ERROR
@@ -651,7 +655,7 @@  static void bnx2x_gro_receive(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 			       struct sk_buff *skb)
 {
 #ifdef CONFIG_INET
-	if (fp->mode == TPA_MODE_GRO && skb_shinfo(skb)->gso_size) {
+	if (skb_shinfo(skb)->gso_size) {
 		skb_set_network_header(skb, 0);
 		switch (be16_to_cpu(skb->protocol)) {
 		case ETH_P_IP: