diff mbox

bnx2x: fix checksum validation

Message ID 1339581004.22704.340.camel@edumazet-glaptop
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet June 13, 2012, 9:50 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

bnx2x driver incorrectly sets ip_summed to CHECKSUM_UNNECESSARY on
encapsulated segments. TCP stack happily accepts frames with bad
checksums, if they are inside a GRE or IPIP encapsulation.

Our understanding is that if no IP or L4 csum validation was done by the
hardware, we should leave ip_summed as is (CHECKSUM_NONE), since
hardware doesn't provide CHECKSUM_COMPLETE support in its cqe.

Then, if IP/L4 checksumming was done by the hardware, set
CHECKSUM_UNNECESSARY if no error was flagged.

Patch based on findings and analysis from Robert Evans

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
Cc: Yaniv Rosner <yanivr@broadcom.com>
Cc: Merav Sicron <meravs@broadcom.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Robert Evans <evansr@google.com>
Cc: Willem de Bruijn <willemb@google.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |   15 -------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   27 ++++++++++----
 2 files changed, 21 insertions(+), 21 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eilon Greenstein June 13, 2012, 1:42 p.m. UTC | #1
On Wed, 2012-06-13 at 11:50 +0200, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> bnx2x driver incorrectly sets ip_summed to CHECKSUM_UNNECESSARY on
> encapsulated segments. TCP stack happily accepts frames with bad
> checksums, if they are inside a GRE or IPIP encapsulation.

So what you are saying is that the indication that the checksum is valid
is interpreted as the encapsulated checksum and not just the IP
header... This was not the intention of this code, it was meant to
indicate that the IP header is valid.

> Our understanding is that if no IP or L4 csum validation was done by the
> hardware, we should leave ip_summed as is (CHECKSUM_NONE), since
> hardware doesn't provide CHECKSUM_COMPLETE support in its cqe.
> 
> Then, if IP/L4 checksumming was done by the hardware, set
> CHECKSUM_UNNECESSARY if no error was flagged.
> 
> Patch based on findings and analysis from Robert Evans

I must admit that the code looks much better with this change. The only
down side is that there is no longer IP checksum offload for pure IP
packets, but that's negligible. The only thing that bothers me is that
there is no way to indicate anything about the encapsulated packet
separately from the outer header. Is that the way we want to keep it?

> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Eilon Greenstein <eilong@broadcom.com>
> Cc: Yaniv Rosner <yanivr@broadcom.com>
> Cc: Merav Sicron <meravs@broadcom.com>
> Cc: Tom Herbert <therbert@google.com>
> Cc: Robert Evans <evansr@google.com>
> Cc: Willem de Bruijn <willemb@google.com>
> ---
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x.h     |   15 -------
>  drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |   27 ++++++++++----
>  2 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> index e30e2a2..7de8241 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
> @@ -747,21 +747,6 @@ struct bnx2x_fastpath {
>  
>  #define ETH_RX_ERROR_FALGS		ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG
>  
> -#define BNX2X_IP_CSUM_ERR(cqe) \
> -			(!((cqe)->fast_path_cqe.status_flags & \
> -			   ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \
> -			 ((cqe)->fast_path_cqe.type_error_flags & \
> -			  ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG))
> -
> -#define BNX2X_L4_CSUM_ERR(cqe) \
> -			(!((cqe)->fast_path_cqe.status_flags & \
> -			   ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \
> -			 ((cqe)->fast_path_cqe.type_error_flags & \
> -			  ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
> -
> -#define BNX2X_RX_CSUM_OK(cqe) \
> -			(!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe)))
> -
>  #define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \
>  				(((le16_to_cpu(flags) & \
>  				   PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \
> diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> index ad0743b..cbc56f2 100644
> --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
> @@ -617,6 +617,25 @@ static int bnx2x_alloc_rx_data(struct bnx2x *bp,
>  	return 0;
>  }
>  
> +static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
> +				struct bnx2x_fastpath *fp)
> +{
> +	/* Do nothing if no IP/L4 csum validation was done */
> +
> +	if (cqe->fast_path_cqe.status_flags &
> +	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
> +	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
> +		return;
> +
> +	/* If both IP/L4 validation were done, check if an error was found. */
> +
> +	if (cqe->fast_path_cqe.type_error_flags &
> +	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
> +	     ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
> +		fp->eth_q_stats.hw_csum_err++;
> +	else
> +		skb->ip_summed = CHECKSUM_UNNECESSARY;
> +}
>  
>  int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
>  {
> @@ -806,13 +825,9 @@ reuse_rx:
>  
>  		skb_checksum_none_assert(skb);
>  
> -		if (bp->dev->features & NETIF_F_RXCSUM) {
> +		if (bp->dev->features & NETIF_F_RXCSUM)
> +			bnx2x_csum_validate(skb, cqe, fp);
>  
> -			if (likely(BNX2X_RX_CSUM_OK(cqe)))
> -				skb->ip_summed = CHECKSUM_UNNECESSARY;
> -			else
> -				fp->eth_q_stats.hw_csum_err++;
> -		}
>  
>  		skb_record_rx_queue(skb, fp->rx_queue);
>  
> 
> 
> 




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 13, 2012, 2:10 p.m. UTC | #2
On Wed, 2012-06-13 at 16:42 +0300, Eilon Greenstein wrote:

> So what you are saying is that the indication that the checksum is valid
> is interpreted as the encapsulated checksum and not just the IP
> header... This was not the intention of this code, it was meant to
> indicate that the IP header is valid.
> 

IP header is always checked by our stack.

Its so fast that there is no point spending a bit in skb, and testing
this bit.

Yes, CHECKSUM_UNNECESSARY really means everything was checked up to L4

> I must admit that the code looks much better with this change. The only
> down side is that there is no longer IP checksum offload for pure IP
> packets, but that's negligible. The only thing that bothers me is that
> there is no way to indicate anything about the encapsulated packet
> separately from the outer header. Is that the way we want to keep it?

Some NIC allow for csum L3 offloading, providing the 16bit csum.

We then can :

skb->csum = csum;
skb->ip_summed = CHECKSUM_COMPLETE;

Check drivers/net/ethernet/ibm/ehea/ehea_main.c,
drivers/net/ethernet/marvell/skge.c ,
drivers/net/ethernet/marvell/sky2.c , ... for examples.


When a layer is removed(pulled), we can use skb_postpull_rcsum() and
friends (check net/ipv4/ip_gre.c for example) to keep skb->csum up2date.


If bnx2x provides this csum (for non TCP/UCP packets or fragments), you
could fallback to this CHECKSUM_COMPLETE stuff.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eilon Greenstein June 13, 2012, 2:21 p.m. UTC | #3
On Wed, 2012-06-13 at 16:10 +0200, Eric Dumazet wrote:
> On Wed, 2012-06-13 at 16:42 +0300, Eilon Greenstein wrote:
> 
> > So what you are saying is that the indication that the checksum is valid
> > is interpreted as the encapsulated checksum and not just the IP
> > header... This was not the intention of this code, it was meant to
> > indicate that the IP header is valid.
> > 
> 
> IP header is always checked by our stack.
> 
> Its so fast that there is no point spending a bit in skb, and testing
> this bit.
> 
> Yes, CHECKSUM_UNNECESSARY really means everything was checked up to L4
> 
> > I must admit that the code looks much better with this change. The only
> > down side is that there is no longer IP checksum offload for pure IP
> > packets, but that's negligible. The only thing that bothers me is that
> > there is no way to indicate anything about the encapsulated packet
> > separately from the outer header. Is that the way we want to keep it?
> 
> Some NIC allow for csum L3 offloading, providing the 16bit csum.
> 
> We then can :
> 
> skb->csum = csum;
> skb->ip_summed = CHECKSUM_COMPLETE;
> 
> Check drivers/net/ethernet/ibm/ehea/ehea_main.c,
> drivers/net/ethernet/marvell/skge.c ,
> drivers/net/ethernet/marvell/sky2.c , ... for examples.
> 
> 
> When a layer is removed(pulled), we can use skb_postpull_rcsum() and
> friends (check net/ipv4/ip_gre.c for example) to keep skb->csum up2date.
> 
> 
> If bnx2x provides this csum (for non TCP/UCP packets or fragments), you
> could fallback to this CHECKSUM_COMPLETE stuff.
> 
> 

Thanks Erik. This leaves me with just one thing to add:
Acked-by: Eilon Greenstein <eilong@broadcom.com>



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller June 13, 2012, 10:59 p.m. UTC | #4
From: "Eilon Greenstein" <eilong@broadcom.com>
Date: Wed, 13 Jun 2012 17:21:03 +0300

> Thanks Erik. This leaves me with just one thing to add:
> Acked-by: Eilon Greenstein <eilong@broadcom.com>

Applied, and queued up for -stable, thanks.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index e30e2a2..7de8241 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -747,21 +747,6 @@  struct bnx2x_fastpath {
 
 #define ETH_RX_ERROR_FALGS		ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG
 
-#define BNX2X_IP_CSUM_ERR(cqe) \
-			(!((cqe)->fast_path_cqe.status_flags & \
-			   ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \
-			 ((cqe)->fast_path_cqe.type_error_flags & \
-			  ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG))
-
-#define BNX2X_L4_CSUM_ERR(cqe) \
-			(!((cqe)->fast_path_cqe.status_flags & \
-			   ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \
-			 ((cqe)->fast_path_cqe.type_error_flags & \
-			  ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
-
-#define BNX2X_RX_CSUM_OK(cqe) \
-			(!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe)))
-
 #define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \
 				(((le16_to_cpu(flags) & \
 				   PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index ad0743b..cbc56f2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -617,6 +617,25 @@  static int bnx2x_alloc_rx_data(struct bnx2x *bp,
 	return 0;
 }
 
+static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe,
+				struct bnx2x_fastpath *fp)
+{
+	/* Do nothing if no IP/L4 csum validation was done */
+
+	if (cqe->fast_path_cqe.status_flags &
+	    (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG |
+	     ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG))
+		return;
+
+	/* If both IP/L4 validation were done, check if an error was found. */
+
+	if (cqe->fast_path_cqe.type_error_flags &
+	    (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG |
+	     ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG))
+		fp->eth_q_stats.hw_csum_err++;
+	else
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
 
 int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 {
@@ -806,13 +825,9 @@  reuse_rx:
 
 		skb_checksum_none_assert(skb);
 
-		if (bp->dev->features & NETIF_F_RXCSUM) {
+		if (bp->dev->features & NETIF_F_RXCSUM)
+			bnx2x_csum_validate(skb, cqe, fp);
 
-			if (likely(BNX2X_RX_CSUM_OK(cqe)))
-				skb->ip_summed = CHECKSUM_UNNECESSARY;
-			else
-				fp->eth_q_stats.hw_csum_err++;
-		}
 
 		skb_record_rx_queue(skb, fp->rx_queue);