diff mbox

[net-next,09/14] igb: Report L4 Rx hash via skb->l4_rxhash

Message ID 1358422519-20981-10-git-send-email-jeffrey.t.kirsher@intel.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Kirsher, Jeffrey T Jan. 17, 2013, 11:35 a.m. UTC
From: Alexander Duyck <alexander.h.duyck@intel.com>

This change makes it so that we report when the Rx hash data is based on L4
protocol inputs, specifically TCP or UDP port numbers.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_82575.h |  9 +++++++++
 drivers/net/ethernet/intel/igb/igb_main.c    | 21 +++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

Comments

Eric Dumazet Jan. 17, 2013, 2:19 p.m. UTC | #1
On Thu, 2013-01-17 at 03:35 -0800, Jeff Kirsher wrote:
> From: Alexander Duyck <alexander.h.duyck@intel.com>
> 
> This change makes it so that we report when the Rx hash data is based on L4
> protocol inputs, specifically TCP or UDP port numbers.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Tested-by: Aaron Brown <aaron.f.brown@intel.com>
> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
> ---
>  drivers/net/ethernet/intel/igb/e1000_82575.h |  9 +++++++++
>  drivers/net/ethernet/intel/igb/igb_main.c    | 21 +++++++++++++++++++--
>  2 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
> index 444f6f5..fa13e70 100644
> --- a/drivers/net/ethernet/intel/igb/e1000_82575.h
> +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
> @@ -112,11 +112,20 @@ union e1000_adv_rx_desc {
>  	} wb;  /* writeback */
>  };
>  
> +#define E1000_RXDADV_RSSTYPE_MASK	0x0000000F
>  #define E1000_RXDADV_HDRBUFLEN_MASK      0x7FE0
>  #define E1000_RXDADV_HDRBUFLEN_SHIFT     5
>  #define E1000_RXDADV_STAT_TS             0x10000 /* Pkt was time stamped */
>  #define E1000_RXDADV_STAT_TSIP           0x08000 /* timestamp in packet */
>  
> +/* RSS Hash results */
> +#define E1000_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
> +#define E1000_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
> +#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
> +#define E1000_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
> +#define E1000_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
> +#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
> +
>  /* Transmit Descriptor - Advanced */
>  union e1000_adv_tx_desc {
>  	struct {
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index a9cb84a..2c66ec8 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -6280,12 +6280,29 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
>  		le32_to_cpu(rx_desc->wb.upper.status_error));
>  }
>  
> +#define IGB_RSS_L4TYPES \
> +	(((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_TCP) | \
> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_UDP) | \
> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP) | \
> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP) | \
> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP_EX) | \
> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP_EX))
> +
>  static inline void igb_rx_hash(struct igb_ring *ring,
>  			       union e1000_adv_rx_desc *rx_desc,
>  			       struct sk_buff *skb)
>  {
> -	if (ring->netdev->features & NETIF_F_RXHASH)
> -		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
> +	u16 rss_type;
> +
> +	if (!(ring->netdev->features & NETIF_F_RXHASH))
> +		return;
> +
> +	skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
> +
> +	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.pkt_info) &
> +		   E1000_RXDADV_RSSTYPE_MASK;
> +
> +	skb->l4_rxhash = (IGB_RSS_L4TYPES >> rss_type) & 0x1;
>  }
>  

Problem is that we should not set l4_rxhash for UDP traffic, as it might
contains encapsulated protocol.

Also, is IGB really using the ports in the rss for UDP packets ?




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H Jan. 17, 2013, 5:07 p.m. UTC | #2
On 01/17/2013 06:19 AM, Eric Dumazet wrote:
> On Thu, 2013-01-17 at 03:35 -0800, Jeff Kirsher wrote:
>> From: Alexander Duyck <alexander.h.duyck@intel.com>
>>
>> This change makes it so that we report when the Rx hash data is based on L4
>> protocol inputs, specifically TCP or UDP port numbers.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
>> Tested-by: Aaron Brown <aaron.f.brown@intel.com>
>> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
>> ---
>>  drivers/net/ethernet/intel/igb/e1000_82575.h |  9 +++++++++
>>  drivers/net/ethernet/intel/igb/igb_main.c    | 21 +++++++++++++++++++--
>>  2 files changed, 28 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
>> index 444f6f5..fa13e70 100644
>> --- a/drivers/net/ethernet/intel/igb/e1000_82575.h
>> +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
>> @@ -112,11 +112,20 @@ union e1000_adv_rx_desc {
>>  	} wb;  /* writeback */
>>  };
>>  
>> +#define E1000_RXDADV_RSSTYPE_MASK	0x0000000F
>>  #define E1000_RXDADV_HDRBUFLEN_MASK      0x7FE0
>>  #define E1000_RXDADV_HDRBUFLEN_SHIFT     5
>>  #define E1000_RXDADV_STAT_TS             0x10000 /* Pkt was time stamped */
>>  #define E1000_RXDADV_STAT_TSIP           0x08000 /* timestamp in packet */
>>  
>> +/* RSS Hash results */
>> +#define E1000_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
>> +#define E1000_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
>> +#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
>> +#define E1000_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
>> +#define E1000_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
>> +#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
>> +
>>  /* Transmit Descriptor - Advanced */
>>  union e1000_adv_tx_desc {
>>  	struct {
>> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
>> index a9cb84a..2c66ec8 100644
>> --- a/drivers/net/ethernet/intel/igb/igb_main.c
>> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
>> @@ -6280,12 +6280,29 @@ static inline void igb_rx_checksum(struct igb_ring *ring,
>>  		le32_to_cpu(rx_desc->wb.upper.status_error));
>>  }
>>  
>> +#define IGB_RSS_L4TYPES \
>> +	(((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_TCP) | \
>> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_UDP) | \
>> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP) | \
>> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP) | \
>> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP_EX) | \
>> +	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP_EX))
>> +
>>  static inline void igb_rx_hash(struct igb_ring *ring,
>>  			       union e1000_adv_rx_desc *rx_desc,
>>  			       struct sk_buff *skb)
>>  {
>> -	if (ring->netdev->features & NETIF_F_RXHASH)
>> -		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
>> +	u16 rss_type;
>> +
>> +	if (!(ring->netdev->features & NETIF_F_RXHASH))
>> +		return;
>> +
>> +	skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
>> +
>> +	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.pkt_info) &
>> +		   E1000_RXDADV_RSSTYPE_MASK;
>> +
>> +	skb->l4_rxhash = (IGB_RSS_L4TYPES >> rss_type) & 0x1;
>>  }
>>  
> 
> Problem is that we should not set l4_rxhash for UDP traffic, as it might
> contains encapsulated protocol.

Isn't the same true of TCP?  I believe STT is intended to run over the
TCP protocol, or am I getting ahead of myself since STT is not supported
by the kernel?

> Also, is IGB really using the ports in the rss for UDP packets ?

Not by default.  The default is to only hash on the IP header for UDP
packets.

As such the default would only be setting the l4_rxhash on TCP frames
only.  The user would have to specifically request L4 port hashing for
UDP via the "ethtool -N" command for configuring rx-flow-hash.

Thanks,

Alex



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Jan. 17, 2013, 5:14 p.m. UTC | #3
On Thu, 2013-01-17 at 09:07 -0800, Alexander Duyck wrote:

> Isn't the same true of TCP?  I believe STT is intended to run over the
> TCP protocol, or am I getting ahead of myself since STT is not supported
> by the kernel?
> 

probably ;)

> > Also, is IGB really using the ports in the rss for UDP packets ?
> 
> Not by default.  The default is to only hash on the IP header for UDP
> packets.
> 
> As such the default would only be setting the l4_rxhash on TCP frames
> only.  The user would have to specifically request L4 port hashing for
> UDP via the "ethtool -N" command for configuring rx-flow-hash.

So you should rewrite this patch ?

Or have I missed something ?


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Duyck, Alexander H Jan. 17, 2013, 5:23 p.m. UTC | #4
On 01/17/2013 09:14 AM, Eric Dumazet wrote:
> On Thu, 2013-01-17 at 09:07 -0800, Alexander Duyck wrote:
> 
>> Isn't the same true of TCP?  I believe STT is intended to run over the
>> TCP protocol, or am I getting ahead of myself since STT is not supported
>> by the kernel?
>>
> 
> probably ;)
> 
>>> Also, is IGB really using the ports in the rss for UDP packets ?
>>
>> Not by default.  The default is to only hash on the IP header for UDP
>> packets.
>>
>> As such the default would only be setting the l4_rxhash on TCP frames
>> only.  The user would have to specifically request L4 port hashing for
>> UDP via the "ethtool -N" command for configuring rx-flow-hash.
> 
> So you should rewrite this patch ?
> 
> Or have I missed something ?

I'm probably going to scrap it.  No point in rewriting it.

I has assumed that there were other uses for the l4_rxhash value.  If
all it is meant for is to indicate that the inner header of a tunnel was
used to compute the hash then there isn't much point to adding support
for this in igb/ixgbe since they don't support any inner header hashing.

It might add value at some point to rename the l4_rxhash flag to
something else though since it seems like there are now tunnels that are
encapsulated inside of l4 headers and it is going to get confusing.

Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross Jan. 17, 2013, 6:46 p.m. UTC | #5
On Thu, Jan 17, 2013 at 9:23 AM, Alexander Duyck
<alexander.h.duyck@intel.com> wrote:
> On 01/17/2013 09:14 AM, Eric Dumazet wrote:
>> On Thu, 2013-01-17 at 09:07 -0800, Alexander Duyck wrote:
>>
>>> Isn't the same true of TCP?  I believe STT is intended to run over the
>>> TCP protocol, or am I getting ahead of myself since STT is not supported
>>> by the kernel?
>>>
>>
>> probably ;)
>>
>>>> Also, is IGB really using the ports in the rss for UDP packets ?
>>>
>>> Not by default.  The default is to only hash on the IP header for UDP
>>> packets.
>>>
>>> As such the default would only be setting the l4_rxhash on TCP frames
>>> only.  The user would have to specifically request L4 port hashing for
>>> UDP via the "ethtool -N" command for configuring rx-flow-hash.
>>
>> So you should rewrite this patch ?
>>
>> Or have I missed something ?
>
> I'm probably going to scrap it.  No point in rewriting it.
>
> I has assumed that there were other uses for the l4_rxhash value.  If
> all it is meant for is to indicate that the inner header of a tunnel was
> used to compute the hash then there isn't much point to adding support
> for this in igb/ixgbe since they don't support any inner header hashing.
>
> It might add value at some point to rename the l4_rxhash flag to
> something else though since it seems like there are now tunnels that are
> encapsulated inside of l4 headers and it is going to get confusing.

I think there is value in setting it for TCP since it is used by
things like RPS and VXLAN (on the encapsulation side, to generate the
source port).

Ultimately, the semantics that I think we are looking for here is
whether the software flow dissector would look deeper into the packet
than the NIC.  In the case of STT, the answer to this is going to be
no, so there's no reason to not set the flag.  Even for protocols like
VXLAN, in software we don't currently look deeply into the packet on
receive, so we could set it there as well.  Both of these protocols
also have the advantage that the outer header contains an inner hash.

In the case of UDP tunnels, it would be nice to use the ports for RSS
since they often do contain these hashes.  We just have to make sure
that they aren't IP fragments.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index 444f6f5..fa13e70 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -112,11 +112,20 @@  union e1000_adv_rx_desc {
 	} wb;  /* writeback */
 };
 
+#define E1000_RXDADV_RSSTYPE_MASK	0x0000000F
 #define E1000_RXDADV_HDRBUFLEN_MASK      0x7FE0
 #define E1000_RXDADV_HDRBUFLEN_SHIFT     5
 #define E1000_RXDADV_STAT_TS             0x10000 /* Pkt was time stamped */
 #define E1000_RXDADV_STAT_TSIP           0x08000 /* timestamp in packet */
 
+/* RSS Hash results */
+#define E1000_RXDADV_RSSTYPE_IPV4_TCP	0x00000001
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP	0x00000003
+#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006
+#define E1000_RXDADV_RSSTYPE_IPV4_UDP	0x00000007
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP	0x00000008
+#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009
+
 /* Transmit Descriptor - Advanced */
 union e1000_adv_tx_desc {
 	struct {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index a9cb84a..2c66ec8 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6280,12 +6280,29 @@  static inline void igb_rx_checksum(struct igb_ring *ring,
 		le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
+#define IGB_RSS_L4TYPES \
+	(((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_TCP) | \
+	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV4_UDP) | \
+	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP) | \
+	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP) | \
+	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_TCP_EX) | \
+	 ((u16)1 << E1000_RXDADV_RSSTYPE_IPV6_UDP_EX))
+
 static inline void igb_rx_hash(struct igb_ring *ring,
 			       union e1000_adv_rx_desc *rx_desc,
 			       struct sk_buff *skb)
 {
-	if (ring->netdev->features & NETIF_F_RXHASH)
-		skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+	u16 rss_type;
+
+	if (!(ring->netdev->features & NETIF_F_RXHASH))
+		return;
+
+	skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
+
+	rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.pkt_info) &
+		   E1000_RXDADV_RSSTYPE_MASK;
+
+	skb->l4_rxhash = (IGB_RSS_L4TYPES >> rss_type) & 0x1;
 }
 
 /**