diff mbox series

[net-next,11/12] net: atlantic: implement UDP GSO offload

Message ID e85100822a4656332c8aa208a2e98af3df12e325.1572610156.git.irusskikh@marvell.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Aquantia Marvell atlantic driver updates | expand

Commit Message

Igor Russkikh Nov. 1, 2019, 12:17 p.m. UTC
atlantic hardware does support UDP hardware segmentation offload.
This allows user to specify one large contiguous buffer with data
which then will be split automagically into multiple UDP packets
of specified size.

Bulk sending of large UDP streams lowers CPU usage and increases
bandwidth.

We did estimations both with udpgso_bench_tx test tool and with modified
iperf3 measurement tool (4 streams, multithread, 200b packet size)
over AQC<->AQC 10G link. Flow control is disabled to prevent RX side
impact on measurements.

No UDP GSO:
	iperf3 -c 10.0.1.2 -u -b0 -l 200 -P4 --multithread
UDP GSO:
	iperf3 -c 10.0.1.2 -u -b0 -l 12600 --udp-lso 200 -P4 --multithread

Mode          CPU   iperf speed    Line speed   Packets per second
-------------------------------------------------------------
NO UDP GSO    350%   3.07 Gbps      3.8 Gbps     1,919,419
SW UDP GSO    200%   5.55 Gbps      6.4 Gbps     3,286,144
HW UDP GSO    90%    6.80 Gbps      8.4 Gbps     4,273,117

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
---
 .../device_drivers/aquantia/atlantic.txt         | 15 +++++++++++++++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c  | 16 +++++++++++++---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.h |  7 ++++---
 .../aquantia/atlantic/hw_atl/hw_atl_a0.c         |  2 +-
 .../aquantia/atlantic/hw_atl/hw_atl_b0.c         | 11 +++++++----
 5 files changed, 40 insertions(+), 11 deletions(-)

Comments

Eric Dumazet Nov. 1, 2019, 4:59 p.m. UTC | #1
On 11/1/19 5:17 AM, Igor Russkikh wrote:
> atlantic hardware does support UDP hardware segmentation offload.
> This allows user to specify one large contiguous buffer with data
> which then will be split automagically into multiple UDP packets
> of specified size.


>  
> @@ -484,11 +485,19 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
>  
>  	if (unlikely(skb_is_gso(skb))) {
>  		dx_buff->mss = skb_shinfo(skb)->gso_size;
> -		dx_buff->is_gso = 1U;
> +		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
> +			dx_buff->is_gso_tcp = 1U;
> +			dx_buff->len_l4 = tcp_hdrlen(skb);
> +		} else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
> +			dx_buff->is_gso_udp = 1U;
> +			dx_buff->len_l4 = sizeof(struct udphdr);
> +			/* UDP GSO Hardware does not replace packet length. */
> +			udp_hdr(skb)->len = htons(dx_buff->mss +
> +						  dx_buff->len_l4);
> +		}

Have you tested IPv6 ?


>  		dx_buff->len_pkt = skb->len;
>  		dx_buff->len_l2 = ETH_HLEN;
>  		dx_buff->len_l3 = ip_hdrlen(skb);
> -		dx_buff->len_l4 = tcp_hdrlen(skb);
>  		dx_buff->eop_index = 0xffffU;
>  		dx_buff->is_ipv6 =
>  			(ip_hdr(skb)->version == 6) ? 1U : 0U;

I am asking because you seem to test IPv6 here, so blindly using ip_hdr(skb)->protocol
few lines above is weird.
Igor Russkikh Nov. 4, 2019, 10:11 p.m. UTC | #2
>> +			/* UDP GSO Hardware does not replace packet length. */
>> +			udp_hdr(skb)->len = htons(dx_buff->mss +
>> +						  dx_buff->len_l4);
>> +		}
> 
> Have you tested IPv6 ?
> 
> 
>>  		dx_buff->len_pkt = skb->len;
>>  		dx_buff->len_l2 = ETH_HLEN;
>>  		dx_buff->len_l3 = ip_hdrlen(skb);
>> -		dx_buff->len_l4 = tcp_hdrlen(skb);
>>  		dx_buff->eop_index = 0xffffU;
>>  		dx_buff->is_ipv6 =
>>  			(ip_hdr(skb)->version == 6) ? 1U : 0U;
> 
> I am asking because you seem to test IPv6 here, so blindly using ip_hdr(skb)->protocol
> few lines above is weird.
> 

Hi Eric, thanks, indeed it'll screw up on ipv6.
HW should be ip6 capable, I'll retest and fix this.
diff mbox series

Patch

diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.txt b/Documentation/networking/device_drivers/aquantia/atlantic.txt
index ef3d8c749d4c..d614250e37d5 100644
--- a/Documentation/networking/device_drivers/aquantia/atlantic.txt
+++ b/Documentation/networking/device_drivers/aquantia/atlantic.txt
@@ -325,6 +325,21 @@  Supported ethtool options
  Example:
  ethtool -N eth0 flow-type udp4 action 0 loc 32
 
+ UDP GSO hardware offload
+ ---------------------------------
+ UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation
+ into hardware. A special userspace socket option is required for this,
+ could be validated with /kernel/tools/testing/selftests/net/
+
+    udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100
+
+ Will cause sending out of 100 byte sized UDP packets formed from single
+ 6300 bytes user buffer.
+
+ UDP GSO is configured by:
+
+    ethtool -K eth0 tx-udp-segmentation on
+
  Private flags (testing)
  ---------------------------------
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 7ad8eb535d28..742ee5fe003e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -309,6 +309,7 @@  void aq_nic_ndev_init(struct aq_nic_s *self)
 	self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
 				     NETIF_F_RXHASH | NETIF_F_SG |
 				     NETIF_F_LRO | NETIF_F_TSO;
+	self->ndev->gso_partial_features = NETIF_F_GSO_UDP_L4;
 	self->ndev->priv_flags = aq_hw_caps->hw_priv_flags;
 	self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
@@ -484,11 +485,19 @@  unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 
 	if (unlikely(skb_is_gso(skb))) {
 		dx_buff->mss = skb_shinfo(skb)->gso_size;
-		dx_buff->is_gso = 1U;
+		if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+			dx_buff->is_gso_tcp = 1U;
+			dx_buff->len_l4 = tcp_hdrlen(skb);
+		} else if (ip_hdr(skb)->protocol == IPPROTO_UDP) {
+			dx_buff->is_gso_udp = 1U;
+			dx_buff->len_l4 = sizeof(struct udphdr);
+			/* UDP GSO Hardware does not replace packet length. */
+			udp_hdr(skb)->len = htons(dx_buff->mss +
+						  dx_buff->len_l4);
+		}
 		dx_buff->len_pkt = skb->len;
 		dx_buff->len_l2 = ETH_HLEN;
 		dx_buff->len_l3 = ip_hdrlen(skb);
-		dx_buff->len_l4 = tcp_hdrlen(skb);
 		dx_buff->eop_index = 0xffffU;
 		dx_buff->is_ipv6 =
 			(ip_hdr(skb)->version == 6) ? 1U : 0U;
@@ -597,7 +606,8 @@  unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 	     --ret, dx = aq_ring_next_dx(ring, dx)) {
 		dx_buff = &ring->buff_ring[dx];
 
-		if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) {
+		if (!(dx_buff->is_gso_tcp | dx_buff->is_gso_udp) &&
+		    !dx_buff->is_vlan && dx_buff->pa) {
 			if (unlikely(dx_buff->is_sop)) {
 				dma_unmap_single(aq_nic_get_dev(self),
 						 dx_buff->pa,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
index be3702a4dcc9..991e4d31b094 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h
@@ -65,19 +65,20 @@  struct __packed aq_ring_buff_s {
 	};
 	union {
 		struct {
-			u16 len;
+			u32 len:16;
 			u32 is_ip_cso:1;
 			u32 is_udp_cso:1;
 			u32 is_tcp_cso:1;
 			u32 is_cso_err:1;
 			u32 is_sop:1;
 			u32 is_eop:1;
-			u32 is_gso:1;
+			u32 is_gso_tcp:1;
+			u32 is_gso_udp:1;
 			u32 is_mapped:1;
 			u32 is_cleaned:1;
 			u32 is_error:1;
 			u32 is_vlan:1;
-			u32 rsvd3:5;
+			u32 rsvd3:4;
 			u16 eop_index;
 			u16 rsvd4;
 		};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 03b62d7d9f1a..9b1062b8af64 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -454,7 +454,7 @@  static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_gso) {
+		if (buff->is_gso_tcp) {
 			txd->ctl |= (buff->len_l3 << 31) |
 				(buff->len_l2 << 24) |
 				HW_ATL_A0_TXD_CTL_CMD_TCP |
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index e4de258a5c19..2a8f84064701 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -43,7 +43,9 @@ 
 			NETIF_F_NTUPLE |  \
 			NETIF_F_HW_VLAN_CTAG_FILTER | \
 			NETIF_F_HW_VLAN_CTAG_RX |     \
-			NETIF_F_HW_VLAN_CTAG_TX,      \
+			NETIF_F_HW_VLAN_CTAG_TX |     \
+			NETIF_F_GSO_UDP_L4      |     \
+			NETIF_F_GSO_PARTIAL,          \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
@@ -531,8 +533,9 @@  static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->sw_tail];
 
-		if (buff->is_gso) {
-			txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
+		if (buff->is_gso_tcp || buff->is_gso_udp) {
+			if (buff->is_gso_tcp)
+				txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP;
 			txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC;
 			txd->ctl |= (buff->len_l3 << 31) |
 				    (buff->len_l2 << 24);
@@ -552,7 +555,7 @@  static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 			txd->ctl |= buff->vlan_tx_tag << 4;
 			is_vlan = true;
 		}
-		if (!buff->is_gso && !buff->is_vlan) {
+		if (!buff->is_gso_tcp && !buff->is_gso_udp && !buff->is_vlan) {
 			buff_pa_len = buff->len;
 
 			txd->buf_addr = buff->pa;