diff mbox series

[ovs-dev] netdev-dpdk: Fallback to non tunnel offloading API.

Message ID 20240327165054.1441010-1-david.marchand@redhat.com
State Superseded
Headers show
Series [ovs-dev] netdev-dpdk: Fallback to non tunnel offloading API. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

David Marchand March 27, 2024, 4:50 p.m. UTC
The outer checksum offloading API in DPDK is ambiguous and was
added by Intel folks with the assumption that any outer offloading
always goes with an inner offloading request.

With net/i40e and net/ice drivers, requesting outer ip checksum with a
tunnel context but no inner offloading request triggers a Tx failure
associated with a port MDD event.
2024-03-27T16:02:07.084Z|00018|dpdk|WARN|ice_interrupt_handler(): OICR:
	MDD event

To avoid this situation, if no checksum or segmentation offloading is
requested on the inner part of a packet, fallback to "normal" (non outer)
offloading request.
And outer offloading can be re-enabled for net/i40e and netice.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
 lib/netdev-dpdk.c | 84 +++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

Comments

Jun Wang March 28, 2024, 4:33 a.m. UTC | #1
I validated this modification on my x710 network card, but I found that
the outer UDP checksum of the transmitted packets is incorrect, leading 
to communication abnormalities. I think it's necessary to disable the outer
UDP checksum because although the capability reported by DPDK 
indicates support, in reality, the hardware doesn't actually support offloading,
resulting in outer UDP checksum errors.

tx_geneve_tso_offload="false", tx_ip_csum_offload="true", tx_out_ip_csum_offload="true",
tx_out_udp_csum_offload="true", tx_sctp_csum_offload="true", tx_tcp_csum_offload="true", 
tx_tcp_seg_offload="false", tx_udp_csum_offload="true", tx_vxlan_tso_offload="false"



junwang01@cestc.cn

From: David Marchand
Date: 2024-03-28 00:50
To: dev
CC: Mike Pattrick; Ilya Maximets; Jun Wang
Subject: [PATCH] netdev-dpdk: Fallback to non tunnel offloading API.
The outer checksum offloading API in DPDK is ambiguous and was
added by Intel folks with the assumption that any outer offloading
always goes with an inner offloading request.

With net/i40e and net/ice drivers, requesting outer ip checksum with a
tunnel context but no inner offloading request triggers a Tx failure
associated with a port MDD event.
2024-03-27T16:02:07.084Z|00018|dpdk|WARN|ice_interrupt_handler(): OICR:
MDD event

To avoid this situation, if no checksum or segmentation offloading is
requested on the inner part of a packet, fallback to "normal" (non outer)
offloading request.
And outer offloading can be re-enabled for net/i40e and netice.

Fixes: 084c8087292c ("userspace: Support VXLAN and GENEVE TSO.")
Signed-off-by: David Marchand <david.marchand@redhat.com>
---
lib/netdev-dpdk.c | 84 +++++++++++++++++++++++------------------------
1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2111f77681..939817474c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1354,18 +1354,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
         info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
     }
-    if (!strcmp(info.driver_name, "net_ice")
-        || !strcmp(info.driver_name, "net_i40e")) {
-        /* FIXME: Driver advertises the capability but doesn't seem
-         * to actually support it correctly.  Can remove this once
-         * the driver is fixed on DPDK side. */
-        VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
-                  "net/ice or net/i40e port.", netdev_get_name(&dev->up));
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
-    }
-
     if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     } else {
@@ -2584,20 +2572,20 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
     struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
     struct tcp_header *th;
-    const uint64_t all_requests = (RTE_MBUF_F_TX_IP_CKSUM |
-                                   RTE_MBUF_F_TX_L4_MASK  |
-                                   RTE_MBUF_F_TX_OUTER_IP_CKSUM  |
-                                   RTE_MBUF_F_TX_OUTER_UDP_CKSUM |
-                                   RTE_MBUF_F_TX_TCP_SEG);
-    const uint64_t all_marks = (RTE_MBUF_F_TX_IPV4 |
-                                RTE_MBUF_F_TX_IPV6 |
-                                RTE_MBUF_F_TX_OUTER_IPV4 |
-                                RTE_MBUF_F_TX_OUTER_IPV6 |
-                                RTE_MBUF_F_TX_TUNNEL_MASK);
-
-    if (!(mbuf->ol_flags & all_requests)) {
+    const uint64_t all_inner_requests = (RTE_MBUF_F_TX_IP_CKSUM |
+                                         RTE_MBUF_F_TX_L4_MASK |
+                                         RTE_MBUF_F_TX_TCP_SEG);
+    const uint64_t all_outer_requests = (RTE_MBUF_F_TX_OUTER_IP_CKSUM  |
+                                          RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
+    const uint64_t all_inner_marks = (RTE_MBUF_F_TX_IPV4 |
+                                      RTE_MBUF_F_TX_IPV6);
+    const uint64_t all_outer_marks = (RTE_MBUF_F_TX_OUTER_IPV4 |
+                                      RTE_MBUF_F_TX_OUTER_IPV6 |
+                                      RTE_MBUF_F_TX_TUNNEL_MASK);
+
+    if (!(mbuf->ol_flags & (all_inner_requests | all_outer_requests))) {
         /* No offloads requested, no marks should be set. */
-        mbuf->ol_flags &= ~all_marks;
+        mbuf->ol_flags &= ~(all_inner_marks | all_outer_marks);
         uint64_t unexpected = mbuf->ol_flags & RTE_MBUF_F_TX_OFFLOAD_MASK;
         if (OVS_UNLIKELY(unexpected)) {
@@ -2610,32 +2598,44 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
         return true;
     }
+    const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
+    if (OVS_UNLIKELY(tunnel_type
+                     && tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE
+                     && tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
+        VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
+                     netdev_get_name(&dev->up), tunnel_type);
+        netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
+                              "Packet with unexpected tunnel type", mbuf);
+        return false;
+    }
+
     /* If packet is vxlan or geneve tunnel packet, calculate outer
      * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated
      * before. */
-    const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
-    if (tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
-        tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) {
+    if ((tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
+         tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) &&
+        mbuf->ol_flags & all_inner_requests) {
+
         mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) -
                  (char *) dp_packet_eth(pkt);
         mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) -
                  (char *) dp_packet_l3(pkt);
+    } else {
+        if (OVS_UNLIKELY(!(mbuf->ol_flags & all_inner_requests))) {
+            /* If no inner offloading is requesting, fallback to non tunneling
+             * checksum offloads. */
-        /* If neither inner checksums nor TSO is requested, inner marks
-         * should not be set. */
-        if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM |
-                                RTE_MBUF_F_TX_L4_MASK  |
-                                RTE_MBUF_F_TX_TCP_SEG))) {
-            mbuf->ol_flags &= ~(RTE_MBUF_F_TX_IPV4 |
-                                RTE_MBUF_F_TX_IPV6);
+            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
+                mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
+                mbuf->ol_flags |= RTE_MBUF_F_TX_IPV4;
+            }
+            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM) {
+                mbuf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
+                mbuf->ol_flags |= (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) ?
+                                  RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
+            }
+            mbuf->ol_flags &= ~(all_outer_requests | all_outer_marks);
         }
-    } else if (OVS_UNLIKELY(tunnel_type)) {
-        VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
-                     netdev_get_name(&dev->up), tunnel_type);
-        netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
-                              "Packet with unexpected tunnel type", mbuf);
-        return false;
-    } else {
         mbuf->l2_len = (char *) dp_packet_l3(pkt) -
                (char *) dp_packet_eth(pkt);
         mbuf->l3_len = (char *) dp_packet_l4(pkt) -
David Marchand March 28, 2024, 8:49 a.m. UTC | #2
On Wed, Mar 27, 2024 at 5:51 PM David Marchand
<david.marchand@redhat.com> wrote:
>      /* If packet is vxlan or geneve tunnel packet, calculate outer
>       * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated
>       * before. */
> -    const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
> -    if (tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
> -        tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) {
> +    if ((tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
> +         tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) &&
> +        mbuf->ol_flags & all_inner_requests) {
> +
>          mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) -
>                   (char *) dp_packet_eth(pkt);
>          mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) -
>                   (char *) dp_packet_l3(pkt);
> +    } else {
> +        if (OVS_UNLIKELY(!(mbuf->ol_flags & all_inner_requests))) {
> +            /* If no inner offloading is requesting, fallback to non tunneling
> +             * checksum offloads. */

Inner marks must be reset before converting outer marks.

Otherwise, this results (with IPv4 traffic encapsulated in IPv6 geneve
tunnel) in such a ol_flags combination:
RTE_MBUF_F_RX_RSS_HASH
RTE_MBUF_F_TX_UDP_CKSUM
RTE_MBUF_F_TX_IP_CKSUM
RTE_MBUF_F_TX_IPV4
RTE_MBUF_F_TX_IPV6

v2 in preparation.


>
> -        /* If neither inner checksums nor TSO is requested, inner marks
> -         * should not be set. */
> -        if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM |
> -                                RTE_MBUF_F_TX_L4_MASK  |
> -                                RTE_MBUF_F_TX_TCP_SEG))) {
> -            mbuf->ol_flags &= ~(RTE_MBUF_F_TX_IPV4 |
> -                                RTE_MBUF_F_TX_IPV6);
> +            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
> +                mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
> +                mbuf->ol_flags |= RTE_MBUF_F_TX_IPV4;
> +            }
> +            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM) {
> +                mbuf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
> +                mbuf->ol_flags |= (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) ?
> +                                  RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
> +            }
> +            mbuf->ol_flags &= ~(all_outer_requests | all_outer_marks);
David Marchand March 28, 2024, 8:56 a.m. UTC | #3
On Thu, Mar 28, 2024 at 5:40 AM junwang01@cestc.cn <junwang01@cestc.cn> wrote:
>
> I validated this modification on my x710 network card, but I found that
> the outer UDP checksum of the transmitted packets is incorrect, leading
> to communication abnormalities. I think it's necessary to disable the outer
> UDP checksum because although the capability reported by DPDK
> indicates support, in reality, the hardware doesn't actually support offloading,
> resulting in outer UDP checksum errors.
>
> tx_geneve_tso_offload="false", tx_ip_csum_offload="true", tx_out_ip_csum_offload="true",
> tx_out_udp_csum_offload="true", tx_sctp_csum_offload="true", tx_tcp_csum_offload="true",
> tx_tcp_seg_offload="false", tx_udp_csum_offload="true", tx_vxlan_tso_offload="false"

Well, good timing, thanks for the report.

I was testing ipv6 in ipv4 (which seemed to work) and I realised
something is wrong at the outer -> inner conversion by looking at the
ol_flags in my debug prints.
Now, trying the opposite (ipv4 in ipv6), I think I reproduce your
issue with a E810 nic:

04:50:46.211854 50:7c:6f:3c:0c:26 > 50:7c:6f:3c:10:5a, ethertype IPv6
(0x86dd), length 168: (hlim 64, next-header UDP (17) payload length:
114) 2001:4e48::2.39854 > 2001::1.geneve: [bad udp cksum 0x89ad ->
0xfadd!] Geneve, Flags [none], vni 0x0, proto TEB (0x6558)
    52:54:00:00:11:01 > 4e:a9:1d:ce:85:4a, ethertype IPv4 (0x0800),
length 98: (tos 0x0, ttl 64, id 9408, offset 0, flags [DF], proto ICMP
(1), length 84)
    172.31.22.2 > 172.31.22.1: ICMP echo request, id 1442, seq 9, length 64

Please have a try with the v2 (I'll post soon).

If it still fails, can you provide a reproducer (ideally without OVN
to reduce the scope)?
diff mbox series

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2111f77681..939817474c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1354,18 +1354,6 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
     }
 
-    if (!strcmp(info.driver_name, "net_ice")
-        || !strcmp(info.driver_name, "net_i40e")) {
-        /* FIXME: Driver advertises the capability but doesn't seem
-         * to actually support it correctly.  Can remove this once
-         * the driver is fixed on DPDK side. */
-        VLOG_INFO("%s: disabled Tx outer udp checksum offloads for a "
-                  "net/ice or net/i40e port.", netdev_get_name(&dev->up));
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
-        info.tx_offload_capa &= ~RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
-    }
-
     if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
         dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     } else {
@@ -2584,20 +2572,20 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
     struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
     struct tcp_header *th;
 
-    const uint64_t all_requests = (RTE_MBUF_F_TX_IP_CKSUM |
-                                   RTE_MBUF_F_TX_L4_MASK  |
-                                   RTE_MBUF_F_TX_OUTER_IP_CKSUM  |
-                                   RTE_MBUF_F_TX_OUTER_UDP_CKSUM |
-                                   RTE_MBUF_F_TX_TCP_SEG);
-    const uint64_t all_marks = (RTE_MBUF_F_TX_IPV4 |
-                                RTE_MBUF_F_TX_IPV6 |
-                                RTE_MBUF_F_TX_OUTER_IPV4 |
-                                RTE_MBUF_F_TX_OUTER_IPV6 |
-                                RTE_MBUF_F_TX_TUNNEL_MASK);
-
-    if (!(mbuf->ol_flags & all_requests)) {
+    const uint64_t all_inner_requests = (RTE_MBUF_F_TX_IP_CKSUM |
+                                         RTE_MBUF_F_TX_L4_MASK |
+                                         RTE_MBUF_F_TX_TCP_SEG);
+    const uint64_t all_outer_requests = (RTE_MBUF_F_TX_OUTER_IP_CKSUM  |
+                                          RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
+    const uint64_t all_inner_marks = (RTE_MBUF_F_TX_IPV4 |
+                                      RTE_MBUF_F_TX_IPV6);
+    const uint64_t all_outer_marks = (RTE_MBUF_F_TX_OUTER_IPV4 |
+                                      RTE_MBUF_F_TX_OUTER_IPV6 |
+                                      RTE_MBUF_F_TX_TUNNEL_MASK);
+
+    if (!(mbuf->ol_flags & (all_inner_requests | all_outer_requests))) {
         /* No offloads requested, no marks should be set. */
-        mbuf->ol_flags &= ~all_marks;
+        mbuf->ol_flags &= ~(all_inner_marks | all_outer_marks);
 
         uint64_t unexpected = mbuf->ol_flags & RTE_MBUF_F_TX_OFFLOAD_MASK;
         if (OVS_UNLIKELY(unexpected)) {
@@ -2610,32 +2598,44 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
         return true;
     }
 
+    const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
+    if (OVS_UNLIKELY(tunnel_type
+                     && tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE
+                     && tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
+        VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
+                     netdev_get_name(&dev->up), tunnel_type);
+        netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
+                              "Packet with unexpected tunnel type", mbuf);
+        return false;
+    }
+
     /* If packet is vxlan or geneve tunnel packet, calculate outer
      * l2 len and outer l3 len. Inner l2/l3/l4 len are calculated
      * before. */
-    const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
-    if (tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
-        tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) {
+    if ((tunnel_type == RTE_MBUF_F_TX_TUNNEL_GENEVE ||
+         tunnel_type == RTE_MBUF_F_TX_TUNNEL_VXLAN) &&
+        mbuf->ol_flags & all_inner_requests) {
+
         mbuf->outer_l2_len = (char *) dp_packet_l3(pkt) -
                  (char *) dp_packet_eth(pkt);
         mbuf->outer_l3_len = (char *) dp_packet_l4(pkt) -
                  (char *) dp_packet_l3(pkt);
+    } else {
+        if (OVS_UNLIKELY(!(mbuf->ol_flags & all_inner_requests))) {
+            /* If no inner offloading is requesting, fallback to non tunneling
+             * checksum offloads. */
 
-        /* If neither inner checksums nor TSO is requested, inner marks
-         * should not be set. */
-        if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM |
-                                RTE_MBUF_F_TX_L4_MASK  |
-                                RTE_MBUF_F_TX_TCP_SEG))) {
-            mbuf->ol_flags &= ~(RTE_MBUF_F_TX_IPV4 |
-                                RTE_MBUF_F_TX_IPV6);
+            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM) {
+                mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
+                mbuf->ol_flags |= RTE_MBUF_F_TX_IPV4;
+            }
+            if (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM) {
+                mbuf->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
+                mbuf->ol_flags |= (mbuf->ol_flags & RTE_MBUF_F_TX_OUTER_IPV4) ?
+                                  RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
+            }
+            mbuf->ol_flags &= ~(all_outer_requests | all_outer_marks);
         }
-    } else if (OVS_UNLIKELY(tunnel_type)) {
-        VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
-                     netdev_get_name(&dev->up), tunnel_type);
-        netdev_dpdk_mbuf_dump(netdev_get_name(&dev->up),
-                              "Packet with unexpected tunnel type", mbuf);
-        return false;
-    } else {
         mbuf->l2_len = (char *) dp_packet_l3(pkt) -
                (char *) dp_packet_eth(pkt);
         mbuf->l3_len = (char *) dp_packet_l4(pkt) -