diff mbox series

[ovs-dev,v13,2/2] userspace: Enable tunnel tests with tso.

Message ID 20240117192631.253439-2-mkp@redhat.com
State Accepted
Commit 85bcbbed839a8a13b2ac77dabc4ae78293d881e8
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev,v13,1/2] userspace: Support vxlan and geneve tso. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Mike Pattrick Jan. 17, 2024, 7:26 p.m. UTC
This patch enables most of the tunnel tests in the testsuite, and adds a
large TCP transfer to a vxlan and geneve test to verify TSO
functionality. Some additional changes were required to accommodate these
changes with netdev-linux interfaces. The test for vlan over vxlan is
purposely not enabled as the traffic produced by this test gives
incorrect values in the vnet header.

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
v10:
 - Software TCP checksums now support encapsulated TSO case
 - Redundant inner offset code was removed
v11:
 - Corrected logic in netdev-linux vnet prepend code
v12:
 - Submitted by Dexia
v13:
 - Corrected formatting
 - Added vxlan and geneve ipv6 tcp tests
 - Corrected order of applying checksums in dp_packet_ol_send_prepare
   \_ Previously, we could apply the outer checksum and then modify the
      inner.
 - Corrected special case of ipv4 in ipv6 with tso in netdev-linux
   \_ This solution isn't glamourous, but can be refactored later.
---
 lib/dp-packet.c                  |  8 ++-
 lib/dp-packet.h                  | 63 ++++++++++++++++-----
 lib/dpif-netdev-extract-avx512.c |  8 +--
 lib/flow.c                       | 12 +---
 lib/netdev-linux.c               | 47 +++++++++++++---
 lib/netdev-native-tnl.c          | 27 +++++----
 lib/packets.c                    | 94 +++++++++++++++++++++++++-------
 tests/system-traffic.at          | 55 ++++++++++++-------
 8 files changed, 222 insertions(+), 92 deletions(-)

Comments

Simon Horman Jan. 17, 2024, 9:05 p.m. UTC | #1
On Wed, Jan 17, 2024 at 02:26:31PM -0500, Mike Pattrick wrote:
> This patch enables most of the tunnel tests in the testsuite, and adds a
> large TCP transfer to a vxlan and geneve test to verify TSO
> functionality. Some additional changes were required to accommodate these
> changes with netdev-linux interfaces. The test for vlan over vxlan is
> purposely not enabled as the traffic produced by this test gives
> incorrect values in the vnet header.
> 
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

Acked-by: Simon Horman <horms@ovn.org>
diff mbox series

Patch

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index e7738c37a..9635cac8b 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -578,7 +578,6 @@  dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
 
     if (dp_packet_hwol_is_tunnel_geneve(p) ||
         dp_packet_hwol_is_tunnel_vxlan(p)) {
-        dp_packet_tnl_outer_ol_send_prepare(p, flags);
         tnl_inner = true;
     }
 
@@ -593,6 +592,9 @@  dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
     }
 
     if (!dp_packet_hwol_tx_l4_checksum(p)) {
+        if (tnl_inner) {
+            dp_packet_tnl_outer_ol_send_prepare(p, flags);
+        }
         return;
     }
 
@@ -617,4 +619,8 @@  dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
         dp_packet_ol_set_l4_csum_good(p);
         dp_packet_hwol_reset_tx_l4_csum(p);
     }
+
+    if (tnl_inner) {
+        dp_packet_tnl_outer_ol_send_prepare(p, flags);
+    }
 }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index c626acfbd..70d4bc3ed 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -432,6 +432,8 @@  dp_packet_reset_offsets(struct dp_packet *b)
     b->l2_5_ofs = UINT16_MAX;
     b->l3_ofs = UINT16_MAX;
     b->l4_ofs = UINT16_MAX;
+    b->inner_l3_ofs = UINT16_MAX;
+    b->inner_l4_ofs = UINT16_MAX;
 }
 
 static inline uint16_t
@@ -529,6 +531,16 @@  dp_packet_inner_l4(const struct dp_packet *b)
            : NULL;
 }
 
+static inline size_t
+dp_packet_inner_l4_size(const struct dp_packet *b)
+{
+    return OVS_LIKELY(b->l4_ofs != UINT16_MAX)
+           ? (const char *) dp_packet_tail(b)
+           - (const char *) dp_packet_inner_l4(b)
+           - dp_packet_l2_pad_size(b)
+           : 0;
+}
+
 static inline const void *
 dp_packet_get_tcp_payload(const struct dp_packet *b)
 {
@@ -865,14 +877,6 @@  dp_packet_set_data(struct dp_packet *b, void *data)
     }
 }
 
-static inline void
-dp_packet_reset_packet(struct dp_packet *b, int off)
-{
-    dp_packet_set_size(b, dp_packet_size(b) - off);
-    dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
-    dp_packet_reset_offsets(b);
-}
-
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
@@ -1142,6 +1146,20 @@  dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
             DP_PACKET_OL_TX_SCTP_CKSUM;
 }
 
+/* Returns 'true' if packet 'b' is marked as having an outer IPv6 header. */
+static inline bool
+dp_packet_hwol_is_outer_ipv6(const struct dp_packet *b)
+{
+    return *dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IPV6;
+}
+
+/* Returns 'true' if packet 'b' is marked as having an outer IPv4 header. */
+static inline bool
+dp_packet_hwol_is_outer_ipv4(const struct dp_packet *b)
+{
+    return *dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_OUTER_IPV4;
+}
+
 /* Returns 'true' if packet 'b' is marked for tunnel GENEVE
  * checksum offloading. */
 static inline bool
@@ -1402,21 +1420,36 @@  dp_packet_ol_reset_l4_csum_good(struct dp_packet *p)
     }
 }
 
-/* Marks packet 'p' with good integrity if the 'start' and 'offset'
- * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
- * The 'start' is the offset from the begin of the packet headers.
- * The 'offset' is the offset from start to place the checksum.
+/* Marks packet 'p' with good integrity if checksum offload locations
+ * were provided. In the case of encapsulated packets, these values may
+ * be deeper into the packet than OVS might expect. But the packet
+ * should still be considered to have good integrity.
+ * The 'csum_start' is the offset from the begin of the packet headers.
+ * The 'csum_offset' is the offset from start to place the checksum.
  * The csum_start and csum_offset fields are set from the virtio_net_hdr
  * struct that may be provided by a netdev on packet ingress. */
 static inline void
-dp_packet_ol_l4_csum_check_partial(struct dp_packet *p, uint16_t start,
-                             uint16_t offset)
+dp_packet_ol_l4_csum_check_partial(struct dp_packet *p)
 {
-    if (p->csum_start == start && p->csum_offset == offset) {
+    if (p->csum_start && p->csum_offset) {
         dp_packet_ol_set_l4_csum_partial(p);
     }
 }
 
+static inline void
+dp_packet_reset_packet(struct dp_packet *b, int off)
+{
+    dp_packet_set_size(b, dp_packet_size(b) - off);
+    dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
+    dp_packet_reset_offsets(b);
+
+    if (b->csum_start >= off && b->csum_offset) {
+        /* Adjust values for decapsulation. */
+        b->csum_start -= off;
+        dp_packet_ol_set_l4_csum_partial(b);
+    }
+}
+
 static inline uint32_t ALWAYS_INLINE
 dp_packet_calc_hash_ipv4(const uint8_t *pkt, const uint16_t l3_ofs,
                          uint32_t hash)
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 1bc7e8d0e..57ca4c71b 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -776,9 +776,7 @@  mfex_ipv6_set_hwol(struct dp_packet *pkt)
 static void
 mfex_tcp_set_hwol(struct dp_packet *pkt)
 {
-    dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
-                                 offsetof(struct tcp_header,
-                                          tcp_csum));
+    dp_packet_ol_l4_csum_check_partial(pkt);
     if (dp_packet_l4_checksum_good(pkt)
         || dp_packet_ol_l4_csum_partial(pkt)) {
         dp_packet_hwol_set_csum_tcp(pkt);
@@ -788,9 +786,7 @@  mfex_tcp_set_hwol(struct dp_packet *pkt)
 static void
 mfex_udp_set_hwol(struct dp_packet *pkt)
 {
-    dp_packet_ol_l4_csum_check_partial(pkt, pkt->l4_ofs,
-                                 offsetof(struct udp_header,
-                                          udp_csum));
+    dp_packet_ol_l4_csum_check_partial(pkt);
     if (dp_packet_l4_checksum_good(pkt)
         || dp_packet_ol_l4_csum_partial(pkt)) {
         dp_packet_hwol_set_csum_udp(pkt);
diff --git a/lib/flow.c b/lib/flow.c
index 82d93570a..8e3402388 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1054,9 +1054,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                         dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                     }
-                    dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
-                                                 offsetof(struct tcp_header,
-                                                          tcp_csum));
+                    dp_packet_ol_l4_csum_check_partial(packet);
                     if (dp_packet_l4_checksum_good(packet)
                         || dp_packet_ol_l4_csum_partial(packet)) {
                         dp_packet_hwol_set_csum_tcp(packet);
@@ -1076,9 +1074,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                     dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                 }
-                dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
-                                             offsetof(struct udp_header,
-                                                      udp_csum));
+                dp_packet_ol_l4_csum_check_partial(packet);
                 if (dp_packet_l4_checksum_good(packet)
                     || dp_packet_ol_l4_csum_partial(packet)) {
                     dp_packet_hwol_set_csum_udp(packet);
@@ -1092,9 +1088,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
-                dp_packet_ol_l4_csum_check_partial(packet, packet->l4_ofs,
-                                             offsetof(struct sctp_header,
-                                                      sctp_csum));
+                dp_packet_ol_l4_csum_check_partial(packet);
                 if (dp_packet_l4_checksum_good(packet)
                     || dp_packet_ol_l4_csum_partial(packet)) {
                     dp_packet_hwol_set_csum_sctp(packet);
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e79a43260..1b2e5b6c2 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -7145,8 +7145,12 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
     if (dp_packet_hwol_is_tso(b)) {
         uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
         struct tcp_header *tcp = dp_packet_l4(b);
+        struct tcp_header *inner_tcp = dp_packet_inner_l4(b);
+        if (inner_tcp) {
+            tcp = inner_tcp;
+        }
         int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
-        int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
+        int hdr_len = ((char *) tcp - (char *) dp_packet_eth(b))
                       + tcp_hdr_len;
         int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
 
@@ -7164,17 +7168,35 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
         } else if (dp_packet_hwol_tx_ipv6(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
         }
-
     } else {
         vnet->hdr_len = 0;
         vnet->gso_size = 0;
         vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
     }
 
-    if (dp_packet_l4_checksum_good(b)) {
+    bool l4_is_good = dp_packet_l4_checksum_good(b);
+
+    if ((dp_packet_hwol_is_tunnel_vxlan(b) ||
+         dp_packet_hwol_is_tunnel_geneve(b)) &&
+        dp_packet_hwol_tx_l4_checksum(b)) {
+        /* This condition is needed because dp-packet doesn't currently track
+         * outer and inner checksum statuses seperately. In the case of these
+         * two tunnel types we can end up setting outer l4 as good but still
+         * need to complete the inner l4. */
+        l4_is_good = !(dp_packet_hwol_l4_is_tcp(b) ||
+                       dp_packet_hwol_l4_is_udp(b));
+    }
+
+    if (l4_is_good) {
         /* The packet has good L4 checksum. No need to validate again. */
         vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
         vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+        if (!dp_packet_ip_checksum_good(b)) {
+            /* It is possible that L4 is good but the IP checksum isn't
+             * complete. For example in the case of UDP encapsulation of an ARP
+             * packet where the UDP checksum is 0. */
+            dp_packet_ip_set_header_csum(b, false);
+        }
     } else if (dp_packet_hwol_tx_l4_checksum(b)) {
         /* The csum calculation is offloaded. */
         if (dp_packet_hwol_l4_is_tcp(b)) {
@@ -7192,20 +7214,28 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
              * the TCP pseudo header, so that replacing it by the ones
              * complement checksum of the TCP header and body will give
              * the correct result. */
+            void *l3_off = dp_packet_inner_l3(b);
+            void *l4_off = dp_packet_inner_l4(b);
+
+            if (!l3_off || !l4_off) {
+                l3_off = dp_packet_l3(b);
+                l4_off = dp_packet_l4(b);
+            }
 
-            struct tcp_header *tcp_hdr = dp_packet_l4(b);
+            struct tcp_header *tcp_hdr = l4_off;
             ovs_be16 csum = 0;
             if (dp_packet_hwol_is_ipv4(b)) {
-                const struct ip_header *ip_hdr = dp_packet_l3(b);
+                const struct ip_header *ip_hdr = l3_off;
                 csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
             } else if (dp_packet_hwol_tx_ipv6(b)) {
-                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = l3_off;
                 csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
             }
 
             tcp_hdr->tcp_csum = csum;
             vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-            vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
+            vnet->csum_start = (OVS_FORCE __virtio16) ((char *) l4_off -
+                                    (char *) dp_packet_data(b));
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct tcp_header, tcp_csum);
         } else if (dp_packet_hwol_l4_is_udp(b)) {
@@ -7222,7 +7252,8 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
 
             udp_hdr->udp_csum = csum;
             vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-            vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
+            vnet->csum_start = (OVS_FORCE __virtio16) ((char *) udp_hdr -
+                                    (char *) dp_packet_data(b));;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct udp_header, udp_csum);
         } else if (dp_packet_hwol_l4_is_sctp(b)) {
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index fa87c6281..0d6d803fe 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -215,7 +215,8 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     }
 
     if (udp->udp_csum) {
-        if (OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
+        if (OVS_LIKELY(!dp_packet_ol_l4_csum_partial(packet)) &&
+            OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
             uint32_t csum;
             if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
                 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
@@ -292,18 +293,11 @@  dp_packet_tnl_ol_process(struct dp_packet *packet,
             dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) -
                                          (char *) dp_packet_eth(packet) +
                                          GENEVE_BASE_HLEN + opt_len);
-
-            packet->inner_l3_ofs = packet->l3_ofs + GENEVE_BASE_HLEN + opt_len;
-            packet->inner_l4_ofs = packet->l4_ofs + GENEVE_BASE_HLEN + opt_len;
-
         } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
             dp_packet_hwol_set_tunnel_vxlan(packet);
             dp_packet_set_l2_len(packet, (char *) dp_packet_l3(packet) -
                                          (char *) dp_packet_eth(packet) +
                                          VXLAN_HLEN);
-
-            packet->inner_l3_ofs = packet->l3_ofs + VXLAN_HLEN;
-            packet->inner_l4_ofs = packet->l4_ofs + VXLAN_HLEN;
         }
     }
 }
@@ -313,6 +307,8 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                            struct dp_packet *packet,
                            const struct ovs_action_push_tnl *data)
 {
+    uint16_t l3_ofs = packet->l3_ofs;
+    uint16_t l4_ofs = packet->l4_ofs;
     struct udp_header *udp;
     int ip_tot_size;
 
@@ -332,13 +328,20 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
         } else {
             dp_packet_hwol_set_csum_udp(packet);
         }
-    } else {
-        dp_packet_ol_set_l4_csum_good(packet);
     }
 
-    packet->inner_l3_ofs += packet->l4_ofs;
-    packet->inner_l4_ofs += packet->l4_ofs;
+    if (packet->csum_start && packet->csum_offset) {
+        dp_packet_ol_set_l4_csum_partial(packet);
+    } else if (!udp->udp_csum) {
+        dp_packet_ol_set_l4_csum_good(packet);
+    }
 
+    if (l3_ofs != UINT16_MAX) {
+        packet->inner_l3_ofs = l3_ofs + data->header_len;
+    }
+    if (l4_ofs != UINT16_MAX) {
+        packet->inner_l4_ofs = l4_ofs + data->header_len;
+    }
 }
 
 static void *
diff --git a/lib/packets.c b/lib/packets.c
index d9e41346e..f23d25420 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1999,21 +1999,44 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
 void
 packet_tcp_complete_csum(struct dp_packet *p, bool inner)
 {
-    struct tcp_header *tcp = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p);
+    struct tcp_header *tcp;
+    size_t tcp_sz;
+    void *ip_hdr;
+    bool is_v4;
+
+    if (inner) {
+        tcp = dp_packet_inner_l4(p);
+        ip_hdr = dp_packet_inner_l3(p);
+        tcp_sz = dp_packet_inner_l4_size(p);
+    } else {
+        tcp = dp_packet_l4(p);
+        ip_hdr = dp_packet_l3(p);
+        tcp_sz = dp_packet_l4_size(p);
+    }
+
+    if (!inner && dp_packet_hwol_is_outer_ipv6(p)) {
+        is_v4 = false;
+    } else if (!inner && dp_packet_hwol_is_outer_ipv4(p)) {
+        is_v4 = true;
+    } else if (dp_packet_hwol_is_ipv4(p)) {
+        is_v4 = true;
+    } else if (dp_packet_hwol_tx_ipv6(p)) {
+        is_v4 = false;
+    } else {
+        OVS_NOT_REACHED();
+    }
 
     tcp->tcp_csum = 0;
-    if (dp_packet_hwol_is_ipv4(p)) {
-        struct ip_header *ip = dp_packet_l3(p);
+    if (is_v4) {
+        struct ip_header *ip = ip_hdr;
 
         tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
-                                                  tcp, dp_packet_l4_size(p)));
-    } else if (dp_packet_hwol_tx_ipv6(p)) {
-        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+                                                  tcp, tcp_sz));
+    } else {
+        struct ovs_16aligned_ip6_hdr *ip6 = ip_hdr;
 
         tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
-                                                dp_packet_l4_size(p));
-    } else {
-        OVS_NOT_REACHED();
+                                                tcp_sz);
     }
 }
 
@@ -2022,26 +2045,49 @@  packet_tcp_complete_csum(struct dp_packet *p, bool inner)
 void
 packet_udp_complete_csum(struct dp_packet *p, bool inner)
 {
-    struct udp_header *udp = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p);
+    struct udp_header *udp;
+    size_t udp_sz;
+    void *ip_hdr;
+    bool is_v4;
+
+    if (inner) {
+        udp = dp_packet_inner_l4(p);
+        ip_hdr = dp_packet_inner_l3(p);
+        udp_sz = dp_packet_inner_l4_size(p);
+    } else {
+        udp = dp_packet_l4(p);
+        ip_hdr = dp_packet_l3(p);
+        udp_sz = dp_packet_l4_size(p);
+    }
 
     /* Skip csum calculation if the udp_csum is zero. */
     if (!udp->udp_csum) {
         return;
     }
 
+    if (!inner && dp_packet_hwol_is_outer_ipv6(p)) {
+        is_v4 = false;
+    } else if (!inner && dp_packet_hwol_is_outer_ipv4(p)) {
+        is_v4 = true;
+    } else if (dp_packet_hwol_is_ipv4(p)) {
+        is_v4 = true;
+    } else if (dp_packet_hwol_tx_ipv6(p)) {
+        is_v4 = false;
+    } else {
+        OVS_NOT_REACHED();
+    }
+
     udp->udp_csum = 0;
-    if (dp_packet_hwol_is_ipv4(p)) {
-        struct ip_header *ip = dp_packet_l3(p);
+    if (is_v4) {
+        struct ip_header *ip = ip_hdr;
 
         udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
-                                                  udp, dp_packet_l4_size(p)));
-    } else if (dp_packet_hwol_tx_ipv6(p)) {
-        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+                                                  udp, udp_sz));
+    } else {
+        struct ovs_16aligned_ip6_hdr *ip6 = ip_hdr;
 
         udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
-                                                dp_packet_l4_size(p));
-    } else {
-        OVS_NOT_REACHED();
+                                                udp_sz);
     }
 
     if (!udp->udp_csum) {
@@ -2054,10 +2100,18 @@  packet_udp_complete_csum(struct dp_packet *p, bool inner)
 void
 packet_sctp_complete_csum(struct dp_packet *p, bool inner)
 {
-    struct sctp_header *sh = (inner) ? dp_packet_inner_l4(p) : dp_packet_l4(p);
-    uint16_t tp_len = dp_packet_l4_size(p);
+    struct sctp_header *sh;
+    uint16_t tp_len;
     ovs_be32 csum;
 
+    if (inner) {
+        sh = dp_packet_inner_l4(p);
+        tp_len = dp_packet_inner_l4_size(p);
+    } else {
+        sh = dp_packet_l4(p);
+        tp_len = dp_packet_l4_size(p);
+    }
+
     put_16aligned_be32(&sh->sctp_csum, 0);
     csum = crc32c((void *) sh, tp_len);
     put_16aligned_be32(&sh->sctp_csum, csum);
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 283706c6e..f363a778c 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -292,7 +292,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over vxlan tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_VXLAN()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -330,6 +329,15 @@  NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
@@ -381,7 +389,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over vxlan6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_VXLAN_UDP6ZEROCSUM()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -421,11 +428,18 @@  NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over gre tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 
@@ -467,7 +481,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over ip6gre L2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 OVS_CHECK_ERSPAN()
@@ -508,7 +521,6 @@  AT_CLEANUP
 
 
 AT_SETUP([datapath - ping over erspan v1 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 OVS_CHECK_ERSPAN()
@@ -545,7 +557,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over erspan v2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 OVS_CHECK_ERSPAN()
@@ -582,7 +593,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over ip6erspan v1 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 OVS_CHECK_ERSPAN()
@@ -622,7 +632,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over ip6erspan v2 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 OVS_CHECK_ERSPAN()
@@ -663,7 +672,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over geneve tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -701,11 +709,19 @@  NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over geneve tunnel, delete flow regression])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -760,7 +776,6 @@  OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d
 AT_CLEANUP
 
 AT_SETUP([datapath - flow resume with geneve tun_metadata])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -812,7 +827,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over geneve6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE_UDP6ZEROCSUM()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -852,12 +866,19 @@  NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PI
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
 
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > data], [nc.pid])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data], [0])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - slow_action on geneve6 tunnel])
 AT_SKIP_IF([test $HAVE_TCPDUMP = no])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE_UDP6ZEROCSUM()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -981,7 +1002,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over gre tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_MIN_KERNEL(3, 10)
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1028,7 +1048,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_MIN_KERNEL(3, 10)
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1077,7 +1096,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_MIN_KERNEL(3, 10)
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1131,7 +1149,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_MIN_KERNEL(3, 10)
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1187,7 +1204,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_MIN_KERNEL(3, 10)
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1242,7 +1258,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over srv6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_SRV6()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -1304,7 +1319,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping6 over srv6 tunnel])
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_SRV6()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -8062,7 +8076,6 @@  AT_CLEANUP
 
 AT_SETUP([conntrack - can match and clear ct_state from outside OVS])
 CHECK_CONNTRACK_LOCAL_STACK()
-OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_GENEVE()
 
 OVS_TRAFFIC_VSWITCHD_START()