diff mbox series

[ovs-dev] Userspace: Software fallback for UDP encapsulated TCP segmentation.

Message ID 20240124181435.665698-1-mkp@redhat.com
State Superseded
Headers show
Series [ovs-dev] Userspace: Software fallback for UDP encapsulated TCP segmentation. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Mike Pattrick Jan. 24, 2024, 6:14 p.m. UTC
When sending packets that are flagged as requiring segmentation to an
interface that doens't support this feature, send the packet to the TSO
software fallback instead of dropping it.

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/dp-packet-gso.c     | 73 +++++++++++++++++++++++++++++++++--------
 lib/dp-packet.h         | 26 +++++++++++++++
 lib/netdev-native-tnl.c |  8 +++++
 lib/netdev.c            | 19 -----------
 tests/system-traffic.at | 48 +++++++++++++++++++++++++++
 5 files changed, 142 insertions(+), 32 deletions(-)

Comments

Mike Pattrick Jan. 24, 2024, 9:28 p.m. UTC | #1
On Wed, Jan 24, 2024 at 1:14 PM Mike Pattrick <mkp@redhat.com> wrote:
>
> When sending packets that are flagged as requiring segmentation to an
> interface that doens't support this feature, send the packet to the TSO
> software fallback instead of dropping it.
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

For some reason, this patch fails github CI but not Intel CI and
didn't have any errors when I tested it. I'm looking into the issue.
diff mbox series

Patch

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 847685ad9..f25abf436 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -47,6 +47,8 @@  dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
     seg->l2_5_ofs = p->l2_5_ofs;
     seg->l3_ofs = p->l3_ofs;
     seg->l4_ofs = p->l4_ofs;
+    seg->inner_l3_ofs = p->inner_l3_ofs;
+    seg->inner_l4_ofs = p->inner_l4_ofs;
 
     /* The protocol headers remain the same, so preserve hash and mark. */
     *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@  dp_packet_gso_nr_segs(struct dp_packet *p)
     const char *data_tail;
     const char *data_pos;
 
-    data_pos = dp_packet_get_tcp_payload(p);
+    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+        dp_packet_hwol_is_tunnel_geneve(p)) {
+        data_pos = dp_packet_get_inner_tcp_payload(p);
+    } else {
+        data_pos = dp_packet_get_tcp_payload(p);
+    }
     data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
 
     return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -91,12 +98,15 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
     struct tcp_header *tcp_hdr;
     struct ip_header *ip_hdr;
     struct dp_packet *seg;
+    const char *data_pos;
     uint16_t tcp_offset;
     uint16_t tso_segsz;
+    uint16_t ip_id = 0;
     uint32_t tcp_seq;
-    uint16_t ip_id;
+    bool outer_ipv4;
     int hdr_len;
     int seg_len;
+    bool tnl;
 
     tso_segsz = dp_packet_get_tso_segsz(p);
     if (!tso_segsz) {
@@ -105,20 +115,35 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         return false;
     }
 
-    tcp_hdr = dp_packet_l4(p);
-    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
-    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
-    hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
-              + tcp_offset * 4;
-    ip_id = 0;
-    if (dp_packet_hwol_is_ipv4(p)) {
+    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+            dp_packet_hwol_is_tunnel_geneve(p)) {
+        data_pos =  dp_packet_get_inner_tcp_payload(p);
+        outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+        tcp_hdr = dp_packet_inner_l4(p);
+        ip_hdr = dp_packet_inner_l3(p);
+        tnl = true;
+        if (outer_ipv4) {
+            ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+        } else if (dp_packet_hwol_is_ipv4(p)) {
+            ip_id = ntohs(ip_hdr->ip_id);
+        }
+    } else {
+        data_pos = dp_packet_get_tcp_payload(p);
+        outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+        tcp_hdr = dp_packet_l4(p);
         ip_hdr = dp_packet_l3(p);
-        ip_id = ntohs(ip_hdr->ip_id);
+        tnl = false;
+        if (outer_ipv4) {
+            ip_id = ntohs(ip_hdr->ip_id);
+        }
     }
 
+    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
+    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
+    hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
+              + tcp_offset * 4;
     const char *data_tail = (char *) dp_packet_tail(p)
                             - dp_packet_l2_pad_size(p);
-    const char *data_pos = dp_packet_get_tcp_payload(p);
     int n_segs = dp_packet_gso_nr_segs(p);
 
     for (int i = 0; i < n_segs; i++) {
@@ -130,8 +155,26 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
         data_pos += seg_len;
 
+        if (tnl) {
+            /* Update tunnel L3 header. */
+            if (dp_packet_hwol_is_ipv4(seg)) {
+                ip_hdr = dp_packet_inner_l3(seg);
+                ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
+                                           dp_packet_inner_l4_size(seg));
+                ip_hdr->ip_id = htons(ip_id);
+                ip_hdr->ip_csum = 0;
+                ip_id++;
+            } else {
+                struct ovs_16aligned_ip6_hdr *ip6_hdr;
+
+                ip6_hdr = dp_packet_inner_l3(seg);
+                ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
+                    = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
+            }
+        }
+
         /* Update L3 header. */
-        if (dp_packet_hwol_is_ipv4(seg)) {
+        if (outer_ipv4) {
             ip_hdr = dp_packet_l3(seg);
             ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
                                        dp_packet_l4_size(seg));
@@ -146,7 +189,11 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         }
 
         /* Update L4 header. */
-        tcp_hdr = dp_packet_l4(seg);
+        if (tnl) {
+            tcp_hdr = dp_packet_inner_l4(seg);
+        } else {
+            tcp_hdr = dp_packet_l4(seg);
+        }
         put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
         tcp_seq += seg_len;
         if (OVS_LIKELY(i < (n_segs - 1))) {
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 939bec5c8..32618d89d 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -526,6 +526,16 @@  dp_packet_inner_l3(const struct dp_packet *b)
            : NULL;
 }
 
+static inline size_t
+dp_packet_inner_l3_size(const struct dp_packet *b)
+{
+    return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
+           ? (const char *) dp_packet_tail(b)
+           - (const char *) dp_packet_inner_l3(b)
+           - dp_packet_l2_pad_size(b)
+           : 0;
+}
+
 static inline void *
 dp_packet_inner_l4(const struct dp_packet *b)
 {
@@ -560,6 +570,22 @@  dp_packet_get_tcp_payload(const struct dp_packet *b)
     return NULL;
 }
 
+static inline const void *
+dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
+{
+    size_t l4_size = dp_packet_inner_l4_size(b);
+
+    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
+        struct tcp_header *tcp = dp_packet_inner_l4(b);
+        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+
+        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
+            return (const char *) tcp + tcp_len;
+        }
+    }
+    return NULL;
+}
+
 static inline uint32_t
 dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
 {
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 0d6d803fe..76aaa41c9 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -342,6 +342,14 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
     if (l4_ofs != UINT16_MAX) {
         packet->inner_l4_ofs = l4_ofs + data->header_len;
     }
+
+    if (dp_packet_hwol_is_tso(packet)) {
+        uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
+        if (tso_segsz > data->header_len) {
+            tso_segsz -= data->header_len;
+            dp_packet_set_tso_segsz(packet, tso_segsz);
+        }
+    }
 }
 
 static void *
diff --git a/lib/netdev.c b/lib/netdev.c
index f2d921ed6..fc61ab1fd 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -69,8 +69,6 @@  COVERAGE_DEFINE(netdev_received);
 COVERAGE_DEFINE(netdev_sent);
 COVERAGE_DEFINE(netdev_add_router);
 COVERAGE_DEFINE(netdev_get_stats);
-COVERAGE_DEFINE(netdev_vxlan_tso_drops);
-COVERAGE_DEFINE(netdev_geneve_tso_drops);
 COVERAGE_DEFINE(netdev_push_header_drops);
 COVERAGE_DEFINE(netdev_soft_seg_good);
 COVERAGE_DEFINE(netdev_soft_seg_drops);
@@ -914,23 +912,6 @@  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
         !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
         DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
             if (dp_packet_hwol_is_tso(packet)) {
-                if (dp_packet_hwol_is_tunnel_vxlan(packet)
-                    && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) {
-                        VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support",
-                                     netdev_get_name(netdev));
-                        COVERAGE_INC(netdev_vxlan_tso_drops);
-                        dp_packet_delete_batch(batch, true);
-                        return false;
-                }
-
-                if (dp_packet_hwol_is_tunnel_geneve(packet)
-                    && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) {
-                        VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support",
-                                     netdev_get_name(netdev));
-                        COVERAGE_INC(netdev_geneve_tso_drops);
-                        dp_packet_delete_batch(batch, true);
-                        return false;
-                }
                 return netdev_send_tso(netdev, qid, batch, concurrent_txq);
             }
         }
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index f363a778c..ff61fec91 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -341,6 +341,54 @@  AT_CHECK([diff -q payload.bin data], [0])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
+AT_SKIP_IF([test $HAVE_NC = no])
+OVS_CHECK_VXLAN()
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Test the case where only one side has all checksum and tso offload disabled.
+AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
+AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
+
+dnl Reinitialize.
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+                  [id 0 dstport 4789])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc2.pid])
+AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc2.pid)])
+AT_CHECK([diff -q payload.bin data2], [0])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([datapath - ping vlan over vxlan tunnel])
 OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_VXLAN()