diff mbox series

[ovs-dev] Userspace: Software fallback for UDP encapsulated TCP segmentation.

Message ID 20240221040855.271921-1-mkp@redhat.com
State Superseded
Headers show
Series [ovs-dev] Userspace: Software fallback for UDP encapsulated TCP segmentation. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Mike Pattrick Feb. 21, 2024, 4:08 a.m. UTC
When sending packets that are flagged as requiring segmentation to an
interface that doens't support this feature, send the packet to the TSO
software fallback instead of dropping it.

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/dp-packet-gso.c     | 73 +++++++++++++++++++++++++++++++++--------
 lib/dp-packet.h         | 26 +++++++++++++++
 lib/netdev-native-tnl.c |  8 +++++
 lib/netdev.c            | 37 +++++++++------------
 tests/system-traffic.at | 58 ++++++++++++++++++++++++++++++++
 5 files changed, 167 insertions(+), 35 deletions(-)

Comments

Mike Pattrick Feb. 21, 2024, 6:09 p.m. UTC | #1
On Tue, Feb 20, 2024 at 11:09 PM Mike Pattrick <mkp@redhat.com> wrote:
>
> When sending packets that are flagged as requiring segmentation to an
> interface that doens't support this feature, send the packet to the TSO
> software fallback instead of dropping it.
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

Recheck-request: github-robot

> ---
>  lib/dp-packet-gso.c     | 73 +++++++++++++++++++++++++++++++++--------
>  lib/dp-packet.h         | 26 +++++++++++++++
>  lib/netdev-native-tnl.c |  8 +++++
>  lib/netdev.c            | 37 +++++++++------------
>  tests/system-traffic.at | 58 ++++++++++++++++++++++++++++++++
>  5 files changed, 167 insertions(+), 35 deletions(-)
>
> diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
> index 847685ad9..f25abf436 100644
> --- a/lib/dp-packet-gso.c
> +++ b/lib/dp-packet-gso.c
> @@ -47,6 +47,8 @@ dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
>      seg->l2_5_ofs = p->l2_5_ofs;
>      seg->l3_ofs = p->l3_ofs;
>      seg->l4_ofs = p->l4_ofs;
> +    seg->inner_l3_ofs = p->inner_l3_ofs;
> +    seg->inner_l4_ofs = p->inner_l4_ofs;
>
>      /* The protocol headers remain the same, so preserve hash and mark. */
>      *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
> @@ -71,7 +73,12 @@ dp_packet_gso_nr_segs(struct dp_packet *p)
>      const char *data_tail;
>      const char *data_pos;
>
> -    data_pos = dp_packet_get_tcp_payload(p);
> +    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> +        dp_packet_hwol_is_tunnel_geneve(p)) {
> +        data_pos = dp_packet_get_inner_tcp_payload(p);
> +    } else {
> +        data_pos = dp_packet_get_tcp_payload(p);
> +    }
>      data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
>
>      return DIV_ROUND_UP(data_tail - data_pos, segsz);
> @@ -91,12 +98,15 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
>      struct tcp_header *tcp_hdr;
>      struct ip_header *ip_hdr;
>      struct dp_packet *seg;
> +    const char *data_pos;
>      uint16_t tcp_offset;
>      uint16_t tso_segsz;
> +    uint16_t ip_id = 0;
>      uint32_t tcp_seq;
> -    uint16_t ip_id;
> +    bool outer_ipv4;
>      int hdr_len;
>      int seg_len;
> +    bool tnl;
>
>      tso_segsz = dp_packet_get_tso_segsz(p);
>      if (!tso_segsz) {
> @@ -105,20 +115,35 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
>          return false;
>      }
>
> -    tcp_hdr = dp_packet_l4(p);
> -    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> -    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> -    hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
> -              + tcp_offset * 4;
> -    ip_id = 0;
> -    if (dp_packet_hwol_is_ipv4(p)) {
> +    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
> +            dp_packet_hwol_is_tunnel_geneve(p)) {
> +        data_pos =  dp_packet_get_inner_tcp_payload(p);
> +        outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
> +        tcp_hdr = dp_packet_inner_l4(p);
> +        ip_hdr = dp_packet_inner_l3(p);
> +        tnl = true;
> +        if (outer_ipv4) {
> +            ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
> +        } else if (dp_packet_hwol_is_ipv4(p)) {
> +            ip_id = ntohs(ip_hdr->ip_id);
> +        }
> +    } else {
> +        data_pos = dp_packet_get_tcp_payload(p);
> +        outer_ipv4 = dp_packet_hwol_is_ipv4(p);
> +        tcp_hdr = dp_packet_l4(p);
>          ip_hdr = dp_packet_l3(p);
> -        ip_id = ntohs(ip_hdr->ip_id);
> +        tnl = false;
> +        if (outer_ipv4) {
> +            ip_id = ntohs(ip_hdr->ip_id);
> +        }
>      }
>
> +    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
> +    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
> +    hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
> +              + tcp_offset * 4;
>      const char *data_tail = (char *) dp_packet_tail(p)
>                              - dp_packet_l2_pad_size(p);
> -    const char *data_pos = dp_packet_get_tcp_payload(p);
>      int n_segs = dp_packet_gso_nr_segs(p);
>
>      for (int i = 0; i < n_segs; i++) {
> @@ -130,8 +155,26 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
>          seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
>          data_pos += seg_len;
>
> +        if (tnl) {
> +            /* Update tunnel L3 header. */
> +            if (dp_packet_hwol_is_ipv4(seg)) {
> +                ip_hdr = dp_packet_inner_l3(seg);
> +                ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
> +                                           dp_packet_inner_l4_size(seg));
> +                ip_hdr->ip_id = htons(ip_id);
> +                ip_hdr->ip_csum = 0;
> +                ip_id++;
> +            } else {
> +                struct ovs_16aligned_ip6_hdr *ip6_hdr;
> +
> +                ip6_hdr = dp_packet_inner_l3(seg);
> +                ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
> +                    = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
> +            }
> +        }
> +
>          /* Update L3 header. */
> -        if (dp_packet_hwol_is_ipv4(seg)) {
> +        if (outer_ipv4) {
>              ip_hdr = dp_packet_l3(seg);
>              ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
>                                         dp_packet_l4_size(seg));
> @@ -146,7 +189,11 @@ dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
>          }
>
>          /* Update L4 header. */
> -        tcp_hdr = dp_packet_l4(seg);
> +        if (tnl) {
> +            tcp_hdr = dp_packet_inner_l4(seg);
> +        } else {
> +            tcp_hdr = dp_packet_l4(seg);
> +        }
>          put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
>          tcp_seq += seg_len;
>          if (OVS_LIKELY(i < (n_segs - 1))) {
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 2fa17d814..c3dabc5b0 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -529,6 +529,16 @@ dp_packet_inner_l3(const struct dp_packet *b)
>             : NULL;
>  }
>
> +static inline size_t
> +dp_packet_inner_l3_size(const struct dp_packet *b)
> +{
> +    return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
> +           ? (const char *) dp_packet_tail(b)
> +           - (const char *) dp_packet_inner_l3(b)
> +           - dp_packet_l2_pad_size(b)
> +           : 0;
> +}
> +
>  static inline void *
>  dp_packet_inner_l4(const struct dp_packet *b)
>  {
> @@ -563,6 +573,22 @@ dp_packet_get_tcp_payload(const struct dp_packet *b)
>      return NULL;
>  }
>
> +static inline const void *
> +dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
> +{
> +    size_t l4_size = dp_packet_inner_l4_size(b);
> +
> +    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
> +        struct tcp_header *tcp = dp_packet_inner_l4(b);
> +        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> +
> +        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
> +            return (const char *) tcp + tcp_len;
> +        }
> +    }
> +    return NULL;
> +}
> +
>  static inline uint32_t
>  dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
>  {
> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
> index dee9ab344..369b92acc 100644
> --- a/lib/netdev-native-tnl.c
> +++ b/lib/netdev-native-tnl.c
> @@ -348,6 +348,14 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>      if (l4_ofs != UINT16_MAX) {
>          packet->inner_l4_ofs = l4_ofs + data->header_len;
>      }
> +
> +    if (dp_packet_hwol_is_tso(packet)) {
> +        uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
> +        if (tso_segsz > data->header_len) {
> +            tso_segsz -= data->header_len;
> +            dp_packet_set_tso_segsz(packet, tso_segsz);
> +        }
> +    }
>  }
>
>  static void *
> diff --git a/lib/netdev.c b/lib/netdev.c
> index f2d921ed6..1d59bbe5d 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -69,8 +69,6 @@ COVERAGE_DEFINE(netdev_received);
>  COVERAGE_DEFINE(netdev_sent);
>  COVERAGE_DEFINE(netdev_add_router);
>  COVERAGE_DEFINE(netdev_get_stats);
> -COVERAGE_DEFINE(netdev_vxlan_tso_drops);
> -COVERAGE_DEFINE(netdev_geneve_tso_drops);
>  COVERAGE_DEFINE(netdev_push_header_drops);
>  COVERAGE_DEFINE(netdev_soft_seg_good);
>  COVERAGE_DEFINE(netdev_soft_seg_drops);
> @@ -910,28 +908,23 @@ netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>      struct dp_packet *packet;
>      int error;
>
> -    if (userspace_tso_enabled() &&
> -        !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> -        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> -            if (dp_packet_hwol_is_tso(packet)) {
> -                if (dp_packet_hwol_is_tunnel_vxlan(packet)
> -                    && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) {
> -                        VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support",
> -                                     netdev_get_name(netdev));
> -                        COVERAGE_INC(netdev_vxlan_tso_drops);
> -                        dp_packet_delete_batch(batch, true);
> -                        return false;
> +    if (userspace_tso_enabled()) {
> +        if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
> +            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +                if (dp_packet_hwol_is_tso(packet)) {
> +                    return netdev_send_tso(netdev, qid, batch, concurrent_txq);
>                  }
> -
> -                if (dp_packet_hwol_is_tunnel_geneve(packet)
> -                    && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) {
> -                        VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support",
> -                                     netdev_get_name(netdev));
> -                        COVERAGE_INC(netdev_geneve_tso_drops);
> -                        dp_packet_delete_batch(batch, true);
> -                        return false;
> +            }
> +        } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO |
> +                                     NETDEV_TX_GENEVE_TNL_TSO))) {
> +            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
> +                if (!dp_packet_hwol_is_tso(packet)) {
> +                    continue;
> +                }
> +                if (dp_packet_hwol_is_tunnel_vxlan(packet) ||
> +                    dp_packet_hwol_is_tunnel_geneve(packet)) {
> +                    return netdev_send_tso(netdev, qid, batch, concurrent_txq);
>                  }
> -                return netdev_send_tso(netdev, qid, batch, concurrent_txq);
>              }
>          }
>      }
> diff --git a/tests/system-traffic.at b/tests/system-traffic.at
> index 98e494abf..c1681506e 100644
> --- a/tests/system-traffic.at
> +++ b/tests/system-traffic.at
> @@ -351,6 +351,64 @@ OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
>
> +AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
> +AT_SKIP_IF([test $HAVE_NC = no])
> +OVS_CHECK_VXLAN()
> +
> +OVS_TRAFFIC_VSWITCHD_START()
> +ADD_BR([br-underlay])
> +
> +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
> +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
> +
> +ADD_NAMESPACES(at_ns0)
> +
> +dnl Set up underlay link from host into the namespace using veth pair.
> +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
> +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
> +AT_CHECK([ip link set dev br-underlay up])
> +
> +dnl Test the case where only one side has all checksum and tso offload disabled.
> +AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
> +AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
> +
> +dnl Reinitialize.
> +AT_CHECK([ovs-vsctl del-port ovs-p0])
> +AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
> +
> +dnl Set up tunnel endpoints on OVS outside the namespace and with a native
> +dnl linux device inside the namespace.
> +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
> +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
> +                  [id 0 dstport 4789])
> +
> +dnl First, check the underlay.
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +dnl Check that the tunnel is up.
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], [0], [dnl
> +3 packets transmitted, 3 received, 0% packet loss, time 0ms
> +])
> +
> +dnl Initialize the listener before it is needed.
> +NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid])
> +
> +dnl Verify that ncat is ready.
> +OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])])
> +
> +dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
> +AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
> +AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
> +
> +dnl Wait until transfer completes before checking.
> +OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
> +AT_CHECK([diff -q payload.bin data2], [0])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
>  AT_SETUP([datapath - ping vlan over vxlan tunnel])
>  OVS_CHECK_TUNNEL_TSO()
>  OVS_CHECK_VXLAN()
> --
> 2.39.3
>
Simon Horman March 27, 2024, 5:39 p.m. UTC | #2
On Tue, Feb 20, 2024 at 11:08:55PM -0500, Mike Pattrick wrote:
> When sending packets that are flagged as requiring segmentation to an
> interface that doens't support this feature, send the packet to the TSO
> software fallback instead of dropping it.
> 
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

Hi Mike,

Can I confirm that from your PoV this patch is still awaiting review?
I ask because it's been sitting around for a while now.
Mike Pattrick March 27, 2024, 9:07 p.m. UTC | #3
On Wed, Mar 27, 2024 at 1:39 PM Simon Horman <horms@ovn.org> wrote:
>
> On Tue, Feb 20, 2024 at 11:08:55PM -0500, Mike Pattrick wrote:
> > When sending packets that are flagged as requiring segmentation to an
> > interface that doens't support this feature, send the packet to the TSO
> > software fallback instead of dropping it.
> >
> > Signed-off-by: Mike Pattrick <mkp@redhat.com>
>
> Hi Mike,
>
> Can I confirm that from your PoV this patch is still awaiting review?
> I ask because it's been sitting around for a while now.

I believe this patch now needs to be modified for the recent
recirculation change. I'll update it for that, give it another once
over, and resubmit.

-M


>
Ilya Maximets March 27, 2024, 9:15 p.m. UTC | #4
On 3/27/24 22:07, Mike Pattrick wrote:
> On Wed, Mar 27, 2024 at 1:39 PM Simon Horman <horms@ovn.org> wrote:
>>
>> On Tue, Feb 20, 2024 at 11:08:55PM -0500, Mike Pattrick wrote:
>>> When sending packets that are flagged as requiring segmentation to an
>>> interface that doens't support this feature, send the packet to the TSO
>>> software fallback instead of dropping it.
>>>
>>> Signed-off-by: Mike Pattrick <mkp@redhat.com>
>>
>> Hi Mike,
>>
>> Can I confirm that from your PoV this patch is still awaiting review?
>> I ask because it's been sitting around for a while now.
> 
> I believe this patch now needs to be modified for the recent
> recirculation change. I'll update it for that, give it another once
> over, and resubmit.

While at it, please, check that the outer UDP header is correctly
updated.  It is likely not, according to the tests done here:
  https://github.com/openvswitch/ovs-issues/issues/321#issuecomment-2008592610
It seem to keep the length that is larger than IP payload size.

If so, some unit tests for this functionality would be nice to have
as well with the actual comparison of headers for different packets.

Best regards, Ilya Maximets.
diff mbox series

Patch

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 847685ad9..f25abf436 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -47,6 +47,8 @@  dp_packet_gso_seg_new(const struct dp_packet *p, size_t hdr_len,
     seg->l2_5_ofs = p->l2_5_ofs;
     seg->l3_ofs = p->l3_ofs;
     seg->l4_ofs = p->l4_ofs;
+    seg->inner_l3_ofs = p->inner_l3_ofs;
+    seg->inner_l4_ofs = p->inner_l4_ofs;
 
     /* The protocol headers remain the same, so preserve hash and mark. */
     *dp_packet_rss_ptr(seg) = *dp_packet_rss_ptr(p);
@@ -71,7 +73,12 @@  dp_packet_gso_nr_segs(struct dp_packet *p)
     const char *data_tail;
     const char *data_pos;
 
-    data_pos = dp_packet_get_tcp_payload(p);
+    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+        dp_packet_hwol_is_tunnel_geneve(p)) {
+        data_pos = dp_packet_get_inner_tcp_payload(p);
+    } else {
+        data_pos = dp_packet_get_tcp_payload(p);
+    }
     data_tail = (char *) dp_packet_tail(p) - dp_packet_l2_pad_size(p);
 
     return DIV_ROUND_UP(data_tail - data_pos, segsz);
@@ -91,12 +98,15 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
     struct tcp_header *tcp_hdr;
     struct ip_header *ip_hdr;
     struct dp_packet *seg;
+    const char *data_pos;
     uint16_t tcp_offset;
     uint16_t tso_segsz;
+    uint16_t ip_id = 0;
     uint32_t tcp_seq;
-    uint16_t ip_id;
+    bool outer_ipv4;
     int hdr_len;
     int seg_len;
+    bool tnl;
 
     tso_segsz = dp_packet_get_tso_segsz(p);
     if (!tso_segsz) {
@@ -105,20 +115,35 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         return false;
     }
 
-    tcp_hdr = dp_packet_l4(p);
-    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
-    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
-    hdr_len = ((char *) dp_packet_l4(p) - (char *) dp_packet_eth(p))
-              + tcp_offset * 4;
-    ip_id = 0;
-    if (dp_packet_hwol_is_ipv4(p)) {
+    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
+            dp_packet_hwol_is_tunnel_geneve(p)) {
+        data_pos =  dp_packet_get_inner_tcp_payload(p);
+        outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
+        tcp_hdr = dp_packet_inner_l4(p);
+        ip_hdr = dp_packet_inner_l3(p);
+        tnl = true;
+        if (outer_ipv4) {
+            ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
+        } else if (dp_packet_hwol_is_ipv4(p)) {
+            ip_id = ntohs(ip_hdr->ip_id);
+        }
+    } else {
+        data_pos = dp_packet_get_tcp_payload(p);
+        outer_ipv4 = dp_packet_hwol_is_ipv4(p);
+        tcp_hdr = dp_packet_l4(p);
         ip_hdr = dp_packet_l3(p);
-        ip_id = ntohs(ip_hdr->ip_id);
+        tnl = false;
+        if (outer_ipv4) {
+            ip_id = ntohs(ip_hdr->ip_id);
+        }
     }
 
+    tcp_offset = TCP_OFFSET(tcp_hdr->tcp_ctl);
+    tcp_seq = ntohl(get_16aligned_be32(&tcp_hdr->tcp_seq));
+    hdr_len = ((char *) tcp_hdr - (char *) dp_packet_eth(p))
+              + tcp_offset * 4;
     const char *data_tail = (char *) dp_packet_tail(p)
                             - dp_packet_l2_pad_size(p);
-    const char *data_pos = dp_packet_get_tcp_payload(p);
     int n_segs = dp_packet_gso_nr_segs(p);
 
     for (int i = 0; i < n_segs; i++) {
@@ -130,8 +155,26 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
         data_pos += seg_len;
 
+        if (tnl) {
+            /* Update tunnel L3 header. */
+            if (dp_packet_hwol_is_ipv4(seg)) {
+                ip_hdr = dp_packet_inner_l3(seg);
+                ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
+                                           dp_packet_inner_l4_size(seg));
+                ip_hdr->ip_id = htons(ip_id);
+                ip_hdr->ip_csum = 0;
+                ip_id++;
+            } else {
+                struct ovs_16aligned_ip6_hdr *ip6_hdr;
+
+                ip6_hdr = dp_packet_inner_l3(seg);
+                ip6_hdr->ip6_ctlun.ip6_un1.ip6_un1_plen
+                    = htons(dp_packet_inner_l3_size(seg) - sizeof *ip6_hdr);
+            }
+        }
+
         /* Update L3 header. */
-        if (dp_packet_hwol_is_ipv4(seg)) {
+        if (outer_ipv4) {
             ip_hdr = dp_packet_l3(seg);
             ip_hdr->ip_tot_len = htons(sizeof *ip_hdr +
                                        dp_packet_l4_size(seg));
@@ -146,7 +189,11 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         }
 
         /* Update L4 header. */
-        tcp_hdr = dp_packet_l4(seg);
+        if (tnl) {
+            tcp_hdr = dp_packet_inner_l4(seg);
+        } else {
+            tcp_hdr = dp_packet_l4(seg);
+        }
         put_16aligned_be32(&tcp_hdr->tcp_seq, htonl(tcp_seq));
         tcp_seq += seg_len;
         if (OVS_LIKELY(i < (n_segs - 1))) {
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 2fa17d814..c3dabc5b0 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -529,6 +529,16 @@  dp_packet_inner_l3(const struct dp_packet *b)
            : NULL;
 }
 
+static inline size_t
+dp_packet_inner_l3_size(const struct dp_packet *b)
+{
+    return OVS_LIKELY(b->inner_l3_ofs != UINT16_MAX)
+           ? (const char *) dp_packet_tail(b)
+           - (const char *) dp_packet_inner_l3(b)
+           - dp_packet_l2_pad_size(b)
+           : 0;
+}
+
 static inline void *
 dp_packet_inner_l4(const struct dp_packet *b)
 {
@@ -563,6 +573,22 @@  dp_packet_get_tcp_payload(const struct dp_packet *b)
     return NULL;
 }
 
+static inline const void *
+dp_packet_get_inner_tcp_payload(const struct dp_packet *b)
+{
+    size_t l4_size = dp_packet_inner_l4_size(b);
+
+    if (OVS_LIKELY(l4_size >= TCP_HEADER_LEN)) {
+        struct tcp_header *tcp = dp_packet_inner_l4(b);
+        int tcp_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+
+        if (OVS_LIKELY(tcp_len >= TCP_HEADER_LEN && tcp_len <= l4_size)) {
+            return (const char *) tcp + tcp_len;
+        }
+    }
+    return NULL;
+}
+
 static inline uint32_t
 dp_packet_get_tcp_payload_length(const struct dp_packet *pkt)
 {
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index dee9ab344..369b92acc 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -348,6 +348,14 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
     if (l4_ofs != UINT16_MAX) {
         packet->inner_l4_ofs = l4_ofs + data->header_len;
     }
+
+    if (dp_packet_hwol_is_tso(packet)) {
+        uint16_t tso_segsz = dp_packet_get_tso_segsz(packet);
+        if (tso_segsz > data->header_len) {
+            tso_segsz -= data->header_len;
+            dp_packet_set_tso_segsz(packet, tso_segsz);
+        }
+    }
 }
 
 static void *
diff --git a/lib/netdev.c b/lib/netdev.c
index f2d921ed6..1d59bbe5d 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -69,8 +69,6 @@  COVERAGE_DEFINE(netdev_received);
 COVERAGE_DEFINE(netdev_sent);
 COVERAGE_DEFINE(netdev_add_router);
 COVERAGE_DEFINE(netdev_get_stats);
-COVERAGE_DEFINE(netdev_vxlan_tso_drops);
-COVERAGE_DEFINE(netdev_geneve_tso_drops);
 COVERAGE_DEFINE(netdev_push_header_drops);
 COVERAGE_DEFINE(netdev_soft_seg_good);
 COVERAGE_DEFINE(netdev_soft_seg_drops);
@@ -910,28 +908,23 @@  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
     struct dp_packet *packet;
     int error;
 
-    if (userspace_tso_enabled() &&
-        !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
-        DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
-            if (dp_packet_hwol_is_tso(packet)) {
-                if (dp_packet_hwol_is_tunnel_vxlan(packet)
-                    && !(netdev_flags & NETDEV_TX_VXLAN_TNL_TSO)) {
-                        VLOG_WARN_RL(&rl, "%s: No VXLAN TSO support",
-                                     netdev_get_name(netdev));
-                        COVERAGE_INC(netdev_vxlan_tso_drops);
-                        dp_packet_delete_batch(batch, true);
-                        return false;
+    if (userspace_tso_enabled()) {
+        if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
+            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+                if (dp_packet_hwol_is_tso(packet)) {
+                    return netdev_send_tso(netdev, qid, batch, concurrent_txq);
                 }
-
-                if (dp_packet_hwol_is_tunnel_geneve(packet)
-                    && !(netdev_flags & NETDEV_TX_GENEVE_TNL_TSO)) {
-                        VLOG_WARN_RL(&rl, "%s: No GENEVE TSO support",
-                                     netdev_get_name(netdev));
-                        COVERAGE_INC(netdev_geneve_tso_drops);
-                        dp_packet_delete_batch(batch, true);
-                        return false;
+            }
+        } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO |
+                                     NETDEV_TX_GENEVE_TNL_TSO))) {
+            DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+                if (!dp_packet_hwol_is_tso(packet)) {
+                    continue;
+                }
+                if (dp_packet_hwol_is_tunnel_vxlan(packet) ||
+                    dp_packet_hwol_is_tunnel_geneve(packet)) {
+                    return netdev_send_tso(netdev, qid, batch, concurrent_txq);
                 }
-                return netdev_send_tso(netdev, qid, batch, concurrent_txq);
             }
         }
     }
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 98e494abf..c1681506e 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -351,6 +351,64 @@  OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([datapath - tcp over vxlan tunnel with software fallback])
+AT_SKIP_IF([test $HAVE_NC = no])
+OVS_CHECK_VXLAN()
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Test the case where only one side has all checksum and tso offload disabled.
+AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
+AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
+
+dnl Reinitialize.
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+                  [id 0 dstport 4789])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Check that the tunnel is up.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Initialize the listener before it is needed.
+NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid])
+
+dnl Verify that ncat is ready.
+OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])])
+
+dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data2], [0])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([datapath - ping vlan over vxlan tunnel])
 OVS_CHECK_TUNNEL_TSO()
 OVS_CHECK_VXLAN()