diff mbox series

[ovs-dev,v4,1/2] userspace: Support GRE TSO.

Message ID 20250108203128.215022-1-mkp@redhat.com
State Superseded
Headers show
Series [ovs-dev,v4,1/2] userspace: Support GRE TSO. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Mike Pattrick Jan. 8, 2025, 8:31 p.m. UTC
This patch extends the userspace datapaths support of tunnel tso from
only supporting VxLAN and Geneve to also supporting GRE tunnels. There
is also a software fallback for cases where the egress netdev does not
support this feature.

Signed-off-by: Mike Pattrick <mkp@redhat.com>

---
v2:
 - Corrected logic in reset_tcp_seg
 - Sequenced GRE tunnels will now drop sequencing on TSO.
 - Documentation removed.
v3:
 - Updated dpdk netdev for gre tunnel offload
 - Corrected recirculate call
 - Added new helper function for offload tunnel type
v4:
 - Corrected system test comment

Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/dp-packet-gso.c     |  30 +++++++----
 lib/dp-packet.c         |   2 +
 lib/dp-packet.h         |  31 +++++++++++
 lib/dpif-netdev.c       |   4 +-
 lib/netdev-dpdk.c       |  15 ++++++
 lib/netdev-native-tnl.c |  39 ++++++++++----
 lib/netdev-provider.h   |   1 +
 lib/netdev.c            |  16 +++---
 tests/dpif-netdev.at    |  85 +++++++++++++++++++++++++++----
 tests/system-traffic.at | 110 +++++++++++++++++++++++++++++++++++++++-
 10 files changed, 293 insertions(+), 40 deletions(-)

Comments

David Marchand Jan. 15, 2025, 3:59 p.m. UTC | #1
On Wed, Jan 8, 2025 at 9:32 PM Mike Pattrick <mkp@redhat.com> wrote:
>
> This patch extends the userspace datapaths support of tunnel tso from
> only supporting VxLAN and Geneve to also supporting GRE tunnels. There
> is also a software fallback for cases where the egress netdev does not
> support this feature.
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
>
> ---
> v2:
>  - Corrected logic in reset_tcp_seg
>  - Sequenced GRE tunnels will now drop sequencing on TSO.
>  - Documentation removed.
> v3:
>  - Updated dpdk netdev for gre tunnel offload
>  - Corrected recirculate call
>  - Added new helper function for offload tunnel type
> v4:
>  - Corrected system test comment
>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

This patch lgtm.
We could enable more unit tests, but it is not a blocker.

Reviewed-by: David Marchand <david.marchand@redhat.com>
diff mbox series

Patch

diff --git a/lib/dp-packet-gso.c b/lib/dp-packet-gso.c
index 04ebb19da..235635977 100644
--- a/lib/dp-packet-gso.c
+++ b/lib/dp-packet-gso.c
@@ -73,8 +73,7 @@  dp_packet_gso_nr_segs(struct dp_packet *p)
     const char *data_tail;
     const char *data_pos;
 
-    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
-        dp_packet_hwol_is_tunnel_geneve(p)) {
+    if (dp_packet_hwol_is_tunnel(p)) {
         data_pos = dp_packet_get_inner_tcp_payload(p);
     } else {
         data_pos = dp_packet_get_tcp_payload(p);
@@ -105,7 +104,9 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
     bool outer_ipv4;
     int hdr_len;
     int seg_len;
-    bool tnl;
+    bool udp_tnl = dp_packet_hwol_is_tunnel_vxlan(p) ||
+                   dp_packet_hwol_is_tunnel_geneve(p);
+    bool gre_tnl = dp_packet_hwol_is_tunnel_gre(p);
 
     tso_segsz = dp_packet_get_tso_segsz(p);
     if (!tso_segsz) {
@@ -114,11 +115,9 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         return false;
     }
 
-    if (dp_packet_hwol_is_tunnel_vxlan(p) ||
-        dp_packet_hwol_is_tunnel_geneve(p)) {
+    if (udp_tnl || gre_tnl) {
         outer_ipv4 = dp_packet_hwol_is_outer_ipv4(p);
         tcp_hdr = dp_packet_inner_l4(p);
-        tnl = true;
 
         if (outer_ipv4) {
             outer_ip_id = ntohs(((struct ip_header *) dp_packet_l3(p))->ip_id);
@@ -130,7 +129,6 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
     } else {
         outer_ipv4 = dp_packet_hwol_is_ipv4(p);
         tcp_hdr = dp_packet_l4(p);
-        tnl = false;
 
         if (outer_ipv4) {
             struct ip_header *ip_hdr = dp_packet_l3(p);
@@ -156,13 +154,15 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         seg = dp_packet_gso_seg_new(p, hdr_len, data_pos, seg_len);
         data_pos += seg_len;
 
-        if (tnl) {
+        if (udp_tnl) {
             /* Update tunnel UDP header length. */
             struct udp_header *tnl_hdr;
 
             tnl_hdr = dp_packet_l4(seg);
             tnl_hdr->udp_len = htons(dp_packet_l4_size(seg));
+        }
 
+        if (udp_tnl || gre_tnl) {
             /* Update tunnel inner L3 header. */
             if (dp_packet_hwol_is_ipv4(seg)) {
                 struct ip_header *ip_hdr = dp_packet_inner_l3(seg);
@@ -194,7 +194,7 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
         }
 
         /* Update L4 header. */
-        if (tnl) {
+        if (udp_tnl || gre_tnl) {
             tcp_hdr = dp_packet_inner_l4(seg);
         } else {
             tcp_hdr = dp_packet_l4(seg);
@@ -208,6 +208,18 @@  dp_packet_gso(struct dp_packet *p, struct dp_packet_batch **batches)
             tcp_hdr->tcp_ctl = TCP_CTL(tcp_flags, tcp_offset);
         }
 
+        if (gre_tnl) {
+            struct gre_base_hdr *ghdr;
+
+            ghdr = dp_packet_l4(seg);
+
+            if (ghdr->flags & htons(GRE_CSUM)) {
+                ovs_be16 *csum_opt = (ovs_be16 *) (ghdr + 1);
+                *csum_opt = 0;
+                *csum_opt = csum(ghdr, dp_packet_l4_size(seg));
+            }
+        }
+
         if (dp_packet_batch_is_full(curr_batch)) {
             curr_batch++;
         }
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index df7bf8e6b..dad0d7be3 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -604,6 +604,8 @@  dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
                        NETDEV_TX_OFFLOAD_SCTP_CKSUM |
                        NETDEV_TX_OFFLOAD_IPV4_CKSUM);
         }
+    } else if (dp_packet_hwol_is_tunnel_gre(p)) {
+        tnl_inner = true;
     }
 
     if (dp_packet_hwol_tx_ip_csum(p)) {
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 4afbbe722..f94a82b07 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -104,6 +104,9 @@  enum dp_packet_offload_mask {
     /* Offload tunnel packet, outer header is IPv6. */
     DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6,
                 RTE_MBUF_F_TX_OUTER_IPV6, 0x40000),
+    /* Offload packet is GRE tunnel. */
+    DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_GRE,
+                RTE_MBUF_F_TX_TUNNEL_GRE, 0x80000),
 
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
@@ -123,6 +126,7 @@  enum dp_packet_offload_mask {
                                      DP_PACKET_OL_TX_IP_CKSUM        | \
                                      DP_PACKET_OL_TX_TUNNEL_GENEVE   | \
                                      DP_PACKET_OL_TX_TUNNEL_VXLAN    | \
+                                     DP_PACKET_OL_TX_TUNNEL_GRE      | \
                                      DP_PACKET_OL_TX_OUTER_IPV4      | \
                                      DP_PACKET_OL_TX_OUTER_IP_CKSUM  | \
                                      DP_PACKET_OL_TX_OUTER_UDP_CKSUM | \
@@ -1171,6 +1175,22 @@  dp_packet_hwol_is_tunnel_vxlan(struct dp_packet *b)
     return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN);
 }
 
+/* Returns 'true' if packet 'b' is marked for GRE tunnel offloading. */
+static inline bool
+dp_packet_hwol_is_tunnel_gre(struct dp_packet *b)
+{
+    return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_GRE);
+}
+
+/* Returns true if packet 'b' has any offloadable tunnel type. */
+static inline bool
+dp_packet_hwol_is_tunnel(struct dp_packet *b)
+{
+    return !!(*dp_packet_ol_flags_ptr(b) & (DP_PACKET_OL_TX_TUNNEL_VXLAN |
+                                            DP_PACKET_OL_TX_TUNNEL_GRE |
+                                            DP_PACKET_OL_TX_TUNNEL_GENEVE));
+}
+
 /* Returns 'true' if packet 'b' is marked for outer IPv4 checksum offload. */
 static inline bool
 dp_packet_hwol_is_outer_ipv4_cksum(const struct dp_packet *b)
@@ -1289,11 +1309,19 @@  dp_packet_hwol_set_tunnel_vxlan(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN;
 }
 
+/* Mark packet 'b' for GRE tunnel offloading. */
+static inline void
+dp_packet_hwol_set_tunnel_gre(struct dp_packet *b)
+{
+    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_GRE;
+}
+
 /* Clears tunnel offloading marks. */
 static inline void
 dp_packet_hwol_reset_tunnel(struct dp_packet *b)
 {
     *dp_packet_ol_flags_ptr(b) &= ~(DP_PACKET_OL_TX_TUNNEL_VXLAN |
+                                    DP_PACKET_OL_TX_TUNNEL_GRE |
                                     DP_PACKET_OL_TX_TUNNEL_GENEVE);
 }
 
@@ -1352,6 +1380,9 @@  dp_packet_hwol_reset_tcp_seg(struct dp_packet *p)
             ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM;
         }
         ol_flags |= DP_PACKET_OL_TX_OUTER_UDP_CKSUM;
+    } else if (ol_flags & DP_PACKET_OL_TX_TUNNEL_GRE &&
+               ol_flags & DP_PACKET_OL_TX_OUTER_IPV4) {
+        ol_flags |= DP_PACKET_OL_TX_OUTER_IP_CKSUM;
     }
 
     *dp_packet_ol_flags_ptr(p) = ol_flags;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2a529f272..b572fab23 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -8928,9 +8928,7 @@  dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
     struct dp_packet *packet;
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets) {
-        if (dp_packet_hwol_is_tunnel_geneve(packet) ||
-                dp_packet_hwol_is_tunnel_vxlan(packet)) {
-
+        if (dp_packet_hwol_is_tunnel(packet)) {
             if (dp_packet_hwol_is_tso(packet)) {
                 /* Can't perform GSO in the middle of a pipeline. */
                 COVERAGE_INC(datapath_drop_tunnel_tso_recirc);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 449c660a7..9d7f9dc76 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -421,6 +421,7 @@  enum dpdk_hw_ol_features {
     NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD = 1 << 9,
     NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD = 1 << 10,
     NETDEV_TX_OUTER_UDP_CKSUM_OFFLOAD = 1 << 11,
+    NETDEV_TX_GRE_TNL_TSO_OFFLOAD = 1 << 12,
 };
 
 enum dpdk_rx_steer_flags {
@@ -1091,6 +1092,8 @@  netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
                                    NETDEV_TX_OFFLOAD_TCP_TSO);
     netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD,
                                    NETDEV_TX_VXLAN_TNL_TSO);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GRE_TNL_TSO_OFFLOAD,
+                                   NETDEV_TX_GRE_TNL_TSO);
     netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_GENEVE_TNL_TSO_OFFLOAD,
                                    NETDEV_TX_GENEVE_TNL_TSO);
     netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD,
@@ -1158,6 +1161,10 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev,
         conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GENEVE_TNL_TSO;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_GRE_TNL_TSO_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_OUTER_IP_CKSUM_OFFLOAD) {
         conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM;
     }
@@ -1434,6 +1441,13 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
             VLOG_WARN("%s: Tx Geneve tunnel TSO offload is not supported.",
                       netdev_get_name(&dev->up));
         }
+
+        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_GRE_TNL_TSO) {
+            dev->hw_ol_features |= NETDEV_TX_GRE_TNL_TSO_OFFLOAD;
+        } else {
+            VLOG_WARN("%s: Tx GRE tunnel TSO offload is not supported.",
+                      netdev_get_name(&dev->up));
+        }
     }
 
     n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
@@ -2629,6 +2643,7 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
     const uint64_t tunnel_type = mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK;
     if (OVS_UNLIKELY(tunnel_type &&
                      tunnel_type != RTE_MBUF_F_TX_TUNNEL_GENEVE &&
+                     tunnel_type != RTE_MBUF_F_TX_TUNNEL_GRE &&
                      tunnel_type != RTE_MBUF_F_TX_TUNNEL_VXLAN)) {
         VLOG_WARN_RL(&rl, "%s: Unexpected tunnel type: %#"PRIx64,
                      netdev_get_name(&dev->up), tunnel_type);
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index ede5e1686..66398a741 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -194,8 +194,7 @@  netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
         packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
 
-        if (dp_packet_hwol_is_tunnel_geneve(packet) ||
-            dp_packet_hwol_is_tunnel_vxlan(packet)) {
+        if (dp_packet_hwol_is_tunnel(packet)) {
             dp_packet_hwol_set_tx_outer_ipv6(packet);
         } else {
             dp_packet_hwol_set_tx_ipv6(packet);
@@ -207,8 +206,7 @@  netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
         ip = netdev_tnl_ip_hdr(eth);
         ip->ip_tot_len = htons(*ip_tot_size);
         /* Postpone checksum to when the packet is pushed to the port. */
-        if (dp_packet_hwol_is_tunnel_geneve(packet) ||
-            dp_packet_hwol_is_tunnel_vxlan(packet)) {
+        if (dp_packet_hwol_is_tunnel(packet)) {
             dp_packet_hwol_set_tx_outer_ipv4(packet);
             dp_packet_hwol_set_tx_outer_ipv4_csum(packet);
         } else {
@@ -271,7 +269,9 @@  dp_packet_tnl_ol_process(struct dp_packet *packet,
         ip = dp_packet_l3(packet);
 
         if (data->tnl_type == OVS_VPORT_TYPE_GENEVE ||
-            data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
+            data->tnl_type == OVS_VPORT_TYPE_VXLAN ||
+            data->tnl_type == OVS_VPORT_TYPE_GRE ||
+            data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
 
             if (IP_VER(ip->ip_ihl_ver) == 4) {
                 dp_packet_hwol_set_tx_ipv4(packet);
@@ -286,6 +286,9 @@  dp_packet_tnl_ol_process(struct dp_packet *packet,
         dp_packet_hwol_set_tunnel_geneve(packet);
     } else if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
         dp_packet_hwol_set_tunnel_vxlan(packet);
+    } else if (data->tnl_type == OVS_VPORT_TYPE_GRE ||
+               data->tnl_type == OVS_VPORT_TYPE_IP6GRE) {
+        dp_packet_hwol_set_tunnel_gre(packet);
     }
 }
 
@@ -535,9 +538,13 @@  netdev_gre_push_header(const struct netdev *netdev,
                        const struct ovs_action_push_tnl *data)
 {
     struct netdev_vport *dev = netdev_vport_cast(netdev);
+    uint16_t l3_ofs = packet->l3_ofs;
+    uint16_t l4_ofs = packet->l4_ofs;
     struct gre_base_hdr *greh;
     int ip_tot_size;
 
+    dp_packet_tnl_ol_process(packet, data);
+
     greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len,
                                      &ip_tot_size, 0);
 
@@ -547,11 +554,23 @@  netdev_gre_push_header(const struct netdev *netdev,
     }
 
     if (greh->flags & htons(GRE_SEQ)) {
-        /* Last 4 byte is GRE seqno */
-        int seq_ofs = gre_header_len(greh->flags) - 4;
-        ovs_16aligned_be32 *seq_opt =
-            ALIGNED_CAST(ovs_16aligned_be32 *, (char *)greh + seq_ofs);
-        put_16aligned_be32(seq_opt, htonl(atomic_count_inc(&dev->gre_seqno)));
+        if (!dp_packet_hwol_is_tso(packet)) {
+            /* Last 4 byte is GRE seqno */
+            int seq_ofs = gre_header_len(greh->flags) - 4;
+            ovs_16aligned_be32 *seq_opt =
+                ALIGNED_CAST(ovs_16aligned_be32 *, (char *) greh + seq_ofs);
+            put_16aligned_be32(seq_opt,
+                               htonl(atomic_count_inc(&dev->gre_seqno)));
+        } else {
+            VLOG_WARN_RL(&err_rl, "Cannot use GRE Sequence numbers with TSO.");
+        }
+    }
+
+    if (l3_ofs != UINT16_MAX) {
+        packet->inner_l3_ofs = l3_ofs + data->header_len;
+    }
+    if (l4_ofs != UINT16_MAX) {
+        packet->inner_l4_ofs = l4_ofs + data->header_len;
     }
 }
 
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 22840a058..5ae379469 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -47,6 +47,7 @@  enum netdev_ol_flags {
     NETDEV_TX_GENEVE_TNL_TSO = 1 << 6,
     NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM = 1 << 7,
     NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM = 1 << 8,
+    NETDEV_TX_GRE_TNL_TSO = 1 << 9,
 };
 
 /* A network device (e.g. an Ethernet device).
diff --git a/lib/netdev.c b/lib/netdev.c
index 02beac9d0..9dd94ebdd 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -916,11 +916,11 @@  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
                 }
             }
         } else if (!(netdev_flags & (NETDEV_TX_VXLAN_TNL_TSO |
+                                     NETDEV_TX_GRE_TNL_TSO |
                                      NETDEV_TX_GENEVE_TNL_TSO))) {
             DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
                 if (dp_packet_hwol_is_tso(packet) &&
-                    (dp_packet_hwol_is_tunnel_vxlan(packet) ||
-                     dp_packet_hwol_is_tunnel_geneve(packet))) {
+                    dp_packet_hwol_is_tunnel(packet)) {
                     return netdev_send_tso(netdev, qid, batch, concurrent_txq);
                 }
             }
@@ -1011,6 +1011,8 @@  netdev_push_header(const struct netdev *netdev,
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
         if (OVS_UNLIKELY(data->tnl_type != OVS_VPORT_TYPE_GENEVE &&
                          data->tnl_type != OVS_VPORT_TYPE_VXLAN &&
+                         data->tnl_type != OVS_VPORT_TYPE_GRE &&
+                         data->tnl_type != OVS_VPORT_TYPE_IP6GRE &&
                          dp_packet_hwol_is_tso(packet))) {
             COVERAGE_INC(netdev_push_header_drops);
             dp_packet_delete(packet);
@@ -1019,16 +1021,17 @@  netdev_push_header(const struct netdev *netdev,
                          netdev_get_name(netdev), netdev_get_type(netdev));
         } else {
             if (data->tnl_type != OVS_VPORT_TYPE_GENEVE &&
-                data->tnl_type != OVS_VPORT_TYPE_VXLAN) {
+                data->tnl_type != OVS_VPORT_TYPE_VXLAN &&
+                data->tnl_type != OVS_VPORT_TYPE_GRE &&
+                data->tnl_type != OVS_VPORT_TYPE_IP6GRE) {
                 dp_packet_ol_send_prepare(packet, 0);
-            } else if (dp_packet_hwol_is_tunnel_geneve(packet) ||
-                       dp_packet_hwol_is_tunnel_vxlan(packet)) {
+            } else if (dp_packet_hwol_is_tunnel(packet)) {
                 if (dp_packet_hwol_is_tso(packet)) {
                     COVERAGE_INC(netdev_push_header_drops);
                     dp_packet_delete(packet);
                     VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO is not "
                                       "supported with multiple levels of "
-                                      "VXLAN or GENEVE encapsulation.",
+                                      "VXLAN, GENEVE, or GRE encapsulation.",
                                  netdev_get_name(netdev));
                     continue;
                 }
@@ -1480,6 +1483,7 @@  netdev_get_status(const struct netdev *netdev, struct smap *smap)
         OL_ADD_STAT("sctp_csum", NETDEV_TX_OFFLOAD_SCTP_CKSUM);
         OL_ADD_STAT("tcp_seg", NETDEV_TX_OFFLOAD_TCP_TSO);
         OL_ADD_STAT("vxlan_tso", NETDEV_TX_VXLAN_TNL_TSO);
+        OL_ADD_STAT("gre_tso", NETDEV_TX_GRE_TNL_TSO);
         OL_ADD_STAT("geneve_tso", NETDEV_TX_GENEVE_TNL_TSO);
         OL_ADD_STAT("out_ip_csum", NETDEV_TX_OFFLOAD_OUTER_IP_CKSUM);
         OL_ADD_STAT("out_udp_csum", NETDEV_TX_OFFLOAD_OUTER_UDP_CKSUM);
diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at
index 36cea6aa9..60060ee2e 100644
--- a/tests/dpif-netdev.at
+++ b/tests/dpif-netdev.at
@@ -658,11 +658,11 @@  OVS_VSWITCHD_START(
                      other-config:datapath-id=1234 fail-mode=secure])
 
 AT_CHECK([ovs-vsctl get interface p1 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl
-tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false"
+tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false"
 ], [])
 
 AT_CHECK([ovs-vsctl get interface br0 status | sed -n 's/^{\(.*\).*}$/\1/p'], [0], [dnl
-tx_geneve_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false"
+tx_geneve_tso_offload="false", tx_gre_tso_offload="false", tx_ip_csum_offload="false", tx_out_ip_csum_offload="false", tx_out_udp_csum_offload="false", tx_sctp_csum_offload="false", tx_tcp_csum_offload="false", tx_tcp_seg_offload="false", tx_udp_csum_offload="false", tx_vxlan_tso_offload="false"
 ], [])
 
 OVS_VSWITCHD_STOP
@@ -937,15 +937,26 @@  AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy \
                        options:csum=true ofport_request=4 \
                     -- add-port int-br t4 -- set Interface t4 type=geneve \
                        options:remote_ip=2001:cafe::93 options:key=123 \
-                       options:csum=true ofport_request=5], [0])
+                       options:csum=true ofport_request=5 \
+                    -- add-port int-br t5 -- set Interface t5 type=gre \
+                       options:remote_ip=2001:cafe::93 options:key=123 \
+                       options:csum=true ofport_request=6 \
+                    -- add-port int-br t6 -- set Interface t6 type=gre \
+                       options:remote_ip=1.1.2.92 options:key=123 \
+                       options:csum=false ofport_request=7], [0])
 
-flow_s="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800),
-        ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no),
-        tcp(src=54392,dst=5201),tcp_flags(ack)"
+dnl The final tunnel intentionally has checksum turned off to exercise a
+dnl different code path, there is no GRE checksum offload anyways.
 
-flow_s_v6="eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd),
-           ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6),
-           tcp(src=54392,dst=5201),tcp_flags(ack)"
+m4_define([IPV4_TSO], [m4_join([,],
+  [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x0800)],
+  [ipv4(src=192.168.123.2,dst=192.168.123.1,proto=6,tos=1,ttl=64,frag=no)],
+  [tcp(src=54392,dst=5201),tcp_flags(ack)])])
+
+m4_define([IPV6_TSO], [m4_join([,],
+  [eth(src=8a:bf:7e:2f:05:84,dst=0a:8f:39:4f:e0:73),eth_type(0x86dd)],
+  [ipv6(src=2001:cafe::88,dst=2001:cafe::92,proto=6)],
+  [tcp(src=54392,dst=5201),tcp_flags(ack)])])
 
 dnl Setup dummy interface tunnel connectivity.
 AT_CHECK([ovs-appctl netdev-dummy/ip4addr br1 1.1.2.88/24], [0], [OK
@@ -968,9 +979,9 @@  AT_CHECK([ovs-vsctl set Interface p1 options:tx_pcap=p1.pcap -- \
                     set Interface int-br options:ol_ip_csum_set_good=false -- \
                     set Interface int-br options:ol_tso_segsz=500])
 
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s}" \
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV4_TSO" \
           --len 2054])
-AT_CHECK([ovs-appctl netdev-dummy/receive int-br "in_port(2),${flow_s_v6}" \
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "IPV6_TSO" \
           --len 2074])
 
 dnl Check that first we have the following packets:
@@ -984,10 +995,26 @@  dnl - IPv6 Geneve tunnel with IPv4 payload
 dnl - IPv6 Geneve tunnel with IPv6 payload
 dnl - IPv6 Geneve tunnel with IPv4 payload
 dnl - IPv6 Geneve tunnel with IPv6 payload
+dnl - IPv4 GRE tunnel with IPv4 payload
+dnl - IPv4 GRE tunnel with IPv6 payload
+dnl - IPv6 GRE tunnel with IPv4 payload
+dnl - IPv6 GRE tunnel with IPv6 payload
 dnl These are sorted since OVS may send payloads to the tunnels in any order.
 zero400=$(printf '0%.0s' $(seq 800))
 zero100=$(printf '0%.0s' $(seq 200))
 AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl
+[aabbcc000001aa55aa55000308004500025a00004000402f31c0010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl
+[000000000000000050100000edfd0000${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004500025a00014000402f31bf010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl
+[000001f40000000050100000ec090000${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004500025a00024000402f31be010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl
+[000003e80000000050100000ea150000${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004500025a00034000402f31bd010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe000000000000000000000092d4781451]dnl
+[000005dc0000000050100000e8210000${zero100}${zero400}]
 [aabbcc000001aa55aa55000308004500026200004000401131d6010102580101025ce01312b5024e5f360800000000007b00]dnl
 [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl
 [00000092d4781451000000000000000050100000edfd0000${zero100}${zero400}]
@@ -1012,6 +1039,18 @@  AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl
 [aabbcc000001aa55aa55000308004500026200034000401131d3010102580101025ce01317c1024efcd10000655800007b00]dnl
 [0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000000000882001cafe0000000000000000]dnl
 [00000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004501024600004000402f31d3010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058408004501021c0000000040060187c0a87b02c0a87b01d47814510000000000000000501000004dc20000]dnl
+[${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004501024600014000402f31d2010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058408004501021c0001000040060186c0a87b02c0a87b01d4781451000001f400000000501000004bce0000]dnl
+[${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004501024600024000402f31d1010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058408004501021c0002000040060185c0a87b02c0a87b01d4781451000003e8000000005010000049da0000]dnl
+[${zero100}${zero400}]
+[aabbcc000001aa55aa55000308004501024600034000402f31d0010102580101025c200065580000007b0a8f394fe0738abf]dnl
+[7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc000000005010000047e60000]dnl
+[${zero100}${zero400}]
 [aabbcc000001aa55aa55000308004501024e00004000401131e9010102580101025ce01312b5023abd990800000000007b00]dnl
 [0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01d4781451000000000000000050100000]dnl
 [4dc20000${zero100}${zero400}]
@@ -1036,6 +1075,18 @@  AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl
 [aabbcc000001aa55aa55000308004501024e00034000401131e6010102580101025ce01317c1023a5b350000655800007b00]dnl
 [0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01d4781451000005dc0000000050100000]dnl
 [47e60000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a0006558da8e00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl
+[000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a0006558dc8200000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl
+[000000882001cafe000000000000000000000092d4781451000003e80000000050100000ea150000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a0006558de7600000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl
+[000000882001cafe000000000000000000000092d4781451000001f40000000050100000ec090000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd60000000024a2f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a0006558e06a00000000007b0a8f394fe0738abf7e2f058486dd60000000020806002001cafe0000000000000000]dnl
+[000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}${zero400}]
 [aabbcc000006aa55aa55000386dd60000000024e11402001cafe0000000000000000000000882001cafe0000000000000000]dnl
 [00000093e01312b5024e8ed10800000000007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl
 [00000000000000882001cafe000000000000000000000092d4781451000000000000000050100000edfd0000${zero100}]dnl
@@ -1068,6 +1119,18 @@  AT_CHECK_UNQUOTED([ovs-pcap p1.pcap | sort], [0], [dnl
 [00000093e01317c1024e2c6d0000655800007b000a8f394fe0738abf7e2f058486dd60000000020806002001cafe00000000]dnl
 [00000000000000882001cafe000000000000000000000092d4781451000005dc0000000050100000e8210000${zero100}]dnl
 [${zero400}]
+[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a00065583a4e00000000007b0a8f394fe0738abf7e2f058408004501021c0003000040060184c0a87b02c0a87b01]dnl
+[d4781451000005dc000000005010000047e60000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a00065583c4300000000007b0a8f394fe0738abf7e2f058408004501021c0002000040060185c0a87b02c0a87b01]dnl
+[d4781451000003e8000000005010000049da0000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a00065583e3800000000007b0a8f394fe0738abf7e2f058408004501021c0001000040060186c0a87b02c0a87b01]dnl
+[d4781451000001f400000000501000004bce0000${zero100}${zero400}]
+[aabbcc000006aa55aa55000386dd6010000002362f402001cafe0000000000000000000000882001cafe0000000000000000]dnl
+[00000093a0006558402d00000000007b0a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02c0a87b01]dnl
+[d47814510000000000000000501000004dc20000${zero100}${zero400}]
 [aabbcc000006aa55aa55000386dd60100000023a11402001cafe0000000000000000000000882001cafe0000000000000000]dnl
 [00000093e01312b5023aed340800000000007b000a8f394fe0738abf7e2f058408004501021c0000000040060187c0a87b02]dnl
 [c0a87b01d47814510000000000000000501000004dc20000${zero100}${zero400}]
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 16de8da20..e6e63cf78 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -557,7 +557,6 @@  OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
 AT_SETUP([datapath - ping over gre tunnel])
-OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
 
 OVS_TRAFFIC_VSWITCHD_START()
@@ -615,6 +614,96 @@  OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
 
+AT_SETUP([datapath - tcp over gre tunnel with software fallback])
+AT_SKIP_IF([test $HAVE_NC = no])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+OVS_CHECK_GRE()
+
+dnl This test is only valid with tso. If the kernel segments the packets, the
+dnl packet lengths in the final test will be different.
+m4_ifndef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Test the case where one side has all checksum and TSO offload disabled.
+AT_CHECK([ethtool -K ovs-p0 tso off], [0], [ignore], [ignore])
+AT_CHECK([ethtool -K ovs-p0 sg off], [0], [ignore], [ignore])
+
+dnl Reinitialize.
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br-underlay ovs-p0])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([gre], [br0], [at_gre0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([gretap], [at_gre1], [at_ns0], [172.31.1.100], [10.1.1.1/24])
+
+dnl Set MTU for tunnel to generate 1500 byte packets.
+AT_CHECK([ip link set dev br0 mtu 1400])
+
+dnl First, check the underlay.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 172.31.1.100 | FORMAT_PING],
+              [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Check that the tunnel is up.
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING],
+              [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Start tcpdump to capture the encapsulated packets.
+OVS_DAEMONIZE([tcpdump -i ovs-p0 -w p0.pcap], [tcpdump.pid])
+
+dnl Wait until the pcap is written, which happens after the interface
+dnl is opened by tcpdump.
+OVS_WAIT_UNTIL([test -e p0.pcap])
+
+dnl Initialize the listener before it is needed.
+NETNS_DAEMONIZE([at_ns0], [nc -l 10.1.1.1 1234 > data2], [nc.pid])
+
+dnl Verify that ncat is ready.
+OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :1234])])
+
+dnl Large TCP transfer aimed towards ovs-p0, which has TSO disabled.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+AT_CHECK([nc $NC_EOF_OPT 10.1.1.1 1234 < payload.bin])
+
+dnl Wait until transfer completes before checking.
+OVS_WAIT_WHILE([kill -0 $(cat nc.pid)])
+AT_CHECK([diff -q payload.bin data2], [0])
+OVS_WAIT_WHILE([test $(stat -c %s p0.pcap) -le 68000 ])
+
+dnl Stop OVS and tcpdump and verify the results.
+AT_CHECK([kill -15 $(cat tcpdump.pid)])
+OVS_WAIT_WHILE([kill -0 $(cat tcpdump.pid)])
+
+dnl The exact number of packets sent will vary, but we check that the largest
+dnl segments have the correct lengths and certain other fields.
+AT_CHECK([test $(ovs-pcap p0.pcap | grep -Ec dnl
+"^.{24}0800"dnl Ethernet
+"4500059e....4000..2f....ac1f0164ac1f0101"dnl IP(len=1438, DF, GRE, 172.31.1.100->172.31.1.1)
+"00006558"dnl GRE(flags=0, proto=0x6558)
+".{24}0800"dnl Ethernet
+"45000578....4000..06....0a0101640a010101"dnl IP(len=1400, DF, TCP, 10.1.1.100->10.1.1.1)
+"....04d2............................0000"dnl TCP(dport=1234
+) -ge 20])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
 AT_SETUP([datapath - ping over ip6gre L2 tunnel])
 OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
 OVS_CHECK_GRE()
@@ -651,6 +740,25 @@  dnl Okay, now check the overlay with different packet sizes
 NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -W 2 10.1.1.100 | FORMAT_PING], [0], [dnl
 3 packets transmitted, 3 received, 0% packet loss, time 0ms
 ])
+
+dnl Start ncat listeners.
+OVS_DAEMONIZE([nc -l 10.1.1.100 1234 > tcp_data], [nc.pid])
+NETNS_DAEMONIZE([at_ns0], [nc -l -u 10.1.1.1 4321 > udp_data], [nc2.pid])
+
+dnl Verify that ncat is ready.
+OVS_WAIT_UNTIL([netstat -ln | grep :1234])
+OVS_WAIT_UNTIL([NS_EXEC([at_ns0], [netstat -ln | grep :4321])])
+
+dnl Check large bidirectional TCP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=60000 count=1 2> /dev/null])
+NS_CHECK_EXEC([at_ns0], [nc $NC_EOF_OPT 10.1.1.100 1234 < payload.bin])
+OVS_WAIT_UNTIL([diff -q payload.bin tcp_data])
+
+dnl Check UDP.
+AT_CHECK([dd if=/dev/urandom of=payload.bin bs=600 count=1 2> /dev/null])
+AT_CHECK([nc $NC_EOF_OPT -u 10.1.1.1 4321 < payload.bin])
+OVS_WAIT_UNTIL([diff -q payload.bin udp_data])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP