diff mbox series

[ovs-dev,[PATCH,RFC] 14/17] Enable L4 csum offloading by default.

Message ID 20211207165156.705727-15-fbl@sysclose.org
State RFC
Headers show
Series [ovs-dev,[PATCH,RFC] 14/17] Enable L4 csum offloading by default. | expand

Commit Message

Flavio Leitner Dec. 7, 2021, 4:51 p.m. UTC
The netdev receiving packets is supposed to provide the flags
indicating if the L4 csum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 csum when the packet is going to be sent over
a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those those features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
---
 lib/conntrack.c         |  16 +--
 lib/dp-packet.c         |  23 +++-
 lib/dp-packet.h         |  56 ++++++++
 lib/flow.c              |  21 +++
 lib/netdev-dpdk.c       | 157 ++++++++++++++-------
 lib/netdev-linux.c      | 295 +++++++++++++++++++++-------------------
 lib/netdev-native-tnl.c |  32 +----
 lib/netdev.c            |  40 ++----
 lib/packets.c           | 174 +++++++++++++++++++-----
 lib/packets.h           |   3 +
 10 files changed, 527 insertions(+), 290 deletions(-)
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 5b4ca4dfc..c12b03538 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2103,14 +2103,10 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     }
 
     if (ok) {
-        bool hwol_bad_l4_csum = dp_packet_ol_l4_csum_bad(pkt);
-        if (!hwol_bad_l4_csum) {
-            bool  hwol_good_l4_csum = dp_packet_ol_l4_csum_good(pkt)
-                                      || dp_packet_ol_tx_l4_csum(pkt);
-            /* Validate the checksum only when hwol is not supported. */
+        if (!dp_packet_ol_l4_csum_bad(pkt)) {
             if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
-                           &ctx->icmp_related, l3, !hwol_good_l4_csum,
-                           NULL)) {
+                           &ctx->icmp_related, l3,
+                           !dp_packet_ol_l4_csum_good(pkt), NULL)) {
                 ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
                 return true;
             }
@@ -3421,8 +3417,10 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
             adj_seqnum(&th->tcp_seq, ec->seq_skew);
     }
 
-    th->tcp_csum = 0;
-    if (!dp_packet_ol_tx_l4_csum(pkt)) {
+    if (dp_packet_ol_tx_tcp_csum(pkt)) {
+        dp_packet_ol_reset_l4_csum_good(pkt);
+    } else {
+        th->tcp_csum = 0;
         if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
             th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
                                dp_packet_l4_size(pkt));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 369f3561e..8a1bf221a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -38,6 +38,9 @@  dp_packet_init__(struct dp_packet *p, size_t allocated, enum dp_packet_source so
     dp_packet_init_specific(p);
     /* By default assume the packet type to be Ethernet. */
     p->packet_type = htonl(PT_ETH);
+    /* Reset csum start and offset. */
+    p->csum_start = 0;
+    p->csum_offset = 0;
 }
 
 static void
@@ -188,7 +191,7 @@  dp_packet_clone_with_headroom(const struct dp_packet *p, size_t headroom)
                                                     dp_packet_size(p),
                                                     headroom);
     /* Copy the following fields into the returned buffer: l2_pad_size,
-     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+     * l2_5_ofs, l3_ofs, ..., cutlen, packet_type and md. */
     memcpy(&new_buffer->l2_pad_size, &p->l2_pad_size,
             sizeof(struct dp_packet) -
             offsetof(struct dp_packet, l2_pad_size));
@@ -517,4 +520,22 @@  dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
         dp_packet_ip_set_header_csum(p);
         dp_packet_ol_set_ip_csum_good(p);
     }
+
+    if (dp_packet_ol_l4_csum_good(p) || !dp_packet_ol_tx_l4_csum(p)) {
+        return;
+    }
+
+    if (dp_packet_ol_tx_tcp_csum(p)
+        && !(flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
+        packet_tcp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    } else if (dp_packet_ol_tx_udp_csum(p)
+        && !(flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
+        packet_udp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    } else if (!(flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)
+        && dp_packet_ol_tx_sctp_csum(p)) {
+        packet_sctp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    }
 }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 278be172e..51f98ab9a 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -138,6 +138,8 @@  struct dp_packet {
                                       or UINT16_MAX. */
     uint32_t cutlen;               /* length in bytes to cut from the end. */
     ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
+    uint16_t csum_start;           /* Position to start checksumming from. */
+    uint16_t csum_offset;          /* Offset to place checksum. */
     union {
         struct pkt_metadata md;
         uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -981,6 +983,13 @@  dp_packet_ol_tx_ipv4(const struct dp_packet *p)
     return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV4);
 }
 
+/* Returns 'true' if packet 'p' is marked as IPv6. */
+static inline bool
+dp_packet_ol_tx_ipv6(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
+}
+
 /* Returns 'true' if packet 'p' is marked for TCP checksum offloading. */
 static inline bool
 dp_packet_ol_tx_tcp_csum(const struct dp_packet *p)
@@ -1111,6 +1120,8 @@  dp_packet_ip_set_header_csum(struct dp_packet *p)
     ip->ip_csum = csum(ip, sizeof *ip);
 }
 
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
 static inline bool
 dp_packet_ol_l4_csum_good(const struct dp_packet *p)
 {
@@ -1125,6 +1136,51 @@  dp_packet_ol_l4_csum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_L4_CSUM_BAD;
 }
 
+/* Returns 'true' if the packet has good integrity though the
+ * checksum in the packet 'p' is not complete. */
+static inline bool
+dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
+{
+    return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CSUM_MASK) ==
+            DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good integrity though the checksum in the
+ * packet is not complete. */
+static inline void
+dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_set_l4_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with good integrity if the 'start' and 'offset'
+ * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
+ * The 'start' is the offset from the begin of the packet headers.
+ * The 'offset' is the offset from start to place the checksum. */
+static inline void
+dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start,
+                             uint16_t offset)
+{
+    if (p->csum_start == start && p->csum_offset == offset) {
+        dp_packet_ol_set_l4_csum_partial(p);
+    }
+}
+
 #ifdef  __cplusplus
 }
 #endif
diff --git a/lib/flow.c b/lib/flow.c
index bad1e1a17..e0ab80bbf 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1022,6 +1022,12 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                     miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
                     miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                     miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+                    dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                              __builtin_offsetof(struct tcp_header, tcp_csum));
+                    if (dp_packet_ol_l4_csum_good(packet)
+                        || dp_packet_ol_l4_csum_partial(packet)) {
+                        dp_packet_ol_set_tx_tcp_csum(packet);
+                    }
                 }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
@@ -1032,6 +1038,12 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, udp->udp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                              __builtin_offsetof(struct udp_header, udp_csum));
+                if (dp_packet_ol_l4_csum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_ol_set_tx_udp_csum(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
             if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
@@ -1041,6 +1053,12 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                            __builtin_offsetof(struct sctp_header, sctp_csum));
+                if (dp_packet_ol_l4_csum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_ol_set_tx_sctp_csum(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
             if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
@@ -3146,6 +3164,7 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             tcp->tcp_csum = 0;
             tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
                                                       tcp, l4_len));
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_UDP) {
             struct udp_header *udp = dp_packet_l4(p);
 
@@ -3155,6 +3174,8 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             if (!udp->udp_csum) {
                 udp->udp_csum = htons(0xffff);
             }
+
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_ICMP) {
             struct icmp_header *icmp = dp_packet_l4(p);
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 435b17c8f..c7e09b973 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -145,16 +145,6 @@  typedef uint16_t dpdk_port_t;
 
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 
-/* List of required flags advertised by the hardware that will be used
- * if TSO is enabled. Ideally this should include DEV_TX_OFFLOAD_SCTP_CKSUM.
- * However, very few drivers supports that the moment and SCTP is not a
- * widely used protocol as TCP and UDP, so it's optional. */
-#define DPDK_TX_TSO_OFFLOAD_FLAGS (DEV_TX_OFFLOAD_TCP_TSO        \
-                                   | DEV_TX_OFFLOAD_TCP_CKSUM    \
-                                   | DEV_TX_OFFLOAD_UDP_CKSUM    \
-                                   | DEV_TX_OFFLOAD_IPV4_CKSUM)
-
-
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .split_hdr_size = 0,
@@ -397,8 +387,10 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
     NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 7,
 };
 
 /*
@@ -951,6 +943,35 @@  dpdk_watchdog(void *dummy OVS_UNUSED)
     return NULL;
 }
 
+static void
+netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
+                               enum dpdk_hw_ol_features hw_ol_features,
+                               enum netdev_ol_flags flag)
+{
+    struct netdev *netdev = &dev->up;
+
+    if (dev->hw_ol_features & hw_ol_features) {
+        netdev->ol_flags |= flag;
+    } else {
+        netdev->ol_flags &= ~flag;
+    }
+}
+
+static void
+netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
+{
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_IPV4_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_TCP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_UDP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_SCTP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_TCP_TSO);
+}
+
 static int
 dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
 {
@@ -987,11 +1008,20 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
-        }
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
     }
 
     /* Limit configured rss hash functions to only those supported
@@ -1097,7 +1127,6 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     struct rte_ether_addr eth_addr;
     int diag;
     int n_rxq, n_txq;
-    uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
     uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
                                      DEV_RX_OFFLOAD_TCP_CKSUM |
                                      DEV_RX_OFFLOAD_IPV4_CKSUM;
@@ -1133,18 +1162,28 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     }
 
+    if (info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
-        if ((info.tx_offload_capa & tx_tso_offload_capa)
-            == tx_tso_offload_capa) {
+        if (info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) {
             dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
-            if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) {
-                dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
-            } else {
-                VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
-                          "SCTP packets sent to this device will be dropped",
-                          netdev_get_name(&dev->up));
-            }
         } else {
             VLOG_WARN("%s: Tx TSO offload is not supported.",
                       netdev_get_name(&dev->up));
@@ -1727,6 +1766,9 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
         smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
         HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
 #undef HWOL_SMAP_ADD
         smap_add(args, "lsc_interrupt_mode",
@@ -2174,6 +2216,7 @@  netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 
     mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
     mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
+    mbuf->l4_len = 0;
     mbuf->outer_l2_len = 0;
     mbuf->outer_l3_len = 0;
 
@@ -2395,6 +2438,26 @@  netdev_dpdk_vhost_update_rx_counters(struct netdev_dpdk *dev,
     }
 }
 
+/* Translate TX mbuf offloading flags to RX flags.
+ *
+ * The supported vhost-user library version uses TX flags when
+ * receiving mbufs. A future version fixed that issue and
+ * provide RX flags. */
+static void
+netdev_dpdk_vhost_rxq_offload(struct rte_mbuf **pkts, int pkt_cnt)
+{
+
+/* FIXME: This is only needed until DPDK 21.11 is released. */
+    for (int i = 0; i < pkt_cnt; i++) {
+        struct rte_mbuf *pkt = pkts[i];
+
+        if (pkt->ol_flags & PKT_TX_L4_MASK) {
+            pkt->ol_flags &= ~PKT_TX_L4_MASK;
+            pkt->ol_flags |= PKT_RX_L4_CKSUM_NONE;
+        }
+    }
+}
+
 /*
  * The receive path for the vhost port is the TX path out from guest.
  */
@@ -2431,6 +2494,8 @@  netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
         }
     }
 
+    netdev_dpdk_vhost_rxq_offload((struct rte_mbuf **) batch->packets, nb_rx);
+
     if (policer) {
         qos_drops = nb_rx;
         nb_rx = ingress_policer_run(policer,
@@ -5001,21 +5066,7 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
-    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    } else {
-        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    }
-
-    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-        }
-    }
+    netdev_dpdk_update_netdev_flags(dev);
 
     /* If both requested and actual hwaddr were previously
      * unset (initialized to 0), then first device init above
@@ -5154,19 +5205,20 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
+        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
+                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
+
+        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
         if (userspace_tso_enabled()) {
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
-                                | 1ULL << VIRTIO_NET_F_HOST_UFO;
+            dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+            VLOG_DBG("%s: TSO enabled on vhost port",
+                     netdev_get_name(&dev->up));
         } else {
-            /* This disables checksum offloading and all the features
-             * that depends on it (TSO, UFO, ECN) according to virtio
-             * specification. */
-            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_TSO4
+                                | 1ULL << VIRTIO_NET_F_HOST_TSO6;
         }
 
         err = rte_vhost_driver_disable_features(dev->vhost_id,
@@ -5185,6 +5237,7 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
         }
     }
 
+    netdev_dpdk_update_netdev_flags(dev);
     err = dpdk_vhost_reconfigure_helper(dev);
 
 unlock:
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 82f9a0758..48a3cf7d7 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -927,14 +927,6 @@  netdev_linux_common_construct(struct netdev *netdev_)
     netnsid_unset(&netdev->netnsid);
     ovs_mutex_init(&netdev->mutex);
 
-    if (userspace_tso_enabled()) {
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    }
-
     return 0;
 }
 
@@ -948,6 +940,15 @@  netdev_linux_construct(struct netdev *netdev_)
         return error;
     }
 
+    /* The socket interface doesn't offer the option to enable only
+     * csum offloading without TSO. */
+    if (userspace_tso_enabled()) {
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+    }
+
     error = get_flags(&netdev->up, &netdev->ifi_flags);
     if (error == ENODEV) {
         if (netdev->up.netdev_class != &netdev_internal_class) {
@@ -976,6 +977,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     static const char tap_dev[] = "/dev/net/tun";
     const char *name = netdev_->name;
+    unsigned long oflags;
     struct ifreq ifr;
 
     int error = netdev_linux_common_construct(netdev_);
@@ -993,11 +995,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
 
     /* Create tap device. */
     get_flags(&netdev->up, &netdev->ifi_flags);
-    ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-    if (userspace_tso_enabled()) {
-        ifr.ifr_flags |= IFF_VNET_HDR;
-    }
-
+    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
     ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
     if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
         VLOG_WARN("%s: creating tap device failed: %s", name,
@@ -1019,21 +1017,22 @@  netdev_linux_construct_tap(struct netdev *netdev_)
         goto error_close;
     }
 
+    oflags = TUN_F_CSUM;
     if (userspace_tso_enabled()) {
-        /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
-         * available, it will return EINVAL when a flag is unknown.
-         * Therefore, try enabling offload with no flags to check
-         * if TUNSETOFFLOAD support is available or not. */
-        if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
-            unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-
-            if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
-                VLOG_WARN("%s: enabling tap offloading failed: %s", name,
-                          ovs_strerror(errno));
-                error = errno;
-                goto error_close;
-            }
+        oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
+    }
+
+    if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+        netdev_->ol_flags |= (NETDEV_OFFLOAD_TX_IPV4_CSUM
+                              | NETDEV_OFFLOAD_TX_TCP_CSUM
+                              | NETDEV_OFFLOAD_TX_UDP_CSUM);
+
+        if (userspace_tso_enabled()) {
+            netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
         }
+    } else {
+       VLOG_WARN("%s: Disabling hardware offloading: %s", name,
+                 ovs_strerror(errno));
     }
 
     netdev->present = true;
@@ -1333,18 +1332,21 @@  netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffers[i];
          }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
-            struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
-            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+        if (virtio_net_hdr_size) {
+            int ret = netdev_linux_parse_vnet_hdr(pkt);
+            if (OVS_UNLIKELY(ret)) {
+                struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
+                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-            /* Unexpected error situation: the virtio header is not present
-             * or corrupted. Drop the packet but continue in case next ones
-             * are correct. */
-            dp_packet_delete(pkt);
-            netdev->rx_dropped += 1;
-            VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
-                         netdev_get_name(netdev_));
-            continue;
+                /* Unexpected error situation: the virtio header is not present
+                 * or corrupted or contains unsupported features. Drop the packet
+                 * but continue in case next ones are correct. */
+                dp_packet_delete(pkt);
+                netdev->rx_dropped += 1;
+                VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
+                             netdev_get_name(netdev_), ovs_strerror(ret));
+                continue;
+            }
         }
 
         for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
@@ -1392,7 +1394,6 @@  static int
 netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
                                 struct dp_packet_batch *batch)
 {
-    int virtio_net_hdr_size;
     ssize_t retval;
     size_t std_len;
     int iovlen;
@@ -1402,16 +1403,14 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
         /* Use the buffer from the allocated packet below to receive MTU
          * sized packets and an aux_buf for extra TSO data. */
         iovlen = IOV_TSO_SIZE;
-        virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
     } else {
         /* Use only the buffer from the allocated packet. */
         iovlen = IOV_STD_SIZE;
-        virtio_net_hdr_size = 0;
     }
 
     /* The length here needs to be accounted in the same way when the
      * aux_buf is allocated so that it can be prepended to TSO buffer. */
-    std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
+    std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu;
     for (i = 0; i < NETDEV_MAX_BURST; i++) {
         struct dp_packet *buffer;
         struct dp_packet *pkt;
@@ -1451,7 +1450,7 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffer;
         }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
+        if (netdev_linux_parse_vnet_hdr(pkt)) {
             struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
             struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
@@ -1600,7 +1599,7 @@  netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
  * on other interface types because we attach a socket filter to the rx
  * socket. */
 static int
-netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
+netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
                             struct dp_packet_batch *batch)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1621,10 +1620,7 @@  netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
         ssize_t retval;
         int error;
 
-        if (tso) {
-            netdev_linux_prepend_vnet_hdr(packet, mtu);
-        }
-
+        netdev_linux_prepend_vnet_hdr(packet, mtu);
         size = dp_packet_size(packet);
         do {
             retval = write(netdev->tap_fd, dp_packet_data(packet), size);
@@ -1754,7 +1750,7 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
 
         error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
     } else {
-        error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch);
+        error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
     }
     if (error) {
         if (error == ENOBUFS) {
@@ -6568,102 +6564,64 @@  af_packet_sock(void)
     return sock;
 }
 
-static int
-netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
-{
-    struct eth_header *eth_hdr;
-    ovs_be16 eth_type;
-    int l2_len;
-
-    eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN);
-    if (!eth_hdr) {
-        return -EINVAL;
-    }
-
-    l2_len = ETH_HEADER_LEN;
-    eth_type = eth_hdr->eth_type;
-    if (eth_type_vlan(eth_type)) {
-        struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN);
-
-        if (!vlan) {
-            return -EINVAL;
-        }
-
-        eth_type = vlan->vlan_next_type;
-        l2_len += VLAN_HEADER_LEN;
-    }
-
-    if (eth_type == htons(ETH_TYPE_IP)) {
-        struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN);
-
-        if (!ip_hdr) {
-            return -EINVAL;
-        }
-
-        *l4proto = ip_hdr->ip_proto;
-        dp_packet_ol_set_tx_ipv4(b);
-    } else if (eth_type == htons(ETH_TYPE_IPV6)) {
-        struct ovs_16aligned_ip6_hdr *nh6;
-
-        nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN);
-        if (!nh6) {
-            return -EINVAL;
-        }
-
-        *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
-        dp_packet_ol_set_tx_ipv6(b);
-    }
-
-    return 0;
-}
-
+/* Initializes packet 'b' with features enabled in the prepended
+ * struct virtio_net_hdr.  Returns 0 if successful, otherwise a
+ * positive errno value. */
 static int
 netdev_linux_parse_vnet_hdr(struct dp_packet *b)
 {
     struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet);
-    uint16_t l4proto = 0;
 
     if (OVS_UNLIKELY(!vnet)) {
-        return -EINVAL;
+        return EINVAL;
     }
 
     if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
         return 0;
     }
 
-    if (netdev_linux_parse_l2(b, &l4proto)) {
-        return -EINVAL;
-    }
-
     if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-        if (l4proto == IPPROTO_TCP) {
-            dp_packet_ol_set_tx_tcp_csum(b);
-        } else if (l4proto == IPPROTO_UDP) {
-            dp_packet_ol_set_tx_udp_csum(b);
-        } else if (l4proto == IPPROTO_SCTP) {
-            dp_packet_ol_set_tx_sctp_csum(b);
-        }
-    }
-
-    if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-        uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4
-                                | VIRTIO_NET_HDR_GSO_TCPV6
-                                | VIRTIO_NET_HDR_GSO_UDP;
-        uint8_t type = vnet->gso_type & allowed_mask;
-
-        if (type == VIRTIO_NET_HDR_GSO_TCPV4
-            || type == VIRTIO_NET_HDR_GSO_TCPV6) {
-            dp_packet_ol_set_tcp_seg(b);
-        }
-    }
-
-    return 0;
+        /* The packet has offloaded checksum. However, there is no
+         * additional information like the protocol used, so it would
+         * require to parse the packet here. The checksum starting point
+         * and offset are going to be verified when the packet headers
+         * are parsed during miniflow extraction. */
+        b->csum_start = vnet->csum_start;
+        b->csum_offset = vnet->csum_offset;
+    } else {
+        b->csum_start = 0;
+        b->csum_offset = 0;
+    }
+
+    int ret = 0;
+    switch (vnet->gso_type) {
+    case VIRTIO_NET_HDR_GSO_TCPV4:
+    case VIRTIO_NET_HDR_GSO_TCPV6:
+        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
+         * is given and needs to be respected. */
+        dp_packet_ol_set_tcp_seg(b);
+        break;
+    case VIRTIO_NET_HDR_GSO_UDP:
+        /* UFO is not supported. */
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled.");
+        ret = ENOTSUP;
+        break;
+    case VIRTIO_NET_HDR_GSO_NONE:
+        break;
+    default:
+        ret = ENOTSUP;
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x",
+                     vnet->gso_type);
+    }
+
+    return ret;
 }
 
 static void
 netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
 {
-    struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet);
+    struct virtio_net_hdr v;
+    struct virtio_net_hdr *vnet = &v;
 
     if (dp_packet_ol_tcp_seg(b)) {
         uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
@@ -6673,30 +6631,91 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
         vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
         if (dp_packet_ol_tx_ipv4(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-        } else {
+        } else if (dp_packet_ol_tx_ipv6(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
         }
 
     } else {
-        vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
-    }
-
-    if (dp_packet_ol_tx_l4_csum(b)) {
-        vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-        vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b)
-                                                  - (char *)dp_packet_eth(b));
-
+        vnet->hdr_len = 0;
+        vnet->gso_size = 0;
+        vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+    }
+
+    if (dp_packet_ol_l4_csum_good(b)) {
+        /* The packet has good checksum in the packet.
+         * No need to validate again. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16)0;
+        vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+    } else if (dp_packet_ol_tx_l4_csum(b)) {
+        /* The csum calculation is offloaded. */
         if (dp_packet_ol_tx_tcp_csum(b)) {
+            /* Virtual I/O Device (VIRTIO) Version 1.1
+             * 5.1.6.2 Packet Transmission
+             If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
+             checksumming the packet:
+               - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+               - csum_start is set to the offset within the packet
+                 to begin checksumming, and
+               - csum_offset indicates how many bytes after the
+                 csum_start the new (16 bit ones’ complement) checksum
+                 is placed by the device.
+               The TCP checksum field in the packet is set to the sum of
+               the TCP pseudo header, so that replacing it by the ones’
+               complement checksum of the TCP header and body will give
+               the correct result. */
+
+            struct tcp_header *tcp_hdr = dp_packet_l4(b);
+            ovs_be16 csum = 0;
+            if (dp_packet_ol_tx_ipv4(b)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(b)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            tcp_hdr->tcp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16)b->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct tcp_header, tcp_csum);
         } else if (dp_packet_ol_tx_udp_csum(b)) {
+            struct udp_header *udp_hdr = dp_packet_l4(b);
+            ovs_be16 csum = 0;
+
+            if (dp_packet_ol_tx_ipv4(b)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(b)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            udp_hdr->udp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16)b->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct udp_header, udp_csum);
         } else if (dp_packet_ol_tx_sctp_csum(b)) {
-            vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
-                                    struct sctp_header, sctp_csum);
+            /* The Linux kernel networking stack only supports csum_start
+             * and csum_offset when SCTP GSO is enabled.  See kernel's
+             * skb_csum_hwoffload_help(). Currently there is no SCTP
+             * segmentation offload support in OVS. */
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16)0;
+            vnet->flags = 0;
         } else {
-            VLOG_WARN_RL(&rl, "Unsupported L4 protocol");
+            /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes
+             * a new flag that is not covered in above checks. */
+            VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. "
+                         "Flags: %"PRIu64, *dp_packet_ol_flags_ptr(b));
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16)0;
+            vnet->flags = 0;
         }
+    } else {
+        /* Packet L4 csum is unknown. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16)0;
+        vnet->flags = 0;
     }
+
+    dp_packet_push(b, vnet, sizeof *vnet);
 }
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 1e2ae5ea6..b5271f3a3 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -224,28 +224,6 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     return udp + 1;
 }
 
-static void
-netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
-                         int ip_tot_size)
-{
-    uint32_t csum;
-
-    if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
-        csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
-                                         dp_packet_data(packet)));
-    } else {
-        csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
-                                        dp_packet_data(packet)));
-    }
-
-    csum = csum_continue(csum, udp, ip_tot_size);
-    udp->udp_csum = csum_finish(csum);
-
-    if (!udp->udp_csum) {
-        udp->udp_csum = htons(0xffff);
-    }
-}
-
 void
 netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                            struct dp_packet *packet,
@@ -260,9 +238,9 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
 
-    if (udp->udp_csum) {
-        netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
-    }
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_ol_set_tx_udp_csum(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 }
 
 static void *
@@ -806,7 +784,9 @@  netdev_gtpu_push_header(const struct netdev *netdev,
                                     data->header_len, &ip_tot_size);
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
-    netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_ol_set_tx_udp_csum(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 
     gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
 
diff --git a/lib/netdev.c b/lib/netdev.c
index d82c48e93..fffd72f5f 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -804,36 +804,15 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
      * netdev to decide what would be the best to do.
      * Provide a software fallback in case the device doesn't support IP csum
      * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated.
+     * Packet with L4 csum offloading enabled was received with verified csum.
+     * Leave the L4 csum offloading enabled even with good checksum for the
+     * netdev to decide what would be the best to do.
+     * Netdev that requires pseudo header csum needs to calculate that.
+     * Provide a software fallback in case the netdev doesn't support L4 csum
+     * offloading. Note: Encapsulated packet must have the inner L4 header
      * csum already calculated. */
     dp_packet_ol_send_prepare(packet, netdev_flags);
-
-    if (dp_packet_ol_tx_l4_csum(packet)) {
-        if (dp_packet_ol_tx_tcp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
-                /* Fall back to TCP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No TCP checksum support");
-                return false;
-            }
-        } else if (dp_packet_ol_tx_udp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
-                /* Fall back to UDP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No UDP checksum support");
-                return false;
-            }
-        } else if (dp_packet_ol_tx_sctp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)) {
-                /* Fall back to SCTP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
-                return false;
-            }
-        } else {
-            uint64_t ol_flags = *dp_packet_ol_flags_ptr(packet);
-            VLOG_ERR_BUF(errormsg, "No L4 checksum support: "
-                         "offload mask: %"PRIu64, ol_flags);
-            return false;
-        }
-    }
-
     return true;
 }
 
@@ -966,11 +945,10 @@  netdev_push_header(const struct netdev *netdev,
     size_t i, size = dp_packet_batch_size(batch);
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
-        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet)
-                         || dp_packet_ol_tx_l4_csum(packet))) {
+        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet))) {
             COVERAGE_INC(netdev_push_header_drops);
             dp_packet_delete(packet);
-            VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
+            VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is "
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
diff --git a/lib/packets.c b/lib/packets.c
index c5ad57543..8ad362d1e 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1085,16 +1085,24 @@  packet_set_ipv4_addr(struct dp_packet *packet,
     pkt_metadata_init_conn(&packet->md);
 
     if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_tcp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
 
-        th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+            th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+        }
     } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
-        struct udp_header *uh = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_udp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
 
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     }
@@ -1198,16 +1206,24 @@  packet_update_csum128(struct dp_packet *packet, uint8_t proto,
     size_t l4_size = dp_packet_l4_size(packet);
 
     if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_tcp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
 
-        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+            th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+        }
     } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
-        struct udp_header *uh = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_udp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
 
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     } else if (proto == IPPROTO_ICMPV6 &&
@@ -1325,7 +1341,10 @@  static void
 packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
 {
     if (*port != new_port) {
-        *csum = recalc_csum16(*csum, *port, new_port);
+        if (csum) {
+            *csum = recalc_csum16(*csum, *port, new_port);
+        }
+
         *port = new_port;
     }
 }
@@ -1337,9 +1356,16 @@  void
 packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct tcp_header *th = dp_packet_l4(packet);
+    ovs_be16 *csum = NULL;
+
+    if (dp_packet_ol_tx_tcp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+    } else {
+        csum = &th->tcp_csum;
+    }
 
-    packet_set_port(&th->tcp_src, src, &th->tcp_csum);
-    packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+    packet_set_port(&th->tcp_src, src, csum);
+    packet_set_port(&th->tcp_dst, dst, csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1351,17 +1377,21 @@  packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct udp_header *uh = dp_packet_l4(packet);
 
-    if (uh->udp_csum) {
-        packet_set_port(&uh->udp_src, src, &uh->udp_csum);
-        packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
+    if (dp_packet_ol_tx_udp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        packet_set_port(&uh->udp_src, src, NULL);
+        packet_set_port(&uh->udp_dst, dst, NULL);
+    } else {
+        ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL;
+
+        packet_set_port(&uh->udp_src, src, csum);
+        packet_set_port(&uh->udp_dst, dst, csum);
 
-        if (!uh->udp_csum) {
+        if (csum && !uh->udp_csum) {
             uh->udp_csum = htons(0xffff);
         }
-    } else {
-        uh->udp_src = src;
-        uh->udp_dst = dst;
     }
+
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1372,18 +1402,27 @@  void
 packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct sctp_header *sh = dp_packet_l4(packet);
-    ovs_be32 old_csum, old_correct_csum, new_csum;
-    uint16_t tp_len = dp_packet_l4_size(packet);
 
-    old_csum = get_16aligned_be32(&sh->sctp_csum);
-    put_16aligned_be32(&sh->sctp_csum, 0);
-    old_correct_csum = crc32c((void *)sh, tp_len);
+    if (dp_packet_ol_tx_sctp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+    } else {
+        ovs_be32 old_csum, old_correct_csum, new_csum;
+        uint16_t tp_len = dp_packet_l4_size(packet);
+
+        old_csum = get_16aligned_be32(&sh->sctp_csum);
+        put_16aligned_be32(&sh->sctp_csum, 0);
+        old_correct_csum = crc32c((void *)sh, tp_len);
 
-    sh->sctp_src = src;
-    sh->sctp_dst = dst;
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+
+        new_csum = crc32c((void *)sh, tp_len);
+        put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
+                           ^ new_csum);
+    }
 
-    new_csum = crc32c((void *)sh, tp_len);
-    put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1907,3 +1946,72 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
         }
     }
 }
+
+/* Set TCP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_tcp_complete_csum(struct dp_packet *p)
+{
+    struct tcp_header *tcp = dp_packet_l4(p);
+
+    tcp->tcp_csum = 0;
+    if (dp_packet_ol_tx_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  tcp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+}
+
+/* Set UDP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_udp_complete_csum(struct dp_packet *p)
+{
+    struct udp_header *udp = dp_packet_l4(p);
+
+    /* Skip csum calculation if the udp_csum is zero. */
+    if (!udp->udp_csum) {
+        return;
+    }
+
+    udp->udp_csum = 0;
+    if (dp_packet_ol_tx_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  udp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+
+    if (!udp->udp_csum) {
+        udp->udp_csum = htons(0xffff);
+    }
+}
+
+/* Set SCTP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_sctp_complete_csum(struct dp_packet *p)
+{
+    struct sctp_header *sh = dp_packet_l4(p);
+    uint16_t tp_len = dp_packet_l4_size(p);
+    ovs_be32 csum;
+
+    put_16aligned_be32(&sh->sctp_csum, 0);
+    csum = crc32c((void *)sh, tp_len);
+    put_16aligned_be32(&sh->sctp_csum, csum);
+}
diff --git a/lib/packets.h b/lib/packets.h
index e8bdf08a0..49d23781a 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1641,6 +1641,9 @@  void packet_put_ra_prefix_opt(struct dp_packet *,
                               const ovs_be128 router_prefix);
 uint32_t packet_csum_pseudoheader(const struct ip_header *);
 void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
+void packet_tcp_complete_csum(struct dp_packet *);
+void packet_udp_complete_csum(struct dp_packet *);
+void packet_sctp_complete_csum(struct dp_packet *);
 
 #define DNS_HEADER_LEN 12
 struct dns_header {