diff mbox series

[ovs-dev,v5,4/4] userspace: Enable L4 csum offloading by default.

Message ID 20220713181400.3326739-5-mkp@redhat.com
State Superseded
Headers show
Series Enhance support for checksum offloading | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Mike Pattrick July 13, 2022, 6:14 p.m. UTC
From: Flavio Leitner <fbl@sysclose.org>

The netdev receiving packets is supposed to provide the flags
indicating if the L4 csum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 csum when the packet is going to be sent over
a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those two features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>

---
v5:
 - Added David's proposed new_device change
 - Corrected checksumming in header push
 - Added update_netdev_flags where needed
---
 lib/conntrack.c         |  15 +--
 lib/dp-packet.c         |  24 ++++
 lib/dp-packet.h         |  78 ++++++++++++-
 lib/flow.c              |  23 ++++
 lib/netdev-dpdk.c       | 177 +++++++++++++++++++---------
 lib/netdev-linux.c      | 252 ++++++++++++++++++++++++++--------------
 lib/netdev-native-tnl.c |  32 +----
 lib/netdev.c            |  52 ++-------
 lib/packets.c           | 175 ++++++++++++++++++++++------
 lib/packets.h           |   3 +
 10 files changed, 572 insertions(+), 259 deletions(-)

Comments

0-day Robot July 13, 2022, 6:22 p.m. UTC | #1
Bleep bloop.  Greetings Mike Pattrick, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


Patch skipped due to previous failure.

Please check this out.  If you feel there has been an error, please email aconole@redhat.com

Thanks,
0-day Robot
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index d5793856d..b2733e55c 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2106,13 +2106,12 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     }
 
     if (ok) {
-        bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
-        if (!hwol_bad_l4_csum) {
-            bool  hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
-                                      || dp_packet_hwol_tx_l4_checksum(pkt);
+        if (!dp_packet_l4_checksum_bad(pkt)) {
             /* Validate the checksum only when hwol is not supported. */
             if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
-                           &ctx->icmp_related, l3, !hwol_good_l4_csum,
+                           &ctx->icmp_related, l3,
+                           !dp_packet_l4_checksum_good(pkt) &&
+                           !dp_packet_hwol_tx_l4_checksum(pkt),
                            NULL)) {
                 ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
                 return true;
@@ -3424,8 +3423,10 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
             adj_seqnum(&th->tcp_seq, ec->seq_skew);
     }
 
-    th->tcp_csum = 0;
-    if (!dp_packet_hwol_tx_l4_checksum(pkt)) {
+    if (dp_packet_hwol_tx_l4_checksum(pkt)) {
+        dp_packet_ol_reset_l4_csum_good(pkt);
+    } else {
+        th->tcp_csum = 0;
         if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
             th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
                                dp_packet_l4_size(pkt));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 7771d0d62..33bdee510 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -38,6 +38,9 @@  dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
     dp_packet_init_specific(b);
     /* By default assume the packet type to be Ethernet. */
     b->packet_type = htonl(PT_ETH);
+    /* Reset csum start and offset. */
+    b->csum_start = 0;
+    b->csum_offset = 0;
 }
 
 static void
@@ -520,4 +523,25 @@  dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags)
         }
         dp_packet_hwol_reset_tx_ip_csum(p);
     }
+
+    if (dp_packet_l4_checksum_good(p) || !dp_packet_hwol_tx_l4_checksum(p)) {
+        return;
+    }
+
+    if (dp_packet_hwol_l4_is_tcp(p)
+        && !(flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
+        packet_tcp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+        dp_packet_ol_reset_tx_l4_csum(p);
+    } else if (dp_packet_hwol_l4_is_udp(p)
+        && !(flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
+        packet_udp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+        dp_packet_ol_reset_tx_l4_csum(p);
+    } else if (!(flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)
+        && dp_packet_hwol_l4_is_sctp(p)) {
+        packet_sctp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+        dp_packet_ol_reset_tx_l4_csum(p);
+    }
 }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 48f5e82a7..4c6e7e853 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -140,6 +140,8 @@  struct dp_packet {
                                       or UINT16_MAX. */
     uint32_t cutlen;               /* length in bytes to cut from the end. */
     ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
+    uint16_t csum_start;           /* Position to start checksumming from. */
+    uint16_t csum_offset;          /* Offset to place checksum. */
     union {
         struct pkt_metadata md;
         uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -991,6 +993,13 @@  dp_packet_hwol_is_ipv4(const struct dp_packet *b)
     return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4);
 }
 
+/* Returns 'true' if packet 'p' is marked as IPv6. */
+static inline bool
+dp_packet_ol_tx_ipv6(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
+}
+
 /* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
 static inline bool
 dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
@@ -1015,18 +1024,26 @@  dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
             DP_PACKET_OL_TX_SCTP_CKSUM;
 }
 
-/* Mark packet 'b' for IPv4 checksum offloading. */
 static inline void
-dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
+dp_packet_ol_reset_tx_l4_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_L4_MASK;
+}
+
+/* Mark packet 'p' as IPv4. */
+static inline void
+dp_packet_hwol_set_tx_ipv4(struct dp_packet *p)
 {
-    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
 }
 
-/* Mark packet 'b' for IPv6 checksum offloading. */
+/* Mark packet 'a' as IPv6. */
 static inline void
-dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
+dp_packet_hwol_set_tx_ipv6(struct dp_packet *a)
 {
-    *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
+    *dp_packet_ol_flags_ptr(a) &= ~DP_PACKET_OL_TX_IPV4;
+    *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
 }
 
 /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
@@ -1125,6 +1142,8 @@  dp_packet_ip_set_header_csum(struct dp_packet *p)
     ip->ip_csum = csum(ip, sizeof *ip);
 }
 
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
 static inline bool
 dp_packet_l4_checksum_good(const struct dp_packet *p)
 {
@@ -1139,6 +1158,53 @@  dp_packet_l4_checksum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_L4_CKSUM_BAD;
 }
 
+/* Returns 'true' if the packet has good integrity though the
+ * checksum in the packet 'p' is not complete. */
+static inline bool
+dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
+{
+    return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
+            DP_PACKET_OL_RX_L4_CKSUM_MASK;
+}
+
+/* Marks packet 'p' with good integrity though the checksum in the
+ * packet is not complete. */
+static inline void
+dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_MASK;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_set_l4_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CKSUM_GOOD;
+}
+
+/* Marks packet 'p' with good L4 checksum as modified. */
+static inline void
+dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p)
+{
+    if (!dp_packet_ol_l4_csum_partial(p)) {
+        *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CKSUM_GOOD;
+    }
+}
+
+/* Marks packet 'p' with good integrity if the 'start' and 'offset'
+ * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
+ * The 'start' is the offset from the begin of the packet headers.
+ * The 'offset' is the offset from start to place the checksum. */
+static inline void
+dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start,
+                             uint16_t offset)
+{
+    if (p->csum_start == start && p->csum_offset == offset) {
+        dp_packet_ol_set_l4_csum_partial(p);
+    }
+}
+
 static inline void ALWAYS_INLINE
 dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
 {
diff --git a/lib/flow.c b/lib/flow.c
index 0ac274dc3..adda681e7 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1027,6 +1027,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                         dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                     }
+                    dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                                 offsetof(struct tcp_header,
+                                                          tcp_csum));
+                    if (dp_packet_l4_checksum_good(packet)
+                        || dp_packet_ol_l4_csum_partial(packet)) {
+                        dp_packet_hwol_set_csum_tcp(packet);
+                    }
                 }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
@@ -1042,6 +1049,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 } else if (dl_type == htons(ETH_TYPE_IPV6)) {
                     dp_packet_update_rss_hash_ipv6_tcp_udp(packet);
                 }
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                             offsetof(struct udp_header,
+                                                      udp_csum));
+                if (dp_packet_l4_checksum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_hwol_set_csum_udp(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
             if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
@@ -1051,6 +1065,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                             offsetof(struct sctp_header,
+                                                      sctp_csum));
+                if (dp_packet_l4_checksum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_hwol_set_csum_sctp(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
             if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
@@ -3170,6 +3191,7 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             tcp->tcp_csum = 0;
             tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
                                                       tcp, l4_len));
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_UDP) {
             struct udp_header *udp = dp_packet_l4(p);
 
@@ -3179,6 +3201,7 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             if (!udp->udp_csum) {
                 udp->udp_csum = htons(0xffff);
             }
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_ICMP) {
             struct icmp_header *icmp = dp_packet_l4(p);
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 811c62a87..a69bab829 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -145,17 +145,6 @@  typedef uint16_t dpdk_port_t;
 
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 
-/* List of required flags advertised by the hardware that will be used
- * if TSO is enabled. Ideally this should include
- * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that
- * at the moment and SCTP is not a widely used protocol like TCP and UDP,
- * so it's optional. */
-#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO        \
-                                   | RTE_ETH_TX_OFFLOAD_TCP_CKSUM    \
-                                   | RTE_ETH_TX_OFFLOAD_UDP_CKSUM    \
-                                   | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
-
-
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .split_hdr_size = 0,
@@ -398,8 +387,10 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
     NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 7,
 };
 
 /*
@@ -953,6 +944,35 @@  dpdk_watchdog(void *dummy OVS_UNUSED)
     return NULL;
 }
 
+static void
+netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
+                               enum dpdk_hw_ol_features hw_ol_features,
+                               enum netdev_ol_flags flag)
+{
+    struct netdev *netdev = &dev->up;
+
+    if (dev->hw_ol_features & hw_ol_features) {
+        netdev->ol_flags |= flag;
+    } else {
+        netdev->ol_flags &= ~flag;
+    }
+}
+
+static void
+netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
+{
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
+                                   NETDEV_TX_OFFLOAD_IPV4_CKSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
+                                   NETDEV_TX_OFFLOAD_TCP_CKSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
+                                   NETDEV_TX_OFFLOAD_UDP_CKSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
+                                   NETDEV_TX_OFFLOAD_SCTP_CKSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
+                                   NETDEV_TX_OFFLOAD_TCP_TSO);
+}
+
 static int
 dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
 {
@@ -989,11 +1009,20 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
-        }
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
     }
 
     /* Limit configured rss hash functions to only those supported
@@ -1099,7 +1128,6 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     struct rte_ether_addr eth_addr;
     int diag;
     int n_rxq, n_txq;
-    uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
@@ -1135,18 +1163,28 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     }
 
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
-        if ((info.tx_offload_capa & tx_tso_offload_capa)
-            == tx_tso_offload_capa) {
+        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
             dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
-            if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
-                dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
-            } else {
-                VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
-                          "SCTP packets sent to this device will be dropped",
-                          netdev_get_name(&dev->up));
-            }
         } else {
             VLOG_WARN("%s: Tx TSO offload is not supported.",
                       netdev_get_name(&dev->up));
@@ -1708,6 +1746,9 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
         smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
         HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
 #undef HWOL_SMAP_ADD
         smap_add(args, "lsc_interrupt_mode",
@@ -2154,6 +2195,7 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 
     mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
     mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    mbuf->l4_len = 0;
     mbuf->outer_l2_len = 0;
     mbuf->outer_l3_len = 0;
 
@@ -3938,6 +3980,37 @@  new_device(int vid)
                 dev->vhost_reconfigured = true;
             }
 
+            uint64_t features;
+            if (rte_vhost_get_negotiated_features(vid, &features)) {
+                VLOG_INFO("Error checking guest features for "
+                          "vHost Device '%s'", dev->vhost_id);
+            } else {
+                if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
+                    dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+                    dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+                    dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+                }
+
+                if (userspace_tso_enabled()) {
+                    if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)
+                        && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) {
+
+                        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+                        VLOG_DBG("%s: TSO enabled on vhost port",
+                                 netdev_get_name(&dev->up));
+                    } else {
+                        VLOG_WARN("%s: Tx TSO offload is not supported.",
+                                  netdev_get_name(&dev->up));
+                    }
+                }
+            }
+
+            /* There is no support in virtio net to offload IPv4 csum,
+             * but the vhost library handles IPv4 csum offloading fine. */
+            dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+
+            netdev_dpdk_update_netdev_flags(dev);
+
             ovsrcu_index_set(&dev->vid, vid);
             exists = true;
 
@@ -4001,6 +4074,14 @@  destroy_device(int vid)
                    dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
             netdev_dpdk_txq_map_clear(dev);
 
+            /* Clear offload capabilities before next new_device. */
+            dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+            netdev_dpdk_update_netdev_flags(dev);
+
             netdev_change_seq_changed(&dev->up);
             ovs_mutex_unlock(&dev->mutex);
             exists = true;
@@ -4938,22 +5019,7 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
-
-    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
-    } else {
-        netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
-    }
-
-    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
-        netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
-        netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
-        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
-        }
-    }
+    netdev_dpdk_update_netdev_flags(dev);
 
     /* If both requested and actual hwaddr were previously
      * unset (initialized to 0), then first device init above
@@ -4995,11 +5061,6 @@  dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
         dev->tx_q[0].map = 0;
     }
 
-    if (userspace_tso_enabled()) {
-        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
-        VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up));
-    }
-
     netdev_dpdk_remap_txqs(dev);
 
     if (netdev_dpdk_get_vid(dev) >= 0) {
@@ -5020,6 +5081,8 @@  dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
         }
     }
 
+    netdev_dpdk_update_netdev_flags(dev);
+
     return 0;
 }
 
@@ -5042,7 +5105,7 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     int err;
     uint64_t vhost_flags = 0;
-    uint64_t vhost_unsup_flags;
+    uint64_t vhost_unsup_flags = 0;
 
     ovs_mutex_lock(&dev->mutex);
 
@@ -5094,19 +5157,17 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+
         if (userspace_tso_enabled()) {
             netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
-            netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
-            netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
-            netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
-            netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
             vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
                                 | 1ULL << VIRTIO_NET_F_HOST_UFO;
-        } else {
-            /* This disables checksum offloading and all the features
-             * that depends on it (TSO, UFO, ECN) according to virtio
-             * specification. */
-            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+            VLOG_DBG("%s: TSO enabled on vhost port",
+                     netdev_get_name(&dev->up));
         }
 
         err = rte_vhost_driver_disable_features(dev->vhost_id,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2766b3f2b..c5d22bda6 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -927,14 +927,6 @@  netdev_linux_common_construct(struct netdev *netdev_)
     netnsid_unset(&netdev->netnsid);
     ovs_mutex_init(&netdev->mutex);
 
-    if (userspace_tso_enabled()) {
-        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
-        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
-        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
-        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
-        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
-    }
-
     return 0;
 }
 
@@ -948,6 +940,16 @@  netdev_linux_construct(struct netdev *netdev_)
         return error;
     }
 
+    /* The socket interface doesn't offer the option to enable only
+     * csum offloading without TSO. */
+    if (userspace_tso_enabled()) {
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     error = get_flags(&netdev->up, &netdev->ifi_flags);
     if (error == ENODEV) {
         if (netdev->up.netdev_class != &netdev_internal_class) {
@@ -976,6 +978,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     static const char tap_dev[] = "/dev/net/tun";
     const char *name = netdev_->name;
+    unsigned long oflags;
     struct ifreq ifr;
 
     int error = netdev_linux_common_construct(netdev_);
@@ -993,10 +996,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
 
     /* Create tap device. */
     get_flags(&netdev->up, &netdev->ifi_flags);
-    ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-    if (userspace_tso_enabled()) {
-        ifr.ifr_flags |= IFF_VNET_HDR;
-    }
+    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 
     ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
     if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
@@ -1019,21 +1019,22 @@  netdev_linux_construct_tap(struct netdev *netdev_)
         goto error_close;
     }
 
+    oflags = TUN_F_CSUM;
     if (userspace_tso_enabled()) {
-        /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
-         * available, it will return EINVAL when a flag is unknown.
-         * Therefore, try enabling offload with no flags to check
-         * if TUNSETOFFLOAD support is available or not. */
-        if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
-            unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-
-            if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
-                VLOG_WARN("%s: enabling tap offloading failed: %s", name,
-                          ovs_strerror(errno));
-                error = errno;
-                goto error_close;
-            }
-        }
+        oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
+    }
+
+    if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+        netdev_->ol_flags |= (NETDEV_TX_OFFLOAD_IPV4_CKSUM
+                              | NETDEV_TX_OFFLOAD_TCP_CKSUM
+                              | NETDEV_TX_OFFLOAD_UDP_CKSUM);
+
+        if (userspace_tso_enabled()) {
+            netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
+         }
+    } else {
+       VLOG_WARN("%s: Disabling hardware offloading: %s", name,
+                 ovs_strerror(errno));
     }
 
     netdev->present = true;
@@ -1333,18 +1334,22 @@  netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffers[i];
          }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
-            struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
-            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+        if (virtio_net_hdr_size) {
+            int ret = netdev_linux_parse_vnet_hdr(pkt);
+            if (OVS_UNLIKELY(ret)) {
+                struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
+                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-            /* Unexpected error situation: the virtio header is not present
-             * or corrupted. Drop the packet but continue in case next ones
-             * are correct. */
-            dp_packet_delete(pkt);
-            netdev->rx_dropped += 1;
-            VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
-                         netdev_get_name(netdev_));
-            continue;
+                /* Unexpected error situation: the virtio header is not
+                 * present or corrupted or contains unsupported features.
+                 * Drop the packet but continue in case next ones are
+                 * correct. */
+                dp_packet_delete(pkt);
+                netdev->rx_dropped += 1;
+                VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
+                             netdev_get_name(netdev_), ovs_strerror(ret));
+                continue;
+            }
         }
 
         for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
@@ -1392,7 +1397,6 @@  static int
 netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
                                 struct dp_packet_batch *batch)
 {
-    int virtio_net_hdr_size;
     ssize_t retval;
     size_t std_len;
     int iovlen;
@@ -1402,16 +1406,14 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
         /* Use the buffer from the allocated packet below to receive MTU
          * sized packets and an aux_buf for extra TSO data. */
         iovlen = IOV_TSO_SIZE;
-        virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
     } else {
         /* Use only the buffer from the allocated packet. */
         iovlen = IOV_STD_SIZE;
-        virtio_net_hdr_size = 0;
     }
 
     /* The length here needs to be accounted in the same way when the
      * aux_buf is allocated so that it can be prepended to TSO buffer. */
-    std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
+    std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu;
     for (i = 0; i < NETDEV_MAX_BURST; i++) {
         struct dp_packet *buffer;
         struct dp_packet *pkt;
@@ -1451,7 +1453,7 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffer;
         }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
+        if (netdev_linux_parse_vnet_hdr(pkt)) {
             struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
             struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
@@ -1600,7 +1602,7 @@  netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
  * on other interface types because we attach a socket filter to the rx
  * socket. */
 static int
-netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
+netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
                             struct dp_packet_batch *batch)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1621,9 +1623,7 @@  netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
         ssize_t retval;
         int error;
 
-        if (tso) {
-            netdev_linux_prepend_vnet_hdr(packet, mtu);
-        }
+        netdev_linux_prepend_vnet_hdr(packet, mtu);
 
         size = dp_packet_size(packet);
         do {
@@ -1754,7 +1754,7 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
 
         error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
     } else {
-        error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch);
+        error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
     }
     if (error) {
         if (error == ENOBUFS) {
@@ -6634,53 +6634,73 @@  netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
     return 0;
 }
 
+/* Initializes packet 'b' with features enabled in the prepended
+ * struct virtio_net_hdr.  Returns 0 if successful, otherwise a
+ * positive errno value. */
 static int
 netdev_linux_parse_vnet_hdr(struct dp_packet *b)
 {
     struct virtio_net_hdr *vnet = dp_packet_pull(b, sizeof *vnet);
-    uint16_t l4proto = 0;
 
     if (OVS_UNLIKELY(!vnet)) {
-        return -EINVAL;
+        return EINVAL;
     }
 
     if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
         return 0;
     }
 
-    if (netdev_linux_parse_l2(b, &l4proto)) {
-        return -EINVAL;
-    }
-
     if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-        if (l4proto == IPPROTO_TCP) {
-            dp_packet_hwol_set_csum_tcp(b);
-        } else if (l4proto == IPPROTO_UDP) {
-            dp_packet_hwol_set_csum_udp(b);
-        } else if (l4proto == IPPROTO_SCTP) {
-            dp_packet_hwol_set_csum_sctp(b);
-        }
-    }
+        uint16_t l4proto = 0;
 
-    if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-        uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4
-                                | VIRTIO_NET_HDR_GSO_TCPV6
-                                | VIRTIO_NET_HDR_GSO_UDP;
-        uint8_t type = vnet->gso_type & allowed_mask;
-
-        if (type == VIRTIO_NET_HDR_GSO_TCPV4
-            || type == VIRTIO_NET_HDR_GSO_TCPV6) {
-            dp_packet_hwol_set_tcp_seg(b);
+        if (netdev_linux_parse_l2(b, &l4proto)) {
+            return EINVAL;
         }
-    }
 
-    return 0;
+        if (l4proto == IPPROTO_UDP) {
+            dp_packet_hwol_set_csum_udp(b);
+        }
+        /* The packet has offloaded checksum. However, there is no
+         * additional information like the protocol used, so it would
+         * require to parse the packet here. The checksum starting point
+         * and offset are going to be verified when the packet headers
+         * are parsed during miniflow extraction. */
+        b->csum_start = (OVS_FORCE uint16_t) vnet->csum_start;
+        b->csum_offset = (OVS_FORCE uint16_t) vnet->csum_offset;
+    } else {
+        b->csum_start = 0;
+        b->csum_offset = 0;
+    }
+
+    int ret = 0;
+    switch (vnet->gso_type) {
+    case VIRTIO_NET_HDR_GSO_TCPV4:
+    case VIRTIO_NET_HDR_GSO_TCPV6:
+        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
+         * is given and needs to be respected. */
+        dp_packet_hwol_set_tcp_seg(b);
+        break;
+    case VIRTIO_NET_HDR_GSO_UDP:
+        /* UFO is not supported. */
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled.");
+        ret = ENOTSUP;
+        break;
+    case VIRTIO_NET_HDR_GSO_NONE:
+        break;
+    default:
+        ret = ENOTSUP;
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x",
+                     vnet->gso_type);
+    }
+
+    return ret;
 }
 
 static void
 netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
 {
-    struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet);
+    struct virtio_net_hdr v;
+    struct virtio_net_hdr *vnet = &v;
 
     if (dp_packet_hwol_is_tso(b)) {
         uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
@@ -6690,30 +6710,92 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
         vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
         if (dp_packet_hwol_is_ipv4(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-        } else {
+        } else if (dp_packet_ol_tx_ipv6(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
         }
 
     } else {
-        vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
-    }
-
-    if (dp_packet_hwol_l4_mask(b)) {
-        vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-        vnet->csum_start = (OVS_FORCE __virtio16)((char *)dp_packet_l4(b)
-                                                  - (char *)dp_packet_eth(b));
-
+        vnet->hdr_len = 0;
+        vnet->gso_size = 0;
+        vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+    }
+
+    if (dp_packet_l4_checksum_good(b)) {
+        /* The packet has good checksum in the packet.
+         * No need to validate again. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+        vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+    } else if (dp_packet_hwol_tx_l4_checksum(b)) {
+        /* The csum calculation is offloaded. */
         if (dp_packet_hwol_l4_is_tcp(b)) {
+            /* Virtual I/O Device (VIRTIO) Version 1.1
+             * 5.1.6.2 Packet Transmission
+             If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
+             checksumming the packet:
+               - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+               - csum_start is set to the offset within the packet
+                 to begin checksumming, and
+               - csum_offset indicates how many bytes after the
+                 csum_start the new (16 bit ones complement) checksum
+                 is placed by the device.
+               The TCP checksum field in the packet is set to the sum of
+               the TCP pseudo header, so that replacing it by the ones
+               complement checksum of the TCP header and body will give
+               the correct result. */
+
+            struct tcp_header *tcp_hdr = dp_packet_l4(b);
+            ovs_be16 csum = 0;
+            if (dp_packet_hwol_is_ipv4(b)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(b)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            tcp_hdr->tcp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct tcp_header, tcp_csum);
         } else if (dp_packet_hwol_l4_is_udp(b)) {
+            struct udp_header *udp_hdr = dp_packet_l4(b);
+            ovs_be16 csum = 0;
+
+            if (dp_packet_hwol_is_ipv4(b)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(b)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(b);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            udp_hdr->udp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16) b->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct udp_header, udp_csum);
         } else if (dp_packet_hwol_l4_is_sctp(b)) {
-            vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
-                                    struct sctp_header, sctp_csum);
+            /* The Linux kernel networking stack only supports csum_start
+             * and csum_offset when SCTP GSO is enabled.  See kernel's
+             * skb_csum_hwoffload_help(). Currently there is no SCTP
+             * segmentation offload support in OVS. */
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+            vnet->flags = 0;
         } else {
-            VLOG_WARN_RL(&rl, "Unsupported L4 protocol");
+            /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes
+             * a new flag that is not covered in above checks. */
+            VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. "
+                         "Flags: %"PRIu64,
+                         (uint64_t)*dp_packet_ol_flags_ptr(b));
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+            vnet->flags = 0;
         }
+    } else {
+        /* Packet L4 csum is unknown. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+        vnet->flags = 0;
     }
+
+    dp_packet_push(b, vnet, sizeof *vnet);
 }
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 754e2d78d..dc054336a 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -224,28 +224,6 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     return udp + 1;
 }
 
-static void
-netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
-                         int ip_tot_size)
-{
-    uint32_t csum;
-
-    if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
-        csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
-                                         dp_packet_data(packet)));
-    } else {
-        csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
-                                        dp_packet_data(packet)));
-    }
-
-    csum = csum_continue(csum, udp, ip_tot_size);
-    udp->udp_csum = csum_finish(csum);
-
-    if (!udp->udp_csum) {
-        udp->udp_csum = htons(0xffff);
-    }
-}
-
 void
 netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                            struct dp_packet *packet,
@@ -260,9 +238,9 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
 
-    if (udp->udp_csum) {
-        netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
-    }
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_hwol_set_csum_udp(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 }
 
 static void *
@@ -806,7 +784,9 @@  netdev_gtpu_push_header(const struct netdev *netdev,
                                     data->header_len, &ip_tot_size);
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
-    netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_hwol_set_csum_udp(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 
     gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
 
diff --git a/lib/netdev.c b/lib/netdev.c
index be546a291..14f56ccfe 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -792,8 +792,6 @@  static bool
 netdev_send_prepare_packet(const uint64_t netdev_flags,
                            struct dp_packet *packet, char **errormsg)
 {
-    uint64_t l4_mask;
-
     if (dp_packet_hwol_is_tso(packet)
         && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
             /* Fall back to GSO in software. */
@@ -806,36 +804,16 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
      * netdev to decide what would be the best to do.
      * Provide a software fallback in case the device doesn't support IP csum
      * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated.
+     * Packet with L4 csum offloading enabled was received with verified csum.
+     * Leave the L4 csum offloading enabled even with good checksum for the
+     * netdev to decide what would be the best to do.
+     * Netdev that requires pseudo header csum needs to calculate that.
+     * Provide a software fallback in case the netdev doesn't support L4 csum
+     * offloading. Note: Encapsulated packet must have the inner L4 header
      * csum already calculated. */
     dp_packet_ol_send_prepare(packet, netdev_flags);
 
-    l4_mask = dp_packet_hwol_l4_mask(packet);
-    if (l4_mask) {
-        if (dp_packet_hwol_l4_is_tcp(packet)) {
-            if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
-                /* Fall back to TCP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No TCP checksum support");
-                return false;
-            }
-        } else if (dp_packet_hwol_l4_is_udp(packet)) {
-            if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
-                /* Fall back to UDP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No UDP checksum support");
-                return false;
-            }
-        } else if (dp_packet_hwol_l4_is_sctp(packet)) {
-            if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) {
-                /* Fall back to SCTP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
-                return false;
-            }
-        } else {
-            VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64,
-                         l4_mask);
-            return false;
-        }
-    }
-
     return true;
 }
 
@@ -968,28 +946,18 @@  netdev_push_header(const struct netdev *netdev,
     size_t i, size = dp_packet_batch_size(batch);
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
-        if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet)
-                         || dp_packet_hwol_l4_mask(packet))) {
+        if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet))) {
             COVERAGE_INC(netdev_push_header_drops);
             dp_packet_delete(packet);
-            VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
+            VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is "
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
             /* The packet is going to be encapsulated and there is
              * no support yet for inner network header csum offloading. */
-            if (dp_packet_hwol_tx_ip_csum(packet)
-                && !dp_packet_ip_checksum_good(packet)) {
-                dp_packet_ip_set_header_csum(packet);
-            }
+            dp_packet_ol_send_prepare(packet, 0);
 
             netdev->netdev_class->push_header(netdev, packet, data);
-            if (dp_packet_hwol_tx_ip_csum(packet)) {
-                dp_packet_ol_reset_ip_csum_good(packet);
-            } else if (dp_packet_hwol_is_ipv4(packet)) {
-                dp_packet_ip_set_header_csum(packet);
-                dp_packet_ol_set_ip_csum_good(packet);
-            }
 
             pkt_metadata_init(&packet->md, data->out_port);
             dp_packet_batch_refill(batch, packet, i);
diff --git a/lib/packets.c b/lib/packets.c
index 8334dc523..42ac7ce04 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1131,16 +1131,22 @@  packet_set_ipv4_addr(struct dp_packet *packet,
     pkt_metadata_init_conn(&packet->md);
 
     if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
-
-        th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+        if (dp_packet_hwol_l4_is_tcp(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
+            th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+        }
     } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
-        struct udp_header *uh = dp_packet_l4(packet);
-
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+        if (dp_packet_hwol_l4_is_udp(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     }
@@ -1244,16 +1250,24 @@  packet_update_csum128(struct dp_packet *packet, uint8_t proto,
     size_t l4_size = dp_packet_l4_size(packet);
 
     if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
+        if (dp_packet_hwol_l4_is_tcp(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
 
-        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+            th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+        }
     } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
-        struct udp_header *uh = dp_packet_l4(packet);
+        if (dp_packet_hwol_l4_is_udp(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
 
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     } else if (proto == IPPROTO_ICMPV6 &&
@@ -1371,7 +1385,9 @@  static void
 packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
 {
     if (*port != new_port) {
-        *csum = recalc_csum16(*csum, *port, new_port);
+        if (csum) {
+            *csum = recalc_csum16(*csum, *port, new_port);
+        }
         *port = new_port;
     }
 }
@@ -1383,9 +1399,16 @@  void
 packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct tcp_header *th = dp_packet_l4(packet);
+    ovs_be16 *csum = NULL;
+
+    if (dp_packet_hwol_l4_is_tcp(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+    } else {
+        csum = &th->tcp_csum;
+    }
 
-    packet_set_port(&th->tcp_src, src, &th->tcp_csum);
-    packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+    packet_set_port(&th->tcp_src, src, csum);
+    packet_set_port(&th->tcp_dst, dst, csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1397,17 +1420,21 @@  packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct udp_header *uh = dp_packet_l4(packet);
 
-    if (uh->udp_csum) {
-        packet_set_port(&uh->udp_src, src, &uh->udp_csum);
-        packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
+    if (dp_packet_hwol_l4_is_udp(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        packet_set_port(&uh->udp_src, src, NULL);
+        packet_set_port(&uh->udp_dst, dst, NULL);
+    } else {
+        ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL;
+
+        packet_set_port(&uh->udp_src, src, csum);
+        packet_set_port(&uh->udp_dst, dst, csum);
 
-        if (!uh->udp_csum) {
+        if (csum && !uh->udp_csum) {
             uh->udp_csum = htons(0xffff);
         }
-    } else {
-        uh->udp_src = src;
-        uh->udp_dst = dst;
     }
+
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1418,18 +1445,27 @@  void
 packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct sctp_header *sh = dp_packet_l4(packet);
-    ovs_be32 old_csum, old_correct_csum, new_csum;
-    uint16_t tp_len = dp_packet_l4_size(packet);
 
-    old_csum = get_16aligned_be32(&sh->sctp_csum);
-    put_16aligned_be32(&sh->sctp_csum, 0);
-    old_correct_csum = crc32c((void *)sh, tp_len);
+    if (dp_packet_hwol_l4_is_sctp(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+    } else {
+        ovs_be32 old_csum, old_correct_csum, new_csum;
+        uint16_t tp_len = dp_packet_l4_size(packet);
 
-    sh->sctp_src = src;
-    sh->sctp_dst = dst;
+        old_csum = get_16aligned_be32(&sh->sctp_csum);
+        put_16aligned_be32(&sh->sctp_csum, 0);
+        old_correct_csum = crc32c((void *) sh, tp_len);
+
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+
+        new_csum = crc32c((void *) sh, tp_len);
+        put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
+                           ^ new_csum);
+    }
 
-    new_csum = crc32c((void *)sh, tp_len);
-    put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1953,3 +1989,72 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
         }
     }
 }
+
+/* Set TCP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_tcp_complete_csum(struct dp_packet *p)
+{
+    struct tcp_header *tcp = dp_packet_l4(p);
+
+    tcp->tcp_csum = 0;
+    if (dp_packet_hwol_is_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  tcp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+}
+
+/* Set UDP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_udp_complete_csum(struct dp_packet *p)
+{
+    struct udp_header *udp = dp_packet_l4(p);
+
+    /* Skip csum calculation if the udp_csum is zero. */
+    if (!udp->udp_csum) {
+        return;
+    }
+
+    udp->udp_csum = 0;
+    if (dp_packet_hwol_is_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  udp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+
+    if (!udp->udp_csum) {
+        udp->udp_csum = htons(0xffff);
+    }
+}
+
+/* Set SCTP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_sctp_complete_csum(struct dp_packet *p)
+{
+    struct sctp_header *sh = dp_packet_l4(p);
+    uint16_t tp_len = dp_packet_l4_size(p);
+    ovs_be32 csum;
+
+    put_16aligned_be32(&sh->sctp_csum, 0);
+    csum = crc32c((void *) sh, tp_len);
+    put_16aligned_be32(&sh->sctp_csum, csum);
+}
diff --git a/lib/packets.h b/lib/packets.h
index 5bdf6e4bb..28950b8b1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1643,6 +1643,9 @@  void packet_put_ra_prefix_opt(struct dp_packet *,
                               const ovs_be128 router_prefix);
 uint32_t packet_csum_pseudoheader(const struct ip_header *);
 void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
+void packet_tcp_complete_csum(struct dp_packet *);
+void packet_udp_complete_csum(struct dp_packet *);
+void packet_sctp_complete_csum(struct dp_packet *);
 
 #define DNS_HEADER_LEN 12
 struct dns_header {