diff mbox series

[ovs-dev,v15,3/4] userspace: Enable IP checksum offloading by default.

Message ID 20230614190327.55525-4-mkp@redhat.com
State Accepted, archived
Commit 5d11c47d3ebe0acd48280239ba9a22bc21ee6273
Headers show
Series Enhanced checksum support | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Mike Pattrick June 14, 2023, 7:03 p.m. UTC
The netdev receiving packets is supposed to provide the flags
indicating if the IP checksum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP checksum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP checksum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Flavio Leitner <fbl@sysclose.org>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 Since v9:
  - Removed duplicative field tx_ip_csum_offload from netdev-dpdk.c
  - Left rx_csum_offload field as it is not duplicative
  - Moved system-userspace-offload.at tests to dpif-netdev.at
  - Various visual changes
  - Extended miniflow_extract changes into avx512 code
 Since v10:
  - avx512 checksum length corrected
 Since v11:
  - If hw-offload and userspace-tso is enabled, don't allow dpdk to
  offload RAW_ENCAP
 Since v13:
  - Removed dpdk hw-offload related code
 Since v14:
  - Moved some netdev_dummy_send code out of mutex
---
 lib/conntrack.c                  | 19 ++++----
 lib/dp-packet.c                  | 15 ++++++
 lib/dp-packet.h                  | 62 +++++++++++++++++++++++--
 lib/dpif-netdev-extract-avx512.c |  5 ++
 lib/dpif-netdev.c                |  2 +
 lib/flow.c                       | 15 ++++--
 lib/ipf.c                        | 11 +++--
 lib/netdev-dpdk.c                | 71 +++++++++++++++++++----------
 lib/netdev-dummy.c               | 22 +++++++++
 lib/netdev-native-tnl.c          | 21 ++++++---
 lib/netdev.c                     | 16 +++++++
 lib/odp-execute-avx512.c         | 20 +++++---
 lib/odp-execute.c                | 21 +++++++--
 lib/packets.c                    | 34 +++++++++++---
 tests/dpif-netdev.at             | 78 ++++++++++++++++++++++++++++++++
 15 files changed, 345 insertions(+), 67 deletions(-)
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index ce8a63de5..78c3e578c 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2043,16 +2043,15 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     ctx->key.dl_type = dl_type;
 
     if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
-        bool hwol_bad_l3_csum = dp_packet_ip_checksum_bad(pkt);
-        if (hwol_bad_l3_csum) {
+        if (dp_packet_ip_checksum_bad(pkt)) {
             ok = false;
             COVERAGE_INC(conntrack_l3csum_err);
         } else {
-            bool hwol_good_l3_csum = dp_packet_ip_checksum_valid(pkt)
-                                     || dp_packet_hwol_is_ipv4(pkt);
-            /* Validate the checksum only when hwol is not supported. */
+            /* Validate the checksum only when hwol is not supported and the
+             * packet's checksum status is not known. */
             ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
-                                 !hwol_good_l3_csum);
+                                 !dp_packet_hwol_is_ipv4(pkt) &&
+                                 !dp_packet_ip_checksum_good(pkt));
         }
     } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
         ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
@@ -2063,8 +2062,8 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     if (ok) {
         bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
         if (!hwol_bad_l4_csum) {
-            bool  hwol_good_l4_csum = dp_packet_l4_checksum_valid(pkt)
-                                      || dp_packet_hwol_tx_l4_checksum(pkt);
+            bool hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
+                                     || dp_packet_hwol_tx_l4_checksum(pkt);
             /* Validate the checksum only when hwol is not supported. */
             if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
                            &ctx->icmp_related, l3, !hwol_good_l4_csum,
@@ -3373,7 +3372,9 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
                 }
                 if (seq_skew) {
                     ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
-                    if (!dp_packet_hwol_is_ipv4(pkt)) {
+                    if (dp_packet_hwol_tx_ip_csum(pkt)) {
+                        dp_packet_ol_reset_ip_csum_good(pkt);
+                    } else {
                         l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
                                                         l3_hdr->ip_tot_len,
                                                         htons(ip_len));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index ae8ab5800..61c36de44 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -21,6 +21,7 @@ 
 #include "dp-packet.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
+#include "netdev-provider.h"
 #include "openvswitch/dynamic-string.h"
 #include "util.h"
 
@@ -530,3 +531,17 @@  dp_packet_compare_offsets(struct dp_packet *b1, struct dp_packet *b2,
     }
     return true;
 }
+
+/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
+ * and if not, updates the packet with the software fall back. */
+void
+dp_packet_ol_send_prepare(struct dp_packet *p, uint64_t flags)
+{
+    if (dp_packet_ip_checksum_good(p) || !dp_packet_hwol_tx_ip_csum(p)) {
+        dp_packet_hwol_reset_tx_ip_csum(p);
+    } else if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) {
+        dp_packet_ip_set_header_csum(p);
+        dp_packet_ol_set_ip_csum_good(p);
+        dp_packet_hwol_reset_tx_ip_csum(p);
+    }
+}
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b3e6a5d10..af0a2b7f0 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -25,6 +25,7 @@ 
 #include <rte_mbuf.h>
 #endif
 
+#include "csum.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/list.h"
@@ -83,6 +84,8 @@  enum dp_packet_offload_mask {
     DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
     /* Offload SCTP checksum. */
     DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
+    /* Offload IP checksum. */
+    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000),
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
 
@@ -97,7 +100,8 @@  enum dp_packet_offload_mask {
                                      DP_PACKET_OL_TX_IPV6          | \
                                      DP_PACKET_OL_TX_TCP_CKSUM     | \
                                      DP_PACKET_OL_TX_UDP_CKSUM     | \
-                                     DP_PACKET_OL_TX_SCTP_CKSUM)
+                                     DP_PACKET_OL_TX_SCTP_CKSUM    | \
+                                     DP_PACKET_OL_TX_IP_CKSUM)
 
 #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
                                  DP_PACKET_OL_TX_UDP_CKSUM | \
@@ -239,6 +243,7 @@  static inline bool dp_packet_equal(const struct dp_packet *,
 bool dp_packet_compare_offsets(struct dp_packet *good,
                                struct dp_packet *test,
                                struct ds *err_str);
+void dp_packet_ol_send_prepare(struct dp_packet *, uint64_t);
 
 
 /* Frees memory that 'b' points to, as well as 'b' itself. */
@@ -1030,6 +1035,26 @@  dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
 }
 
+/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_hwol_tx_ip_csum(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CKSUM);
+}
+
+/* Marks packet 'p' for IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_ip_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CKSUM;
+}
+
+static inline void
+dp_packet_hwol_reset_tx_ip_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IP_CKSUM;
+}
+
 /* Mark packet 'b' for TCP checksum offloading.  It implies that either
  * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
 static inline void
@@ -1063,13 +1088,31 @@  dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
 }
 
+/* Returns 'true' if the IP header has good integrity and the
+ * checksum in it is complete. */
 static inline bool
-dp_packet_ip_checksum_valid(const struct dp_packet *p)
+dp_packet_ip_checksum_good(const struct dp_packet *p)
 {
     return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) ==
             DP_PACKET_OL_RX_IP_CKSUM_GOOD;
 }
 
+/* Marks packet 'p' with good IPv4 checksum. */
+static inline void
+dp_packet_ol_set_ip_csum_good(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Resets IP good checksum flag in packet 'p'. */
+static inline void
+dp_packet_ol_reset_ip_csum_good(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Marks packet 'p' with bad IPv4 checksum. */
 static inline bool
 dp_packet_ip_checksum_bad(const struct dp_packet *p)
 {
@@ -1077,8 +1120,21 @@  dp_packet_ip_checksum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_IP_CKSUM_BAD;
 }
 
+/* Calculate and set the IPv4 header checksum in packet 'p'. */
+static inline void
+dp_packet_ip_set_header_csum(struct dp_packet *p)
+{
+    struct ip_header *ip = dp_packet_l3(p);
+
+    ovs_assert(ip);
+    ip->ip_csum = 0;
+    ip->ip_csum = csum(ip, sizeof *ip);
+}
+
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
 static inline bool
-dp_packet_l4_checksum_valid(const struct dp_packet *p)
+dp_packet_l4_checksum_good(const struct dp_packet *p)
 {
     return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
             DP_PACKET_OL_RX_L4_CKSUM_GOOD;
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index 968845f2d..66884eaf0 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -698,6 +698,7 @@  mfex_ipv6_set_l2_pad_size(struct dp_packet *pkt,
         return -1;
     }
     dp_packet_set_l2_pad_size(pkt, len_from_ipv6 - (p_len + IPV6_HEADER_LEN));
+    dp_packet_hwol_set_tx_ipv6(pkt);
     return 0;
 }
 
@@ -728,6 +729,10 @@  mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
         return -1;
     }
     dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
+    dp_packet_hwol_set_tx_ipv4(pkt);
+    if (dp_packet_ip_checksum_good(pkt)) {
+        dp_packet_hwol_set_tx_ip_csum(pkt);
+    }
     return 0;
 }
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 70b953ae6..abe63412e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7913,6 +7913,8 @@  dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
         ds_destroy(&ds);
     }
 
+    dp_packet_ol_send_prepare(packet_, 0);
+
     return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
                          actions, wc, put_actions, dp->upcall_aux);
 }
diff --git a/lib/flow.c b/lib/flow.c
index 9501a259e..9397c9925 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -935,6 +935,10 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         nw_proto = nh->ip_proto;
         nw_frag = ipv4_get_nw_frag(nh);
         data_pull(&data, &size, ip_len);
+        dp_packet_hwol_set_tx_ipv4(packet);
+        if (dp_packet_ip_checksum_good(packet)) {
+            dp_packet_hwol_set_tx_ip_csum(packet);
+        }
     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
         const struct ovs_16aligned_ip6_hdr *nh = data;
         ovs_be32 tc_flow;
@@ -948,6 +952,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         }
         data_pull(&data, &size, sizeof *nh);
 
+        dp_packet_hwol_set_tx_ipv6(packet);
         plen = ntohs(nh->ip6_plen);
         dp_packet_set_l2_pad_size(packet, size - plen);
         size = plen;   /* Never pull padding. */
@@ -3247,9 +3252,12 @@  packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
             struct ip_header *ip = dp_packet_l3(p);
 
             ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
-            ip->ip_csum = 0;
-            ip->ip_csum = csum(ip, sizeof *ip);
-
+            if (dp_packet_hwol_tx_ip_csum(p)) {
+                dp_packet_ol_reset_ip_csum_good(p);
+            } else {
+                dp_packet_ip_set_header_csum(p);
+                dp_packet_ol_set_ip_csum_good(p);
+            }
             pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         } else { /* ETH_TYPE_IPV6 */
             struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
@@ -3339,6 +3347,7 @@  flow_compose(struct dp_packet *p, const struct flow *flow,
         /* Checksum has already been zeroed by put_zeros call. */
         ip->ip_csum = csum(ip, sizeof *ip);
 
+        dp_packet_ol_set_ip_csum_good(p);
         pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
diff --git a/lib/ipf.c b/lib/ipf.c
index affd440f6..7d74e2c13 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -433,7 +433,9 @@  ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
     len += rest_len;
     l3 = dp_packet_l3(pkt);
     ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
-    if (!dp_packet_hwol_is_ipv4(pkt)) {
+    if (dp_packet_hwol_tx_ip_csum(pkt)) {
+        dp_packet_ol_reset_ip_csum_good(pkt);
+    } else {
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
                                     new_ip_frag_off);
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
@@ -608,8 +610,7 @@  ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
         goto invalid_pkt;
     }
 
-    if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(pkt)
-                     && !dp_packet_hwol_is_ipv4(pkt)
+    if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(pkt)
                      && csum(l3, ip_hdr_len) != 0)) {
         COVERAGE_INC(ipf_l3csum_err);
         goto invalid_pkt;
@@ -1186,7 +1187,9 @@  ipf_post_execute_reass_pkts(struct ipf *ipf,
                     } else {
                         struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
                         struct ip_header *l3_reass = dp_packet_l3(pkt);
-                        if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) {
+                        if (dp_packet_hwol_tx_ip_csum(frag_i->pkt)) {
+                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
+                        } else {
                             ovs_be32 reass_ip =
                                 get_16aligned_be32(&l3_reass->ip_src);
                             ovs_be32 frag_ip =
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 87b3f0297..cac46eac7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -411,8 +411,9 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
+    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
 };
 
 /*
@@ -1039,6 +1040,10 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
         if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
@@ -1179,6 +1184,12 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
     }
 
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
         if ((info.tx_offload_capa & tx_tso_offload_capa)
@@ -2227,13 +2238,16 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 {
     struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
 
-    if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
-        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
-        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
-        mbuf->outer_l2_len = 0;
-        mbuf->outer_l3_len = 0;
+    if (!(mbuf->ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK
+                            | RTE_MBUF_F_TX_TCP_SEG))) {
+        return true;
     }
 
+    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    mbuf->outer_l2_len = 0;
+    mbuf->outer_l3_len = 0;
+
     if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
         struct tcp_header *th = dp_packet_l4(pkt);
 
@@ -2292,13 +2306,11 @@  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
     uint32_t nb_tx = 0;
     uint16_t nb_tx_prep = cnt;
 
-    if (userspace_tso_enabled()) {
-        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
-        if (nb_tx_prep != cnt) {
-            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
-                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
-                         cnt, rte_strerror(rte_errno));
-        }
+    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+    if (nb_tx_prep != cnt) {
+        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+                     "Only %u/%u are valid: %s", netdev_get_name(&dev->up),
+                     nb_tx_prep, cnt, rte_strerror(rte_errno));
     }
 
     while (nb_tx != nb_tx_prep) {
@@ -2637,11 +2649,19 @@  dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
     memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
            sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
 
-    if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
-        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
-                                - (char *)dp_packet_eth(pkt_dest);
-        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
+    if (dp_packet_l3(pkt_dest)) {
+        if (dp_packet_eth(pkt_dest)) {
+            mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
+                                - (char *) dp_packet_eth(pkt_dest);
+        } else {
+            mbuf_dest->l2_len = 0;
+        }
+        if (dp_packet_l4(pkt_dest)) {
+            mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
                                 - (char *) dp_packet_l3(pkt_dest);
+        } else {
+            mbuf_dest->l3_len = 0;
+        }
     }
 
     return pkt_dest;
@@ -2699,11 +2719,9 @@  netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch,
     pkt_cnt = cnt;
 
     /* Prepare each mbuf for hardware offloading. */
-    if (userspace_tso_enabled()) {
-        cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
-        stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
-        pkt_cnt = cnt;
-    }
+    cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
+    stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
+    pkt_cnt = cnt;
 
     /* Apply Quality of Service policy. */
     cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);
@@ -5260,6 +5278,13 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
+
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    } else {
+        netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
         netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 7467e9fbc..1a54add87 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -147,6 +147,11 @@  struct netdev_dummy {
     int requested_n_txq OVS_GUARDED;
     int requested_n_rxq OVS_GUARDED;
     int requested_numa_id OVS_GUARDED;
+
+    /* Enable netdev IP csum offload. */
+    bool ol_ip_csum OVS_GUARDED;
+    /* Flag RX packet with good csum. */
+    bool ol_ip_csum_set_good OVS_GUARDED;
 };
 
 /* Max 'recv_queue_len' in struct netdev_dummy. */
@@ -914,6 +919,13 @@  netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
         }
     }
 
+    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
+                                                false);
+    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", false);
+    if (netdev->ol_ip_csum) {
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     netdev_change_seq_changed(netdev_);
 
     /* 'dummy-pmd' specific config. */
@@ -1092,6 +1104,10 @@  netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
     netdev->rxq_stats[rxq_->queue_id].bytes += dp_packet_size(packet);
     netdev->custom_stats[0].value++;
     netdev->custom_stats[1].value++;
+    if (netdev->ol_ip_csum_set_good) {
+        /* The netdev hardware sets the flag when the packet has good csum. */
+        dp_packet_ol_set_ip_csum_good(packet);
+    }
     ovs_mutex_unlock(&netdev->mutex);
 
     dp_packet_batch_init_packet(batch, packet);
@@ -1173,6 +1189,12 @@  netdev_dummy_send(struct netdev *netdev, int qid,
             }
         }
 
+        if (dp_packet_hwol_tx_ip_csum(packet) &&
+            !dp_packet_ip_checksum_good(packet)) {
+            dp_packet_ip_set_header_csum(packet);
+            dp_packet_ol_set_ip_csum_good(packet);
+        }
+
         ovs_mutex_lock(&dev->mutex);
         dev->stats.tx_packets++;
         dev->txq_stats[qid].packets++;
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index c2c6ca559..72d244598 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -88,7 +88,10 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 
         ovs_be32 ip_src, ip_dst;
 
-        if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
+        /* A packet coming from a network device might have the
+         * csum already checked. In this case, skip the check. */
+        if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(packet))
+            && !dp_packet_hwol_tx_ip_csum(packet)) {
             if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
                 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
                 return NULL;
@@ -142,7 +145,8 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
  *
  * This function sets the IP header's ip_tot_len field (which should be zeroed
  * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
- * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
+ * updates IP header checksum if not offloaded, as well as the l3 and l4
+ * offsets in the 'packet'.
  *
  * Return pointer to the L4 header added to 'packet'. */
 void *
@@ -168,11 +172,16 @@  netdev_tnl_push_ip_header(struct dp_packet *packet, const void *header,
         ip6->ip6_plen = htons(*ip_tot_size);
         packet_set_ipv6_flow_label(&ip6->ip6_flow, ipv6_label);
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
+        dp_packet_hwol_set_tx_ipv6(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         return ip6 + 1;
     } else {
         ip = netdev_tnl_ip_hdr(eth);
         ip->ip_tot_len = htons(*ip_tot_size);
-        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+        /* Postpone checksum to when the packet is pushed to the port. */
+        dp_packet_hwol_set_tx_ipv4(packet);
+        dp_packet_hwol_set_tx_ip_csum(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         *ip_tot_size -= IP_HEADER_LEN;
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
         return ip + 1;
@@ -191,7 +200,7 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     }
 
     if (udp->udp_csum) {
-        if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) {
+        if (OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
             uint32_t csum;
             if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
                 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
@@ -299,8 +308,8 @@  netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
         ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
                           htons(IP_DF) : 0;
 
-        /* Checksum has already been zeroed by eth_build_header. */
-        ip->ip_csum = csum(ip, sizeof *ip);
+        /* The checksum will be calculated when the headers are pushed
+         * to the packet if offloading is not enabled. */
 
         data->header_len += IP_HEADER_LEN;
         return ip + 1;
diff --git a/lib/netdev.c b/lib/netdev.c
index 79b17d006..b86afbf36 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -808,6 +808,14 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
             return false;
     }
 
+    /* Packet with IP csum offloading enabled was received with verified csum.
+     * Leave the IP csum offloading enabled even with good checksum to the
+     * netdev to decide what would be the best to do.
+     * Provide a software fallback in case the device doesn't support IP csum
+     * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated. */
+    dp_packet_ol_send_prepare(packet, netdev_flags);
+
     l4_mask = dp_packet_hwol_l4_mask(packet);
     if (l4_mask) {
         if (dp_packet_hwol_l4_is_tcp(packet)) {
@@ -975,7 +983,15 @@  netdev_push_header(const struct netdev *netdev,
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
+            /* The packet is going to be encapsulated and there is
+             * no support yet for inner network header csum offloading. */
+            if (dp_packet_hwol_tx_ip_csum(packet)
+                && !dp_packet_ip_checksum_good(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+            }
+
             netdev->netdev_class->push_header(netdev, packet, data);
+
             pkt_metadata_init(&packet->md, data->out_port);
             dp_packet_batch_refill(batch, packet, i);
         }
diff --git a/lib/odp-execute-avx512.c b/lib/odp-execute-avx512.c
index c28461ec1..9597f3554 100644
--- a/lib/odp-execute-avx512.c
+++ b/lib/odp-execute-avx512.c
@@ -450,7 +450,6 @@  action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
 
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
         struct ip_header *nh = dp_packet_l3(packet);
-        ovs_be16 old_csum = ~nh->ip_csum;
 
         /* Load the 20 bytes of the IPv4 header. Without options, which is the
          * most common case it's 20 bytes, but can be up to 60 bytes. */
@@ -463,13 +462,20 @@  action_avx512_ipv4_set_addrs(struct dp_packet_batch *batch,
          * (v_pkt_masked). */
         __m256i v_new_hdr = _mm256_or_si256(v_key_shuf, v_pkt_masked);
 
-        /* Update the IP checksum based on updated IP values. */
-        uint16_t delta = avx512_ipv4_hdr_csum_delta(v_packet, v_new_hdr);
-        uint32_t new_csum = old_csum + delta;
-        delta = csum_finish(new_csum);
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            ovs_be16 old_csum = ~nh->ip_csum;
 
-        /* Insert new checksum. */
-        v_new_hdr = _mm256_insert_epi16(v_new_hdr, delta, 5);
+            /* Update the IP checksum based on updated IP values. */
+            uint16_t delta = avx512_ipv4_hdr_csum_delta(v_packet, v_new_hdr);
+            uint32_t new_csum = old_csum + delta;
+
+            delta = csum_finish(new_csum);
+
+            /* Insert new checksum. */
+            v_new_hdr = _mm256_insert_epi16(v_new_hdr, delta, 5);
+        }
 
         /* If ip_src or ip_dst has been modified, L4 checksum needs to
          * be updated too. */
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5cf6fbec0..37f0f717a 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -169,9 +169,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
 
         if (nh->ip_tos != new_tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum,
-                                        htons((uint16_t) nh->ip_tos),
-                                        htons((uint16_t) new_tos));
+            if (dp_packet_hwol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons((uint16_t) nh->ip_tos),
+                                            htons((uint16_t) new_tos));
+            }
+
             nh->ip_tos = new_tos;
         }
     }
@@ -180,8 +185,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
 
         if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
-                                        htons(new_ttl << 8));
+            if (dp_packet_hwol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons(nh->ip_ttl << 8),
+                                            htons(new_ttl << 8));
+            }
+
             nh->ip_ttl = new_ttl;
         }
     }
diff --git a/lib/packets.c b/lib/packets.c
index 7e5a52fd4..a4ccc21f8 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1144,7 +1144,12 @@  packet_set_ipv4_addr(struct dp_packet *packet,
             }
         }
     }
-    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+
+    if (dp_packet_hwol_tx_ip_csum(packet)) {
+        dp_packet_ol_reset_ip_csum_good(packet);
+    } else {
+        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+    }
     put_16aligned_be32(addr, new_addr);
 }
 
@@ -1311,16 +1316,26 @@  packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
     if (nh->ip_tos != tos) {
         uint8_t *field = &nh->ip_tos;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
-                                    htons((uint16_t) tos));
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
+                                        htons((uint16_t) tos));
+        }
+
         *field = tos;
     }
 
     if (nh->ip_ttl != ttl) {
         uint8_t *field = &nh->ip_ttl;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
-                                    htons(ttl << 8));
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
+                                        htons(ttl << 8));
+        }
+
         *field = ttl;
     }
 }
@@ -1931,8 +1946,13 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
 
         tos |= IP_ECN_CE;
         if (nh->ip_tos != tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
-                                        htons((uint16_t) tos));
+            if (dp_packet_hwol_tx_ip_csum(pkt)) {
+                dp_packet_ol_reset_ip_csum_good(pkt);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
+                                            htons((uint16_t) tos));
+            }
+
             nh->ip_tos = tos;
         }
     }
diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at
index 60d789beb..67adf27fb 100644
--- a/tests/dpif-netdev.at
+++ b/tests/dpif-netdev.at
@@ -734,3 +734,81 @@  AT_CHECK([test `ovs-vsctl get Interface p2 statistics:tx_q0_packets` -gt 0 -a dn
 
 OVS_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([userspace offload - ip csum offload])
+OVS_VSWITCHD_START(
+  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
+   add-port br1 p1 -- \
+       set Interface p1 type=dummy -- \
+   add-port br1 p2 -- \
+       set Interface p2 type=dummy --])
+
+# Modify the ip_dst addr to force changing the IP csum.
+AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
+
+# Check if no offload remains ok.
+AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+
+# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
+# by the datapath while processing the packet.
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by egress handling
+# in the datapath and not by the netdev.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by netdev and not
+# by the datapath.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading disabled to check
+# if the datapath updates the csum, but does not fix the issue.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading enabled to check
+# if the driver updates and fixes the csum.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP