diff mbox series

[ovs-dev,v6,3/4] userspace: Enable IP checksum offloading by default.

Message ID 20220713184532.3342903-4-mkp@redhat.com
State Superseded
Headers show
Series Enhance support for checksum offloading | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Mike Pattrick July 13, 2022, 6:45 p.m. UTC
From: Flavio Leitner <fbl@sysclose.org>

The netdev receiving packets is supposed to provide the flags
indicating if the IP csum was verified and it is GOOD or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP csum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP csum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
v5:
 - Refactor was mostly removed, except for valid->good
 - Reset unsupported offload flags in send_prepare
 - Moved send_prepare from process_upcall to netdev_upcall

v6:
 - Re-added tests that were incorrectly excluded from v5
---
 lib/conntrack.c                     | 17 ++++---
 lib/dp-packet.c                     | 15 ++++++
 lib/dp-packet.h                     | 60 ++++++++++++++++++++--
 lib/dpif-netdev.c                   |  4 ++
 lib/flow.c                          | 15 ++++--
 lib/ipf.c                           | 11 ++--
 lib/netdev-dpdk.c                   | 70 ++++++++++++++-----------
 lib/netdev-dummy.c                  | 23 +++++++++
 lib/netdev-native-tnl.c             | 21 +++++---
 lib/netdev.c                        | 22 ++++++++
 lib/odp-execute.c                   | 21 ++++++--
 lib/packets.c                       | 34 ++++++++++---
 ofproto/ofproto-dpif-upcall.c       |  2 +-
 tests/automake.mk                   |  1 +
 tests/system-userspace-offload.at   | 79 +++++++++++++++++++++++++++++
 tests/system-userspace-testsuite.at |  1 +
 16 files changed, 328 insertions(+), 68 deletions(-)
 create mode 100644 tests/system-userspace-offload.at

Comments

David Marchand Oct. 17, 2022, 11:31 a.m. UTC | #1
On Wed, Jul 13, 2022 at 8:45 PM Mike Pattrick <mkp@redhat.com> wrote:
>
> From: Flavio Leitner <fbl@sysclose.org>
>
> The netdev receiving packets is supposed to provide the flags
> indicating if the IP csum was verified and it is GOOD or BAD,
> otherwise the stack will check when appropriate by software.
>
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
>
> When encapsulate a packet with that flag, set the checksum
> of the inner IP header since that is not yet supported.
>
> Calculate the IP csum when the packet is going to be sent over
> a device that doesn't support the feature.
>
> Linux devices don't support IP csum offload alone, so the
> support is not enabled.
>
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> Co-authored-by: Mike Pattrick <mkp@redhat.com>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
> v5:
>  - Refactor was mostly removed, except for valid->good
>  - Reset unsupported offload flags in send_prepare
>  - Moved send_prepare from process_upcall to netdev_upcall
>
> v6:
>  - Re-added tests that were incorrectly excluded from v5

[snip]

> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 081900576..811c62a87 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -397,8 +397,9 @@ enum dpdk_hw_ol_features {
>      NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
>      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
>      NETDEV_RX_HW_SCATTER = 1 << 2,
> -    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
> -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
> +    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> +    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> +    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
>  };
>
>  /*
> @@ -984,6 +985,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>          conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
>      }
>
> +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
>          conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
>          if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> @@ -1124,6 +1129,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>      }
>
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    }
> +
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
>          if ((info.tx_offload_capa & tx_tso_offload_capa)
> @@ -1693,16 +1704,12 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
>                          dev->requested_txq_size);
>          smap_add_format(args, "configured_txq_descriptors", "%d",
>                          dev->txq_size);
> -        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
> -            smap_add(args, "rx_csum_offload", "true");
> -        } else {
> -            smap_add(args, "rx_csum_offload", "false");
> -        }
> -        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -            smap_add(args, "tx_tso_offload", "true");
> -        } else {
> -            smap_add(args, "tx_tso_offload", "false");
> -        }
> +#define HWOL_SMAP_ADD(FIELD, FLAG) \
> +        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
> +        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
> +#undef HWOL_SMAP_ADD
>          smap_add(args, "lsc_interrupt_mode",
>                   dev->lsc_interrupt_mode ? "true" : "false");
>
> @@ -2145,12 +2152,10 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  {
>      struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
>
> -    if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> -        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
> -        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> -        mbuf->outer_l2_len = 0;
> -        mbuf->outer_l3_len = 0;
> -    }
> +    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
> +    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
> +    mbuf->outer_l2_len = 0;
> +    mbuf->outer_l3_len = 0;
>
>      if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
>          struct tcp_header *th = dp_packet_l4(pkt);
> @@ -2210,13 +2215,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>      uint32_t nb_tx = 0;
>      uint16_t nb_tx_prep = cnt;
>
> -    if (userspace_tso_enabled()) {
> -        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> -        if (nb_tx_prep != cnt) {
> -            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> -                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> -                         cnt, rte_strerror(rte_errno));
> -        }
> +    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> +    if (nb_tx_prep != cnt) {
> +        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> +                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> +                     cnt, rte_strerror(rte_errno));
>      }
>
>      while (nb_tx != nb_tx_prep) {
> @@ -2656,12 +2659,10 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
>      memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
>             sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
>
> -    if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
> -        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> -                                - (char *)dp_packet_eth(pkt_dest);
> -        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> -                                - (char *) dp_packet_l3(pkt_dest);
> -    }
> +    mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
> +                            - (char *) dp_packet_eth(pkt_dest);
> +    mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
> +                            - (char *) dp_packet_l3(pkt_dest);
>
>      return pkt_dest;
>  }

Stopping at this patch, there was one issue in the code, that left me
perplex for while :-).

Running with userspace-tso-enable="false" and sending 2 packets for
which a OF rule changes the dest ip address:
(gdb) b rte_eth_tx_burst
Breakpoint 3 at 0x936600: rte_eth_tx_burst. (38 locations)
(gdb) commands
Type commands for breakpoint(s) 3, one per line.
End with a line saying just "end".
>p tx_pkts[0].ol_flags
>p tx_pkts[0].l2_len
>x/hx (uint16_t *)(tx_pkts[0].buf_addr + tx_pkts[0].data_off + 24)
>continue
>end

- First packet:

Thread 15 "pmd-c05/id:11" hit Breakpoint 3, rte_eth_tx_burst
(nb_pkts=<optimized out>, tx_pkts=0x7f09b4005970, queue_id=3,
port_id=3) at /root/ovs/dpdk-dir/v21.11/install/include/rte_ethdev.h:5680
5680        nb_pkts = p->tx_pkt_burst(qd, tx_pkts, nb_pkts);
$9 = 36028797018964098
$10 = 0
0x1590f15158:    0x82ca

Which translates to:
RTE_MBUF_F_RX_RSS_HASH
RTE_MBUF_F_RX_IP_CKSUM_GOOD
RTE_MBUF_F_TX_IPV4

The packet checksum in memory is correct, because it got resolved in
sw, during the "upcall" handling.
OVS does not ask for hw ip csum, which is expected too.

All is good, the packet is received:
07:14:12.982762 0c:42:a1:00:a8:7c > 0c:42:a1:00:a8:7d, ethertype IPv4
(0x0800), length 60: (tos 0x0, ttl 64, id 1, offset 0, flags [none],
proto unknown (254), length 20)
...
    0x0010:  0014 0001 0000 40fe *ca82* c0a8 1701 c0a8  ......@.........


- Second packet:

Thread 15 "pmd-c05/id:11" hit Breakpoint 3, rte_eth_tx_burst
(nb_pkts=<optimized out>, tx_pkts=0x7f09b4005970, queue_id=3,
port_id=3) at /root/ovs/dpdk-dir/v21.11/install/include/rte_ethdev.h:5680
5680        nb_pkts = p->tx_pkt_burst(qd, tx_pkts, nb_pkts);
$11 = 54043195528445954
$12 = 0
0x1590f15d18:    0x97ca

Which translates to:
RTE_MBUF_F_RX_RSS_HASH
RTE_MBUF_F_TX_IP_CKSUM
RTE_MBUF_F_TX_IPV4

The packet checksum is invalid in memory.
OVS asks for TX ip checksum.
But l2_len is empty.

This is because of a check in netdev_dpdk_common_send()
(userspace_tso_enabled() == false), and as a consequence
netdev_dpdk_prep_hwol_batch() is not called.

But.. testing with a mlx5 hw/driver, the received packet has a valid csum:
07:14:12.984818 0c:42:a1:00:a8:7c > 0c:42:a1:00:a8:7d, ethertype IPv4
(0x0800), length 60: (tos 0x0, ttl 64, id 1, offset 0, flags [none],
proto unknown (254), length 20)
...
    0x0010:  0014 0001 0000 40fe *ca82* c0a8 1701 c0a8  ......@.........


My assumption is that the mlx5 hw does not need l2_len to determine
where to find the ip header.

This might be different with other nics and the mbuf API wrt IP
checksum is still explicit:
https://git.dpdk.org/dpdk/tree/lib/mbuf/rte_mbuf_core.h?h=v21.11#n411

/**
 * Offload the IP checksum in the hardware. The flag RTE_MBUF_F_TX_IPV4 should
 * also be set by the application, although a PMD will only check
 * RTE_MBUF_F_TX_IP_CKSUM.
 *  - fill the mbuf offload information: l2_len, l3_len
 */
#define RTE_MBUF_F_TX_IP_CKSUM      (1ULL << 54)

So netdev_dpdk_prep_hwol_batch should be called regardless of
userspace_tso_enabled().

Diffing v4 with v6, it seems this is a rebase issue:

@@ -2770,9 +2770,11 @@ netdev_dpdk_common_send(struct netdev *netdev,
struct dp_packet_batch *batch,
     pkt_cnt = cnt;

     /* Prepare each mbuf for hardware offloading. */
-    cnt = netdev_dpdk_prep_ol_batch(dev, pkts, pkt_cnt);
-    stats->tx_invalid_ol_drops += pkt_cnt - cnt;
-    pkt_cnt = cnt;
+    if (userspace_tso_enabled()) {
+        cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt);
+        stats->tx_invalid_hwol_drops += pkt_cnt - cnt;
+        pkt_cnt = cnt;
+    }

     /* Apply Quality of Service policy. */
     cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true);


The rest lgtm.
With this fixed, you can add my:
Reviewed-by: David Marchand <david.marchand@redhat.com>
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index faa2d6ab7..d5793856d 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2089,16 +2089,15 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     ctx->key.dl_type = dl_type;
 
     if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
-        bool hwol_bad_l3_csum = dp_packet_ip_checksum_bad(pkt);
-        if (hwol_bad_l3_csum) {
+        if (dp_packet_ip_checksum_bad(pkt)) {
             ok = false;
             COVERAGE_INC(conntrack_l3csum_err);
         } else {
-            bool hwol_good_l3_csum = dp_packet_ip_checksum_valid(pkt)
-                                     || dp_packet_hwol_is_ipv4(pkt);
-            /* Validate the checksum only when hwol is not supported. */
+            /* Validate the checksum only when hwol is not supported and the
+             * packets checksum status is not known. */
             ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
-                                 !hwol_good_l3_csum);
+                                 !dp_packet_hwol_is_ipv4(pkt) &&
+                                 !dp_packet_ip_checksum_good(pkt));
         }
     } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
         ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
@@ -2109,7 +2108,7 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     if (ok) {
         bool hwol_bad_l4_csum = dp_packet_l4_checksum_bad(pkt);
         if (!hwol_bad_l4_csum) {
-            bool  hwol_good_l4_csum = dp_packet_l4_checksum_valid(pkt)
+            bool  hwol_good_l4_csum = dp_packet_l4_checksum_good(pkt)
                                       || dp_packet_hwol_tx_l4_checksum(pkt);
             /* Validate the checksum only when hwol is not supported. */
             if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
@@ -3402,7 +3401,9 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
                 }
                 if (seq_skew) {
                     ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
-                    if (!dp_packet_hwol_is_ipv4(pkt)) {
+                    if (dp_packet_hwol_tx_ip_csum(pkt)) {
+                        dp_packet_ol_reset_ip_csum_good(pkt);
+                    } else {
                         l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
                                                         l3_hdr->ip_tot_len,
                                                         htons(ip_len));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 35c72542a..7771d0d62 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -21,6 +21,7 @@ 
 #include "dp-packet.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
+#include "netdev-provider.h"
 #include "openvswitch/dynamic-string.h"
 #include "util.h"
 
@@ -506,3 +507,17 @@  dp_packet_resize_l2(struct dp_packet *b, int increment)
     dp_packet_adjust_layer_offset(&b->l2_5_ofs, increment);
     return dp_packet_data(b);
 }
+
+/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
+ * and if not, update the packet with the software fall back. */
+void
+dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags)
+{
+    if (!(flags & NETDEV_TX_OFFLOAD_IPV4_CKSUM)) {
+        if (!dp_packet_ip_checksum_good(p) && dp_packet_hwol_tx_ip_csum(p)) {
+            dp_packet_ip_set_header_csum(p);
+            dp_packet_ol_set_ip_csum_good(p);
+        }
+        dp_packet_hwol_reset_tx_ip_csum(p);
+    }
+}
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index eea5a9215..48f5e82a7 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -25,6 +25,7 @@ 
 #include <rte_mbuf.h>
 #endif
 
+#include "csum.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/list.h"
@@ -83,6 +84,8 @@  enum dp_packet_offload_mask {
     DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, RTE_MBUF_F_TX_UDP_CKSUM, 0x400),
     /* Offload SCTP checksum. */
     DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, RTE_MBUF_F_TX_SCTP_CKSUM, 0x800),
+    /* Offload IP checksum. */
+    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CKSUM, RTE_MBUF_F_TX_IP_CKSUM, 0x1000),
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
 
@@ -97,7 +100,8 @@  enum dp_packet_offload_mask {
                                      DP_PACKET_OL_TX_IPV6          | \
                                      DP_PACKET_OL_TX_TCP_CKSUM     | \
                                      DP_PACKET_OL_TX_UDP_CKSUM     | \
-                                     DP_PACKET_OL_TX_SCTP_CKSUM)
+                                     DP_PACKET_OL_TX_SCTP_CKSUM    | \
+                                     DP_PACKET_OL_TX_IP_CKSUM)
 
 #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
                                  DP_PACKET_OL_TX_UDP_CKSUM | \
@@ -235,6 +239,7 @@  void *dp_packet_steal_data(struct dp_packet *);
 
 static inline bool dp_packet_equal(const struct dp_packet *,
                                    const struct dp_packet *);
+void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
 
 
 /* Frees memory that 'b' points to, as well as 'b' itself. */
@@ -1024,6 +1029,26 @@  dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
 }
 
+/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_hwol_tx_ip_csum(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CKSUM);
+}
+
+/* Marks packet 'p' for IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_ip_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CKSUM;
+}
+
+static inline void
+dp_packet_hwol_reset_tx_ip_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IP_CKSUM;
+}
+
 /* Mark packet 'b' for TCP checksum offloading.  It implies that either
  * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
 static inline void
@@ -1057,13 +1082,31 @@  dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
     *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
 }
 
+/* Returns 'true' is the IP has good integrity and the
+ * checksum in it is complete. */
 static inline bool
-dp_packet_ip_checksum_valid(const struct dp_packet *p)
+dp_packet_ip_checksum_good(const struct dp_packet *p)
 {
     return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) ==
             DP_PACKET_OL_RX_IP_CKSUM_GOOD;
 }
 
+/* Marks packet 'p' with good IPv4 checksum. */
+static inline void
+dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Resets IP good checksum flag in packet 'p'. */
+static inline void
+dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+/* Marks packet 'p' with bad IPv4 checksum. */
 static inline bool
 dp_packet_ip_checksum_bad(const struct dp_packet *p)
 {
@@ -1071,8 +1114,19 @@  dp_packet_ip_checksum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_IP_CKSUM_BAD;
 }
 
+/* Calculate and set the IPv4 header checksum in packet 'p'. */
+static inline void
+dp_packet_ip_set_header_csum(struct dp_packet *p)
+{
+    struct ip_header *ip = dp_packet_l3(p);
+
+    ovs_assert(ip);
+    ip->ip_csum = 0;
+    ip->ip_csum = csum(ip, sizeof *ip);
+}
+
 static inline bool
-dp_packet_l4_checksum_valid(const struct dp_packet *p)
+dp_packet_l4_checksum_good(const struct dp_packet *p)
 {
     return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
             DP_PACKET_OL_RX_L4_CKSUM_GOOD;
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 018052d08..20652cdca 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -7868,6 +7868,10 @@  dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
         ds_destroy(&ds);
     }
 
+    /* The packet is going to be encapsulated and sent to
+     * the controller. */
+    dp_packet_ol_send_prepare(packet_, 0);
+
     return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
                          actions, wc, put_actions, dp->upcall_aux);
 }
diff --git a/lib/flow.c b/lib/flow.c
index b1e1fb34d..0ac274dc3 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -907,6 +907,10 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         nw_proto = nh->ip_proto;
         nw_frag = ipv4_get_nw_frag(nh);
         data_pull(&data, &size, ip_len);
+        dp_packet_hwol_set_tx_ipv4(packet);
+        if (dp_packet_ip_checksum_good(packet)) {
+            dp_packet_hwol_set_tx_ip_csum(packet);
+        }
     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
         const struct ovs_16aligned_ip6_hdr *nh = data;
         ovs_be32 tc_flow;
@@ -920,6 +924,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         }
         data_pull(&data, &size, sizeof *nh);
 
+        dp_packet_hwol_set_tx_ipv6(packet);
         plen = ntohs(nh->ip6_plen);
         dp_packet_set_l2_pad_size(packet, size - plen);
         size = plen;   /* Never pull padding. */
@@ -3221,9 +3226,12 @@  packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
             struct ip_header *ip = dp_packet_l3(p);
 
             ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
-            ip->ip_csum = 0;
-            ip->ip_csum = csum(ip, sizeof *ip);
-
+            if (dp_packet_hwol_tx_ip_csum(p)) {
+                dp_packet_ol_reset_ip_csum_good(p);
+            } else {
+                dp_packet_ip_set_header_csum(p);
+                dp_packet_ol_set_ip_csum_good(p);
+            }
             pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         } else { /* ETH_TYPE_IPV6 */
             struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
@@ -3313,6 +3321,7 @@  flow_compose(struct dp_packet *p, const struct flow *flow,
         /* Checksum has already been zeroed by put_zeros call. */
         ip->ip_csum = csum(ip, sizeof *ip);
 
+        dp_packet_ol_set_ip_csum_good(p);
         pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
diff --git a/lib/ipf.c b/lib/ipf.c
index d45266374..18c98576a 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -433,7 +433,9 @@  ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
     len += rest_len;
     l3 = dp_packet_l3(pkt);
     ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
-    if (!dp_packet_hwol_is_ipv4(pkt)) {
+    if (dp_packet_hwol_tx_ip_csum(pkt)) {
+        dp_packet_ol_reset_ip_csum_good(pkt);
+    } else {
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
                                     new_ip_frag_off);
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
@@ -608,8 +610,7 @@  ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
         goto invalid_pkt;
     }
 
-    if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(pkt)
-                     && !dp_packet_hwol_is_ipv4(pkt)
+    if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(pkt)
                      && csum(l3, ip_hdr_len) != 0)) {
         COVERAGE_INC(ipf_l3csum_err);
         goto invalid_pkt;
@@ -1185,7 +1186,9 @@  ipf_post_execute_reass_pkts(struct ipf *ipf,
                     } else {
                         struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
                         struct ip_header *l3_reass = dp_packet_l3(pkt);
-                        if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) {
+                        if (dp_packet_hwol_tx_ip_csum(frag_i->pkt)) {
+                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
+                        } else {
                             ovs_be32 reass_ip =
                                 get_16aligned_be32(&l3_reass->ip_src);
                             ovs_be32 frag_ip =
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 081900576..811c62a87 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -397,8 +397,9 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
+    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
 };
 
 /*
@@ -984,6 +985,10 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.rxmode.offloads |= RTE_ETH_RX_OFFLOAD_KEEP_CRC;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
         if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
@@ -1124,6 +1129,12 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
     }
 
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_IPV4_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
         if ((info.tx_offload_capa & tx_tso_offload_capa)
@@ -1693,16 +1704,12 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
                         dev->requested_txq_size);
         smap_add_format(args, "configured_txq_descriptors", "%d",
                         dev->txq_size);
-        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
-            smap_add(args, "rx_csum_offload", "true");
-        } else {
-            smap_add(args, "rx_csum_offload", "false");
-        }
-        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-            smap_add(args, "tx_tso_offload", "true");
-        } else {
-            smap_add(args, "tx_tso_offload", "false");
-        }
+#define HWOL_SMAP_ADD(FIELD, FLAG) \
+        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
+        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
+#undef HWOL_SMAP_ADD
         smap_add(args, "lsc_interrupt_mode",
                  dev->lsc_interrupt_mode ? "true" : "false");
 
@@ -2145,12 +2152,10 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 {
     struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
 
-    if (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
-        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
-        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
-        mbuf->outer_l2_len = 0;
-        mbuf->outer_l3_len = 0;
-    }
+    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    mbuf->outer_l2_len = 0;
+    mbuf->outer_l3_len = 0;
 
     if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
         struct tcp_header *th = dp_packet_l4(pkt);
@@ -2210,13 +2215,11 @@  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
     uint32_t nb_tx = 0;
     uint16_t nb_tx_prep = cnt;
 
-    if (userspace_tso_enabled()) {
-        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
-        if (nb_tx_prep != cnt) {
-            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
-                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
-                         cnt, rte_strerror(rte_errno));
-        }
+    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+    if (nb_tx_prep != cnt) {
+        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
+                     cnt, rte_strerror(rte_errno));
     }
 
     while (nb_tx != nb_tx_prep) {
@@ -2656,12 +2659,10 @@  dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
     memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
            sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
 
-    if (mbuf_dest->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
-        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
-                                - (char *)dp_packet_eth(pkt_dest);
-        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
-                                - (char *) dp_packet_l3(pkt_dest);
-    }
+    mbuf_dest->l2_len = (char *) dp_packet_l3(pkt_dest)
+                            - (char *) dp_packet_eth(pkt_dest);
+    mbuf_dest->l3_len = (char *) dp_packet_l4(pkt_dest)
+                            - (char *) dp_packet_l3(pkt_dest);
 
     return pkt_dest;
 }
@@ -4937,6 +4938,13 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
+
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    } else {
+        netdev->ol_flags &= ~NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
         netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 72cb95471..db07ee46c 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -148,6 +148,11 @@  struct netdev_dummy {
     int requested_n_txq OVS_GUARDED;
     int requested_n_rxq OVS_GUARDED;
     int requested_numa_id OVS_GUARDED;
+
+    /* Enable netdev IP csum offload. */
+    bool ol_ip_csum OVS_GUARDED;
+    /* Flag RX packet with good csum. */
+    bool ol_ip_csum_set_good OVS_GUARDED;
 };
 
 /* Max 'recv_queue_len' in struct netdev_dummy. */
@@ -910,6 +915,13 @@  netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
         }
     }
 
+    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
+                                                false);
+    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", false);
+    if (netdev->ol_ip_csum) {
+        netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     netdev_change_seq_changed(netdev_);
 
     /* 'dummy-pmd' specific config. */
@@ -1088,6 +1100,10 @@  netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
     netdev->rxq_stats[rxq_->queue_id].bytes += dp_packet_size(packet);
     netdev->custom_stats[0].value++;
     netdev->custom_stats[1].value++;
+    if (netdev->ol_ip_csum_set_good) {
+        /* The netdev hardware sets the flag when the packet has good csum. */
+        dp_packet_ol_set_ip_csum_good(packet);
+    }
     ovs_mutex_unlock(&netdev->mutex);
 
     dp_packet_batch_init_packet(batch, packet);
@@ -1170,6 +1186,13 @@  netdev_dummy_send(struct netdev *netdev, int qid,
         }
 
         ovs_mutex_lock(&dev->mutex);
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            if (!dp_packet_ip_checksum_good(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+                dp_packet_ol_set_ip_csum_good(packet);
+            }
+        }
+
         dev->stats.tx_packets++;
         dev->txq_stats[qid].packets++;
         dev->stats.tx_bytes += size;
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index b89dfdd52..754e2d78d 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -88,7 +88,10 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 
         ovs_be32 ip_src, ip_dst;
 
-        if (OVS_UNLIKELY(!dp_packet_ip_checksum_valid(packet))) {
+        /* A packet coming from a network device might have the
+         * csum already checked. In this case, skip the check. */
+        if (OVS_UNLIKELY(!dp_packet_ip_checksum_good(packet))
+            && !dp_packet_hwol_tx_ip_csum(packet)) {
             if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
                 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
                 return NULL;
@@ -142,7 +145,8 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
  *
  * This function sets the IP header's ip_tot_len field (which should be zeroed
  * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
- * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
+ * updates IP header checksum if not offloaded, as well as the l3 and l4
+ * offsets in 'packet'.
  *
  * Return pointer to the L4 header added to 'packet'. */
 void *
@@ -167,11 +171,16 @@  netdev_tnl_push_ip_header(struct dp_packet *packet,
         *ip_tot_size -= IPV6_HEADER_LEN;
         ip6->ip6_plen = htons(*ip_tot_size);
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
+        dp_packet_hwol_set_tx_ipv6(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         return ip6 + 1;
     } else {
         ip = netdev_tnl_ip_hdr(eth);
         ip->ip_tot_len = htons(*ip_tot_size);
-        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+        /* Postpone checksum to when the packet is pushed to the port. */
+        dp_packet_hwol_set_tx_ipv4(packet);
+        dp_packet_hwol_set_tx_ip_csum(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         *ip_tot_size -= IP_HEADER_LEN;
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
         return ip + 1;
@@ -190,7 +199,7 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     }
 
     if (udp->udp_csum) {
-        if (OVS_UNLIKELY(!dp_packet_l4_checksum_valid(packet))) {
+        if (OVS_UNLIKELY(!dp_packet_l4_checksum_good(packet))) {
             uint32_t csum;
             if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
                 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
@@ -297,8 +306,8 @@  netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
         ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
                           htons(IP_DF) : 0;
 
-        /* Checksum has already been zeroed by eth_build_header. */
-        ip->ip_csum = csum(ip, sizeof *ip);
+        /* The checksum will be calculated when the headers are pushed
+         * to the packet if offloading is not enabled. */
 
         data->header_len += IP_HEADER_LEN;
         return ip + 1;
diff --git a/lib/netdev.c b/lib/netdev.c
index 32ec45bbe..be546a291 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -801,6 +801,14 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
             return false;
     }
 
+    /* Packet with IP csum offloading enabled was received with verified csum.
+     * Leave the IP csum offloading enabled even with good checksum to the
+     * netdev to decide what would be the best to do.
+     * Provide a software fallback in case the device doesn't support IP csum
+     * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated. */
+    dp_packet_ol_send_prepare(packet, netdev_flags);
+
     l4_mask = dp_packet_hwol_l4_mask(packet);
     if (l4_mask) {
         if (dp_packet_hwol_l4_is_tcp(packet)) {
@@ -968,7 +976,21 @@  netdev_push_header(const struct netdev *netdev,
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
+            /* The packet is going to be encapsulated and there is
+             * no support yet for inner network header csum offloading. */
+            if (dp_packet_hwol_tx_ip_csum(packet)
+                && !dp_packet_ip_checksum_good(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+            }
+
             netdev->netdev_class->push_header(netdev, packet, data);
+            if (dp_packet_hwol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else if (dp_packet_hwol_is_ipv4(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+                dp_packet_ol_set_ip_csum_good(packet);
+            }
+
             pkt_metadata_init(&packet->md, data->out_port);
             dp_packet_batch_refill(batch, packet, i);
         }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 7da56793d..9b25db0ea 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -167,9 +167,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
 
         if (nh->ip_tos != new_tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum,
-                                        htons((uint16_t) nh->ip_tos),
-                                        htons((uint16_t) new_tos));
+            if (dp_packet_hwol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons((uint16_t) nh->ip_tos),
+                                            htons((uint16_t) new_tos));
+            }
+
             nh->ip_tos = new_tos;
         }
     }
@@ -178,8 +183,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
 
         if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
-                                        htons(new_ttl << 8));
+            if (dp_packet_hwol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons(nh->ip_ttl << 8),
+                                            htons(new_ttl << 8));
+            }
+
             nh->ip_ttl = new_ttl;
         }
     }
diff --git a/lib/packets.c b/lib/packets.c
index 874066e3c..8334dc523 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1144,7 +1144,12 @@  packet_set_ipv4_addr(struct dp_packet *packet,
             }
         }
     }
-    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+
+    if (dp_packet_hwol_tx_ip_csum(packet)) {
+        dp_packet_ol_reset_ip_csum_good(packet);
+    } else {
+        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+    }
     put_16aligned_be32(addr, new_addr);
 }
 
@@ -1309,16 +1314,26 @@  packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
     if (nh->ip_tos != tos) {
         uint8_t *field = &nh->ip_tos;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
-                                    htons((uint16_t) tos));
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
+                                        htons((uint16_t) tos));
+        }
+
         *field = tos;
     }
 
     if (nh->ip_ttl != ttl) {
         uint8_t *field = &nh->ip_ttl;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
-                                    htons(ttl << 8));
+        if (dp_packet_hwol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
+                                        htons(ttl << 8));
+        }
+
         *field = ttl;
     }
 }
@@ -1927,8 +1942,13 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
 
         tos |= IP_ECN_CE;
         if (nh->ip_tos != tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
-                                        htons((uint16_t) tos));
+            if (dp_packet_hwol_tx_ip_csum(pkt)) {
+                dp_packet_ol_reset_ip_csum_good(pkt);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
+                                            htons((uint16_t) tos));
+            }
+
             nh->ip_tos = tos;
         }
     }
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 57f94df54..8de8a4c2e 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -215,7 +215,7 @@  struct upcall {
     enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
     const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
     unsigned pmd_id;               /* Datapath poll mode driver id. */
-    const struct dp_packet *packet;   /* Packet associated with this upcall. */
+    const struct dp_packet *packet;  /* Packet associated with this upcall. */
     ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
     uint16_t mru;                  /* If !0, Maximum receive unit of
                                       fragmented IP packet */
diff --git a/tests/automake.mk b/tests/automake.mk
index b29cb783e..1b6296411 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -161,6 +161,7 @@  SYSTEM_KMOD_TESTSUITE_AT = \
 SYSTEM_USERSPACE_TESTSUITE_AT = \
 	tests/system-userspace-testsuite.at \
 	tests/system-userspace-macros.at \
+	tests/system-userspace-offload.at \
 	tests/system-userspace-packet-type-aware.at \
 	tests/system-route.at
 
diff --git a/tests/system-userspace-offload.at b/tests/system-userspace-offload.at
new file mode 100644
index 000000000..4d7f3ef89
--- /dev/null
+++ b/tests/system-userspace-offload.at
@@ -0,0 +1,79 @@ 
+AT_BANNER([userspace offload])
+
+AT_SETUP([userspace offload - ip csum offload])
+OVS_VSWITCHD_START(
+  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
+   add-port br1 p1 -- \
+       set Interface p1 type=dummy -- \
+   add-port br1 p2 -- \
+       set Interface p2 type=dummy --])
+
+# Modify the ip_dst addr to force changing the IP csum.
+AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
+
+# Check if no offload remains ok.
+AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+
+# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
+# by the datapath while processing the packet.
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by egress handling
+# in the datapath and not by the netdev.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by netdev and not
+# by the datapath.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading disabled to check
+# if the datapath updates the csum, but does not fix the issue.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading enabled to check
+# if the driver updates and fixes the csum.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at
index 2e9659a67..1021b4ad4 100644
--- a/tests/system-userspace-testsuite.at
+++ b/tests/system-userspace-testsuite.at
@@ -25,5 +25,6 @@  m4_include([tests/system-common-macros.at])
 m4_include([tests/system-traffic.at])
 m4_include([tests/system-layer3-tunnels.at])
 m4_include([tests/system-interface.at])
+m4_include([tests/system-userspace-offload.at])
 m4_include([tests/system-userspace-packet-type-aware.at])
 m4_include([tests/system-route.at])