diff mbox series

[ovs-dev,[PATCH,RFC] 13/17] Enable IP checksum offloading by default.

Message ID 20211207165156.705727-14-fbl@sysclose.org
State RFC
Headers show
Series [ovs-dev,[PATCH,RFC] 13/17] Enable IP checksum offloading by default. | expand

Commit Message

Flavio Leitner Dec. 7, 2021, 4:51 p.m. UTC
The netdev receiving packets is supposed to provide the flags
indicating if the IP csum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner IP header since that is not yet supported.

Calculate the IP csum when the packet is going to be sent over
a device that doesn't support the feature.

Linux devices don't support IP csum offload alone, so the
support is not enabled.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
---
 lib/conntrack.c                     | 12 ++---
 lib/dp-packet.c                     | 12 +++++
 lib/dp-packet.h                     | 63 ++++++++++++++++++++---
 lib/dpif.h                          |  2 +-
 lib/flow.c                          | 16 ++++--
 lib/ipf.c                           |  9 ++--
 lib/netdev-dpdk.c                   | 78 ++++++++++++++--------------
 lib/netdev-dummy.c                  | 21 ++++++++
 lib/netdev-native-tnl.c             | 19 +++++--
 lib/netdev.c                        | 22 ++++++++
 lib/odp-execute.c                   | 21 ++++++--
 lib/packets.c                       | 34 ++++++++++---
 ofproto/ofproto-dpif-upcall.c       | 14 +++--
 tests/automake.mk                   |  1 +
 tests/system-userspace-offload.at   | 79 +++++++++++++++++++++++++++++
 tests/system-userspace-testsuite.at |  1 +
 16 files changed, 322 insertions(+), 82 deletions(-)
 create mode 100644 tests/system-userspace-offload.at

Comments

Mike Pattrick Jan. 24, 2022, 7:21 p.m. UTC | #1
On Tue, Dec 7, 2021 at 11:54 AM Flavio Leitner <fbl@sysclose.org> wrote:
>
> The netdev receiving packets is supposed to provide the flags
> indicating if the IP csum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
>
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
>
> When encapsulate a packet with that flag, set the checksum
> of the inner IP header since that is not yet supported.
>
> Calculate the IP csum when the packet is going to be sent over
> a device that doesn't support the feature.
>
> Linux devices don't support IP csum offload alone, so the
> support is not enabled.
>
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> ---
>  lib/conntrack.c                     | 12 ++---
>  lib/dp-packet.c                     | 12 +++++
>  lib/dp-packet.h                     | 63 ++++++++++++++++++++---
>  lib/dpif.h                          |  2 +-
>  lib/flow.c                          | 16 ++++--
>  lib/ipf.c                           |  9 ++--
>  lib/netdev-dpdk.c                   | 78 ++++++++++++++--------------
>  lib/netdev-dummy.c                  | 21 ++++++++
>  lib/netdev-native-tnl.c             | 19 +++++--
>  lib/netdev.c                        | 22 ++++++++
>  lib/odp-execute.c                   | 21 ++++++--
>  lib/packets.c                       | 34 ++++++++++---
>  ofproto/ofproto-dpif-upcall.c       | 14 +++--
>  tests/automake.mk                   |  1 +
>  tests/system-userspace-offload.at   | 79 +++++++++++++++++++++++++++++
>  tests/system-userspace-testsuite.at |  1 +
>  16 files changed, 322 insertions(+), 82 deletions(-)
>  create mode 100644 tests/system-userspace-offload.at
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 2392a2ea4..5b4ca4dfc 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2089,16 +2089,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
>      ctx->key.dl_type = dl_type;
>
>      if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
> -        bool hwol_bad_l3_csum = dp_packet_ol_ip_csum_bad(pkt);
> -        if (hwol_bad_l3_csum) {
> +        if (dp_packet_ol_ip_csum_bad(pkt)) {
>              ok = false;
>              COVERAGE_INC(conntrack_l3csum_err);
>          } else {
> -            bool hwol_good_l3_csum = dp_packet_ol_ip_csum_good(pkt)
> -                                     || dp_packet_ol_tx_ipv4(pkt);
> -            /* Validate the checksum only when hwol is not supported. */
>              ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
> -                                 !hwol_good_l3_csum);
> +                                 !dp_packet_ol_ip_csum_good(pkt));
>          }
>      } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
>          ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
> @@ -3402,7 +3398,9 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>                  }
>                  if (seq_skew) {
>                      ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
> -                    if (!dp_packet_ol_tx_ipv4(pkt)) {
> +                    if (dp_packet_ol_tx_ip_csum(pkt)) {
> +                        dp_packet_ol_reset_ip_csum_good(pkt);
> +                    } else {
>                          l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
>                                                          l3_hdr->ip_tot_len,
>                                                          htons(ip_len));

This is more of a general comment for the whole patch series, but I
see that a lot of the diffs use the motif:

if (dp_packet_ol_tx_ip_csum(pkt)) {
    dp_packet_ol_reset_ip_csum_good(pkt);
} else {
    recalc_csumXX()
}

Would it make sense instead to simply flag for non-offload tainted
checksum, and then only one call to csum() on packet egress?

-M

> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index a4ca5a052..369f3561e 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -21,6 +21,7 @@
>  #include "dp-packet.h"
>  #include "netdev-afxdp.h"
>  #include "netdev-dpdk.h"
> +#include "netdev-provider.h"
>  #include "openvswitch/dynamic-string.h"
>  #include "util.h"
>
> @@ -506,3 +507,14 @@ dp_packet_resize_l2(struct dp_packet *p, int increment)
>      dp_packet_adjust_layer_offset(&p->l2_5_ofs, increment);
>      return dp_packet_data(p);
>  }
> +
> +/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
> + * and if not, update the packet with the software fall back. */
> +void
> +dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
> +    if (!dp_packet_ol_ip_csum_good(p) && dp_packet_ol_tx_ip_csum(p)
> +        && !(flags & NETDEV_OFFLOAD_TX_IPV4_CSUM)) {
> +        dp_packet_ip_set_header_csum(p);
> +        dp_packet_ol_set_ip_csum_good(p);
> +    }
> +}
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index ac160985d..278be172e 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -25,6 +25,7 @@
>  #include <rte_mbuf.h>
>  #endif
>
> +#include "csum.h"
>  #include "netdev-afxdp.h"
>  #include "netdev-dpdk.h"
>  #include "openvswitch/list.h"
> @@ -75,12 +76,14 @@ enum dp_packet_offload_mask {
>      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
>      /* Offloaded packet is IPv6. */
>      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
> +    /* Offload IP checksum. */
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CSUM, PKT_TX_IP_CKSUM, 0x200),
>      /* Offload TCP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x200),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x400),
>      /* Offload UDP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x400),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x800),
>      /* Offload SCTP checksum. */
> -    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x800),
> +    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x1000),
>      /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
>  };
>
> @@ -93,6 +96,7 @@ enum dp_packet_offload_mask {
>                                       DP_PACKET_OL_TX_TCP_SEG       | \
>                                       DP_PACKET_OL_TX_IPV4          | \
>                                       DP_PACKET_OL_TX_IPV6          | \
> +                                     DP_PACKET_OL_TX_IP_CSUM      | \
>                                       DP_PACKET_OL_TX_TCP_CSUM     | \
>                                       DP_PACKET_OL_TX_UDP_CSUM     | \
>                                       DP_PACKET_OL_TX_SCTP_CSUM)
> @@ -233,7 +237,7 @@ void *dp_packet_steal_data(struct dp_packet *);
>
>  static inline bool dp_packet_equal(const struct dp_packet *,
>                                     const struct dp_packet *);
> -
> +void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
>
>  /* Frees memory that 'p' points to, as well as 'p' itself. */
>  static inline void
> @@ -970,7 +974,7 @@ dp_packet_ol_tcp_seg(const struct dp_packet *p)
>      return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_TCP_SEG);
>  }
>
> -/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> +/* Returns 'true' if packet 'p' is marked as IPv4. */
>  static inline bool
>  dp_packet_ol_tx_ipv4(const struct dp_packet *p)
>  {
> @@ -1001,20 +1005,36 @@ dp_packet_ol_tx_sctp_csum(struct dp_packet *p)
>              DP_PACKET_OL_TX_SCTP_CSUM;
>  }
>
> -/* Mark packet 'p' for IPv4 checksum offloading. */
> +/* Marks packet 'p' as IPv4. */
>  static inline void
>  dp_packet_ol_set_tx_ipv4(struct dp_packet *p)
>  {
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
>      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
>  }
>
> -/* Mark packet 'p' for IPv6 checksum offloading. */
> +/* Marks packet 'p' as IPv6. */
>  static inline void
>  dp_packet_ol_set_tx_ipv6(struct dp_packet *p)
>  {
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV4;
>      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV6;
>  }
>
> +/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> +static inline bool
> +dp_packet_ol_tx_ip_csum(const struct dp_packet *p)
> +{
> +    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CSUM);
> +}
> +
> +/* Marks packet 'p' for IPv4 checksum offloading. */
> +static inline void
> +dp_packet_ol_set_tx_ip_csum(struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CSUM;
> +}
> +
>  /* Mark packet 'p' for TCP checksum offloading.  It implies that either
>   * the packet 'p' is marked for IPv4 or IPv6 checksum offloading. */
>  static inline void
> @@ -1048,6 +1068,8 @@ dp_packet_ol_set_tcp_seg(struct dp_packet *p)
>      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_TCP_SEG;
>  }
>
> +/* Returns 'true' is the IP has good integrity and the
> + * checksum in it is complete. */
>  static inline bool
>  dp_packet_ol_ip_csum_good(const struct dp_packet *p)
>  {
> @@ -1055,6 +1077,22 @@ dp_packet_ol_ip_csum_good(const struct dp_packet *p)
>              DP_PACKET_OL_RX_IP_CSUM_GOOD;
>  }
>
> +/* Marks packet 'p' with good IPv4 checksum. */
> +static inline void
> +dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_BAD;
> +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CSUM_GOOD;
> +}
> +
> +/* Resets IP good checksum flag in packet 'p'. */
> +static inline void
> +dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
> +{
> +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_GOOD;
> +}
> +
> +/* Marks packet 'p' with bad IPv4 checksum. */
>  static inline bool
>  dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
>  {
> @@ -1062,6 +1100,17 @@ dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
>              DP_PACKET_OL_RX_IP_CSUM_BAD;
>  }
>
> +/* Calculate and set the IPv4 header checksum in packet 'p'. */
> +static inline void
> +dp_packet_ip_set_header_csum(struct dp_packet *p)
> +{
> +    struct ip_header *ip = dp_packet_l3(p);
> +
> +    ovs_assert(ip);
> +    ip->ip_csum = 0;
> +    ip->ip_csum = csum(ip, sizeof *ip);
> +}
> +
>  static inline bool
>  dp_packet_ol_l4_csum_good(const struct dp_packet *p)
>  {
> diff --git a/lib/dpif.h b/lib/dpif.h
> index 8febfb9f6..9ed7a00fd 100644
> --- a/lib/dpif.h
> +++ b/lib/dpif.h
> @@ -860,7 +860,7 @@ void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);
>   *
>   * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
>   * flow should be installed, or some otherwise a positive errno value. */
> -typedef int upcall_callback(const struct dp_packet *packet,
> +typedef int upcall_callback(struct dp_packet *packet,
>                              const struct flow *flow,
>                              ovs_u128 *ufid,
>                              unsigned pmd_id,
> diff --git a/lib/flow.c b/lib/flow.c
> index a021bc0eb..bad1e1a17 100644
> --- a/lib/flow.c
> +++ b/lib/flow.c
> @@ -907,6 +907,10 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>          nw_proto = nh->ip_proto;
>          nw_frag = ipv4_get_nw_frag(nh);
>          data_pull(&data, &size, ip_len);
> +        dp_packet_ol_set_tx_ipv4(packet);
> +        if (dp_packet_ol_ip_csum_good(packet)) {
> +            dp_packet_ol_set_tx_ip_csum(packet);
> +        }
>      } else if (dl_type == htons(ETH_TYPE_IPV6)) {
>          const struct ovs_16aligned_ip6_hdr *nh = data;
>          ovs_be32 tc_flow;
> @@ -920,6 +924,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
>          }
>          data_pull(&data, &size, sizeof *nh);
>
> +        dp_packet_ol_set_tx_ipv6(packet);
>          plen = ntohs(nh->ip6_plen);
>          dp_packet_set_l2_pad_size(packet, size - plen);
>          size = plen;   /* Never pull padding. */
> @@ -3197,9 +3202,12 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
>              struct ip_header *ip = dp_packet_l3(p);
>
>              ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
> -            ip->ip_csum = 0;
> -            ip->ip_csum = csum(ip, sizeof *ip);
> -
> +            if (dp_packet_ol_tx_ip_csum(p)) {
> +                dp_packet_ol_reset_ip_csum_good(p);
> +            } else {
> +                dp_packet_ip_set_header_csum(p);
> +                dp_packet_ol_set_ip_csum_good(p);
> +            }
>              pseudo_hdr_csum = packet_csum_pseudoheader(ip);
>          } else { /* ETH_TYPE_IPV6 */
>              struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
> @@ -3288,7 +3296,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
>          ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
>          /* Checksum has already been zeroed by put_zeros call. */
>          ip->ip_csum = csum(ip, sizeof *ip);
> -
> +        dp_packet_ol_set_ip_csum_good(p);
>          pseudo_hdr_csum = packet_csum_pseudoheader(ip);
>          flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
>      } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
> diff --git a/lib/ipf.c b/lib/ipf.c
> index e78559491..d9b51bf06 100644
> --- a/lib/ipf.c
> +++ b/lib/ipf.c
> @@ -433,7 +433,9 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
>      len += rest_len;
>      l3 = dp_packet_l3(pkt);
>      ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
> -    if (!dp_packet_ol_tx_ipv4(pkt)) {
> +    if (dp_packet_ol_tx_ip_csum(pkt)) {
> +        dp_packet_ol_reset_ip_csum_good(pkt);
> +    } else {
>          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
>                                      new_ip_frag_off);
>          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
> @@ -609,7 +611,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
>      }
>
>      if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(pkt)
> -                     && !dp_packet_ol_tx_ipv4(pkt)
>                       && csum(l3, ip_hdr_len) != 0)) {
>          COVERAGE_INC(ipf_l3csum_err);
>          goto invalid_pkt;
> @@ -1185,7 +1186,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
>                      } else {
>                          struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
>                          struct ip_header *l3_reass = dp_packet_l3(pkt);
> -                        if (!dp_packet_ol_tx_ipv4(frag_i->pkt)) {
> +                        if (dp_packet_ol_tx_ip_csum(frag_i->pkt)) {
> +                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
> +                        } else {
>                              ovs_be32 reass_ip =
>                                  get_16aligned_be32(&l3_reass->ip_src);
>                              ovs_be32 frag_ip =
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index c8e29ec40..435b17c8f 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -396,8 +396,9 @@ enum dpdk_hw_ol_features {
>      NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
>      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
>      NETDEV_RX_HW_SCATTER = 1 << 2,
> -    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
> -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
> +    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> +    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> +    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
>  };
>
>  /*
> @@ -982,6 +983,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>          conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
>      }
>
> +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
>          conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
>          if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> @@ -1122,6 +1127,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
>      }
>
> +    if (info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +    }
> +
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
>          if ((info.tx_offload_capa & tx_tso_offload_capa)
> @@ -1712,16 +1723,12 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
>                          dev->requested_txq_size);
>          smap_add_format(args, "configured_txq_descriptors", "%d",
>                          dev->txq_size);
> -        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
> -            smap_add(args, "rx_csum_offload", "true");
> -        } else {
> -            smap_add(args, "rx_csum_offload", "false");
> -        }
> -        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -            smap_add(args, "tx_tso_offload", "true");
> -        } else {
> -            smap_add(args, "tx_tso_offload", "false");
> -        }
> +#define HWOL_SMAP_ADD(FIELD, FLAG) \
> +        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
> +        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
> +#undef HWOL_SMAP_ADD
>          smap_add(args, "lsc_interrupt_mode",
>                   dev->lsc_interrupt_mode ? "true" : "false");
>
> @@ -2165,12 +2172,10 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  {
>      struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
>
> -    if (mbuf->ol_flags & PKT_TX_L4_MASK) {
> -        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
> -        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> -        mbuf->outer_l2_len = 0;
> -        mbuf->outer_l3_len = 0;
> -    }
> +    mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
> +    mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> +    mbuf->outer_l2_len = 0;
> +    mbuf->outer_l3_len = 0;
>
>      if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
>          struct tcp_header *th = dp_packet_l4(pkt);
> @@ -2230,13 +2235,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
>      uint32_t nb_tx = 0;
>      uint16_t nb_tx_prep = cnt;
>
> -    if (userspace_tso_enabled()) {
> -        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> -        if (nb_tx_prep != cnt) {
> -            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> -                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> -                         cnt, rte_strerror(rte_errno));
> -        }
> +    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> +    if (nb_tx_prep != cnt) {
> +        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> +                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> +                     cnt, rte_strerror(rte_errno));
>      }
>
>      while (nb_tx != nb_tx_prep) {
> @@ -2611,10 +2614,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>      }
>
>      sw_stats_add.tx_invalid_ol_drops = cnt;
> -    if (userspace_tso_enabled()) {
> -        cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
> -    }
> -
> +    cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
>      sw_stats_add.tx_invalid_ol_drops -= cnt;
>      sw_stats_add.tx_mtu_exceeded_drops = cnt;
>      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> @@ -2760,12 +2760,10 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
>      memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
>             sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
>
> -    if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) {
> -        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> -                                - (char *)dp_packet_eth(pkt_dest);
> -        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> -                                - (char *) dp_packet_l3(pkt_dest);
> -    }
> +    mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> +                            - (char *)dp_packet_eth(pkt_dest);
> +    mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> +                            - (char *) dp_packet_l3(pkt_dest);
>
>      return pkt_dest;
>  }
> @@ -2886,9 +2884,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>          struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
>
>          hwol_drops = batch_cnt;
> -        if (userspace_tso_enabled()) {
> -            batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
> -        }
> +        batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
>          hwol_drops -= batch_cnt;
>          mtu_drops = batch_cnt;
>          batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
> @@ -5005,6 +5001,12 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>      }
>
>      err = dpdk_eth_dev_init(dev);
> +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> +        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    } else {
> +        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
>          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
>          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
> diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> index 1f386b81b..24280d1a7 100644
> --- a/lib/netdev-dummy.c
> +++ b/lib/netdev-dummy.c
> @@ -141,6 +141,9 @@ struct netdev_dummy {
>      int requested_n_txq OVS_GUARDED;
>      int requested_n_rxq OVS_GUARDED;
>      int requested_numa_id OVS_GUARDED;
> +
> +    bool ol_ip_csum OVS_GUARDED;            /* Enable netdev IP csum offload. */
> +    bool ol_ip_csum_set_good OVS_GUARDED;   /* Flag RX packet with good csum. */
>  };
>
>  /* Max 'recv_queue_len' in struct netdev_dummy. */
> @@ -898,6 +901,13 @@ netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
>          }
>      }
>
> +    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
> +                                                false);
> +    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", true);
> +    if (netdev->ol_ip_csum) {
> +        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> +    }
> +
>      netdev_change_seq_changed(netdev_);
>
>      /* 'dummy-pmd' specific config. */
> @@ -1052,6 +1062,10 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
>      netdev->stats.rx_bytes += dp_packet_size(packet);
>      netdev->custom_stats[0].value++;
>      netdev->custom_stats[1].value++;
> +    if (netdev->ol_ip_csum_set_good) {
> +        /* The netdev hardware sets the flag when the packet has good csum. */
> +        dp_packet_ol_set_ip_csum_good(packet);
> +    }
>      ovs_mutex_unlock(&netdev->mutex);
>
>      dp_packet_batch_init_packet(batch, packet);
> @@ -1134,6 +1148,13 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
>          }
>
>          ovs_mutex_lock(&dev->mutex);
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            if (!dp_packet_ol_ip_csum_good(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +                dp_packet_ol_set_ip_csum_good(packet);
> +            }
> +        }
> +
>          dev->stats.tx_packets++;
>          dev->stats.tx_bytes += size;
>
> diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
> index 48f13b4bd..1e2ae5ea6 100644
> --- a/lib/netdev-native-tnl.c
> +++ b/lib/netdev-native-tnl.c
> @@ -88,7 +88,10 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
>
>          ovs_be32 ip_src, ip_dst;
>
> -        if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(packet))) {
> +        /* A packet coming from a network device might have the
> +         * csum already checked. In this case, skip the check. */
> +        if (!dp_packet_ol_ip_csum_good(packet)
> +            && !dp_packet_ol_tx_ip_csum(packet)) {
>              if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
>                  VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
>                  return NULL;
> @@ -142,7 +145,8 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
>   *
>   * This function sets the IP header's ip_tot_len field (which should be zeroed
>   * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
> - * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
> + * updates IP header checksum if not offloaded, as well as the l3 and l4
> + * offsets in 'packet'.
>   *
>   * Return pointer to the L4 header added to 'packet'. */
>  void *
> @@ -167,11 +171,16 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
>          *ip_tot_size -= IPV6_HEADER_LEN;
>          ip6->ip6_plen = htons(*ip_tot_size);
>          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
> +        dp_packet_ol_set_tx_ipv6(packet);
> +        dp_packet_ol_reset_ip_csum_good(packet);
>          return ip6 + 1;
>      } else {
>          ip = netdev_tnl_ip_hdr(eth);
>          ip->ip_tot_len = htons(*ip_tot_size);
> -        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
> +        /* Postpone checksum to when the packet is pushed to the port. */
> +        dp_packet_ol_set_tx_ipv4(packet);
> +        dp_packet_ol_set_tx_ip_csum(packet);
> +        dp_packet_ol_reset_ip_csum_good(packet);
>          *ip_tot_size -= IP_HEADER_LEN;
>          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
>          return ip + 1;
> @@ -297,8 +306,8 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
>          ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
>                            htons(IP_DF) : 0;
>
> -        /* Checksum has already been zeroed by eth_build_header. */
> -        ip->ip_csum = csum(ip, sizeof *ip);
> +        /* The checksum will be calculated when the headers are pushed
> +         * to the packet if offloading is not enabled. */
>
>          data->header_len += IP_HEADER_LEN;
>          return ip + 1;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index 5bde9c1c9..d82c48e93 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -799,6 +799,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
>              return false;
>      }
>
> +    /* Packet with IP csum offloading enabled was received with verified csum.
> +     * Leave the IP csum offloading enabled even with good checksum to the
> +     * netdev to decide what would be the best to do.
> +     * Provide a software fallback in case the device doesn't support IP csum
> +     * offloading. Note: Encapsulated packet must have the inner IP header
> +     * csum already calculated. */
> +    dp_packet_ol_send_prepare(packet, netdev_flags);
> +
>      if (dp_packet_ol_tx_l4_csum(packet)) {
>          if (dp_packet_ol_tx_tcp_csum(packet)) {
>              if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
> @@ -966,7 +974,21 @@ netdev_push_header(const struct netdev *netdev,
>                           "not supported: packet dropped",
>                           netdev_get_name(netdev));
>          } else {
> +            /* The packet is going to be encapsulated and there is
> +             * no support yet for inner network header csum offloading. */
> +            if (dp_packet_ol_tx_ip_csum(packet)
> +                && !dp_packet_ol_ip_csum_good(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +            }
> +
>              netdev->netdev_class->push_header(netdev, packet, data);
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else if (dp_packet_ol_tx_ipv4(packet)) {
> +                dp_packet_ip_set_header_csum(packet);
> +                dp_packet_ol_set_ip_csum_good(packet);
> +            }
> +
>              pkt_metadata_init(&packet->md, data->out_port);
>              dp_packet_batch_refill(batch, packet, i);
>          }
> diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> index 6eeda2a61..c46905bcd 100644
> --- a/lib/odp-execute.c
> +++ b/lib/odp-execute.c
> @@ -167,9 +167,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
>          new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
>
>          if (nh->ip_tos != new_tos) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum,
> -                                        htons((uint16_t) nh->ip_tos),
> -                                        htons((uint16_t) new_tos));
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> +                                            htons((uint16_t) nh->ip_tos),
> +                                            htons((uint16_t) new_tos));
> +            }
> +
>              nh->ip_tos = new_tos;
>          }
>      }
> @@ -178,8 +183,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
>          new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
>
>          if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
> -                                        htons(new_ttl << 8));
> +            if (dp_packet_ol_tx_ip_csum(packet)) {
> +                dp_packet_ol_reset_ip_csum_good(packet);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> +                                            htons(nh->ip_ttl << 8),
> +                                            htons(new_ttl << 8));
> +            }
> +
>              nh->ip_ttl = new_ttl;
>          }
>      }
> diff --git a/lib/packets.c b/lib/packets.c
> index 8f0bd6811..c5ad57543 100644
> --- a/lib/packets.c
> +++ b/lib/packets.c
> @@ -1098,7 +1098,12 @@ packet_set_ipv4_addr(struct dp_packet *packet,
>              }
>          }
>      }
> -    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> +
> +    if (dp_packet_ol_tx_ip_csum(packet)) {
> +        dp_packet_ol_reset_ip_csum_good(packet);
> +    } else {
> +        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> +    }
>      put_16aligned_be32(addr, new_addr);
>  }
>
> @@ -1263,16 +1268,26 @@ packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
>      if (nh->ip_tos != tos) {
>          uint8_t *field = &nh->ip_tos;
>
> -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
> -                                    htons((uint16_t) tos));
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            dp_packet_ol_reset_ip_csum_good(packet);
> +        } else {
> +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
> +                                        htons((uint16_t) tos));
> +        }
> +
>          *field = tos;
>      }
>
>      if (nh->ip_ttl != ttl) {
>          uint8_t *field = &nh->ip_ttl;
>
> -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> -                                    htons(ttl << 8));
> +        if (dp_packet_ol_tx_ip_csum(packet)) {
> +            dp_packet_ol_reset_ip_csum_good(packet);
> +        } else {
> +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> +                                        htons(ttl << 8));
> +        }
> +
>          *field = ttl;
>      }
>  }
> @@ -1881,8 +1896,13 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
>
>          tos |= IP_ECN_CE;
>          if (nh->ip_tos != tos) {
> -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> -                                        htons((uint16_t) tos));
> +            if (dp_packet_ol_tx_ip_csum(pkt)) {
> +                dp_packet_ol_reset_ip_csum_good(pkt);
> +            } else {
> +                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> +                                            htons((uint16_t) tos));
> +            }
> +
>              nh->ip_tos = tos;
>          }
>      }
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 1c9c720f0..a00d274ec 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -215,7 +215,7 @@ struct upcall {
>      enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
>      const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
>      unsigned pmd_id;               /* Datapath poll mode driver id. */
> -    const struct dp_packet *packet;   /* Packet associated with this upcall. */
> +    struct dp_packet *packet;      /* Packet associated with this upcall. */
>      ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
>      uint16_t mru;                  /* If !0, Maximum receive unit of
>                                        fragmented IP packet */
> @@ -395,7 +395,7 @@ static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
>                             struct udpif_key *ukey);
>
>  static int upcall_receive(struct upcall *, const struct dpif_backer *,
> -                          const struct dp_packet *packet, enum dpif_upcall_type,
> +                          struct dp_packet *packet, enum dpif_upcall_type,
>                            const struct nlattr *userdata, const struct flow *,
>                            const unsigned int mru,
>                            const ovs_u128 *ufid, const unsigned pmd_id);
> @@ -1140,7 +1140,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
>   * since the 'upcall->put_actions' remains uninitialized. */
>  static int
>  upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
> -               const struct dp_packet *packet, enum dpif_upcall_type type,
> +               struct dp_packet *packet, enum dpif_upcall_type type,
>                 const struct nlattr *userdata, const struct flow *flow,
>                 const unsigned int mru,
>                 const ovs_u128 *ufid, const unsigned pmd_id)
> @@ -1336,7 +1336,7 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
>  }
>
>  static int
> -upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
> +upcall_cb(struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
>            unsigned pmd_id, enum dpif_upcall_type type,
>            const struct nlattr *userdata, struct ofpbuf *actions,
>            struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
> @@ -1446,7 +1446,7 @@ static int
>  process_upcall(struct udpif *udpif, struct upcall *upcall,
>                 struct ofpbuf *odp_actions, struct flow_wildcards *wc)
>  {
> -    const struct dp_packet *packet = upcall->packet;
> +    struct dp_packet *packet = upcall->packet;
>      const struct flow *flow = upcall->flow;
>      size_t actions_len = 0;
>
> @@ -1524,6 +1524,10 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
>                  break;
>              }
>
> +            /* The packet is going to be encapsulated and sent to
> +             * the controller. */
> +            dp_packet_ol_send_prepare(packet, 0);
> +
>              const struct frozen_state *state = &recirc_node->state;
>
>              struct ofproto_async_msg *am = xmalloc(sizeof *am);
> diff --git a/tests/automake.mk b/tests/automake.mk
> index 43731d097..1f49b1f24 100644
> --- a/tests/automake.mk
> +++ b/tests/automake.mk
> @@ -161,6 +161,7 @@ SYSTEM_KMOD_TESTSUITE_AT = \
>  SYSTEM_USERSPACE_TESTSUITE_AT = \
>         tests/system-userspace-testsuite.at \
>         tests/system-userspace-macros.at \
> +       tests/system-userspace-offload.at \
>         tests/system-userspace-packet-type-aware.at \
>         tests/system-route.at
>
> diff --git a/tests/system-userspace-offload.at b/tests/system-userspace-offload.at
> new file mode 100644
> index 000000000..4d7f3ef89
> --- /dev/null
> +++ b/tests/system-userspace-offload.at
> @@ -0,0 +1,79 @@
> +AT_BANNER([userspace offload])
> +
> +AT_SETUP([userspace offload - ip csum offload])
> +OVS_VSWITCHD_START(
> +  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
> +   add-port br1 p1 -- \
> +       set Interface p1 type=dummy -- \
> +   add-port br1 p2 -- \
> +       set Interface p2 type=dummy --])
> +
> +# Modify the ip_dst addr to force changing the IP csum.
> +AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
> +
> +# Check if no offload remains ok.
> +AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
> +# by the datapath while processing the packet.
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Check if packets entering the datapath with csum offloading
> +# enabled gets the csum updated properly by egress handling
> +# in the datapath and not by the netdev.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Check if packets entering the datapath with csum offloading
> +# enabled gets the csum updated properly by netdev and not
> +# by the datapath.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Push a packet with bad csum and offloading disabled to check
> +# if the datapath updates the csum, but does not fix the issue.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +
> +# Push a packet with bad csum and offloading enabled to check
> +# if the driver updates and fixes the csum.
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> +])
> +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> +])
> +OVS_VSWITCHD_STOP
> +AT_CLEANUP
> diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at
> index 2e9659a67..1021b4ad4 100644
> --- a/tests/system-userspace-testsuite.at
> +++ b/tests/system-userspace-testsuite.at
> @@ -25,5 +25,6 @@ m4_include([tests/system-common-macros.at])
>  m4_include([tests/system-traffic.at])
>  m4_include([tests/system-layer3-tunnels.at])
>  m4_include([tests/system-interface.at])
> +m4_include([tests/system-userspace-offload.at])
>  m4_include([tests/system-userspace-packet-type-aware.at])
>  m4_include([tests/system-route.at])
> --
> 2.31.1
>
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Flavio Leitner March 10, 2022, 12:27 p.m. UTC | #2
On Mon, Jan 24, 2022 at 02:21:35PM -0500, Mike Pattrick wrote:
> On Tue, Dec 7, 2021 at 11:54 AM Flavio Leitner <fbl@sysclose.org> wrote:
> >
> > The netdev receiving packets is supposed to provide the flags
> > indicating if the IP csum was verified and it is OK or BAD,
> > otherwise the stack will check when appropriate by software.
> >
> > If the packet comes with good checksum, then postpone the
> > checksum calculation to the egress device if needed.
> >
> > When encapsulate a packet with that flag, set the checksum
> > of the inner IP header since that is not yet supported.
> >
> > Calculate the IP csum when the packet is going to be sent over
> > a device that doesn't support the feature.
> >
> > Linux devices don't support IP csum offload alone, so the
> > support is not enabled.
> >
> > Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> > ---
> >  lib/conntrack.c                     | 12 ++---
> >  lib/dp-packet.c                     | 12 +++++
> >  lib/dp-packet.h                     | 63 ++++++++++++++++++++---
> >  lib/dpif.h                          |  2 +-
> >  lib/flow.c                          | 16 ++++--
> >  lib/ipf.c                           |  9 ++--
> >  lib/netdev-dpdk.c                   | 78 ++++++++++++++--------------
> >  lib/netdev-dummy.c                  | 21 ++++++++
> >  lib/netdev-native-tnl.c             | 19 +++++--
> >  lib/netdev.c                        | 22 ++++++++
> >  lib/odp-execute.c                   | 21 ++++++--
> >  lib/packets.c                       | 34 ++++++++++---
> >  ofproto/ofproto-dpif-upcall.c       | 14 +++--
> >  tests/automake.mk                   |  1 +
> >  tests/system-userspace-offload.at   | 79 +++++++++++++++++++++++++++++
> >  tests/system-userspace-testsuite.at |  1 +
> >  16 files changed, 322 insertions(+), 82 deletions(-)
> >  create mode 100644 tests/system-userspace-offload.at
> >
> > diff --git a/lib/conntrack.c b/lib/conntrack.c
> > index 2392a2ea4..5b4ca4dfc 100644
> > --- a/lib/conntrack.c
> > +++ b/lib/conntrack.c
> > @@ -2089,16 +2089,12 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
> >      ctx->key.dl_type = dl_type;
> >
> >      if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
> > -        bool hwol_bad_l3_csum = dp_packet_ol_ip_csum_bad(pkt);
> > -        if (hwol_bad_l3_csum) {
> > +        if (dp_packet_ol_ip_csum_bad(pkt)) {
> >              ok = false;
> >              COVERAGE_INC(conntrack_l3csum_err);
> >          } else {
> > -            bool hwol_good_l3_csum = dp_packet_ol_ip_csum_good(pkt)
> > -                                     || dp_packet_ol_tx_ipv4(pkt);
> > -            /* Validate the checksum only when hwol is not supported. */
> >              ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
> > -                                 !hwol_good_l3_csum);
> > +                                 !dp_packet_ol_ip_csum_good(pkt));
> >          }
> >      } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
> >          ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
> > @@ -3402,7 +3398,9 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
> >                  }
> >                  if (seq_skew) {
> >                      ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
> > -                    if (!dp_packet_ol_tx_ipv4(pkt)) {
> > +                    if (dp_packet_ol_tx_ip_csum(pkt)) {
> > +                        dp_packet_ol_reset_ip_csum_good(pkt);
> > +                    } else {
> >                          l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
> >                                                          l3_hdr->ip_tot_len,
> >                                                          htons(ip_len));
> 
> This is more of a general comment for the whole patch series, but I
> see that a lot of the diffs use the motif:
> 
> if (dp_packet_ol_tx_ip_csum(pkt)) {
>     dp_packet_ol_reset_ip_csum_good(pkt);
> } else {
>     recalc_csumXX()
> }
> 
> Would it make sense instead to simply flag for non-offload tainted
> checksum, and then only one call to csum() on packet egress?

That's a good point. I see that in most cases it recalculates
only what has changed, so it is supposed to be faster because
1) cache is hot and 2) fewer operations. If we leave to the
egress port, then we need to calculate full checksum and the
headers may not be in the cache.

Therefore, to avoid regressions in the non offloaded case,
I left the original behavior. Maybe my assumption doesn't
make sense.

fbl


> 
> -M
> 
> > diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> > index a4ca5a052..369f3561e 100644
> > --- a/lib/dp-packet.c
> > +++ b/lib/dp-packet.c
> > @@ -21,6 +21,7 @@
> >  #include "dp-packet.h"
> >  #include "netdev-afxdp.h"
> >  #include "netdev-dpdk.h"
> > +#include "netdev-provider.h"
> >  #include "openvswitch/dynamic-string.h"
> >  #include "util.h"
> >
> > @@ -506,3 +507,14 @@ dp_packet_resize_l2(struct dp_packet *p, int increment)
> >      dp_packet_adjust_layer_offset(&p->l2_5_ofs, increment);
> >      return dp_packet_data(p);
> >  }
> > +
> > +/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
> > + * and if not, update the packet with the software fall back. */
> > +void
> > +dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
> > +    if (!dp_packet_ol_ip_csum_good(p) && dp_packet_ol_tx_ip_csum(p)
> > +        && !(flags & NETDEV_OFFLOAD_TX_IPV4_CSUM)) {
> > +        dp_packet_ip_set_header_csum(p);
> > +        dp_packet_ol_set_ip_csum_good(p);
> > +    }
> > +}
> > diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> > index ac160985d..278be172e 100644
> > --- a/lib/dp-packet.h
> > +++ b/lib/dp-packet.h
> > @@ -25,6 +25,7 @@
> >  #include <rte_mbuf.h>
> >  #endif
> >
> > +#include "csum.h"
> >  #include "netdev-afxdp.h"
> >  #include "netdev-dpdk.h"
> >  #include "openvswitch/list.h"
> > @@ -75,12 +76,14 @@ enum dp_packet_offload_mask {
> >      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
> >      /* Offloaded packet is IPv6. */
> >      DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
> > +    /* Offload IP checksum. */
> > +    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CSUM, PKT_TX_IP_CKSUM, 0x200),
> >      /* Offload TCP checksum. */
> > -    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x200),
> > +    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x400),
> >      /* Offload UDP checksum. */
> > -    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x400),
> > +    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x800),
> >      /* Offload SCTP checksum. */
> > -    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x800),
> > +    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x1000),
> >      /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
> >  };
> >
> > @@ -93,6 +96,7 @@ enum dp_packet_offload_mask {
> >                                       DP_PACKET_OL_TX_TCP_SEG       | \
> >                                       DP_PACKET_OL_TX_IPV4          | \
> >                                       DP_PACKET_OL_TX_IPV6          | \
> > +                                     DP_PACKET_OL_TX_IP_CSUM      | \
> >                                       DP_PACKET_OL_TX_TCP_CSUM     | \
> >                                       DP_PACKET_OL_TX_UDP_CSUM     | \
> >                                       DP_PACKET_OL_TX_SCTP_CSUM)
> > @@ -233,7 +237,7 @@ void *dp_packet_steal_data(struct dp_packet *);
> >
> >  static inline bool dp_packet_equal(const struct dp_packet *,
> >                                     const struct dp_packet *);
> > -
> > +void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
> >
> >  /* Frees memory that 'p' points to, as well as 'p' itself. */
> >  static inline void
> > @@ -970,7 +974,7 @@ dp_packet_ol_tcp_seg(const struct dp_packet *p)
> >      return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_TCP_SEG);
> >  }
> >
> > -/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> > +/* Returns 'true' if packet 'p' is marked as IPv4. */
> >  static inline bool
> >  dp_packet_ol_tx_ipv4(const struct dp_packet *p)
> >  {
> > @@ -1001,20 +1005,36 @@ dp_packet_ol_tx_sctp_csum(struct dp_packet *p)
> >              DP_PACKET_OL_TX_SCTP_CSUM;
> >  }
> >
> > -/* Mark packet 'p' for IPv4 checksum offloading. */
> > +/* Marks packet 'p' as IPv4. */
> >  static inline void
> >  dp_packet_ol_set_tx_ipv4(struct dp_packet *p)
> >  {
> > +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
> >      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
> >  }
> >
> > -/* Mark packet 'p' for IPv6 checksum offloading. */
> > +/* Marks packet 'p' as IPv6. */
> >  static inline void
> >  dp_packet_ol_set_tx_ipv6(struct dp_packet *p)
> >  {
> > +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV4;
> >      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV6;
> >  }
> >
> > +/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
> > +static inline bool
> > +dp_packet_ol_tx_ip_csum(const struct dp_packet *p)
> > +{
> > +    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CSUM);
> > +}
> > +
> > +/* Marks packet 'p' for IPv4 checksum offloading. */
> > +static inline void
> > +dp_packet_ol_set_tx_ip_csum(struct dp_packet *p)
> > +{
> > +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CSUM;
> > +}
> > +
> >  /* Mark packet 'p' for TCP checksum offloading.  It implies that either
> >   * the packet 'p' is marked for IPv4 or IPv6 checksum offloading. */
> >  static inline void
> > @@ -1048,6 +1068,8 @@ dp_packet_ol_set_tcp_seg(struct dp_packet *p)
> >      *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_TCP_SEG;
> >  }
> >
> > +/* Returns 'true' is the IP has good integrity and the
> > + * checksum in it is complete. */
> >  static inline bool
> >  dp_packet_ol_ip_csum_good(const struct dp_packet *p)
> >  {
> > @@ -1055,6 +1077,22 @@ dp_packet_ol_ip_csum_good(const struct dp_packet *p)
> >              DP_PACKET_OL_RX_IP_CSUM_GOOD;
> >  }
> >
> > +/* Marks packet 'p' with good IPv4 checksum. */
> > +static inline void
> > +dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
> > +{
> > +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_BAD;
> > +    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CSUM_GOOD;
> > +}
> > +
> > +/* Resets IP good checksum flag in packet 'p'. */
> > +static inline void
> > +dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
> > +{
> > +    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_GOOD;
> > +}
> > +
> > +/* Marks packet 'p' with bad IPv4 checksum. */
> >  static inline bool
> >  dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
> >  {
> > @@ -1062,6 +1100,17 @@ dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
> >              DP_PACKET_OL_RX_IP_CSUM_BAD;
> >  }
> >
> > +/* Calculate and set the IPv4 header checksum in packet 'p'. */
> > +static inline void
> > +dp_packet_ip_set_header_csum(struct dp_packet *p)
> > +{
> > +    struct ip_header *ip = dp_packet_l3(p);
> > +
> > +    ovs_assert(ip);
> > +    ip->ip_csum = 0;
> > +    ip->ip_csum = csum(ip, sizeof *ip);
> > +}
> > +
> >  static inline bool
> >  dp_packet_ol_l4_csum_good(const struct dp_packet *p)
> >  {
> > diff --git a/lib/dpif.h b/lib/dpif.h
> > index 8febfb9f6..9ed7a00fd 100644
> > --- a/lib/dpif.h
> > +++ b/lib/dpif.h
> > @@ -860,7 +860,7 @@ void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);
> >   *
> >   * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
> >   * flow should be installed, or some otherwise a positive errno value. */
> > -typedef int upcall_callback(const struct dp_packet *packet,
> > +typedef int upcall_callback(struct dp_packet *packet,
> >                              const struct flow *flow,
> >                              ovs_u128 *ufid,
> >                              unsigned pmd_id,
> > diff --git a/lib/flow.c b/lib/flow.c
> > index a021bc0eb..bad1e1a17 100644
> > --- a/lib/flow.c
> > +++ b/lib/flow.c
> > @@ -907,6 +907,10 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
> >          nw_proto = nh->ip_proto;
> >          nw_frag = ipv4_get_nw_frag(nh);
> >          data_pull(&data, &size, ip_len);
> > +        dp_packet_ol_set_tx_ipv4(packet);
> > +        if (dp_packet_ol_ip_csum_good(packet)) {
> > +            dp_packet_ol_set_tx_ip_csum(packet);
> > +        }
> >      } else if (dl_type == htons(ETH_TYPE_IPV6)) {
> >          const struct ovs_16aligned_ip6_hdr *nh = data;
> >          ovs_be32 tc_flow;
> > @@ -920,6 +924,7 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
> >          }
> >          data_pull(&data, &size, sizeof *nh);
> >
> > +        dp_packet_ol_set_tx_ipv6(packet);
> >          plen = ntohs(nh->ip6_plen);
> >          dp_packet_set_l2_pad_size(packet, size - plen);
> >          size = plen;   /* Never pull padding. */
> > @@ -3197,9 +3202,12 @@ packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
> >              struct ip_header *ip = dp_packet_l3(p);
> >
> >              ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
> > -            ip->ip_csum = 0;
> > -            ip->ip_csum = csum(ip, sizeof *ip);
> > -
> > +            if (dp_packet_ol_tx_ip_csum(p)) {
> > +                dp_packet_ol_reset_ip_csum_good(p);
> > +            } else {
> > +                dp_packet_ip_set_header_csum(p);
> > +                dp_packet_ol_set_ip_csum_good(p);
> > +            }
> >              pseudo_hdr_csum = packet_csum_pseudoheader(ip);
> >          } else { /* ETH_TYPE_IPV6 */
> >              struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
> > @@ -3288,7 +3296,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
> >          ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
> >          /* Checksum has already been zeroed by put_zeros call. */
> >          ip->ip_csum = csum(ip, sizeof *ip);
> > -
> > +        dp_packet_ol_set_ip_csum_good(p);
> >          pseudo_hdr_csum = packet_csum_pseudoheader(ip);
> >          flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
> >      } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
> > diff --git a/lib/ipf.c b/lib/ipf.c
> > index e78559491..d9b51bf06 100644
> > --- a/lib/ipf.c
> > +++ b/lib/ipf.c
> > @@ -433,7 +433,9 @@ ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
> >      len += rest_len;
> >      l3 = dp_packet_l3(pkt);
> >      ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
> > -    if (!dp_packet_ol_tx_ipv4(pkt)) {
> > +    if (dp_packet_ol_tx_ip_csum(pkt)) {
> > +        dp_packet_ol_reset_ip_csum_good(pkt);
> > +    } else {
> >          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
> >                                      new_ip_frag_off);
> >          l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
> > @@ -609,7 +611,6 @@ ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
> >      }
> >
> >      if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(pkt)
> > -                     && !dp_packet_ol_tx_ipv4(pkt)
> >                       && csum(l3, ip_hdr_len) != 0)) {
> >          COVERAGE_INC(ipf_l3csum_err);
> >          goto invalid_pkt;
> > @@ -1185,7 +1186,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
> >                      } else {
> >                          struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
> >                          struct ip_header *l3_reass = dp_packet_l3(pkt);
> > -                        if (!dp_packet_ol_tx_ipv4(frag_i->pkt)) {
> > +                        if (dp_packet_ol_tx_ip_csum(frag_i->pkt)) {
> > +                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
> > +                        } else {
> >                              ovs_be32 reass_ip =
> >                                  get_16aligned_be32(&l3_reass->ip_src);
> >                              ovs_be32 frag_ip =
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index c8e29ec40..435b17c8f 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -396,8 +396,9 @@ enum dpdk_hw_ol_features {
> >      NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
> >      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
> >      NETDEV_RX_HW_SCATTER = 1 << 2,
> > -    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
> > -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
> > +    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> > +    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> > +    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
> >  };
> >
> >  /*
> > @@ -982,6 +983,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
> >          conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
> >      }
> >
> > +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> > +        conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
> > +    }
> > +
> >      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> >          conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
> >          if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> > @@ -1122,6 +1127,12 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
> >          dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
> >      }
> >
> > +    if (info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
> > +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> > +    } else {
> > +        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> > +    }
> > +
> >      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
> >      if (userspace_tso_enabled()) {
> >          if ((info.tx_offload_capa & tx_tso_offload_capa)
> > @@ -1712,16 +1723,12 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
> >                          dev->requested_txq_size);
> >          smap_add_format(args, "configured_txq_descriptors", "%d",
> >                          dev->txq_size);
> > -        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
> > -            smap_add(args, "rx_csum_offload", "true");
> > -        } else {
> > -            smap_add(args, "rx_csum_offload", "false");
> > -        }
> > -        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> > -            smap_add(args, "tx_tso_offload", "true");
> > -        } else {
> > -            smap_add(args, "tx_tso_offload", "false");
> > -        }
> > +#define HWOL_SMAP_ADD(FIELD, FLAG) \
> > +        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
> > +        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
> > +        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
> > +        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
> > +#undef HWOL_SMAP_ADD
> >          smap_add(args, "lsc_interrupt_mode",
> >                   dev->lsc_interrupt_mode ? "true" : "false");
> >
> > @@ -2165,12 +2172,10 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
> >  {
> >      struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
> >
> > -    if (mbuf->ol_flags & PKT_TX_L4_MASK) {
> > -        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
> > -        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> > -        mbuf->outer_l2_len = 0;
> > -        mbuf->outer_l3_len = 0;
> > -    }
> > +    mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
> > +    mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
> > +    mbuf->outer_l2_len = 0;
> > +    mbuf->outer_l3_len = 0;
> >
> >      if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
> >          struct tcp_header *th = dp_packet_l4(pkt);
> > @@ -2230,13 +2235,11 @@ netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
> >      uint32_t nb_tx = 0;
> >      uint16_t nb_tx_prep = cnt;
> >
> > -    if (userspace_tso_enabled()) {
> > -        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> > -        if (nb_tx_prep != cnt) {
> > -            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> > -                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> > -                         cnt, rte_strerror(rte_errno));
> > -        }
> > +    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
> > +    if (nb_tx_prep != cnt) {
> > +        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
> > +                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
> > +                     cnt, rte_strerror(rte_errno));
> >      }
> >
> >      while (nb_tx != nb_tx_prep) {
> > @@ -2611,10 +2614,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> >      }
> >
> >      sw_stats_add.tx_invalid_ol_drops = cnt;
> > -    if (userspace_tso_enabled()) {
> > -        cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
> > -    }
> > -
> > +    cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
> >      sw_stats_add.tx_invalid_ol_drops -= cnt;
> >      sw_stats_add.tx_mtu_exceeded_drops = cnt;
> >      cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
> > @@ -2760,12 +2760,10 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
> >      memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
> >             sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
> >
> > -    if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) {
> > -        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> > -                                - (char *)dp_packet_eth(pkt_dest);
> > -        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> > -                                - (char *) dp_packet_l3(pkt_dest);
> > -    }
> > +    mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
> > +                            - (char *)dp_packet_eth(pkt_dest);
> > +    mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
> > +                            - (char *) dp_packet_l3(pkt_dest);
> >
> >      return pkt_dest;
> >  }
> > @@ -2886,9 +2884,7 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> >          struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
> >
> >          hwol_drops = batch_cnt;
> > -        if (userspace_tso_enabled()) {
> > -            batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
> > -        }
> > +        batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
> >          hwol_drops -= batch_cnt;
> >          mtu_drops = batch_cnt;
> >          batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
> > @@ -5005,6 +5001,12 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
> >      }
> >
> >      err = dpdk_eth_dev_init(dev);
> > +    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> > +        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> > +    } else {
> > +        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
> > +    }
> > +
> >      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> >          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
> >          netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
> > diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
> > index 1f386b81b..24280d1a7 100644
> > --- a/lib/netdev-dummy.c
> > +++ b/lib/netdev-dummy.c
> > @@ -141,6 +141,9 @@ struct netdev_dummy {
> >      int requested_n_txq OVS_GUARDED;
> >      int requested_n_rxq OVS_GUARDED;
> >      int requested_numa_id OVS_GUARDED;
> > +
> > +    bool ol_ip_csum OVS_GUARDED;            /* Enable netdev IP csum offload. */
> > +    bool ol_ip_csum_set_good OVS_GUARDED;   /* Flag RX packet with good csum. */
> >  };
> >
> >  /* Max 'recv_queue_len' in struct netdev_dummy. */
> > @@ -898,6 +901,13 @@ netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
> >          }
> >      }
> >
> > +    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
> > +                                                false);
> > +    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", true);
> > +    if (netdev->ol_ip_csum) {
> > +        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> > +    }
> > +
> >      netdev_change_seq_changed(netdev_);
> >
> >      /* 'dummy-pmd' specific config. */
> > @@ -1052,6 +1062,10 @@ netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
> >      netdev->stats.rx_bytes += dp_packet_size(packet);
> >      netdev->custom_stats[0].value++;
> >      netdev->custom_stats[1].value++;
> > +    if (netdev->ol_ip_csum_set_good) {
> > +        /* The netdev hardware sets the flag when the packet has good csum. */
> > +        dp_packet_ol_set_ip_csum_good(packet);
> > +    }
> >      ovs_mutex_unlock(&netdev->mutex);
> >
> >      dp_packet_batch_init_packet(batch, packet);
> > @@ -1134,6 +1148,13 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
> >          }
> >
> >          ovs_mutex_lock(&dev->mutex);
> > +        if (dp_packet_ol_tx_ip_csum(packet)) {
> > +            if (!dp_packet_ol_ip_csum_good(packet)) {
> > +                dp_packet_ip_set_header_csum(packet);
> > +                dp_packet_ol_set_ip_csum_good(packet);
> > +            }
> > +        }
> > +
> >          dev->stats.tx_packets++;
> >          dev->stats.tx_bytes += size;
> >
> > diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
> > index 48f13b4bd..1e2ae5ea6 100644
> > --- a/lib/netdev-native-tnl.c
> > +++ b/lib/netdev-native-tnl.c
> > @@ -88,7 +88,10 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
> >
> >          ovs_be32 ip_src, ip_dst;
> >
> > -        if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(packet))) {
> > +        /* A packet coming from a network device might have the
> > +         * csum already checked. In this case, skip the check. */
> > +        if (!dp_packet_ol_ip_csum_good(packet)
> > +            && !dp_packet_ol_tx_ip_csum(packet)) {
> >              if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
> >                  VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
> >                  return NULL;
> > @@ -142,7 +145,8 @@ netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
> >   *
> >   * This function sets the IP header's ip_tot_len field (which should be zeroed
> >   * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
> > - * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
> > + * updates IP header checksum if not offloaded, as well as the l3 and l4
> > + * offsets in 'packet'.
> >   *
> >   * Return pointer to the L4 header added to 'packet'. */
> >  void *
> > @@ -167,11 +171,16 @@ netdev_tnl_push_ip_header(struct dp_packet *packet,
> >          *ip_tot_size -= IPV6_HEADER_LEN;
> >          ip6->ip6_plen = htons(*ip_tot_size);
> >          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
> > +        dp_packet_ol_set_tx_ipv6(packet);
> > +        dp_packet_ol_reset_ip_csum_good(packet);
> >          return ip6 + 1;
> >      } else {
> >          ip = netdev_tnl_ip_hdr(eth);
> >          ip->ip_tot_len = htons(*ip_tot_size);
> > -        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
> > +        /* Postpone checksum to when the packet is pushed to the port. */
> > +        dp_packet_ol_set_tx_ipv4(packet);
> > +        dp_packet_ol_set_tx_ip_csum(packet);
> > +        dp_packet_ol_reset_ip_csum_good(packet);
> >          *ip_tot_size -= IP_HEADER_LEN;
> >          packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
> >          return ip + 1;
> > @@ -297,8 +306,8 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
> >          ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
> >                            htons(IP_DF) : 0;
> >
> > -        /* Checksum has already been zeroed by eth_build_header. */
> > -        ip->ip_csum = csum(ip, sizeof *ip);
> > +        /* The checksum will be calculated when the headers are pushed
> > +         * to the packet if offloading is not enabled. */
> >
> >          data->header_len += IP_HEADER_LEN;
> >          return ip + 1;
> > diff --git a/lib/netdev.c b/lib/netdev.c
> > index 5bde9c1c9..d82c48e93 100644
> > --- a/lib/netdev.c
> > +++ b/lib/netdev.c
> > @@ -799,6 +799,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
> >              return false;
> >      }
> >
> > +    /* Packet with IP csum offloading enabled was received with verified csum.
> > +     * Leave the IP csum offloading enabled even with good checksum to the
> > +     * netdev to decide what would be the best to do.
> > +     * Provide a software fallback in case the device doesn't support IP csum
> > +     * offloading. Note: Encapsulated packet must have the inner IP header
> > +     * csum already calculated. */
> > +    dp_packet_ol_send_prepare(packet, netdev_flags);
> > +
> >      if (dp_packet_ol_tx_l4_csum(packet)) {
> >          if (dp_packet_ol_tx_tcp_csum(packet)) {
> >              if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
> > @@ -966,7 +974,21 @@ netdev_push_header(const struct netdev *netdev,
> >                           "not supported: packet dropped",
> >                           netdev_get_name(netdev));
> >          } else {
> > +            /* The packet is going to be encapsulated and there is
> > +             * no support yet for inner network header csum offloading. */
> > +            if (dp_packet_ol_tx_ip_csum(packet)
> > +                && !dp_packet_ol_ip_csum_good(packet)) {
> > +                dp_packet_ip_set_header_csum(packet);
> > +            }
> > +
> >              netdev->netdev_class->push_header(netdev, packet, data);
> > +            if (dp_packet_ol_tx_ip_csum(packet)) {
> > +                dp_packet_ol_reset_ip_csum_good(packet);
> > +            } else if (dp_packet_ol_tx_ipv4(packet)) {
> > +                dp_packet_ip_set_header_csum(packet);
> > +                dp_packet_ol_set_ip_csum_good(packet);
> > +            }
> > +
> >              pkt_metadata_init(&packet->md, data->out_port);
> >              dp_packet_batch_refill(batch, packet, i);
> >          }
> > diff --git a/lib/odp-execute.c b/lib/odp-execute.c
> > index 6eeda2a61..c46905bcd 100644
> > --- a/lib/odp-execute.c
> > +++ b/lib/odp-execute.c
> > @@ -167,9 +167,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
> >          new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
> >
> >          if (nh->ip_tos != new_tos) {
> > -            nh->ip_csum = recalc_csum16(nh->ip_csum,
> > -                                        htons((uint16_t) nh->ip_tos),
> > -                                        htons((uint16_t) new_tos));
> > +            if (dp_packet_ol_tx_ip_csum(packet)) {
> > +                dp_packet_ol_reset_ip_csum_good(packet);
> > +            } else {
> > +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> > +                                            htons((uint16_t) nh->ip_tos),
> > +                                            htons((uint16_t) new_tos));
> > +            }
> > +
> >              nh->ip_tos = new_tos;
> >          }
> >      }
> > @@ -178,8 +183,14 @@ odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
> >          new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
> >
> >          if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
> > -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
> > -                                        htons(new_ttl << 8));
> > +            if (dp_packet_ol_tx_ip_csum(packet)) {
> > +                dp_packet_ol_reset_ip_csum_good(packet);
> > +            } else {
> > +                nh->ip_csum = recalc_csum16(nh->ip_csum,
> > +                                            htons(nh->ip_ttl << 8),
> > +                                            htons(new_ttl << 8));
> > +            }
> > +
> >              nh->ip_ttl = new_ttl;
> >          }
> >      }
> > diff --git a/lib/packets.c b/lib/packets.c
> > index 8f0bd6811..c5ad57543 100644
> > --- a/lib/packets.c
> > +++ b/lib/packets.c
> > @@ -1098,7 +1098,12 @@ packet_set_ipv4_addr(struct dp_packet *packet,
> >              }
> >          }
> >      }
> > -    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> > +
> > +    if (dp_packet_ol_tx_ip_csum(packet)) {
> > +        dp_packet_ol_reset_ip_csum_good(packet);
> > +    } else {
> > +        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
> > +    }
> >      put_16aligned_be32(addr, new_addr);
> >  }
> >
> > @@ -1263,16 +1268,26 @@ packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
> >      if (nh->ip_tos != tos) {
> >          uint8_t *field = &nh->ip_tos;
> >
> > -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
> > -                                    htons((uint16_t) tos));
> > +        if (dp_packet_ol_tx_ip_csum(packet)) {
> > +            dp_packet_ol_reset_ip_csum_good(packet);
> > +        } else {
> > +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
> > +                                        htons((uint16_t) tos));
> > +        }
> > +
> >          *field = tos;
> >      }
> >
> >      if (nh->ip_ttl != ttl) {
> >          uint8_t *field = &nh->ip_ttl;
> >
> > -        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> > -                                    htons(ttl << 8));
> > +        if (dp_packet_ol_tx_ip_csum(packet)) {
> > +            dp_packet_ol_reset_ip_csum_good(packet);
> > +        } else {
> > +            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
> > +                                        htons(ttl << 8));
> > +        }
> > +
> >          *field = ttl;
> >      }
> >  }
> > @@ -1881,8 +1896,13 @@ IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
> >
> >          tos |= IP_ECN_CE;
> >          if (nh->ip_tos != tos) {
> > -            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> > -                                        htons((uint16_t) tos));
> > +            if (dp_packet_ol_tx_ip_csum(pkt)) {
> > +                dp_packet_ol_reset_ip_csum_good(pkt);
> > +            } else {
> > +                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
> > +                                            htons((uint16_t) tos));
> > +            }
> > +
> >              nh->ip_tos = tos;
> >          }
> >      }
> > diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> > index 1c9c720f0..a00d274ec 100644
> > --- a/ofproto/ofproto-dpif-upcall.c
> > +++ b/ofproto/ofproto-dpif-upcall.c
> > @@ -215,7 +215,7 @@ struct upcall {
> >      enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
> >      const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
> >      unsigned pmd_id;               /* Datapath poll mode driver id. */
> > -    const struct dp_packet *packet;   /* Packet associated with this upcall. */
> > +    struct dp_packet *packet;      /* Packet associated with this upcall. */
> >      ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
> >      uint16_t mru;                  /* If !0, Maximum receive unit of
> >                                        fragmented IP packet */
> > @@ -395,7 +395,7 @@ static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
> >                             struct udpif_key *ukey);
> >
> >  static int upcall_receive(struct upcall *, const struct dpif_backer *,
> > -                          const struct dp_packet *packet, enum dpif_upcall_type,
> > +                          struct dp_packet *packet, enum dpif_upcall_type,
> >                            const struct nlattr *userdata, const struct flow *,
> >                            const unsigned int mru,
> >                            const ovs_u128 *ufid, const unsigned pmd_id);
> > @@ -1140,7 +1140,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
> >   * since the 'upcall->put_actions' remains uninitialized. */
> >  static int
> >  upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
> > -               const struct dp_packet *packet, enum dpif_upcall_type type,
> > +               struct dp_packet *packet, enum dpif_upcall_type type,
> >                 const struct nlattr *userdata, const struct flow *flow,
> >                 const unsigned int mru,
> >                 const ovs_u128 *ufid, const unsigned pmd_id)
> > @@ -1336,7 +1336,7 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
> >  }
> >
> >  static int
> > -upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
> > +upcall_cb(struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
> >            unsigned pmd_id, enum dpif_upcall_type type,
> >            const struct nlattr *userdata, struct ofpbuf *actions,
> >            struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
> > @@ -1446,7 +1446,7 @@ static int
> >  process_upcall(struct udpif *udpif, struct upcall *upcall,
> >                 struct ofpbuf *odp_actions, struct flow_wildcards *wc)
> >  {
> > -    const struct dp_packet *packet = upcall->packet;
> > +    struct dp_packet *packet = upcall->packet;
> >      const struct flow *flow = upcall->flow;
> >      size_t actions_len = 0;
> >
> > @@ -1524,6 +1524,10 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
> >                  break;
> >              }
> >
> > +            /* The packet is going to be encapsulated and sent to
> > +             * the controller. */
> > +            dp_packet_ol_send_prepare(packet, 0);
> > +
> >              const struct frozen_state *state = &recirc_node->state;
> >
> >              struct ofproto_async_msg *am = xmalloc(sizeof *am);
> > diff --git a/tests/automake.mk b/tests/automake.mk
> > index 43731d097..1f49b1f24 100644
> > --- a/tests/automake.mk
> > +++ b/tests/automake.mk
> > @@ -161,6 +161,7 @@ SYSTEM_KMOD_TESTSUITE_AT = \
> >  SYSTEM_USERSPACE_TESTSUITE_AT = \
> >         tests/system-userspace-testsuite.at \
> >         tests/system-userspace-macros.at \
> > +       tests/system-userspace-offload.at \
> >         tests/system-userspace-packet-type-aware.at \
> >         tests/system-route.at
> >
> > diff --git a/tests/system-userspace-offload.at b/tests/system-userspace-offload.at
> > new file mode 100644
> > index 000000000..4d7f3ef89
> > --- /dev/null
> > +++ b/tests/system-userspace-offload.at
> > @@ -0,0 +1,79 @@
> > +AT_BANNER([userspace offload])
> > +
> > +AT_SETUP([userspace offload - ip csum offload])
> > +OVS_VSWITCHD_START(
> > +  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
> > +   add-port br1 p1 -- \
> > +       set Interface p1 type=dummy -- \
> > +   add-port br1 p2 -- \
> > +       set Interface p2 type=dummy --])
> > +
> > +# Modify the ip_dst addr to force changing the IP csum.
> > +AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
> > +
> > +# Check if no offload remains ok.
> > +AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> > +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> > +])
> > +
> > +# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
> > +# by the datapath while processing the packet.
> > +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> > +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> > +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> > +])
> > +
> > +# Check if packets entering the datapath with csum offloading
> > +# enabled gets the csum updated properly by egress handling
> > +# in the datapath and not by the netdev.
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> > +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> > +])
> > +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> > +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> > +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> > +])
> > +
> > +# Check if packets entering the datapath with csum offloading
> > +# enabled gets the csum updated properly by netdev and not
> > +# by the datapath.
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> > +0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> > +])
> > +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> > +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> > +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> > +])
> > +
> > +# Push a packet with bad csum and offloading disabled to check
> > +# if the datapath updates the csum, but does not fix the issue.
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> > +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> > +])
> > +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> > +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> > +0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> > +])
> > +
> > +# Push a packet with bad csum and offloading enabled to check
> > +# if the driver updates and fixes the csum.
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
> > +AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
> > +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
> > +0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
> > +])
> > +AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
> > +AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
> > +0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
> > +])
> > +OVS_VSWITCHD_STOP
> > +AT_CLEANUP
> > diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at
> > index 2e9659a67..1021b4ad4 100644
> > --- a/tests/system-userspace-testsuite.at
> > +++ b/tests/system-userspace-testsuite.at
> > @@ -25,5 +25,6 @@ m4_include([tests/system-common-macros.at])
> >  m4_include([tests/system-traffic.at])
> >  m4_include([tests/system-layer3-tunnels.at])
> >  m4_include([tests/system-interface.at])
> > +m4_include([tests/system-userspace-offload.at])
> >  m4_include([tests/system-userspace-packet-type-aware.at])
> >  m4_include([tests/system-route.at])
> > --
> > 2.31.1
> >
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> 
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 2392a2ea4..5b4ca4dfc 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2089,16 +2089,12 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     ctx->key.dl_type = dl_type;
 
     if (ctx->key.dl_type == htons(ETH_TYPE_IP)) {
-        bool hwol_bad_l3_csum = dp_packet_ol_ip_csum_bad(pkt);
-        if (hwol_bad_l3_csum) {
+        if (dp_packet_ol_ip_csum_bad(pkt)) {
             ok = false;
             COVERAGE_INC(conntrack_l3csum_err);
         } else {
-            bool hwol_good_l3_csum = dp_packet_ol_ip_csum_good(pkt)
-                                     || dp_packet_ol_tx_ipv4(pkt);
-            /* Validate the checksum only when hwol is not supported. */
             ok = extract_l3_ipv4(&ctx->key, l3, dp_packet_l3_size(pkt), NULL,
-                                 !hwol_good_l3_csum);
+                                 !dp_packet_ol_ip_csum_good(pkt));
         }
     } else if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
         ok = extract_l3_ipv6(&ctx->key, l3, dp_packet_l3_size(pkt), NULL);
@@ -3402,7 +3398,9 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
                 }
                 if (seq_skew) {
                     ip_len = ntohs(l3_hdr->ip_tot_len) + seq_skew;
-                    if (!dp_packet_ol_tx_ipv4(pkt)) {
+                    if (dp_packet_ol_tx_ip_csum(pkt)) {
+                        dp_packet_ol_reset_ip_csum_good(pkt);
+                    } else {
                         l3_hdr->ip_csum = recalc_csum16(l3_hdr->ip_csum,
                                                         l3_hdr->ip_tot_len,
                                                         htons(ip_len));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index a4ca5a052..369f3561e 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -21,6 +21,7 @@ 
 #include "dp-packet.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
+#include "netdev-provider.h"
 #include "openvswitch/dynamic-string.h"
 #include "util.h"
 
@@ -506,3 +507,14 @@  dp_packet_resize_l2(struct dp_packet *p, int increment)
     dp_packet_adjust_layer_offset(&p->l2_5_ofs, increment);
     return dp_packet_data(p);
 }
+
+/* Checks if the packet 'p' is compatible with netdev_ol_flags 'flags'
+ * and if not, update the packet with the software fall back. */
+void
+dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
+    if (!dp_packet_ol_ip_csum_good(p) && dp_packet_ol_tx_ip_csum(p)
+        && !(flags & NETDEV_OFFLOAD_TX_IPV4_CSUM)) {
+        dp_packet_ip_set_header_csum(p);
+        dp_packet_ol_set_ip_csum_good(p);
+    }
+}
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index ac160985d..278be172e 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -25,6 +25,7 @@ 
 #include <rte_mbuf.h>
 #endif
 
+#include "csum.h"
 #include "netdev-afxdp.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/list.h"
@@ -75,12 +76,14 @@  enum dp_packet_offload_mask {
     DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
     /* Offloaded packet is IPv6. */
     DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
+    /* Offload IP checksum. */
+    DEF_OL_FLAG(DP_PACKET_OL_TX_IP_CSUM, PKT_TX_IP_CKSUM, 0x200),
     /* Offload TCP checksum. */
-    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x200),
+    DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CSUM, PKT_TX_TCP_CKSUM, 0x400),
     /* Offload UDP checksum. */
-    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x400),
+    DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CSUM, PKT_TX_UDP_CKSUM, 0x800),
     /* Offload SCTP checksum. */
-    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x800),
+    DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CSUM, PKT_TX_SCTP_CKSUM, 0x1000),
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
 
@@ -93,6 +96,7 @@  enum dp_packet_offload_mask {
                                      DP_PACKET_OL_TX_TCP_SEG       | \
                                      DP_PACKET_OL_TX_IPV4          | \
                                      DP_PACKET_OL_TX_IPV6          | \
+                                     DP_PACKET_OL_TX_IP_CSUM      | \
                                      DP_PACKET_OL_TX_TCP_CSUM     | \
                                      DP_PACKET_OL_TX_UDP_CSUM     | \
                                      DP_PACKET_OL_TX_SCTP_CSUM)
@@ -233,7 +237,7 @@  void *dp_packet_steal_data(struct dp_packet *);
 
 static inline bool dp_packet_equal(const struct dp_packet *,
                                    const struct dp_packet *);
-
+void dp_packet_ol_send_prepare(struct dp_packet *, const uint64_t);
 
 /* Frees memory that 'p' points to, as well as 'p' itself. */
 static inline void
@@ -970,7 +974,7 @@  dp_packet_ol_tcp_seg(const struct dp_packet *p)
     return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_TCP_SEG);
 }
 
-/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+/* Returns 'true' if packet 'p' is marked as IPv4. */
 static inline bool
 dp_packet_ol_tx_ipv4(const struct dp_packet *p)
 {
@@ -1001,20 +1005,36 @@  dp_packet_ol_tx_sctp_csum(struct dp_packet *p)
             DP_PACKET_OL_TX_SCTP_CSUM;
 }
 
-/* Mark packet 'p' for IPv4 checksum offloading. */
+/* Marks packet 'p' as IPv4. */
 static inline void
 dp_packet_ol_set_tx_ipv4(struct dp_packet *p)
 {
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
     *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
 }
 
-/* Mark packet 'p' for IPv6 checksum offloading. */
+/* Marks packet 'p' as IPv6. */
 static inline void
 dp_packet_ol_set_tx_ipv6(struct dp_packet *p)
 {
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV4;
     *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV6;
 }
 
+/* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_ol_tx_ip_csum(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IP_CSUM);
+}
+
+/* Marks packet 'p' for IPv4 checksum offloading. */
+static inline void
+dp_packet_ol_set_tx_ip_csum(struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IP_CSUM;
+}
+
 /* Mark packet 'p' for TCP checksum offloading.  It implies that either
  * the packet 'p' is marked for IPv4 or IPv6 checksum offloading. */
 static inline void
@@ -1048,6 +1068,8 @@  dp_packet_ol_set_tcp_seg(struct dp_packet *p)
     *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_TCP_SEG;
 }
 
+/* Returns 'true' is the IP has good integrity and the
+ * checksum in it is complete. */
 static inline bool
 dp_packet_ol_ip_csum_good(const struct dp_packet *p)
 {
@@ -1055,6 +1077,22 @@  dp_packet_ol_ip_csum_good(const struct dp_packet *p)
             DP_PACKET_OL_RX_IP_CSUM_GOOD;
 }
 
+/* Marks packet 'p' with good IPv4 checksum. */
+static inline void
+dp_packet_ol_set_ip_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_IP_CSUM_GOOD;
+}
+
+/* Resets IP good checksum flag in packet 'p'. */
+static inline void
+dp_packet_ol_reset_ip_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_IP_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with bad IPv4 checksum. */
 static inline bool
 dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
 {
@@ -1062,6 +1100,17 @@  dp_packet_ol_ip_csum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_IP_CSUM_BAD;
 }
 
+/* Calculate and set the IPv4 header checksum in packet 'p'. */
+static inline void
+dp_packet_ip_set_header_csum(struct dp_packet *p)
+{
+    struct ip_header *ip = dp_packet_l3(p);
+
+    ovs_assert(ip);
+    ip->ip_csum = 0;
+    ip->ip_csum = csum(ip, sizeof *ip);
+}
+
 static inline bool
 dp_packet_ol_l4_csum_good(const struct dp_packet *p)
 {
diff --git a/lib/dpif.h b/lib/dpif.h
index 8febfb9f6..9ed7a00fd 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -860,7 +860,7 @@  void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux);
  *
  * Returns 0 if successful, ENOSPC if the flow limit has been reached and no
  * flow should be installed, or some otherwise a positive errno value. */
-typedef int upcall_callback(const struct dp_packet *packet,
+typedef int upcall_callback(struct dp_packet *packet,
                             const struct flow *flow,
                             ovs_u128 *ufid,
                             unsigned pmd_id,
diff --git a/lib/flow.c b/lib/flow.c
index a021bc0eb..bad1e1a17 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -907,6 +907,10 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         nw_proto = nh->ip_proto;
         nw_frag = ipv4_get_nw_frag(nh);
         data_pull(&data, &size, ip_len);
+        dp_packet_ol_set_tx_ipv4(packet);
+        if (dp_packet_ol_ip_csum_good(packet)) {
+            dp_packet_ol_set_tx_ip_csum(packet);
+        }
     } else if (dl_type == htons(ETH_TYPE_IPV6)) {
         const struct ovs_16aligned_ip6_hdr *nh = data;
         ovs_be32 tc_flow;
@@ -920,6 +924,7 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
         }
         data_pull(&data, &size, sizeof *nh);
 
+        dp_packet_ol_set_tx_ipv6(packet);
         plen = ntohs(nh->ip6_plen);
         dp_packet_set_l2_pad_size(packet, size - plen);
         size = plen;   /* Never pull padding. */
@@ -3197,9 +3202,12 @@  packet_expand(struct dp_packet *p, const struct flow *flow, size_t size)
             struct ip_header *ip = dp_packet_l3(p);
 
             ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
-            ip->ip_csum = 0;
-            ip->ip_csum = csum(ip, sizeof *ip);
-
+            if (dp_packet_ol_tx_ip_csum(p)) {
+                dp_packet_ol_reset_ip_csum_good(p);
+            } else {
+                dp_packet_ip_set_header_csum(p);
+                dp_packet_ol_set_ip_csum_good(p);
+            }
             pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         } else { /* ETH_TYPE_IPV6 */
             struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(p);
@@ -3288,7 +3296,7 @@  flow_compose(struct dp_packet *p, const struct flow *flow,
         ip->ip_tot_len = htons(p->l4_ofs - p->l3_ofs + l4_len);
         /* Checksum has already been zeroed by put_zeros call. */
         ip->ip_csum = csum(ip, sizeof *ip);
-
+        dp_packet_ol_set_ip_csum_good(p);
         pseudo_hdr_csum = packet_csum_pseudoheader(ip);
         flow_compose_l4_csum(p, flow, pseudo_hdr_csum);
     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
diff --git a/lib/ipf.c b/lib/ipf.c
index e78559491..d9b51bf06 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -433,7 +433,9 @@  ipf_reassemble_v4_frags(struct ipf_list *ipf_list)
     len += rest_len;
     l3 = dp_packet_l3(pkt);
     ovs_be16 new_ip_frag_off = l3->ip_frag_off & ~htons(IP_MORE_FRAGMENTS);
-    if (!dp_packet_ol_tx_ipv4(pkt)) {
+    if (dp_packet_ol_tx_ip_csum(pkt)) {
+        dp_packet_ol_reset_ip_csum_good(pkt);
+    } else {
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_frag_off,
                                     new_ip_frag_off);
         l3->ip_csum = recalc_csum16(l3->ip_csum, l3->ip_tot_len, htons(len));
@@ -609,7 +611,6 @@  ipf_is_valid_v4_frag(struct ipf *ipf, struct dp_packet *pkt)
     }
 
     if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(pkt)
-                     && !dp_packet_ol_tx_ipv4(pkt)
                      && csum(l3, ip_hdr_len) != 0)) {
         COVERAGE_INC(ipf_l3csum_err);
         goto invalid_pkt;
@@ -1185,7 +1186,9 @@  ipf_post_execute_reass_pkts(struct ipf *ipf,
                     } else {
                         struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
                         struct ip_header *l3_reass = dp_packet_l3(pkt);
-                        if (!dp_packet_ol_tx_ipv4(frag_i->pkt)) {
+                        if (dp_packet_ol_tx_ip_csum(frag_i->pkt)) {
+                            dp_packet_ol_reset_ip_csum_good(frag_i->pkt);
+                        } else {
                             ovs_be32 reass_ip =
                                 get_16aligned_be32(&l3_reass->ip_src);
                             ovs_be32 frag_ip =
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c8e29ec40..435b17c8f 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -396,8 +396,9 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
+    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
 };
 
 /*
@@ -982,6 +983,10 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
         if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
@@ -1122,6 +1127,12 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
     }
 
+    if (info.tx_offload_capa & DEV_TX_OFFLOAD_IPV4_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
         if ((info.tx_offload_capa & tx_tso_offload_capa)
@@ -1712,16 +1723,12 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
                         dev->requested_txq_size);
         smap_add_format(args, "configured_txq_descriptors", "%d",
                         dev->txq_size);
-        if (dev->hw_ol_features & NETDEV_RX_CHECKSUM_OFFLOAD) {
-            smap_add(args, "rx_csum_offload", "true");
-        } else {
-            smap_add(args, "rx_csum_offload", "false");
-        }
-        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-            smap_add(args, "tx_tso_offload", "true");
-        } else {
-            smap_add(args, "tx_tso_offload", "false");
-        }
+#define HWOL_SMAP_ADD(FIELD, FLAG) \
+        smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
+        HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
+#undef HWOL_SMAP_ADD
         smap_add(args, "lsc_interrupt_mode",
                  dev->lsc_interrupt_mode ? "true" : "false");
 
@@ -2165,12 +2172,10 @@  netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 {
     struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
 
-    if (mbuf->ol_flags & PKT_TX_L4_MASK) {
-        mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
-        mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
-        mbuf->outer_l2_len = 0;
-        mbuf->outer_l3_len = 0;
-    }
+    mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
+    mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
+    mbuf->outer_l2_len = 0;
+    mbuf->outer_l3_len = 0;
 
     if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
         struct tcp_header *th = dp_packet_l4(pkt);
@@ -2230,13 +2235,11 @@  netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
     uint32_t nb_tx = 0;
     uint16_t nb_tx_prep = cnt;
 
-    if (userspace_tso_enabled()) {
-        nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
-        if (nb_tx_prep != cnt) {
-            VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
-                         "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
-                         cnt, rte_strerror(rte_errno));
-        }
+    nb_tx_prep = rte_eth_tx_prepare(dev->port_id, qid, pkts, cnt);
+    if (nb_tx_prep != cnt) {
+        VLOG_WARN_RL(&rl, "%s: Output batch contains invalid packets. "
+                     "Only %u/%u are valid: %s", dev->up.name, nb_tx_prep,
+                     cnt, rte_strerror(rte_errno));
     }
 
     while (nb_tx != nb_tx_prep) {
@@ -2611,10 +2614,7 @@  __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
     }
 
     sw_stats_add.tx_invalid_ol_drops = cnt;
-    if (userspace_tso_enabled()) {
-        cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
-    }
-
+    cnt = netdev_dpdk_prep_ol_batch(dev, cur_pkts, cnt);
     sw_stats_add.tx_invalid_ol_drops -= cnt;
     sw_stats_add.tx_mtu_exceeded_drops = cnt;
     cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt);
@@ -2760,12 +2760,10 @@  dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
     memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
            sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
 
-    if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) {
-        mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
-                                - (char *)dp_packet_eth(pkt_dest);
-        mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
-                                - (char *) dp_packet_l3(pkt_dest);
-    }
+    mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
+                            - (char *)dp_packet_eth(pkt_dest);
+    mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
+                            - (char *) dp_packet_l3(pkt_dest);
 
     return pkt_dest;
 }
@@ -2886,9 +2884,7 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
         struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
         hwol_drops = batch_cnt;
-        if (userspace_tso_enabled()) {
-            batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
-        }
+        batch_cnt = netdev_dpdk_prep_ol_batch(dev, pkts, batch_cnt);
         hwol_drops -= batch_cnt;
         mtu_drops = batch_cnt;
         batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
@@ -5005,6 +5001,12 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
+    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
+        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+    } else {
+        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
         netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
         netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 1f386b81b..24280d1a7 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -141,6 +141,9 @@  struct netdev_dummy {
     int requested_n_txq OVS_GUARDED;
     int requested_n_rxq OVS_GUARDED;
     int requested_numa_id OVS_GUARDED;
+
+    bool ol_ip_csum OVS_GUARDED;            /* Enable netdev IP csum offload. */
+    bool ol_ip_csum_set_good OVS_GUARDED;   /* Flag RX packet with good csum. */
 };
 
 /* Max 'recv_queue_len' in struct netdev_dummy. */
@@ -898,6 +901,13 @@  netdev_dummy_set_config(struct netdev *netdev_, const struct smap *args,
         }
     }
 
+    netdev->ol_ip_csum_set_good = smap_get_bool(args, "ol_ip_csum_set_good",
+                                                false);
+    netdev->ol_ip_csum = smap_get_bool(args, "ol_ip_csum", true);
+    if (netdev->ol_ip_csum) {
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+    }
+
     netdev_change_seq_changed(netdev_);
 
     /* 'dummy-pmd' specific config. */
@@ -1052,6 +1062,10 @@  netdev_dummy_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
     netdev->stats.rx_bytes += dp_packet_size(packet);
     netdev->custom_stats[0].value++;
     netdev->custom_stats[1].value++;
+    if (netdev->ol_ip_csum_set_good) {
+        /* The netdev hardware sets the flag when the packet has good csum. */
+        dp_packet_ol_set_ip_csum_good(packet);
+    }
     ovs_mutex_unlock(&netdev->mutex);
 
     dp_packet_batch_init_packet(batch, packet);
@@ -1134,6 +1148,13 @@  netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
         }
 
         ovs_mutex_lock(&dev->mutex);
+        if (dp_packet_ol_tx_ip_csum(packet)) {
+            if (!dp_packet_ol_ip_csum_good(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+                dp_packet_ol_set_ip_csum_good(packet);
+            }
+        }
+
         dev->stats.tx_packets++;
         dev->stats.tx_bytes += size;
 
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 48f13b4bd..1e2ae5ea6 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -88,7 +88,10 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
 
         ovs_be32 ip_src, ip_dst;
 
-        if (OVS_UNLIKELY(!dp_packet_ol_ip_csum_good(packet))) {
+        /* A packet coming from a network device might have the
+         * csum already checked. In this case, skip the check. */
+        if (!dp_packet_ol_ip_csum_good(packet)
+            && !dp_packet_ol_tx_ip_csum(packet)) {
             if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
                 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
                 return NULL;
@@ -142,7 +145,8 @@  netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
  *
  * This function sets the IP header's ip_tot_len field (which should be zeroed
  * as part of 'header') and puts its value into '*ip_tot_size' as well.  Also
- * updates IP header checksum, as well as the l3 and l4 offsets in 'packet'.
+ * updates IP header checksum if not offloaded, as well as the l3 and l4
+ * offsets in 'packet'.
  *
  * Return pointer to the L4 header added to 'packet'. */
 void *
@@ -167,11 +171,16 @@  netdev_tnl_push_ip_header(struct dp_packet *packet,
         *ip_tot_size -= IPV6_HEADER_LEN;
         ip6->ip6_plen = htons(*ip_tot_size);
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
+        dp_packet_ol_set_tx_ipv6(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         return ip6 + 1;
     } else {
         ip = netdev_tnl_ip_hdr(eth);
         ip->ip_tot_len = htons(*ip_tot_size);
-        ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
+        /* Postpone checksum to when the packet is pushed to the port. */
+        dp_packet_ol_set_tx_ipv4(packet);
+        dp_packet_ol_set_tx_ip_csum(packet);
+        dp_packet_ol_reset_ip_csum_good(packet);
         *ip_tot_size -= IP_HEADER_LEN;
         packet->l4_ofs = dp_packet_size(packet) - *ip_tot_size;
         return ip + 1;
@@ -297,8 +306,8 @@  netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
         ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
                           htons(IP_DF) : 0;
 
-        /* Checksum has already been zeroed by eth_build_header. */
-        ip->ip_csum = csum(ip, sizeof *ip);
+        /* The checksum will be calculated when the headers are pushed
+         * to the packet if offloading is not enabled. */
 
         data->header_len += IP_HEADER_LEN;
         return ip + 1;
diff --git a/lib/netdev.c b/lib/netdev.c
index 5bde9c1c9..d82c48e93 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -799,6 +799,14 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
             return false;
     }
 
+    /* Packet with IP csum offloading enabled was received with verified csum.
+     * Leave the IP csum offloading enabled even with good checksum to the
+     * netdev to decide what would be the best to do.
+     * Provide a software fallback in case the device doesn't support IP csum
+     * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated. */
+    dp_packet_ol_send_prepare(packet, netdev_flags);
+
     if (dp_packet_ol_tx_l4_csum(packet)) {
         if (dp_packet_ol_tx_tcp_csum(packet)) {
             if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
@@ -966,7 +974,21 @@  netdev_push_header(const struct netdev *netdev,
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
+            /* The packet is going to be encapsulated and there is
+             * no support yet for inner network header csum offloading. */
+            if (dp_packet_ol_tx_ip_csum(packet)
+                && !dp_packet_ol_ip_csum_good(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+            }
+
             netdev->netdev_class->push_header(netdev, packet, data);
+            if (dp_packet_ol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else if (dp_packet_ol_tx_ipv4(packet)) {
+                dp_packet_ip_set_header_csum(packet);
+                dp_packet_ol_set_ip_csum_good(packet);
+            }
+
             pkt_metadata_init(&packet->md, data->out_port);
             dp_packet_batch_refill(batch, packet, i);
         }
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 6eeda2a61..c46905bcd 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -167,9 +167,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_tos = key->ipv4_tos | (nh->ip_tos & ~mask->ipv4_tos);
 
         if (nh->ip_tos != new_tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum,
-                                        htons((uint16_t) nh->ip_tos),
-                                        htons((uint16_t) new_tos));
+            if (dp_packet_ol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons((uint16_t) nh->ip_tos),
+                                            htons((uint16_t) new_tos));
+            }
+
             nh->ip_tos = new_tos;
         }
     }
@@ -178,8 +183,14 @@  odp_set_ipv4(struct dp_packet *packet, const struct ovs_key_ipv4 *key,
         new_ttl = key->ipv4_ttl | (nh->ip_ttl & ~mask->ipv4_ttl);
 
         if (OVS_LIKELY(nh->ip_ttl != new_ttl)) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_ttl << 8),
-                                        htons(new_ttl << 8));
+            if (dp_packet_ol_tx_ip_csum(packet)) {
+                dp_packet_ol_reset_ip_csum_good(packet);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum,
+                                            htons(nh->ip_ttl << 8),
+                                            htons(new_ttl << 8));
+            }
+
             nh->ip_ttl = new_ttl;
         }
     }
diff --git a/lib/packets.c b/lib/packets.c
index 8f0bd6811..c5ad57543 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1098,7 +1098,12 @@  packet_set_ipv4_addr(struct dp_packet *packet,
             }
         }
     }
-    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+
+    if (dp_packet_ol_tx_ip_csum(packet)) {
+        dp_packet_ol_reset_ip_csum_good(packet);
+    } else {
+        nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+    }
     put_16aligned_be32(addr, new_addr);
 }
 
@@ -1263,16 +1268,26 @@  packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst,
     if (nh->ip_tos != tos) {
         uint8_t *field = &nh->ip_tos;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
-                                    htons((uint16_t) tos));
+        if (dp_packet_ol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons((uint16_t) *field),
+                                        htons((uint16_t) tos));
+        }
+
         *field = tos;
     }
 
     if (nh->ip_ttl != ttl) {
         uint8_t *field = &nh->ip_ttl;
 
-        nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
-                                    htons(ttl << 8));
+        if (dp_packet_ol_tx_ip_csum(packet)) {
+            dp_packet_ol_reset_ip_csum_good(packet);
+        } else {
+            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(*field << 8),
+                                        htons(ttl << 8));
+        }
+
         *field = ttl;
     }
 }
@@ -1881,8 +1896,13 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
 
         tos |= IP_ECN_CE;
         if (nh->ip_tos != tos) {
-            nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
-                                        htons((uint16_t) tos));
+            if (dp_packet_ol_tx_ip_csum(pkt)) {
+                dp_packet_ol_reset_ip_csum_good(pkt);
+            } else {
+                nh->ip_csum = recalc_csum16(nh->ip_csum, htons(nh->ip_tos),
+                                            htons((uint16_t) tos));
+            }
+
             nh->ip_tos = tos;
         }
     }
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 1c9c720f0..a00d274ec 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -215,7 +215,7 @@  struct upcall {
     enum odp_key_fitness fitness;  /* Fitness of 'flow' relative to ODP key. */
     const ovs_u128 *ufid;          /* Unique identifier for 'flow'. */
     unsigned pmd_id;               /* Datapath poll mode driver id. */
-    const struct dp_packet *packet;   /* Packet associated with this upcall. */
+    struct dp_packet *packet;      /* Packet associated with this upcall. */
     ofp_port_t ofp_in_port;        /* OpenFlow in port, or OFPP_NONE. */
     uint16_t mru;                  /* If !0, Maximum receive unit of
                                       fragmented IP packet */
@@ -395,7 +395,7 @@  static void delete_op_init(struct udpif *udpif, struct ukey_op *op,
                            struct udpif_key *ukey);
 
 static int upcall_receive(struct upcall *, const struct dpif_backer *,
-                          const struct dp_packet *packet, enum dpif_upcall_type,
+                          struct dp_packet *packet, enum dpif_upcall_type,
                           const struct nlattr *userdata, const struct flow *,
                           const unsigned int mru,
                           const ovs_u128 *ufid, const unsigned pmd_id);
@@ -1140,7 +1140,7 @@  compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
  * since the 'upcall->put_actions' remains uninitialized. */
 static int
 upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
-               const struct dp_packet *packet, enum dpif_upcall_type type,
+               struct dp_packet *packet, enum dpif_upcall_type type,
                const struct nlattr *userdata, const struct flow *flow,
                const unsigned int mru,
                const ovs_u128 *ufid, const unsigned pmd_id)
@@ -1336,7 +1336,7 @@  should_install_flow(struct udpif *udpif, struct upcall *upcall)
 }
 
 static int
-upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
+upcall_cb(struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufid,
           unsigned pmd_id, enum dpif_upcall_type type,
           const struct nlattr *userdata, struct ofpbuf *actions,
           struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux)
@@ -1446,7 +1446,7 @@  static int
 process_upcall(struct udpif *udpif, struct upcall *upcall,
                struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
-    const struct dp_packet *packet = upcall->packet;
+    struct dp_packet *packet = upcall->packet;
     const struct flow *flow = upcall->flow;
     size_t actions_len = 0;
 
@@ -1524,6 +1524,10 @@  process_upcall(struct udpif *udpif, struct upcall *upcall,
                 break;
             }
 
+            /* The packet is going to be encapsulated and sent to
+             * the controller. */
+            dp_packet_ol_send_prepare(packet, 0);
+
             const struct frozen_state *state = &recirc_node->state;
 
             struct ofproto_async_msg *am = xmalloc(sizeof *am);
diff --git a/tests/automake.mk b/tests/automake.mk
index 43731d097..1f49b1f24 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -161,6 +161,7 @@  SYSTEM_KMOD_TESTSUITE_AT = \
 SYSTEM_USERSPACE_TESTSUITE_AT = \
 	tests/system-userspace-testsuite.at \
 	tests/system-userspace-macros.at \
+	tests/system-userspace-offload.at \
 	tests/system-userspace-packet-type-aware.at \
 	tests/system-route.at
 
diff --git a/tests/system-userspace-offload.at b/tests/system-userspace-offload.at
new file mode 100644
index 000000000..4d7f3ef89
--- /dev/null
+++ b/tests/system-userspace-offload.at
@@ -0,0 +1,79 @@ 
+AT_BANNER([userspace offload])
+
+AT_SETUP([userspace offload - ip csum offload])
+OVS_VSWITCHD_START(
+  [add-br br1 -- set bridge br1 datapath-type=dummy -- \
+   add-port br1 p1 -- \
+       set Interface p1 type=dummy -- \
+   add-port br1 p2 -- \
+       set Interface p2 type=dummy --])
+
+# Modify the ip_dst addr to force changing the IP csum.
+AT_CHECK([ovs-ofctl add-flow br1 in_port=p1,actions=mod_nw_dst:192.168.1.1,output:p2])
+
+# Check if no offload remains ok.
+AT_CHECK([ovs-vsctl set Interface p2 options:tx_pcap=p2.pcap])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+
+# Checksum should change to 0x990 with ip_dst changed to 192.168.1.1
+# by the datapath while processing the packet.
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by egress handling
+# in the datapath and not by the netdev.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Check if packets entering the datapath with csum offloading
+# enabled gets the csum updated properly by netdev and not
+# by the datapath.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f8fc0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading disabled to check
+# if the datapath updates the csum, but does not fix the issue.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=false])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=false])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060904c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+
+# Push a packet with bad csum and offloading enabled to check
+# if the driver updates and fixes the csum.
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum=true])
+AT_CHECK([ovs-vsctl set Interface p1 options:ol_ip_csum_set_good=true])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+0a8f394fe0738abf7e2f058408004500003433e0400040068f03c0a87b02c0a87b01d4781451a962ad5417ed297b801000e547fd00000101080a2524d2345c7fe1c4
+])
+AT_CHECK([ovs-pcap p2.pcap > p2.pcap.txt 2>&1])
+AT_CHECK([tail -n 1 p2.pcap.txt], [0], [dnl
+0a8f394fe0738abf7e2f058408004500003433e0400040060990c0a87b02c0a80101d4781451a962ad5417ed297b801000e5c1fd00000101080a2524d2345c7fe1c4
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at
index 2e9659a67..1021b4ad4 100644
--- a/tests/system-userspace-testsuite.at
+++ b/tests/system-userspace-testsuite.at
@@ -25,5 +25,6 @@  m4_include([tests/system-common-macros.at])
 m4_include([tests/system-traffic.at])
 m4_include([tests/system-layer3-tunnels.at])
 m4_include([tests/system-interface.at])
+m4_include([tests/system-userspace-offload.at])
 m4_include([tests/system-userspace-packet-type-aware.at])
 m4_include([tests/system-route.at])