diff mbox series

[ovs-dev,v4,11/14] userspace: Enable L4 csum offloading by default.

Message ID 20220701035834.1851648-11-mkp@redhat.com
State Superseded
Headers show
Series [ovs-dev,v4,01/14] dp-packet: Rename flags with CKSUM to CSUM. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Mike Pattrick July 1, 2022, 3:58 a.m. UTC
From: Flavio Leitner <fbl@sysclose.org>

The netdev receiving packets is supposed to provide the flags
indicating if the L4 csum was verified and it is OK or BAD,
otherwise the stack will check when appropriate by software.

If the packet comes with good checksum, then postpone the
checksum calculation to the egress device if needed.

When encapsulate a packet with that flag, set the checksum
of the inner L4 header since that is not yet supported.

Calculate the L4 csum when the packet is going to be sent over
a device that doesn't support the feature.

Linux tap devices allows enabling L3 and L4 offload, so this
patch enables the feature. However, Linux socket interface
remains disabled because the API doesn't allow enabling
those those features without enabling TSO too.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/conntrack.c         |  16 +--
 lib/dp-packet.c         |  23 +++-
 lib/dp-packet.h         |  97 ++++++++++++---
 lib/flow.c              |  23 ++++
 lib/netdev-dpdk.c       | 136 ++++++++++++---------
 lib/netdev-linux.c      | 253 ++++++++++++++++++++++++++--------------
 lib/netdev-native-tnl.c |  32 +----
 lib/netdev.c            |  39 ++-----
 lib/packets.c           | 175 +++++++++++++++++++++------
 lib/packets.h           |   3 +
 10 files changed, 542 insertions(+), 255 deletions(-)

Comments

David Marchand July 6, 2022, 8:59 p.m. UTC | #1
I did not finish reviewing.

On Fri, Jul 1, 2022 at 5:58 AM Mike Pattrick <mkp@redhat.com> wrote:
>
> From: Flavio Leitner <fbl@sysclose.org>
>
> The netdev receiving packets is supposed to provide the flags
> indicating if the L4 csum was verified and it is OK or BAD,
> otherwise the stack will check when appropriate by software.
>
> If the packet comes with good checksum, then postpone the
> checksum calculation to the egress device if needed.
>
> When encapsulate a packet with that flag, set the checksum
> of the inner L4 header since that is not yet supported.
>
> Calculate the L4 csum when the packet is going to be sent over
> a device that doesn't support the feature.
>
> Linux tap devices allows enabling L3 and L4 offload, so this
> patch enables the feature. However, Linux socket interface
> remains disabled because the API doesn't allow enabling
> those those features without enabling TSO too.

those two*

>
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> Co-authored-by: Mike Pattrick <mkp@redhat.com>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
>  lib/conntrack.c         |  16 +--
>  lib/dp-packet.c         |  23 +++-
>  lib/dp-packet.h         |  97 ++++++++++++---
>  lib/flow.c              |  23 ++++
>  lib/netdev-dpdk.c       | 136 ++++++++++++---------
>  lib/netdev-linux.c      | 253 ++++++++++++++++++++++++++--------------
>  lib/netdev-native-tnl.c |  32 +----
>  lib/netdev.c            |  39 ++-----
>  lib/packets.c           | 175 +++++++++++++++++++++------
>  lib/packets.h           |   3 +
>  10 files changed, 542 insertions(+), 255 deletions(-)
>
> diff --git a/lib/conntrack.c b/lib/conntrack.c
> index 11768da00..d7072e1e9 100644
> --- a/lib/conntrack.c
> +++ b/lib/conntrack.c
> @@ -2105,13 +2105,13 @@ conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
>      }
>
>      if (ok) {
> -        bool hwol_bad_l4_csum = dp_packet_ol_l4_checksum_bad(pkt);
> -        if (!hwol_bad_l4_csum) {
> -            bool  hwol_good_l4_csum = dp_packet_ol_l4_checksum_good(pkt)
> -                                      || dp_packet_ol_tx_l4_checksum(pkt);
> +        if (!dp_packet_ol_l4_checksum_bad(pkt)) {
> +
>              /* Validate the checksum only when hwol is not supported. */
>              if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
> -                           &ctx->icmp_related, l3, !hwol_good_l4_csum,
> +                           &ctx->icmp_related, l3,
> +                           !dp_packet_ol_l4_checksum_good(pkt) &&
> +                           !dp_packet_ol_tx_l4_checksum(pkt),

Do we need to check for dp_packet_ol_tx_l4_checksum?


>                             NULL)) {
>                  ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
>                  return true;
> @@ -3423,8 +3423,10 @@ handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
>              adj_seqnum(&th->tcp_seq, ec->seq_skew);
>      }
>
> -    th->tcp_csum = 0;
> -    if (!dp_packet_ol_tx_l4_checksum(pkt)) {
> +    if (dp_packet_ol_tx_tcp_csum(pkt)) {
> +        dp_packet_ol_reset_l4_csum_good(pkt);
> +    } else {
> +        th->tcp_csum = 0;
>          if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
>              th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
>                                 dp_packet_l4_size(pkt));

[snip]

> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 1eb2954ab..bfeb75add 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -145,17 +145,6 @@ typedef uint16_t dpdk_port_t;
>
>  #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
>
> -/* List of required flags advertised by the hardware that will be used
> - * if TSO is enabled. Ideally this should include
> - * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that
> - * at the moment and SCTP is not a widely used protocol like TCP and UDP,
> - * so it's optional. */
> -#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO        \
> -                                   | RTE_ETH_TX_OFFLOAD_TCP_CKSUM    \
> -                                   | RTE_ETH_TX_OFFLOAD_UDP_CKSUM    \
> -                                   | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
> -
> -
>  static const struct rte_eth_conf port_conf = {
>      .rxmode = {
>          .split_hdr_size = 0,
> @@ -398,8 +387,10 @@ enum dpdk_hw_ol_features {
>      NETDEV_RX_HW_CRC_STRIP = 1 << 1,
>      NETDEV_RX_HW_SCATTER = 1 << 2,
>      NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
> -    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
> -    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
> +    NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
> +    NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
> +    NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
> +    NETDEV_TX_TSO_OFFLOAD = 1 << 7,
>  };
>
>  /*
> @@ -953,6 +944,35 @@ dpdk_watchdog(void *dummy OVS_UNUSED)
>      return NULL;
>  }
>
> +static void
> +netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
> +                               enum dpdk_hw_ol_features hw_ol_features,
> +                               enum netdev_ol_flags flag)
> +{
> +    struct netdev *netdev = &dev->up;
> +
> +    if (dev->hw_ol_features & hw_ol_features) {
> +        netdev->ol_flags |= flag;
> +    } else {
> +        netdev->ol_flags &= ~flag;
> +    }
> +}
> +
> +static void
> +netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
> +{
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
> +                                   NETDEV_OFFLOAD_TX_IPV4_CSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
> +                                   NETDEV_OFFLOAD_TX_TCP_CSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
> +                                   NETDEV_OFFLOAD_TX_UDP_CSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
> +                                   NETDEV_OFFLOAD_TX_SCTP_CSUM);
> +    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
> +                                   NETDEV_OFFLOAD_TX_TCP_TSO);
> +}
> +
>  static int
>  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>  {
> @@ -989,11 +1009,20 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>          conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
>      }
>
> +    if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
> +    }
> +
> +    if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
> +    }
> +
> +    if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
> +    }
> +
>      if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -        conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
> -        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> -            conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
> -        }
> +        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
>      }
>
>      /* Limit configured rss hash functions to only those supported
> @@ -1099,7 +1128,6 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>      struct rte_ether_addr eth_addr;
>      int diag;
>      int n_rxq, n_txq;
> -    uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
>      uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
>                                       RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
> @@ -1135,18 +1163,28 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
>          dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>      }
>
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +    }
> +
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +    }
> +
> +    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
> +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +    } else {
> +        dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +    }
> +
>      dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
>      if (userspace_tso_enabled()) {
> -        if ((info.tx_offload_capa & tx_tso_offload_capa)
> -            == tx_tso_offload_capa) {
> +        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
>              dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> -            if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
> -                dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
> -            } else {
> -                VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
> -                          "SCTP packets sent to this device will be dropped",
> -                          netdev_get_name(&dev->up));
> -            }
>          } else {
>              VLOG_WARN("%s: Tx TSO offload is not supported.",
>                        netdev_get_name(&dev->up));
> @@ -1708,6 +1746,9 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
>          smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
>          HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
>          HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD);
> +        HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD);
>          HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
>  #undef HWOL_SMAP_ADD
>          smap_add(args, "lsc_interrupt_mode",
> @@ -2154,6 +2195,7 @@ netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>
>      mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
>      mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
> +    mbuf->l4_len = 0;
>      mbuf->outer_l2_len = 0;
>      mbuf->outer_l3_len = 0;
>
> @@ -4935,21 +4977,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
>      }
>
>      err = dpdk_eth_dev_init(dev);
> -    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
> -        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> -    } else {
> -        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
> -    }
> -
> -    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
> -        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
> -        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
> -        netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
> -        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> -        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
> -        }
> -    }
> +    netdev_dpdk_update_netdev_flags(dev);
>
>      /* If both requested and actual hwaddr were previously
>       * unset (initialized to 0), then first device init above
> @@ -5024,6 +5052,7 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
>      int err;
>
>      ovs_mutex_lock(&dev->mutex);
> +    netdev_dpdk_update_netdev_flags(dev);
>      err = dpdk_vhost_reconfigure_helper(dev);
>      ovs_mutex_unlock(&dev->mutex);
>
> @@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
>              goto unlock;
>          }
>
> +        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
> +                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
> +
> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;

It is a bit late here, so my brain might not be working well.

I think we have problems with Tx vs Rx features.

TX checksum offloads, from a vhost port point of view, should be
conditionned to what the guest announces it supports.
Afaiu, the guest accepting to receive partial tcp cheksums and other
is mapped to the VIRTIO_NET_F_GUEST_CSUM (1) feature.

> +
>          if (userspace_tso_enabled()) {
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
> -            netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
> -            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
> -                                | 1ULL << VIRTIO_NET_F_HOST_UFO;
> +            dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> +            VLOG_DBG("%s: TSO enabled on vhost port",
> +                     netdev_get_name(&dev->up));
>          } else {
> -            /* This disables checksum offloading and all the features
> -             * that depends on it (TSO, UFO, ECN) according to virtio
> -             * specification. */
>              vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;

On the contrary, with this patch, OVS should always advertise
VIRTIO_NET_F_CSUM, since it can handle partial checksums and may
offload it to some hw nic on tx.
This line above should be removed.

But it is dead code, as vhost_unsup_flags content is reset on the next
line, below.
(intention might have been to vhost_unsup_flags |= ?).


> +            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_TSO4
> +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6;

This seems problematic too.

HOST_TSO4 indicates to the guest that the host side may receive TSO frames.
From a vhost port point of view, that it can receive TSO frames, i.e.
that OVS supports LRO, but we don't express such feature in the netdev
layer.


>
>          err = rte_vhost_driver_disable_features(dev->vhost_id,


I also see a runtime issue when upgrading with the last patch, I'll
send a comment on it.
David Marchand July 11, 2022, 9:02 p.m. UTC | #2
Hello,

Maxime, Ilya, your opinion will probably help.

On Wed, Jul 6, 2022 at 10:59 PM David Marchand
<david.marchand@redhat.com> wrote:
> > @@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
> >              goto unlock;
> >          }
> >
> > +        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
> > +                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
> > +
> > +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> > +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> > +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> > +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>
> I think we have problems with Tx vs Rx features.
>
> TX checksum offloads, from a vhost port point of view, should be
> conditionned to what the guest announces it supports.
> Afaiu, the guest accepting to receive partial tcp cheksums and other
> is mapped to the VIRTIO_NET_F_GUEST_CSUM (1) feature.

On this topic, let me add details on how I think we need to fix this.

- First of all, exposing NETDEV_TX_IPV4_CKSUM_OFFLOAD is ok.

virtio protocol does not support offloadnig ipv4 checksum.
But, on the other hand, the DPDK API requests that TSO hw support
comes with ipv4 hw checksum.

For this reason, the vhost library has been handling the ipv4 checksum
in sw: https://git.dpdk.org/dpdk-stable/tree/lib/vhost/virtio_net.c?h=v21.11.1#n441
And OVS can rely on this.


- As for the rest of the capabilities, OVS must be able to support
different scenarii.

The virtio driver rx capabilities may change as a guest driver can be
reinitialised or changed to a different driver that supports none of
those offloads (like net/virtio + testpmd io mode).
As a result, the vhost netdev checksum tx capabilities might change.

So I think we need to update the netdev hw ol features (and rely on
OVS sw fallback code) with a diff like:

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 3e7b86009..1c69150ea 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4002,6 +4002,37 @@ new_device(int vid)
                 dev->vhost_reconfigured = true;
             }

+            uint64_t features;
+
+            if (rte_vhost_get_negotiated_features(vid, &features)) {
+                VLOG_INFO("Error checking guest features for "
+                          "vHost Device '%s'", dev->vhost_id);
+            } else {
+                if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
+                    dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+                    dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+                    dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+                }
+
+                if (userspace_tso_enabled()) {
+                    if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)
+                        && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) {
+                        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+                        VLOG_DBG("%s: TSO enabled on vhost port",
+                             netdev_get_name(&dev->up));
+                    } else {
+                        VLOG_WARN("%s: Tx TSO offload is not supported.",
+                                  netdev_get_name(&dev->up));
+                    }
+                }
+            }
+
+            /* There is no support in virtio net to offload IPv4 csum,
+             * but the vhost library handles IPv4 csum offloading fine. */
+            dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+
+            netdev_dpdk_update_netdev_flags(dev);
+
             ovsrcu_index_set(&dev->vid, vid);
             exists = true;

@@ -4065,6 +4096,14 @@ destroy_device(int vid)
                    dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
             netdev_dpdk_txq_map_clear(dev);

+            /* Clear offload capabilities before next new_device. */
+            dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+            dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+            netdev_dpdk_update_netdev_flags(dev);
+
             netdev_change_seq_changed(&dev->up);
             ovs_mutex_unlock(&dev->mutex);
             exists = true;
@@ -5045,11 +5084,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
         dev->tx_q[0].map = 0;
     }

-    if (userspace_tso_enabled()) {
-        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
-        VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up));
-    }
-
     netdev_dpdk_remap_txqs(dev);

     err = netdev_dpdk_mempool_configure(dev);
Maxime Coquelin July 12, 2022, 7:43 a.m. UTC | #3
On 7/11/22 23:02, David Marchand wrote:
> Hello,
> 
> Maxime, Ilya, your opinion will probably help.
> 
> On Wed, Jul 6, 2022 at 10:59 PM David Marchand
> <david.marchand@redhat.com> wrote:
>>> @@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
>>>               goto unlock;
>>>           }
>>>
>>> +        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
>>> +                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
>>> +
>>> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
>>> +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
>>> +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
>>> +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
>>
>> I think we have problems with Tx vs Rx features.
>>
>> TX checksum offloads, from a vhost port point of view, should be
>> conditionned to what the guest announces it supports.
>> Afaiu, the guest accepting to receive partial tcp cheksums and other
>> is mapped to the VIRTIO_NET_F_GUEST_CSUM (1) feature.
> 
> On this topic, let me add details on how I think we need to fix this.
> 
> - First of all, exposing NETDEV_TX_IPV4_CKSUM_OFFLOAD is ok.
> 
> virtio protocol does not support offloadnig ipv4 checksum.
> But, on the other hand, the DPDK API requests that TSO hw support
> comes with ipv4 hw checksum.
> 
> For this reason, the vhost library has been handling the ipv4 checksum
> in sw: https://git.dpdk.org/dpdk-stable/tree/lib/vhost/virtio_net.c?h=v21.11.1#n441
> And OVS can rely on this.
> 
> 
> - As for the rest of the capabilities, OVS must be able to support
> different scenarii.
> 
> The virtio driver rx capabilities may change as a guest driver can be
> reinitialised or changed to a different driver that supports none of
> those offloads (like net/virtio + testpmd io mode).
> As a result, the vhost netdev checksum tx capabilities might change.
> 
> So I think we need to update the netdev hw ol features (and rely on
> OVS sw fallback code) with a diff like:
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 3e7b86009..1c69150ea 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -4002,6 +4002,37 @@ new_device(int vid)
>                   dev->vhost_reconfigured = true;
>               }
> 
> +            uint64_t features;
> +
> +            if (rte_vhost_get_negotiated_features(vid, &features)) {
> +                VLOG_INFO("Error checking guest features for "
> +                          "vHost Device '%s'", dev->vhost_id);
> +            } else {
> +                if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
> +                    dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +                    dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +                    dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +                }
> +
> +                if (userspace_tso_enabled()) {
> +                    if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4)
> +                        && features & (1ULL << VIRTIO_NET_F_GUEST_TSO6)) {
> +                        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> +                        VLOG_DBG("%s: TSO enabled on vhost port",
> +                             netdev_get_name(&dev->up));
> +                    } else {
> +                        VLOG_WARN("%s: Tx TSO offload is not supported.",
> +                                  netdev_get_name(&dev->up));
> +                    }
> +                }
> +            }
> +
> +            /* There is no support in virtio net to offload IPv4 csum,
> +             * but the vhost library handles IPv4 csum offloading fine. */
> +            dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +
> +            netdev_dpdk_update_netdev_flags(dev);
> +
>               ovsrcu_index_set(&dev->vid, vid);
>               exists = true;
> 
> @@ -4065,6 +4096,14 @@ destroy_device(int vid)
>                      dev->up.n_rxq * sizeof *dev->vhost_rxq_enabled);
>               netdev_dpdk_txq_map_clear(dev);
> 
> +            /* Clear offload capabilities before next new_device. */
> +            dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
> +            dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
> +            netdev_dpdk_update_netdev_flags(dev);
> +
>               netdev_change_seq_changed(&dev->up);
>               ovs_mutex_unlock(&dev->mutex);
>               exists = true;
> @@ -5045,11 +5084,6 @@ dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
>           dev->tx_q[0].map = 0;
>       }
> 
> -    if (userspace_tso_enabled()) {
> -        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
> -        VLOG_DBG("%s: TSO enabled on vhost port", netdev_get_name(&dev->up));
> -    }
> -
>       netdev_dpdk_remap_txqs(dev);
> 
>       err = netdev_dpdk_mempool_configure(dev);
> 
> 

The proposed change looks good to me, we are now doing the same in Vhost
PMD.

Regards,
Maxime
David Marchand July 12, 2022, 9:44 a.m. UTC | #4
Just a note for completeness, though this issue is handled with the
snippet I proposed in this same thread.

On Fri, Jul 1, 2022 at 5:58 AM Mike Pattrick <mkp@redhat.com> wrote:
> @@ -5024,6 +5052,7 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
>      int err;
>
>      ovs_mutex_lock(&dev->mutex);
> +    netdev_dpdk_update_netdev_flags(dev);
>      err = dpdk_vhost_reconfigure_helper(dev);

Here...

>      ovs_mutex_unlock(&dev->mutex);
>
> @@ -5088,19 +5117,22 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
>              goto unlock;
>          }
>
> +        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
> +                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
> +
> +        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
> +        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;

... and here, we were missing a call to
netdev_dpdk_update_netdev_flags(), *after* touching hw_ol_features.

This explains why I was always seeing the flags as off for vhost ports:
# ovs-appctl dpif-netdev/offload-show | grep vhost0
vhost0: ip_csum: off, tcp_csum: off, udp_csum: off, sctp_csum: off, tso: off

And this is noticeable in a inter vm setup, testing TSO, where
segmentation was done in OVS.

Before I would reach 6Gb/s in inter vm.
After setting flags correcly I get a 18Gb/s.
David Marchand July 12, 2022, 3:08 p.m. UTC | #5
On Fri, Jul 1, 2022 at 5:58 AM Mike Pattrick <mkp@redhat.com> wrote:
> diff --git a/lib/netdev.c b/lib/netdev.c
> index b222a5e64..ad9081f28 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c

[snip]

> @@ -966,11 +946,10 @@ netdev_push_header(const struct netdev *netdev,
>      size_t i, size = dp_packet_batch_size(batch);
>
>      DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
> -        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet)
> -                         || dp_packet_ol_l4_mask(packet))) {
> +        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet))) {
>              COVERAGE_INC(netdev_push_header_drops);
>              dp_packet_delete(packet);
> -            VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
> +            VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is "
>                           "not supported: packet dropped",
>                           netdev_get_name(netdev));
>          } else {

After this block we handle ipv4 checksum "resolution" (from patch 10).

Since this patch now accepts l4 offloads + encapsulation, I think we
need to resolve l4 checksum offloads, here, by calling
dp_packet_ol_send_prepare(packet, 0);.
Or am I missing something else in the code?
diff mbox series

Patch

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 11768da00..d7072e1e9 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2105,13 +2105,13 @@  conn_key_extract(struct conntrack *ct, struct dp_packet *pkt, ovs_be16 dl_type,
     }
 
     if (ok) {
-        bool hwol_bad_l4_csum = dp_packet_ol_l4_checksum_bad(pkt);
-        if (!hwol_bad_l4_csum) {
-            bool  hwol_good_l4_csum = dp_packet_ol_l4_checksum_good(pkt)
-                                      || dp_packet_ol_tx_l4_checksum(pkt);
+        if (!dp_packet_ol_l4_checksum_bad(pkt)) {
+
             /* Validate the checksum only when hwol is not supported. */
             if (extract_l4(&ctx->key, l4, dp_packet_l4_size(pkt),
-                           &ctx->icmp_related, l3, !hwol_good_l4_csum,
+                           &ctx->icmp_related, l3,
+                           !dp_packet_ol_l4_checksum_good(pkt) &&
+                           !dp_packet_ol_tx_l4_checksum(pkt),
                            NULL)) {
                 ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis);
                 return true;
@@ -3423,8 +3423,10 @@  handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
             adj_seqnum(&th->tcp_seq, ec->seq_skew);
     }
 
-    th->tcp_csum = 0;
-    if (!dp_packet_ol_tx_l4_checksum(pkt)) {
+    if (dp_packet_ol_tx_tcp_csum(pkt)) {
+        dp_packet_ol_reset_l4_csum_good(pkt);
+    } else {
+        th->tcp_csum = 0;
         if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
             th->tcp_csum = packet_csum_upperlayer6(nh6, th, ctx->key.nw_proto,
                                dp_packet_l4_size(pkt));
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 94d3b1277..460a9eb66 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -39,6 +39,9 @@  dp_packet_init__(struct dp_packet *p, size_t allocated,
     dp_packet_init_specific(p);
     /* By default assume the packet type to be Ethernet. */
     p->packet_type = htonl(PT_ETH);
+    /* Reset csum start and offset. */
+    p->csum_start = 0;
+    p->csum_offset = 0;
 }
 
 static void
@@ -189,7 +192,7 @@  dp_packet_clone_with_headroom(const struct dp_packet *p, size_t headroom)
                                                     dp_packet_size(p),
                                                     headroom);
     /* Copy the following fields into the returned buffer: l2_pad_size,
-     * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+     * l2_5_ofs, l3_ofs, ..., cutlen, packet_type and md. */
     memcpy(&new_buffer->l2_pad_size, &p->l2_pad_size,
             sizeof(struct dp_packet) -
             offsetof(struct dp_packet, l2_pad_size));
@@ -518,4 +521,22 @@  dp_packet_ol_send_prepare(struct dp_packet *p, const uint64_t flags) {
         dp_packet_ip_set_header_csum(p);
         dp_packet_ol_set_ip_csum_good(p);
     }
+
+    if (dp_packet_ol_l4_checksum_good(p) || !dp_packet_ol_tx_l4_checksum(p)) {
+        return;
+    }
+
+    if (dp_packet_ol_tx_tcp_csum(p)
+        && !(flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
+        packet_tcp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    } else if (dp_packet_ol_tx_udp_csum(p)
+        && !(flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
+        packet_udp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    } else if (!(flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)
+        && dp_packet_ol_tx_sctp_csum(p)) {
+        packet_sctp_complete_csum(p);
+        dp_packet_ol_set_l4_csum_good(p);
+    }
 }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 94aaa40a3..f8ad3079a 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -89,23 +89,24 @@  enum dp_packet_offload_mask {
     /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
 };
 
-#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH        | \
-                                     DP_PACKET_OL_FLOW_MARK       | \
-                                     DP_PACKET_OL_RX_L4_CSUM_BAD  | \
-                                     DP_PACKET_OL_RX_IP_CSUM_BAD  | \
-                                     DP_PACKET_OL_RX_L4_CSUM_GOOD | \
-                                     DP_PACKET_OL_RX_IP_CSUM_GOOD | \
-                                     DP_PACKET_OL_TX_TCP_SEG      | \
-                                     DP_PACKET_OL_TX_IPV4         | \
-                                     DP_PACKET_OL_TX_IPV6         | \
-                                     DP_PACKET_OL_TX_IP_CSUM      | \
-                                     DP_PACKET_OL_TX_TCP_CSUM     | \
-                                     DP_PACKET_OL_TX_UDP_CSUM     | \
+#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH         | \
+                                     DP_PACKET_OL_FLOW_MARK        | \
+                                     DP_PACKET_OL_RX_L4_CSUM_BAD   | \
+                                     DP_PACKET_OL_RX_IP_CSUM_BAD   | \
+                                     DP_PACKET_OL_RX_L4_CSUM_GOOD  | \
+                                     DP_PACKET_OL_RX_IP_CSUM_GOOD  | \
+                                     DP_PACKET_OL_TX_TCP_SEG       | \
+                                     DP_PACKET_OL_TX_IPV4          | \
+                                     DP_PACKET_OL_TX_IPV6          | \
+                                     DP_PACKET_OL_TX_IP_CSUM       | \
+                                     DP_PACKET_OL_TX_TCP_CSUM      | \
+                                     DP_PACKET_OL_TX_UDP_CSUM      | \
                                      DP_PACKET_OL_TX_SCTP_CSUM)
 
 #define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CSUM | \
                                  DP_PACKET_OL_TX_UDP_CSUM | \
                                  DP_PACKET_OL_TX_SCTP_CSUM)
+
 #define DP_PACKET_OL_RX_IP_CSUM_MASK (DP_PACKET_OL_RX_IP_CSUM_GOOD | \
                                        DP_PACKET_OL_RX_IP_CSUM_BAD)
 #define DP_PACKET_OL_RX_L4_CSUM_MASK (DP_PACKET_OL_RX_L4_CSUM_GOOD | \
@@ -140,6 +141,8 @@  struct dp_packet {
                                       or UINT16_MAX. */
     uint32_t cutlen;               /* length in bytes to cut from the end. */
     ovs_be32 packet_type;          /* Packet type as defined in OpenFlow */
+    uint16_t csum_start;           /* Position to start checksumming from. */
+    uint16_t csum_offset;          /* Offset to place checksum. */
     union {
         struct pkt_metadata md;
         uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
@@ -991,6 +994,13 @@  dp_packet_ol_tx_ipv4(const struct dp_packet *a)
     return !!(*dp_packet_ol_flags_ptr(a) & DP_PACKET_OL_TX_IPV4);
 }
 
+/* Returns 'true' if packet 'p' is marked as IPv6. */
+static inline bool
+dp_packet_ol_tx_ipv6(const struct dp_packet *p)
+{
+    return !!(*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_TX_IPV6);
+}
+
 /* Returns 'true' if packet 'a' is marked for TCP checksum offloading. */
 static inline bool
 dp_packet_ol_tx_tcp_csum(const struct dp_packet *a)
@@ -1015,18 +1025,20 @@  dp_packet_ol_tx_sctp_csum(struct dp_packet *a)
             DP_PACKET_OL_TX_SCTP_CSUM;
 }
 
-/* Mark packet 'a' as IPv4. */
+/* Mark packet 'p' as IPv4. */
 static inline void
-dp_packet_ol_set_tx_ipv4(struct dp_packet *a)
+dp_packet_ol_set_tx_ipv4(struct dp_packet *p)
 {
-    *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV4;
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV6;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV4;
 }
 
-/* Mark packet 'a' as IPv6. */
+/* Mark packet 'p' as IPv6. */
 static inline void
-dp_packet_ol_set_tx_ipv6(struct dp_packet *a)
+dp_packet_ol_set_tx_ipv6(struct dp_packet *p)
 {
-    *dp_packet_ol_flags_ptr(a) |= DP_PACKET_OL_TX_IPV6;
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_TX_IPV4;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_TX_IPV6;
 }
 
 /* Returns 'true' if packet 'p' is marked for IPv4 checksum offloading. */
@@ -1119,6 +1131,8 @@  dp_packet_ip_set_header_csum(struct dp_packet *p)
     ip->ip_csum = csum(ip, sizeof *ip);
 }
 
+/* Returns 'true' if the packet 'p' has good integrity and the
+ * checksum in it is correct. */
 static inline bool
 dp_packet_ol_l4_checksum_good(const struct dp_packet *p)
 {
@@ -1133,6 +1147,53 @@  dp_packet_ol_l4_checksum_bad(const struct dp_packet *p)
             DP_PACKET_OL_RX_L4_CSUM_BAD;
 }
 
+/* Returns 'true' if the packet has good integrity though the
+ * checksum in the packet 'p' is not complete. */
+static inline bool
+dp_packet_ol_l4_csum_partial(const struct dp_packet *p)
+{
+    return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CSUM_MASK) ==
+            DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good integrity though the checksum in the
+ * packet is not complete. */
+static inline void
+dp_packet_ol_set_l4_csum_partial(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_MASK;
+}
+
+/* Marks packet 'p' with good L4 checksum. */
+static inline void
+dp_packet_ol_set_l4_csum_good(const struct dp_packet *p)
+{
+    *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_BAD;
+    *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RX_L4_CSUM_GOOD;
+}
+
+/* Marks packet 'p' with good L4 checksum as modified. */
+static inline void
+dp_packet_ol_reset_l4_csum_good(const struct dp_packet *p)
+{
+    if (!dp_packet_ol_l4_csum_partial(p)) {
+        *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_RX_L4_CSUM_GOOD;
+    }
+}
+
+/* Marks packet 'p' with good integrity if the 'start' and 'offset'
+ * matches with the 'csum_start' and 'csum_offset' in packet 'p'.
+ * The 'start' is the offset from the begin of the packet headers.
+ * The 'offset' is the offset from start to place the checksum. */
+static inline void
+dp_packet_ol_vnet_csum_check(const struct dp_packet *p, uint16_t start,
+                             uint16_t offset)
+{
+    if (p->csum_start == start && p->csum_offset == offset) {
+        dp_packet_ol_set_l4_csum_partial(p);
+    }
+}
+
 static inline void ALWAYS_INLINE
 dp_packet_update_rss_hash_ipv4_tcp_udp(struct dp_packet *packet)
 {
diff --git a/lib/flow.c b/lib/flow.c
index 7efb2dd00..fc96f699e 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1025,6 +1025,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                     if (dl_type == htons(ETH_TYPE_IP)) {
                         dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
                     }
+                    dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                                 offsetof(struct tcp_header,
+                                                          tcp_csum));
+                    if (dp_packet_ol_l4_checksum_good(packet)
+                        || dp_packet_ol_l4_csum_partial(packet)) {
+                        dp_packet_ol_set_tx_tcp_csum(packet);
+                    }
                 }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
@@ -1038,6 +1045,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 if (dl_type == htons(ETH_TYPE_IP)) {
                     dp_packet_update_rss_hash_ipv4_tcp_udp(packet);
                 }
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                             offsetof(struct udp_header,
+                                                      udp_csum));
+                if (dp_packet_ol_l4_checksum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_ol_set_tx_udp_csum(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_SCTP)) {
             if (OVS_LIKELY(size >= SCTP_HEADER_LEN)) {
@@ -1047,6 +1061,13 @@  miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
                 miniflow_push_be16(mf, tp_dst, sctp->sctp_dst);
                 miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
                 miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+                dp_packet_ol_vnet_csum_check(packet, packet->l4_ofs,
+                                             offsetof(struct sctp_header,
+                                                      sctp_csum));
+                if (dp_packet_ol_l4_checksum_good(packet)
+                    || dp_packet_ol_l4_csum_partial(packet)) {
+                    dp_packet_ol_set_tx_sctp_csum(packet);
+                }
             }
         } else if (OVS_LIKELY(nw_proto == IPPROTO_ICMP)) {
             if (OVS_LIKELY(size >= ICMP_HEADER_LEN)) {
@@ -3166,6 +3187,7 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             tcp->tcp_csum = 0;
             tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
                                                       tcp, l4_len));
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_UDP) {
             struct udp_header *udp = dp_packet_l4(p);
 
@@ -3175,6 +3197,7 @@  flow_compose_l4_csum(struct dp_packet *p, const struct flow *flow,
             if (!udp->udp_csum) {
                 udp->udp_csum = htons(0xffff);
             }
+            dp_packet_ol_set_l4_csum_good(p);
         } else if (flow->nw_proto == IPPROTO_ICMP) {
             struct icmp_header *icmp = dp_packet_l4(p);
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1eb2954ab..bfeb75add 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -145,17 +145,6 @@  typedef uint16_t dpdk_port_t;
 
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 
-/* List of required flags advertised by the hardware that will be used
- * if TSO is enabled. Ideally this should include
- * RTE_ETH_TX_OFFLOAD_SCTP_CKSUM. However, very few drivers support that
- * at the moment and SCTP is not a widely used protocol like TCP and UDP,
- * so it's optional. */
-#define DPDK_TX_TSO_OFFLOAD_FLAGS (RTE_ETH_TX_OFFLOAD_TCP_TSO        \
-                                   | RTE_ETH_TX_OFFLOAD_TCP_CKSUM    \
-                                   | RTE_ETH_TX_OFFLOAD_UDP_CKSUM    \
-                                   | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM)
-
-
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .split_hdr_size = 0,
@@ -398,8 +387,10 @@  enum dpdk_hw_ol_features {
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
     NETDEV_RX_HW_SCATTER = 1 << 2,
     NETDEV_TX_IPV4_CKSUM_OFFLOAD = 1 << 3,
-    NETDEV_TX_TSO_OFFLOAD = 1 << 4,
-    NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_TCP_CKSUM_OFFLOAD = 1 << 4,
+    NETDEV_TX_UDP_CKSUM_OFFLOAD = 1 << 5,
+    NETDEV_TX_SCTP_CKSUM_OFFLOAD = 1 << 6,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 7,
 };
 
 /*
@@ -953,6 +944,35 @@  dpdk_watchdog(void *dummy OVS_UNUSED)
     return NULL;
 }
 
+static void
+netdev_dpdk_update_netdev_flag(struct netdev_dpdk *dev,
+                               enum dpdk_hw_ol_features hw_ol_features,
+                               enum netdev_ol_flags flag)
+{
+    struct netdev *netdev = &dev->up;
+
+    if (dev->hw_ol_features & hw_ol_features) {
+        netdev->ol_flags |= flag;
+    } else {
+        netdev->ol_flags &= ~flag;
+    }
+}
+
+static void
+netdev_dpdk_update_netdev_flags(struct netdev_dpdk *dev)
+{
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_IPV4_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_IPV4_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TCP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_TCP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_UDP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_UDP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_SCTP_CKSUM_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_SCTP_CSUM);
+    netdev_dpdk_update_netdev_flag(dev, NETDEV_TX_TSO_OFFLOAD,
+                                   NETDEV_OFFLOAD_TX_TCP_TSO);
+}
+
 static int
 dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
 {
@@ -989,11 +1009,20 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_TCP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_UDP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+    }
+
+    if (dev->hw_ol_features & NETDEV_TX_SCTP_CKSUM_OFFLOAD) {
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
+    }
+
     if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_SCTP_CKSUM;
-        }
+        conf.txmode.offloads |= RTE_ETH_TX_OFFLOAD_TCP_TSO;
     }
 
     /* Limit configured rss hash functions to only those supported
@@ -1099,7 +1128,6 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     struct rte_ether_addr eth_addr;
     int diag;
     int n_rxq, n_txq;
-    uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
     uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
                                      RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
@@ -1135,18 +1163,28 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_TX_IPV4_CKSUM_OFFLOAD;
     }
 
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_TCP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_UDP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_UDP_CKSUM_OFFLOAD;
+    }
+
+    if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
+        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+    }
+
     dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
     if (userspace_tso_enabled()) {
-        if ((info.tx_offload_capa & tx_tso_offload_capa)
-            == tx_tso_offload_capa) {
+        if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) {
             dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
-            if (info.tx_offload_capa & RTE_ETH_TX_OFFLOAD_SCTP_CKSUM) {
-                dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
-            } else {
-                VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
-                          "SCTP packets sent to this device will be dropped",
-                          netdev_get_name(&dev->up));
-            }
         } else {
             VLOG_WARN("%s: Tx TSO offload is not supported.",
                       netdev_get_name(&dev->up));
@@ -1708,6 +1746,9 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
         smap_add(args, FIELD, dev->hw_ol_features & FLAG ? "true" : "false");
         HWOL_SMAP_ADD("rx_csum_offload", NETDEV_RX_CHECKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_ip_csum_offload", NETDEV_TX_IPV4_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_tcp_csum_offload", NETDEV_TX_TCP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_udp_csum_offload", NETDEV_TX_UDP_CKSUM_OFFLOAD);
+        HWOL_SMAP_ADD("tx_sctp_csum_offload", NETDEV_TX_SCTP_CKSUM_OFFLOAD);
         HWOL_SMAP_ADD("tx_tso_offload", NETDEV_TX_TSO_OFFLOAD);
 #undef HWOL_SMAP_ADD
         smap_add(args, "lsc_interrupt_mode",
@@ -2154,6 +2195,7 @@  netdev_dpdk_prep_ol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 
     mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
     mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    mbuf->l4_len = 0;
     mbuf->outer_l2_len = 0;
     mbuf->outer_l3_len = 0;
 
@@ -4935,21 +4977,7 @@  netdev_dpdk_reconfigure(struct netdev *netdev)
     }
 
     err = dpdk_eth_dev_init(dev);
-    if (dev->hw_ol_features & NETDEV_TX_IPV4_CKSUM_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    } else {
-        netdev->ol_flags &= ~NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    }
-
-    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-        netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-        if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-        }
-    }
+    netdev_dpdk_update_netdev_flags(dev);
 
     /* If both requested and actual hwaddr were previously
      * unset (initialized to 0), then first device init above
@@ -5024,6 +5052,7 @@  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
     int err;
 
     ovs_mutex_lock(&dev->mutex);
+    netdev_dpdk_update_netdev_flags(dev);
     err = dpdk_vhost_reconfigure_helper(dev);
     ovs_mutex_unlock(&dev->mutex);
 
@@ -5088,19 +5117,22 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
+        vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
+                            | 1ULL << VIRTIO_NET_F_HOST_UFO;
+
+        dev->hw_ol_features |= NETDEV_TX_IPV4_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_TCP_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_UDP_CKSUM_OFFLOAD;
+        dev->hw_ol_features |= NETDEV_TX_SCTP_CKSUM_OFFLOAD;
+
         if (userspace_tso_enabled()) {
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-            netdev->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
-                                | 1ULL << VIRTIO_NET_F_HOST_UFO;
+            dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+            VLOG_DBG("%s: TSO enabled on vhost port",
+                     netdev_get_name(&dev->up));
         } else {
-            /* This disables checksum offloading and all the features
-             * that depends on it (TSO, UFO, ECN) according to virtio
-             * specification. */
             vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+            vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_TSO4
+                                | 1ULL << VIRTIO_NET_F_HOST_TSO6;
         }
 
         err = rte_vhost_driver_disable_features(dev->vhost_id,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 6c230d938..1991ba1e9 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -927,14 +927,6 @@  netdev_linux_common_construct(struct netdev *netdev_)
     netnsid_unset(&netdev->netnsid);
     ovs_mutex_init(&netdev->mutex);
 
-    if (userspace_tso_enabled()) {
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
-        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
-    }
-
     return 0;
 }
 
@@ -948,6 +940,16 @@  netdev_linux_construct(struct netdev *netdev_)
         return error;
     }
 
+    /* The socket interface doesn't offer the option to enable only
+     * csum offloading without TSO. */
+    if (userspace_tso_enabled()) {
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_CSUM;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_UDP_CSUM;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_SCTP_CSUM;
+        netdev_->ol_flags |= NETDEV_OFFLOAD_TX_IPV4_CSUM;
+    }
+
     error = get_flags(&netdev->up, &netdev->ifi_flags);
     if (error == ENODEV) {
         if (netdev->up.netdev_class != &netdev_internal_class) {
@@ -976,6 +978,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     static const char tap_dev[] = "/dev/net/tun";
     const char *name = netdev_->name;
+    unsigned long oflags;
     struct ifreq ifr;
 
     int error = netdev_linux_common_construct(netdev_);
@@ -993,10 +996,7 @@  netdev_linux_construct_tap(struct netdev *netdev_)
 
     /* Create tap device. */
     get_flags(&netdev->up, &netdev->ifi_flags);
-    ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
-    if (userspace_tso_enabled()) {
-        ifr.ifr_flags |= IFF_VNET_HDR;
-    }
+    ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 
     ovs_strzcpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
     if (ioctl(netdev->tap_fd, TUNSETIFF, &ifr) == -1) {
@@ -1019,21 +1019,22 @@  netdev_linux_construct_tap(struct netdev *netdev_)
         goto error_close;
     }
 
+    oflags = TUN_F_CSUM;
     if (userspace_tso_enabled()) {
-        /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
-         * available, it will return EINVAL when a flag is unknown.
-         * Therefore, try enabling offload with no flags to check
-         * if TUNSETOFFLOAD support is available or not. */
-        if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
-            unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
-
-            if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
-                VLOG_WARN("%s: enabling tap offloading failed: %s", name,
-                          ovs_strerror(errno));
-                error = errno;
-                goto error_close;
-            }
-        }
+        oflags |= (TUN_F_TSO4 | TUN_F_TSO6);
+    }
+
+    if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == 0) {
+        netdev_->ol_flags |= (NETDEV_OFFLOAD_TX_IPV4_CSUM
+                              | NETDEV_OFFLOAD_TX_TCP_CSUM
+                              | NETDEV_OFFLOAD_TX_UDP_CSUM);
+
+        if (userspace_tso_enabled()) {
+            netdev_->ol_flags |= NETDEV_OFFLOAD_TX_TCP_TSO;
+         }
+    } else {
+       VLOG_WARN("%s: Disabling hardware offloading: %s", name,
+                 ovs_strerror(errno));
     }
 
     netdev->present = true;
@@ -1333,18 +1334,22 @@  netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffers[i];
          }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
-            struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
-            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
+        if (virtio_net_hdr_size) {
+            int ret = netdev_linux_parse_vnet_hdr(pkt);
+            if (OVS_UNLIKELY(ret)) {
+                struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
+                struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
-            /* Unexpected error situation: the virtio header is not present
-             * or corrupted. Drop the packet but continue in case next ones
-             * are correct. */
-            dp_packet_delete(pkt);
-            netdev->rx_dropped += 1;
-            VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header",
-                         netdev_get_name(netdev_));
-            continue;
+                /* Unexpected error situation: the virtio header is not
+                 * present or corrupted or contains unsupported features.
+                 * Drop the packet but continue in case next ones are
+                 * correct. */
+                dp_packet_delete(pkt);
+                netdev->rx_dropped += 1;
+                VLOG_WARN_RL(&rl, "%s: Dropped packet: %s",
+                             netdev_get_name(netdev_), ovs_strerror(ret));
+                continue;
+            }
         }
 
         for (cmsg = CMSG_FIRSTHDR(&mmsgs[i].msg_hdr); cmsg;
@@ -1392,7 +1397,6 @@  static int
 netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
                                 struct dp_packet_batch *batch)
 {
-    int virtio_net_hdr_size;
     ssize_t retval;
     size_t std_len;
     int iovlen;
@@ -1402,16 +1406,14 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
         /* Use the buffer from the allocated packet below to receive MTU
          * sized packets and an aux_buf for extra TSO data. */
         iovlen = IOV_TSO_SIZE;
-        virtio_net_hdr_size = sizeof(struct virtio_net_hdr);
     } else {
         /* Use only the buffer from the allocated packet. */
         iovlen = IOV_STD_SIZE;
-        virtio_net_hdr_size = 0;
     }
 
     /* The length here needs to be accounted in the same way when the
      * aux_buf is allocated so that it can be prepended to TSO buffer. */
-    std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu;
+    std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + mtu;
     for (i = 0; i < NETDEV_MAX_BURST; i++) {
         struct dp_packet *buffer;
         struct dp_packet *pkt;
@@ -1451,7 +1453,7 @@  netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu,
             pkt = buffer;
         }
 
-        if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) {
+        if (netdev_linux_parse_vnet_hdr(pkt)) {
             struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up);
             struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
@@ -1600,7 +1602,7 @@  netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
  * on other interface types because we attach a socket filter to the rx
  * socket. */
 static int
-netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
+netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
                             struct dp_packet_batch *batch)
 {
     struct netdev_linux *netdev = netdev_linux_cast(netdev_);
@@ -1621,9 +1623,7 @@  netdev_linux_tap_batch_send(struct netdev *netdev_, bool tso, int mtu,
         ssize_t retval;
         int error;
 
-        if (tso) {
-            netdev_linux_prepend_vnet_hdr(packet, mtu);
-        }
+        netdev_linux_prepend_vnet_hdr(packet, mtu);
 
         size = dp_packet_size(packet);
         do {
@@ -1754,7 +1754,7 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
 
         error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
     } else {
-        error = netdev_linux_tap_batch_send(netdev_, tso, mtu, batch);
+        error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
     }
     if (error) {
         if (error == ENOBUFS) {
@@ -6628,59 +6628,78 @@  netdev_linux_parse_l2(struct dp_packet *p, uint16_t *l4proto)
         }
 
         *l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
-        dp_packet_ol_set_tx_ipv6(p);
     }
 
     return 0;
 }
 
+/* Initializes packet 'b' with features enabled in the prepended
+ * struct virtio_net_hdr.  Returns 0 if successful, otherwise a
+ * positive errno value. */
 static int
 netdev_linux_parse_vnet_hdr(struct dp_packet *p)
 {
     struct virtio_net_hdr *vnet = dp_packet_pull(p, sizeof *vnet);
-    uint16_t l4proto = 0;
 
     if (OVS_UNLIKELY(!vnet)) {
-        return -EINVAL;
+        return EINVAL;
     }
 
     if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
         return 0;
     }
 
-    if (netdev_linux_parse_l2(p, &l4proto)) {
-        return -EINVAL;
-    }
-
     if (vnet->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
-        if (l4proto == IPPROTO_TCP) {
-            dp_packet_ol_set_tx_tcp_csum(p);
-        } else if (l4proto == IPPROTO_UDP) {
-            dp_packet_ol_set_tx_udp_csum(p);
-        } else if (l4proto == IPPROTO_SCTP) {
-            dp_packet_ol_set_tx_sctp_csum(p);
-        }
-    }
+        uint16_t l4proto = 0;
 
-    if (l4proto && vnet->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-        uint8_t allowed_mask = VIRTIO_NET_HDR_GSO_TCPV4
-                                | VIRTIO_NET_HDR_GSO_TCPV6
-                                | VIRTIO_NET_HDR_GSO_UDP;
-        uint8_t type = vnet->gso_type & allowed_mask;
-
-        if (type == VIRTIO_NET_HDR_GSO_TCPV4
-            || type == VIRTIO_NET_HDR_GSO_TCPV6) {
-            dp_packet_ol_set_tcp_seg(p);
+        if (netdev_linux_parse_l2(p, &l4proto)) {
+            return EINVAL;
         }
-    }
 
-    return 0;
+        if (l4proto == IPPROTO_UDP) {
+            dp_packet_ol_set_tx_udp_csum(p);
+        }
+        /* The packet has offloaded checksum. However, there is no
+         * additional information like the protocol used, so it would
+         * require to parse the packet here. The checksum starting point
+         * and offset are going to be verified when the packet headers
+         * are parsed during miniflow extraction. */
+        p->csum_start = vnet->csum_start;
+        p->csum_offset = vnet->csum_offset;
+    } else {
+        p->csum_start = 0;
+        p->csum_offset = 0;
+    }
+
+    int ret = 0;
+    switch (vnet->gso_type) {
+    case VIRTIO_NET_HDR_GSO_TCPV4:
+    case VIRTIO_NET_HDR_GSO_TCPV6:
+        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
+         * is given and needs to be respected. */
+        dp_packet_ol_set_tcp_seg(p);
+        break;
+    case VIRTIO_NET_HDR_GSO_UDP:
+        /* UFO is not supported. */
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with UFO enabled.");
+        ret = ENOTSUP;
+        break;
+    case VIRTIO_NET_HDR_GSO_NONE:
+        break;
+    default:
+        ret = ENOTSUP;
+        VLOG_WARN_RL(&rl, "Received an unsupported packet with GSO type: 0x%x",
+                     vnet->gso_type);
+    }
+
+    return ret;
 }
 
 static void
 netdev_linux_prepend_vnet_hdr(struct dp_packet *p, int mtu)
 {
-    struct virtio_net_hdr *vnet = dp_packet_push_zeros(p, sizeof *vnet);
+    struct virtio_net_hdr v;
+    struct virtio_net_hdr *vnet = &v;
 
     if (dp_packet_ol_tcp_seg(p)) {
         uint16_t hdr_len = ((char *) dp_packet_l4(p)
@@ -6690,30 +6709,92 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *p, int mtu)
         vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
         if (dp_packet_ol_tx_ipv4(p)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
-        } else {
+        } else if (dp_packet_ol_tx_ipv6(p)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
         }
 
     } else {
-        vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
-    }
-
-    if (dp_packet_ol_l4_mask(p)) {
-        vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-        vnet->csum_start = (OVS_FORCE __virtio16)((char *) dp_packet_l4(p)
-                                                  - (char *) dp_packet_eth(p));
-
+        vnet->hdr_len = 0;
+        vnet->gso_size = 0;
+        vnet->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+    }
+
+    if (dp_packet_ol_l4_checksum_good(p)) {
+        /* The packet has good checksum in the packet.
+         * No need to validate again. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+        vnet->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+    } else if (dp_packet_ol_tx_l4_checksum(p)) {
+        /* The csum calculation is offloaded. */
         if (dp_packet_ol_tx_tcp_csum(p)) {
+            /* Virtual I/O Device (VIRTIO) Version 1.1
+             * 5.1.6.2 Packet Transmission
+             If the driver negotiated VIRTIO_NET_F_CSUM, it can skip
+             checksumming the packet:
+               - flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set,
+               - csum_start is set to the offset within the packet
+                 to begin checksumming, and
+               - csum_offset indicates how many bytes after the
+                 csum_start the new (16 bit ones complement) checksum
+                 is placed by the device.
+               The TCP checksum field in the packet is set to the sum of
+               the TCP pseudo header, so that replacing it by the ones
+               complement checksum of the TCP header and body will give
+               the correct result. */
+
+            struct tcp_header *tcp_hdr = dp_packet_l4(p);
+            ovs_be16 csum = 0;
+            if (dp_packet_ol_tx_ipv4(p)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(p);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(p)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(p);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            tcp_hdr->tcp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16) p->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct tcp_header, tcp_csum);
         } else if (dp_packet_ol_tx_udp_csum(p)) {
+            struct udp_header *udp_hdr = dp_packet_l4(p);
+            ovs_be16 csum = 0;
+
+            if (dp_packet_ol_tx_ipv4(p)) {
+                const struct ip_header *ip_hdr = dp_packet_l3(p);
+                csum = ~csum_finish(packet_csum_pseudoheader(ip_hdr));
+            } else if (dp_packet_ol_tx_ipv6(p)) {
+                const struct ovs_16aligned_ip6_hdr *ip6_hdr = dp_packet_l3(p);
+                csum = ~csum_finish(packet_csum_pseudoheader6(ip6_hdr));
+            }
+
+            udp_hdr->udp_csum = csum;
+            vnet->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+            vnet->csum_start = (OVS_FORCE __virtio16) p->l4_ofs;
             vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
                                     struct udp_header, udp_csum);
         } else if (dp_packet_ol_tx_sctp_csum(p)) {
-            vnet->csum_offset = (OVS_FORCE __virtio16) __builtin_offsetof(
-                                    struct sctp_header, sctp_csum);
+            /* The Linux kernel networking stack only supports csum_start
+             * and csum_offset when SCTP GSO is enabled.  See kernel's
+             * skb_csum_hwoffload_help(). Currently there is no SCTP
+             * segmentation offload support in OVS. */
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+            vnet->flags = 0;
         } else {
-            VLOG_WARN_RL(&rl, "Unsupported L4 protocol");
+            /* This should only happen when DP_PACKET_OL_TX_L4_MASK includes
+             * a new flag that is not covered in above checks. */
+            VLOG_WARN_RL(&rl, "Unsupported L4 checksum offload. "
+                         "Flags: %"PRIu64,
+                         (uint64_t)*dp_packet_ol_flags_ptr(p));
+            vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+            vnet->flags = 0;
         }
+    } else {
+        /* Packet L4 csum is unknown. */
+        vnet->csum_start = vnet->csum_offset = (OVS_FORCE __virtio16) 0;
+        vnet->flags = 0;
     }
+
+    dp_packet_push(p, vnet, sizeof *vnet);
 }
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index d9e2d7e5d..8d3c8f158 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -224,28 +224,6 @@  udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
     return udp + 1;
 }
 
-static void
-netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
-                         int ip_tot_size)
-{
-    uint32_t csum;
-
-    if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
-        csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
-                                         dp_packet_data(packet)));
-    } else {
-        csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
-                                        dp_packet_data(packet)));
-    }
-
-    csum = csum_continue(csum, udp, ip_tot_size);
-    udp->udp_csum = csum_finish(csum);
-
-    if (!udp->udp_csum) {
-        udp->udp_csum = htons(0xffff);
-    }
-}
-
 void
 netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
                            struct dp_packet *packet,
@@ -260,9 +238,9 @@  netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
 
-    if (udp->udp_csum) {
-        netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
-    }
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_ol_set_tx_udp_csum(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 }
 
 static void *
@@ -806,7 +784,9 @@  netdev_gtpu_push_header(const struct netdev *netdev,
                                     data->header_len, &ip_tot_size);
     udp->udp_src = netdev_tnl_get_src_port(packet);
     udp->udp_len = htons(ip_tot_size);
-    netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+    /* Postpone checksum to the egress netdev. */
+    dp_packet_ol_set_tx_udp_csum(packet);
+    dp_packet_ol_reset_l4_csum_good(packet);
 
     gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
 
diff --git a/lib/netdev.c b/lib/netdev.c
index b222a5e64..ad9081f28 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -804,36 +804,16 @@  netdev_send_prepare_packet(const uint64_t netdev_flags,
      * netdev to decide what would be the best to do.
      * Provide a software fallback in case the device doesn't support IP csum
      * offloading. Note: Encapsulated packet must have the inner IP header
+     * csum already calculated.
+     * Packet with L4 csum offloading enabled was received with verified csum.
+     * Leave the L4 csum offloading enabled even with good checksum for the
+     * netdev to decide what would be the best to do.
+     * Netdev that requires pseudo header csum needs to calculate that.
+     * Provide a software fallback in case the netdev doesn't support L4 csum
+     * offloading. Note: Encapsulated packet must have the inner L4 header
      * csum already calculated. */
     dp_packet_ol_send_prepare(packet, netdev_flags);
 
-    if (dp_packet_ol_l4_mask(packet)) {
-        if (dp_packet_ol_tx_tcp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_TCP_CSUM)) {
-                /* Fall back to TCP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No TCP checksum support");
-                return false;
-            }
-        } else if (dp_packet_ol_tx_udp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_UDP_CSUM)) {
-                /* Fall back to UDP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No UDP checksum support");
-                return false;
-            }
-        } else if (dp_packet_ol_tx_sctp_csum(packet)) {
-            if (!(netdev_flags & NETDEV_OFFLOAD_TX_SCTP_CSUM)) {
-                /* Fall back to SCTP csum in software. */
-                VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
-                return false;
-            }
-        } else {
-            uint64_t ol_flags = *dp_packet_ol_flags_ptr(packet);
-            VLOG_ERR_BUF(errormsg, "No L4 checksum support: "
-                         "offload mask: %"PRIu64, ol_flags);
-            return false;
-        }
-    }
-
     return true;
 }
 
@@ -966,11 +946,10 @@  netdev_push_header(const struct netdev *netdev,
     size_t i, size = dp_packet_batch_size(batch);
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
-        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet)
-                         || dp_packet_ol_l4_mask(packet))) {
+        if (OVS_UNLIKELY(dp_packet_ol_tcp_seg(packet))) {
             COVERAGE_INC(netdev_push_header_drops);
             dp_packet_delete(packet);
-            VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
+            VLOG_WARN_RL(&rl, "%s: Tunneling packets with TSO offloading is "
                          "not supported: packet dropped",
                          netdev_get_name(netdev));
         } else {
diff --git a/lib/packets.c b/lib/packets.c
index ab6e4f766..b8a54026b 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1132,16 +1132,22 @@  packet_set_ipv4_addr(struct dp_packet *packet,
     pkt_metadata_init_conn(&packet->md);
 
     if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
-
-        th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+        if (dp_packet_ol_tx_tcp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
+            th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
+        }
     } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) {
-        struct udp_header *uh = dp_packet_l4(packet);
-
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+        if (dp_packet_ol_tx_udp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     }
@@ -1245,16 +1251,24 @@  packet_update_csum128(struct dp_packet *packet, uint8_t proto,
     size_t l4_size = dp_packet_l4_size(packet);
 
     if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) {
-        struct tcp_header *th = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_tcp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct tcp_header *th = dp_packet_l4(packet);
 
-        th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+            th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr);
+        }
     } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) {
-        struct udp_header *uh = dp_packet_l4(packet);
+        if (dp_packet_ol_tx_udp_csum(packet)) {
+            dp_packet_ol_reset_l4_csum_good(packet);
+        } else {
+            struct udp_header *uh = dp_packet_l4(packet);
 
-        if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
-            if (!uh->udp_csum) {
-                uh->udp_csum = htons(0xffff);
+            if (uh->udp_csum) {
+                uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr);
+                if (!uh->udp_csum) {
+                    uh->udp_csum = htons(0xffff);
+                }
             }
         }
     } else if (proto == IPPROTO_ICMPV6 &&
@@ -1372,7 +1386,9 @@  static void
 packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum)
 {
     if (*port != new_port) {
-        *csum = recalc_csum16(*csum, *port, new_port);
+        if (csum) {
+            *csum = recalc_csum16(*csum, *port, new_port);
+        }
         *port = new_port;
     }
 }
@@ -1384,9 +1400,16 @@  void
 packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct tcp_header *th = dp_packet_l4(packet);
+    ovs_be16 *csum = NULL;
+
+    if (dp_packet_ol_tx_tcp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+    } else {
+        csum = &th->tcp_csum;
+    }
 
-    packet_set_port(&th->tcp_src, src, &th->tcp_csum);
-    packet_set_port(&th->tcp_dst, dst, &th->tcp_csum);
+    packet_set_port(&th->tcp_src, src, csum);
+    packet_set_port(&th->tcp_dst, dst, csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1398,17 +1421,21 @@  packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct udp_header *uh = dp_packet_l4(packet);
 
-    if (uh->udp_csum) {
-        packet_set_port(&uh->udp_src, src, &uh->udp_csum);
-        packet_set_port(&uh->udp_dst, dst, &uh->udp_csum);
+    if (dp_packet_ol_tx_udp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        packet_set_port(&uh->udp_src, src, NULL);
+        packet_set_port(&uh->udp_dst, dst, NULL);
+    } else {
+        ovs_be16 *csum = uh->udp_csum ? &uh->udp_csum : NULL;
+
+        packet_set_port(&uh->udp_src, src, csum);
+        packet_set_port(&uh->udp_dst, dst, csum);
 
-        if (!uh->udp_csum) {
+        if (csum && !uh->udp_csum) {
             uh->udp_csum = htons(0xffff);
         }
-    } else {
-        uh->udp_src = src;
-        uh->udp_dst = dst;
     }
+
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1419,18 +1446,27 @@  void
 packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst)
 {
     struct sctp_header *sh = dp_packet_l4(packet);
-    ovs_be32 old_csum, old_correct_csum, new_csum;
-    uint16_t tp_len = dp_packet_l4_size(packet);
 
-    old_csum = get_16aligned_be32(&sh->sctp_csum);
-    put_16aligned_be32(&sh->sctp_csum, 0);
-    old_correct_csum = crc32c((void *)sh, tp_len);
+    if (dp_packet_ol_tx_sctp_csum(packet)) {
+        dp_packet_ol_reset_l4_csum_good(packet);
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+    } else {
+        ovs_be32 old_csum, old_correct_csum, new_csum;
+        uint16_t tp_len = dp_packet_l4_size(packet);
 
-    sh->sctp_src = src;
-    sh->sctp_dst = dst;
+        old_csum = get_16aligned_be32(&sh->sctp_csum);
+        put_16aligned_be32(&sh->sctp_csum, 0);
+        old_correct_csum = crc32c((void *) sh, tp_len);
+
+        sh->sctp_src = src;
+        sh->sctp_dst = dst;
+
+        new_csum = crc32c((void *) sh, tp_len);
+        put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum
+                           ^ new_csum);
+    }
 
-    new_csum = crc32c((void *)sh, tp_len);
-    put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum);
     pkt_metadata_init_conn(&packet->md);
 }
 
@@ -1954,3 +1990,72 @@  IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6)
         }
     }
 }
+
+/* Set TCP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_tcp_complete_csum(struct dp_packet *p)
+{
+    struct tcp_header *tcp = dp_packet_l4(p);
+
+    tcp->tcp_csum = 0;
+    if (dp_packet_ol_tx_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        tcp->tcp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  tcp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+}
+
+/* Set UDP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_udp_complete_csum(struct dp_packet *p)
+{
+    struct udp_header *udp = dp_packet_l4(p);
+
+    /* Skip csum calculation if the udp_csum is zero. */
+    if (!udp->udp_csum) {
+        return;
+    }
+
+    udp->udp_csum = 0;
+    if (dp_packet_ol_tx_ipv4(p)) {
+        struct ip_header *ip = dp_packet_l3(p);
+
+        udp->udp_csum = csum_finish(csum_continue(packet_csum_pseudoheader(ip),
+                                                  udp, dp_packet_l4_size(p)));
+    } else if (dp_packet_ol_tx_ipv6(p)) {
+        struct ovs_16aligned_ip6_hdr *ip6 = dp_packet_l3(p);
+
+        udp->udp_csum = packet_csum_upperlayer6(ip6, udp, ip6->ip6_nxt,
+                                                dp_packet_l4_size(p));
+    } else {
+        OVS_NOT_REACHED();
+    }
+
+    if (!udp->udp_csum) {
+        udp->udp_csum = htons(0xffff);
+    }
+}
+
+/* Set SCTP checksum field in packet 'p' with complete checksum.
+ * The packet must have the L3 and L4 offsets. */
+void
+packet_sctp_complete_csum(struct dp_packet *p)
+{
+    struct sctp_header *sh = dp_packet_l4(p);
+    uint16_t tp_len = dp_packet_l4_size(p);
+    ovs_be32 csum;
+
+    put_16aligned_be32(&sh->sctp_csum, 0);
+    csum = crc32c((void *) sh, tp_len);
+    put_16aligned_be32(&sh->sctp_csum, csum);
+}
diff --git a/lib/packets.h b/lib/packets.h
index 5bdf6e4bb..28950b8b1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1643,6 +1643,9 @@  void packet_put_ra_prefix_opt(struct dp_packet *,
                               const ovs_be128 router_prefix);
 uint32_t packet_csum_pseudoheader(const struct ip_header *);
 void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6);
+void packet_tcp_complete_csum(struct dp_packet *);
+void packet_udp_complete_csum(struct dp_packet *);
+void packet_sctp_complete_csum(struct dp_packet *);
 
 #define DNS_HEADER_LEN 12
 struct dns_header {