diff mbox series

[ovs-dev,v6,2/3] userspace: Respect tso/gso segment size.

Message ID 20231031195138.2178690-2-mkp@redhat.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev,v6,1/3] netdev-linux: Use ethtool to detect offload support. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Mike Pattrick Oct. 31, 2023, 7:51 p.m. UTC
From: Flavio Leitner <fbl@sysclose.org>

Currently OVS will calculate the segment size based on the
MTU of the egress port. That usually happens to be correct
when the ports share the same MTU, but that is not always true.

Therefore, if the segment size is provided, then use that and
make sure the over sized packets are dropped.

Signed-off-by: Flavio Leitner <fbl@sysclose.org>
Co-authored-by: Mike Pattrick <mkp@redhat.com>
Signed-off-by: Mike Pattrick <mkp@redhat.com>
---
 lib/dp-packet.c    |  3 ++
 lib/dp-packet.h    | 26 ++++++++++++++++
 lib/netdev-dpdk.c  | 12 +++++++-
 lib/netdev-linux.c | 76 ++++++++++++++++++++++++++++++++--------------
 4 files changed, 94 insertions(+), 23 deletions(-)

Comments

Simon Horman Nov. 14, 2023, 1:10 p.m. UTC | #1
On Tue, Oct 31, 2023 at 03:51:37PM -0400, Mike Pattrick wrote:
> From: Flavio Leitner <fbl@sysclose.org>
> 
> Currently OVS will calculate the segment size based on the
> MTU of the egress port. That usually happens to be correct
> when the ports share the same MTU, but that is not always true.
> 
> Therefore, if the segment size is provided, then use that and
> make sure the over sized packets are dropped.
> 
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> Co-authored-by: Mike Pattrick <mkp@redhat.com>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>

...

> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 70ddf8aa4..6a924f3ff 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -126,6 +126,7 @@ struct dp_packet {
>      uint32_t ol_flags;          /* Offloading flags. */
>      uint32_t rss_hash;          /* Packet hash. */
>      uint32_t flow_mark;         /* Packet flow mark. */
> +    uint16_t tso_segsz;         /* TCP TSO segment size. */

Hi Mike,

I don't think there is any need to respin just because of this,
but in my view "TCP TSO" is a tautology. This is because, AFAIK,
the T in TSO stands for TCP.

The above notwithstanding this patch looks good to me.

Acked-by: Simon Horman <horms@ovn.org>

...
Ilya Maximets Nov. 16, 2023, 10:49 p.m. UTC | #2
On 10/31/23 20:51, Mike Pattrick wrote:
> From: Flavio Leitner <fbl@sysclose.org>
> 
> Currently OVS will calculate the segment size based on the
> MTU of the egress port. That usually happens to be correct
> when the ports share the same MTU, but that is not always true.
> 
> Therefore, if the segment size is provided, then use that and
> make sure the over sized packets are dropped.
> 
> Signed-off-by: Flavio Leitner <fbl@sysclose.org>
> Co-authored-by: Mike Pattrick <mkp@redhat.com>
> Signed-off-by: Mike Pattrick <mkp@redhat.com>
> ---
>  lib/dp-packet.c    |  3 ++
>  lib/dp-packet.h    | 26 ++++++++++++++++
>  lib/netdev-dpdk.c  | 12 +++++++-
>  lib/netdev-linux.c | 76 ++++++++++++++++++++++++++++++++--------------
>  4 files changed, 94 insertions(+), 23 deletions(-)
> 
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index ed004c3b9..920402369 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
>      pkt_metadata_init(&b->md, 0);
>      dp_packet_reset_cutlen(b);
>      dp_packet_reset_offload(b);
> +    dp_packet_set_tso_segsz(b, 0);
>      /* Initialize implementation-specific fields of dp_packet. */
>      dp_packet_init_specific(b);
>      /* By default assume the packet type to be Ethernet. */
> @@ -203,6 +204,8 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
>      *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
>      *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
>  
> +    dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer));
> +
>      if (dp_packet_rss_valid(buffer)) {
>          dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
>      }
> diff --git a/lib/dp-packet.h b/lib/dp-packet.h
> index 70ddf8aa4..6a924f3ff 100644
> --- a/lib/dp-packet.h
> +++ b/lib/dp-packet.h
> @@ -126,6 +126,7 @@ struct dp_packet {
>      uint32_t ol_flags;          /* Offloading flags. */
>      uint32_t rss_hash;          /* Packet hash. */
>      uint32_t flow_mark;         /* Packet flow mark. */
> +    uint16_t tso_segsz;         /* TCP TSO segment size. */
>  #endif
>      enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
>  
> @@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
>  static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
>  static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
>  
> +static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
> +static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
> +
>  void *dp_packet_resize_l2(struct dp_packet *, int increment);
>  void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
>  static inline void *dp_packet_eth(const struct dp_packet *);
> @@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>      b->mbuf.buf_len = s;
>  }
>  
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> +    return p->mbuf.tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> +    p->mbuf.tso_segsz = s;
> +}
>  #else /* DPDK_NETDEV */
>  
>  static inline void
> @@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
>      b->allocated_ = s;
>  }
>  
> +static inline uint16_t
> +dp_packet_get_tso_segsz(const struct dp_packet *p)
> +{
> +    return p->tso_segsz;
> +}
> +
> +static inline void
> +dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
> +{
> +    p->tso_segsz = s;
> +}
>  #endif /* DPDK_NETDEV */
>  
>  static inline void
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 55700250d..9f20cc689 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2453,6 +2453,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  
>      if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
>          struct tcp_header *th = dp_packet_l4(pkt);
> +        int hdr_len;
>  
>          if (!th) {
>              VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
> @@ -2462,7 +2463,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
>  
>          mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
>          mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
> +        hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
>          mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
> +        if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) {
> +            VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
> +                         "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"",
> +                         dev->up.name, hdr_len, mbuf->tso_segsz,
> +                         dev->max_packet_len);
> +            return false;
> +        }
>  
>          if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
>              mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
> @@ -2737,7 +2746,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
>      int cnt = 0;
>      struct rte_mbuf *pkt;
>  
> -    /* Filter oversized packets, unless are marked for TSO. */
> +    /* Filter oversized packets. The TSO packets are filtered out
> +     * during the offloading preparation for performance reasons. */
>      for (i = 0; i < pkt_cnt; i++) {
>          pkt = pkts[i];
>          if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len)
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index a46f5127f..4d91391fb 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -539,7 +539,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
>  static bool tap_supports_vnet_hdr = true;
>  
>  static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
> -static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
> +static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
>  static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
>                                     int cmd, const char *cmd_name);
>  static int get_flags(const struct netdev *, unsigned int *flags);
> @@ -1593,9 +1593,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>  }
>  
>  static int
> -netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
> -                             struct dp_packet_batch *batch)
> +netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex,
> +                             bool tso, int mtu, struct dp_packet_batch *batch)
>  {
> +    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
>      const size_t size = dp_packet_batch_size(batch);
>      /* We don't bother setting most fields in sockaddr_ll because the
>       * kernel ignores them for SOCK_RAW. */
> @@ -1604,26 +1605,35 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
>  
>      struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
>      struct iovec *iov = xmalloc(sizeof(*iov) * size);
> -
>      struct dp_packet *packet;
> +    int cnt = 0;
> +
>      DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
>          if (tso) {
> -            netdev_linux_prepend_vnet_hdr(packet, mtu);
> -        }
> +            int ret = netdev_linux_prepend_vnet_hdr(packet, mtu);
> +
> +            if (OVS_UNLIKELY(ret)) {
> +                netdev->tx_dropped += 1;
> +                VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",

Nit: maybe make this error message the same as for tap?
It's missing some context.

> +                             netdev_get_name(netdev_), ovs_strerror(ret));
> +                continue;
> +            }
> +         }
>  
> -        iov[i].iov_base = dp_packet_data(packet);
> -        iov[i].iov_len = dp_packet_size(packet);
> -        mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
> -                                            .msg_namelen = sizeof sll,
> -                                            .msg_iov = &iov[i],
> -                                            .msg_iovlen = 1 };
> +        iov[cnt].iov_base = dp_packet_data(packet);
> +        iov[cnt].iov_len = dp_packet_size(packet);
> +        mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll,
> +                                              .msg_namelen = sizeof sll,
> +                                              .msg_iov = &iov[cnt],
> +                                              .msg_iovlen = 1 };
> +        cnt++;
>      }
>  
>      int error = 0;
> -    for (uint32_t ofs = 0; ofs < size; ) {
> +    for (uint32_t ofs = 0; ofs < cnt;) {
>          ssize_t retval;
>          do {
> -            retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
> +            retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0);
>              error = retval < 0 ? errno : 0;
>          } while (error == EINTR);
>          if (error) {
> @@ -1665,7 +1675,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
>          int error;
>  
>          if (OVS_LIKELY(tap_supports_vnet_hdr)) {
> -            netdev_linux_prepend_vnet_hdr(packet, mtu);
> +            error = netdev_linux_prepend_vnet_hdr(packet, mtu);
> +            if (OVS_UNLIKELY(error)) {
> +                netdev->tx_dropped++;
> +                VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet "
> +                             "dropped. %s", netdev_get_name(netdev_),
> +                             ovs_strerror(error));
> +                continue;
> +            }
>          }
>  
>          size = dp_packet_size(packet);
> @@ -1795,7 +1812,8 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
>              goto free_batch;
>          }
>  
> -        error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
> +        error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, mtu,
> +                                             batch);
>      } else {
>          error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
>      }
> @@ -7089,8 +7107,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
>      switch (vnet->gso_type) {
>      case VIRTIO_NET_HDR_GSO_TCPV4:
>      case VIRTIO_NET_HDR_GSO_TCPV6:
> -        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
> -         * is given and needs to be respected. */
> +        dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size);
>          dp_packet_hwol_set_tcp_seg(b);
>          break;
>  
> @@ -7112,18 +7129,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
>      return ret;
>  }
>  
> -static void
> +/* Prepends struct virtio_net_hdr to packet 'b'.
> + * Returns 0 if successful, otherwise a positive errno value.
> + * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */
> +static int
>  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
>  {
>      struct virtio_net_hdr v;
>      struct virtio_net_hdr *vnet = &v;
>  
>      if (dp_packet_hwol_is_tso(b)) {
> -        uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
> -                            + TCP_HEADER_LEN;
> +        uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
> +        struct tcp_header *tcp = dp_packet_l4(b);
> +        int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
> +        int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
> +                      + tcp_hdr_len;
> +        int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
> +
> +        if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) {
> +            VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", "
> +                         "gso: %"PRIu16", max length: %"PRIu32".", hdr_len,
> +                         tso_segsz, max_packet_len);
> +            return EMSGSIZE;
> +        }
>  
>          vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
> -        vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
> +        vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz);
>          if (dp_packet_hwol_is_ipv4(b)) {
>              vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>          } else if (dp_packet_hwol_tx_ipv6(b)) {
> @@ -7213,4 +7244,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
>      }
>  
>      dp_packet_push(b, vnet, sizeof *vnet);
> +    return 0;
>  }
diff mbox series

Patch

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index ed004c3b9..920402369 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -34,6 +34,7 @@  dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
     pkt_metadata_init(&b->md, 0);
     dp_packet_reset_cutlen(b);
     dp_packet_reset_offload(b);
+    dp_packet_set_tso_segsz(b, 0);
     /* Initialize implementation-specific fields of dp_packet. */
     dp_packet_init_specific(b);
     /* By default assume the packet type to be Ethernet. */
@@ -203,6 +204,8 @@  dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
     *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
     *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
 
+    dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer));
+
     if (dp_packet_rss_valid(buffer)) {
         dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
     }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 70ddf8aa4..6a924f3ff 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -126,6 +126,7 @@  struct dp_packet {
     uint32_t ol_flags;          /* Offloading flags. */
     uint32_t rss_hash;          /* Packet hash. */
     uint32_t flow_mark;         /* Packet flow mark. */
+    uint16_t tso_segsz;         /* TCP TSO segment size. */
 #endif
     enum dp_packet_source source;  /* Source of memory allocated as 'base'. */
 
@@ -166,6 +167,9 @@  static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
 static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
 static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
 
+static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
+static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
+
 void *dp_packet_resize_l2(struct dp_packet *, int increment);
 void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
 static inline void *dp_packet_eth(const struct dp_packet *);
@@ -644,6 +648,17 @@  dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
     b->mbuf.buf_len = s;
 }
 
+static inline uint16_t
+dp_packet_get_tso_segsz(const struct dp_packet *p)
+{
+    return p->mbuf.tso_segsz;
+}
+
+static inline void
+dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
+{
+    p->mbuf.tso_segsz = s;
+}
 #else /* DPDK_NETDEV */
 
 static inline void
@@ -700,6 +715,17 @@  dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
     b->allocated_ = s;
 }
 
+static inline uint16_t
+dp_packet_get_tso_segsz(const struct dp_packet *p)
+{
+    return p->tso_segsz;
+}
+
+static inline void
+dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
+{
+    p->tso_segsz = s;
+}
 #endif /* DPDK_NETDEV */
 
 static inline void
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 55700250d..9f20cc689 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2453,6 +2453,7 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 
     if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
         struct tcp_header *th = dp_packet_l4(pkt);
+        int hdr_len;
 
         if (!th) {
             VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
@@ -2462,7 +2463,15 @@  netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
 
         mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
         mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
+        hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
         mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
+        if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) {
+            VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
+                         "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"",
+                         dev->up.name, hdr_len, mbuf->tso_segsz,
+                         dev->max_packet_len);
+            return false;
+        }
 
         if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
             mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
@@ -2737,7 +2746,8 @@  netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     int cnt = 0;
     struct rte_mbuf *pkt;
 
-    /* Filter oversized packets, unless are marked for TSO. */
+    /* Filter oversized packets. The TSO packets are filtered out
+     * during the offloading preparation for performance reasons. */
     for (i = 0; i < pkt_cnt; i++) {
         pkt = pkts[i];
         if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len)
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index a46f5127f..4d91391fb 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -539,7 +539,7 @@  static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
 static bool tap_supports_vnet_hdr = true;
 
 static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
-static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
+static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
 static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
                                    int cmd, const char *cmd_name);
 static int get_flags(const struct netdev *, unsigned int *flags);
@@ -1593,9 +1593,10 @@  netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
 }
 
 static int
-netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
-                             struct dp_packet_batch *batch)
+netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex,
+                             bool tso, int mtu, struct dp_packet_batch *batch)
 {
+    struct netdev_linux *netdev = netdev_linux_cast(netdev_);
     const size_t size = dp_packet_batch_size(batch);
     /* We don't bother setting most fields in sockaddr_ll because the
      * kernel ignores them for SOCK_RAW. */
@@ -1604,26 +1605,35 @@  netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
 
     struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
     struct iovec *iov = xmalloc(sizeof(*iov) * size);
-
     struct dp_packet *packet;
+    int cnt = 0;
+
     DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
         if (tso) {
-            netdev_linux_prepend_vnet_hdr(packet, mtu);
-        }
+            int ret = netdev_linux_prepend_vnet_hdr(packet, mtu);
+
+            if (OVS_UNLIKELY(ret)) {
+                netdev->tx_dropped += 1;
+                VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",
+                             netdev_get_name(netdev_), ovs_strerror(ret));
+                continue;
+            }
+         }
 
-        iov[i].iov_base = dp_packet_data(packet);
-        iov[i].iov_len = dp_packet_size(packet);
-        mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
-                                            .msg_namelen = sizeof sll,
-                                            .msg_iov = &iov[i],
-                                            .msg_iovlen = 1 };
+        iov[cnt].iov_base = dp_packet_data(packet);
+        iov[cnt].iov_len = dp_packet_size(packet);
+        mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll,
+                                              .msg_namelen = sizeof sll,
+                                              .msg_iov = &iov[cnt],
+                                              .msg_iovlen = 1 };
+        cnt++;
     }
 
     int error = 0;
-    for (uint32_t ofs = 0; ofs < size; ) {
+    for (uint32_t ofs = 0; ofs < cnt;) {
         ssize_t retval;
         do {
-            retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
+            retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0);
             error = retval < 0 ? errno : 0;
         } while (error == EINTR);
         if (error) {
@@ -1665,7 +1675,14 @@  netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
         int error;
 
         if (OVS_LIKELY(tap_supports_vnet_hdr)) {
-            netdev_linux_prepend_vnet_hdr(packet, mtu);
+            error = netdev_linux_prepend_vnet_hdr(packet, mtu);
+            if (OVS_UNLIKELY(error)) {
+                netdev->tx_dropped++;
+                VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet "
+                             "dropped. %s", netdev_get_name(netdev_),
+                             ovs_strerror(error));
+                continue;
+            }
         }
 
         size = dp_packet_size(packet);
@@ -1795,7 +1812,8 @@  netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
             goto free_batch;
         }
 
-        error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
+        error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, mtu,
+                                             batch);
     } else {
         error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
     }
@@ -7089,8 +7107,7 @@  netdev_linux_parse_vnet_hdr(struct dp_packet *b)
     switch (vnet->gso_type) {
     case VIRTIO_NET_HDR_GSO_TCPV4:
     case VIRTIO_NET_HDR_GSO_TCPV6:
-        /* FIXME: The packet has offloaded TCP segmentation. The gso_size
-         * is given and needs to be respected. */
+        dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size);
         dp_packet_hwol_set_tcp_seg(b);
         break;
 
@@ -7112,18 +7129,32 @@  netdev_linux_parse_vnet_hdr(struct dp_packet *b)
     return ret;
 }
 
-static void
+/* Prepends struct virtio_net_hdr to packet 'b'.
+ * Returns 0 if successful, otherwise a positive errno value.
+ * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */
+static int
 netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
 {
     struct virtio_net_hdr v;
     struct virtio_net_hdr *vnet = &v;
 
     if (dp_packet_hwol_is_tso(b)) {
-        uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
-                            + TCP_HEADER_LEN;
+        uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
+        struct tcp_header *tcp = dp_packet_l4(b);
+        int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+        int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
+                      + tcp_hdr_len;
+        int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
+
+        if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) {
+            VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", "
+                         "gso: %"PRIu16", max length: %"PRIu32".", hdr_len,
+                         tso_segsz, max_packet_len);
+            return EMSGSIZE;
+        }
 
         vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
-        vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
+        vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz);
         if (dp_packet_hwol_is_ipv4(b)) {
             vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
         } else if (dp_packet_hwol_tx_ipv6(b)) {
@@ -7213,4 +7244,5 @@  netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
     }
 
     dp_packet_push(b, vnet, sizeof *vnet);
+    return 0;
 }