@@ -345,6 +345,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/unixctl.h \
lib/userspace-tso.c \
lib/userspace-tso.h \
+ lib/userspace-tso-segsz.c \
+ lib/userspace-tso-segsz.h \
lib/util.c \
lib/util.h \
lib/uuid.c \
@@ -502,7 +502,16 @@ dp_packet_resize_l2_5(struct dp_packet *b, int increment)
void *
dp_packet_resize_l2(struct dp_packet *b, int increment)
{
+ int outer_l2_len = dp_packet_hwol_get_outer_l2_len(b);
+
dp_packet_resize_l2_5(b, increment);
dp_packet_adjust_layer_offset(&b->l2_5_ofs, increment);
+ if (outer_l2_len) {
+ dp_packet_hwol_set_outer_l2_len(b, outer_l2_len + increment);
+ } else {
+ int l2_len = dp_packet_hwol_get_l2_len(b);
+
+ dp_packet_hwol_set_l2_len(b, l2_len + increment);
+ }
return dp_packet_data(b);
}
@@ -81,6 +81,14 @@ enum dp_packet_offload_mask {
DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, PKT_TX_UDP_CKSUM, 0x400),
/* Offload SCTP checksum. */
DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800),
+ /* VXLAN TCP Segmentation Offload. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_VXLAN, 0x1000),
+ /* UDP Segmentation Offload. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_SEG, PKT_TX_UDP_SEG, 0x2000),
+ /* Outer L3 Type IPV4 For Tunnel Offload. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, 0x4000),
+ /* Outer L3 Type IPV6 For Tunnel Offload. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, 0x8000),
/* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
@@ -95,7 +103,8 @@ enum dp_packet_offload_mask {
DP_PACKET_OL_TX_IPV6 | \
DP_PACKET_OL_TX_TCP_CKSUM | \
DP_PACKET_OL_TX_UDP_CKSUM | \
- DP_PACKET_OL_TX_SCTP_CKSUM)
+ DP_PACKET_OL_TX_SCTP_CKSUM | \
+ DP_PACKET_OL_TX_UDP_SEG)
#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
DP_PACKET_OL_TX_UDP_CKSUM | \
@@ -954,6 +963,13 @@ dp_packet_hwol_is_tso(const struct dp_packet *b)
return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG);
}
+/* Returns 'true' if packet 'b' is marked for UDP fragmentation offloading. */
+static inline bool
+dp_packet_hwol_is_ufo(const struct dp_packet *b)
+{
+ return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_UDP_SEG);
+}
+
/* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */
static inline bool
dp_packet_hwol_is_ipv4(const struct dp_packet *b)
@@ -992,6 +1008,13 @@ dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
}
+/* Reset packet 'b' for IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_reset_tx_ipv4(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) &= ~DP_PACKET_OL_TX_IPV4;
+}
+
/* Mark packet 'b' for IPv6 checksum offloading. */
static inline void
dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
@@ -999,6 +1022,27 @@ dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
}
+/* Reset packet 'b' for IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_reset_tx_ipv6(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) &= ~DP_PACKET_OL_TX_IPV6;
+}
+
+/* Mark packet 'b' for Outer IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_outer_ipv4(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IPV4;
+}
+
+/* Mark packet 'b' for Outer IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_outer_ipv6(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_OUTER_IPV6;
+}
+
/* Mark packet 'b' for TCP checksum offloading. It implies that either
* the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
static inline void
@@ -1007,6 +1051,14 @@ dp_packet_hwol_set_csum_tcp(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_CKSUM;
}
+/* Reset TCP checksum offloading flag for packet 'b'.
+ */
+static inline void
+dp_packet_hwol_reset_csum_tcp(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) &= ~DP_PACKET_OL_TX_TCP_CKSUM;
+}
+
/* Mark packet 'b' for UDP checksum offloading. It implies that either
* the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
static inline void
@@ -1015,6 +1067,15 @@ dp_packet_hwol_set_csum_udp(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_CKSUM;
}
+/* Reset UDP checksum offloading flag for packet 'b'.
+ */
+static inline void
+dp_packet_hwol_reset_csum_udp(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) &= ~DP_PACKET_OL_TX_UDP_CKSUM;
+}
+
+
/* Mark packet 'b' for SCTP checksum offloading. It implies that either
* the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
static inline void
@@ -1032,6 +1093,181 @@ dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
*dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
}
+/* Mark packet 'b' for UDP segmentation offloading. It implies that
+ * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading
+ * and also for UDP checksum offloading. */
+static inline void
+dp_packet_hwol_set_udp_seg(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_SEG;
+}
+
+#ifdef DPDK_NETDEV
+/* Set l2_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l2_len(struct dp_packet *b, int l2_len)
+{
+ b->mbuf.l2_len = l2_len;
+}
+
+/* Set l3_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l3_len(struct dp_packet *b, int l3_len)
+{
+ b->mbuf.l3_len = l3_len;
+}
+
+/* Set l4_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l4_len(struct dp_packet *b, int l4_len)
+{
+ b->mbuf.l4_len = l4_len;
+}
+
+/* Set outer_l2_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_outer_l2_len(struct dp_packet *b, int outer_l2_len)
+{
+ b->mbuf.outer_l2_len = outer_l2_len;
+}
+
+/* Set outer_l3_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_outer_l3_len(struct dp_packet *b, int outer_l3_len)
+{
+ b->mbuf.outer_l3_len = outer_l3_len;
+}
+
+/* Get l2_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l2_len(struct dp_packet *b)
+{
+ return b->mbuf.l2_len;
+}
+
+/* Get l3_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l3_len(struct dp_packet *b)
+{
+ return b->mbuf.l3_len;
+}
+
+/* Get l4_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l4_len(struct dp_packet *b)
+{
+ return b->mbuf.l4_len;
+}
+
+/* Get outer_l2_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_outer_l2_len(struct dp_packet *b)
+{
+ return b->mbuf.outer_l2_len;
+}
+
+
+/* Get outer_l3_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_outer_l3_len(struct dp_packet *b)
+{
+ return b->mbuf.outer_l3_len;
+}
+
+#else
+/* Set l2_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l2_len(struct dp_packet *b OVS_UNUSED,
+ int l2_len OVS_UNUSED)
+{
+}
+
+/* Set l3_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l3_len(struct dp_packet *b OVS_UNUSED,
+ int l3_len OVS_UNUSED)
+{
+}
+
+/* Set l4_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_l4_len(struct dp_packet *b OVS_UNUSED,
+ int l4_len OVS_UNUSED)
+{
+}
+
+/* Set outer_l2_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_outer_l2_len(struct dp_packet *b OVS_UNUSED,
+ int outer_l2_len OVS_UNUSED)
+{
+}
+
+/* Set outer_l3_len for the packet 'b' */
+static inline void
+dp_packet_hwol_set_outer_l3_len(struct dp_packet *b OVS_UNUSED,
+ int outer_l3_len OVS_UNUSED)
+{
+}
+
+/* Get l2_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l2_len(struct dp_packet *b)
+{
+ return ((char *)dp_packet_l3(b) - (char *)dp_packet_eth(b));
+}
+
+/* Get l3_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l3_len(struct dp_packet *b)
+{
+ return ((char *)dp_packet_l4(b) - (char *)dp_packet_l3(b));
+}
+
+/* Get l4_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_l4_len(struct dp_packet *b OVS_UNUSED)
+{
+ return 0;
+}
+
+
+/* Get outer_l2_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_outer_l2_len(struct dp_packet *b)
+{
+ return ((char *)dp_packet_l3(b) - (char *)dp_packet_eth(b));
+}
+
+/* Get outer_l3_len for the packet 'b' */
+static inline int
+dp_packet_hwol_get_outer_l3_len(struct dp_packet *b)
+{
+ return ((char *)dp_packet_l4(b) - (char *)dp_packet_l3(b));
+}
+
+#endif /* DPDK_NETDEV */
+
+/* Mark packet 'b' for VXLAN TCP segmentation offloading. */
+static inline void
+dp_packet_hwol_set_vxlan_tcp_seg(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TUNNEL_VXLAN;
+ /* Set outer_l2_len and outer_l3_len */
+ dp_packet_hwol_set_outer_l2_len(b, (char *) dp_packet_l3(b)
+ - (char *) dp_packet_eth(b));
+ dp_packet_hwol_set_outer_l3_len(b, (char *) dp_packet_l4(b)
+ - (char *) dp_packet_l3(b));
+}
+
+/* Check if it is a VXLAN packet */
+static inline bool
+dp_packet_hwol_is_vxlan_tcp_seg(struct dp_packet *b)
+{
+ return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TUNNEL_VXLAN);
+}
+
+
static inline bool
dp_packet_ip_checksum_valid(const struct dp_packet *p)
{
@@ -38,6 +38,7 @@
#include <rte_errno.h>
#include <rte_ethdev.h>
#include <rte_flow.h>
+#include <rte_ip.h>
#include <rte_malloc.h>
#include <rte_mbuf.h>
#include <rte_meter.h>
@@ -72,6 +73,7 @@
#include "unaligned.h"
#include "unixctl.h"
#include "userspace-tso.h"
+#include "userspace-tso-segsz.h"
#include "util.h"
#include "uuid.h"
@@ -87,6 +89,7 @@ COVERAGE_DEFINE(vhost_notification);
#define OVS_CACHE_LINE_SIZE CACHE_LINE_SIZE
#define OVS_VPORT_DPDK "ovs_dpdk"
+#define DPDK_RTE_HDR_OFFSET 1
/*
* need to reserve tons of extra space in the mbufs so we can align the
@@ -96,6 +99,8 @@ COVERAGE_DEFINE(vhost_notification);
*/
#define ETHER_HDR_MAX_LEN (RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN \
+ (2 * VLAN_HEADER_LEN))
+#define ETHER_VLAN_HDR_MAX_LEN (RTE_ETHER_HDR_LEN + \
+ + (2 * VLAN_HEADER_LEN))
#define MTU_TO_FRAME_LEN(mtu) ((mtu) + RTE_ETHER_HDR_LEN + \
RTE_ETHER_CRC_LEN)
#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN)
@@ -404,6 +409,7 @@ enum dpdk_hw_ol_features {
NETDEV_RX_HW_SCATTER = 1 << 2,
NETDEV_TX_TSO_OFFLOAD = 1 << 3,
NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
+ NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD = 1 << 5,
};
/*
@@ -998,6 +1004,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
+ /* Enable VXLAN TSO support if available */
+ if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) {
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
+ }
if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
}
@@ -1136,6 +1147,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
if ((info.tx_offload_capa & tx_tso_offload_capa)
== tx_tso_offload_capa) {
dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ /* Enable VXLAN TSO support if available */
+ if (info.tx_offload_capa & DEV_TX_OFFLOAD_VXLAN_TNL_TSO) {
+ dev->hw_ol_features |= NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD;
+ }
if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) {
dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
} else {
@@ -2173,37 +2188,267 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
rte_free(rx);
}
+static inline bool
+is_local_to_local(uint16_t src_port_id, struct netdev_dpdk *dev)
+{
+ bool ret = false;
+ struct netdev_dpdk *src_dev;
+
+ if (src_port_id == UINT16_MAX) {
+ ret = true;
+ } else {
+ src_dev = netdev_dpdk_lookup_by_port_id(src_port_id);
+ if (src_dev && (netdev_dpdk_get_vid(src_dev) >= 0)) {
+ ret = true;
+ }
+ }
+
+ if (ret) {
+ if (netdev_dpdk_get_vid(dev) < 0) {
+ ret = false;
+ }
+ }
+
+ return ret;
+}
+
+#define UDP_VXLAN_ETH_HDR_SIZE 30
+
/* Prepare the packet for HWOL.
* Return True if the packet is OK to continue. */
static bool
netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
{
struct dp_packet *pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+ uint16_t l4_proto = 0;
+ uint8_t *l3_hdr_ptr = NULL;
+ struct rte_ether_hdr *eth_hdr =
+ rte_pktmbuf_mtod(mbuf, struct rte_ether_hdr *);
+ struct rte_ipv4_hdr *ip_hdr;
+ struct rte_ipv6_hdr *ip6_hdr;
+ const uint16_t tso_segsz = get_userspace_tso_segsz();
+
+ /* Return directly if source and destitation of mbuf are local ports
+ * because mbuf has already set ol_flags and l*_len correctly.
+ */
+ if (is_local_to_local(mbuf->port, dev)) {
+ if (mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)) {
+ mbuf->tso_segsz = tso_segsz - mbuf->l3_len - mbuf->l4_len;
+ }
+ return true;
+ }
+
+ if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) {
+ /* Handle VXLAN TSO */
+ struct rte_udp_hdr *udp_hdr = NULL;
+
+ /* Correct l2_len for VxLAN packet */
+ mbuf->l2_len += sizeof(struct udp_header)
+ + sizeof(struct vxlanhdr);
+
+ /* small packets whose size is less than or equal to MTU needn't
+ * VXLAN TSO. In addtion, if hardware can't support VXLAN TSO, it
+ * also can't be handled. So PKT_TX_TUNNEL_VXLAN must be cleared
+ * outer_l2_len and outer_l3_len must be zeroed.
+ */
+ if (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG))
+ && (mbuf->pkt_len <= tso_segsz + mbuf->outer_l2_len
+ + mbuf->outer_l3_len + mbuf->l2_len)) {
+ mbuf->ol_flags &= ~PKT_TX_TUNNEL_VXLAN;
+ if ((mbuf->ol_flags & PKT_TX_IPV4) &&
+ (mbuf->outer_l3_len == IPV6_HEADER_LEN)) {
+ dp_packet_hwol_reset_tx_ipv4(pkt);
+ dp_packet_hwol_set_tx_ipv6(pkt);
+ } else if ((mbuf->ol_flags & PKT_TX_IPV6) &&
+ (mbuf->outer_l3_len == IP_HEADER_LEN)) {
+ dp_packet_hwol_reset_tx_ipv6(pkt);
+ dp_packet_hwol_set_tx_ipv4(pkt);
+ }
+ mbuf->l2_len = mbuf->outer_l2_len;
+ mbuf->l3_len = mbuf->outer_l3_len;
+ mbuf->l4_len = sizeof(struct rte_udp_hdr);
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+ return true;
+ }
+
+ /* Handle outer packet */
+ if (mbuf->outer_l3_len == IP_HEADER_LEN) {
+ ip_hdr = (struct rte_ipv4_hdr *)((char *) eth_hdr
+ + mbuf->outer_l2_len);
+ /* outer IP checksum offload */
+ ip_hdr->hdr_checksum = 0;
+ mbuf->ol_flags |= PKT_TX_OUTER_IP_CKSUM;
+ mbuf->ol_flags |= PKT_TX_OUTER_IPV4;
+
+ udp_hdr = (struct rte_udp_hdr *)(ip_hdr + DPDK_RTE_HDR_OFFSET);
+ } else if (mbuf->outer_l3_len == IPV6_HEADER_LEN) {
+ ip6_hdr = (struct rte_ipv6_hdr *)((char *) eth_hdr
+ + mbuf->outer_l2_len);
+ /* no IP checksum for outer IPv6 */
+ mbuf->ol_flags |= PKT_TX_OUTER_IPV6;
- if (mbuf->ol_flags & PKT_TX_L4_MASK) {
+ udp_hdr = (struct rte_udp_hdr *)(ip6_hdr + DPDK_RTE_HDR_OFFSET);
+
+ }
+
+ /* Handle inner packet */
+ if (udp_hdr != NULL) {
+ if (mbuf->ol_flags & PKT_TX_IPV4) {
+ ip_hdr = (struct rte_ipv4_hdr *)
+ ((uint8_t *)udp_hdr + mbuf->l2_len);
+ l4_proto = ip_hdr->next_proto_id;
+ l3_hdr_ptr = (uint8_t *)ip_hdr;
+
+ /* inner IP checksum offload */
+ ip_hdr->hdr_checksum = 0;
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+ } else if (mbuf->ol_flags & PKT_TX_IPV6) {
+ ip6_hdr = (struct rte_ipv6_hdr *)
+ ((uint8_t *)udp_hdr + mbuf->l2_len);
+ l4_proto = ip6_hdr->proto;
+ l3_hdr_ptr = (uint8_t *)ip6_hdr;
+ }
+ }
+
+ /* In case of MTU > tso_segsz, PKT_TX_TCP_SEG or PKT_TX_UDP_SEG wasn't
+ * set by client/server, here is a place we can mark it.
+ */
+ if ((mbuf->pkt_len > tso_segsz + mbuf->outer_l2_len
+ + mbuf->outer_l3_len + mbuf->l2_len)
+ && (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) {
+ if (l4_proto == IPPROTO_UDP) {
+ mbuf->ol_flags |= PKT_TX_UDP_SEG;
+ } else if (l4_proto == IPPROTO_TCP) {
+ mbuf->ol_flags |= PKT_TX_TCP_SEG;
+ }
+ }
+ } else if (mbuf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) {
+ /* Handle VLAN TSO */
mbuf->l2_len = (char *)dp_packet_l3(pkt) - (char *)dp_packet_eth(pkt);
mbuf->l3_len = (char *)dp_packet_l4(pkt) - (char *)dp_packet_l3(pkt);
mbuf->outer_l2_len = 0;
mbuf->outer_l3_len = 0;
+
+ if (mbuf->ol_flags & PKT_TX_IPV4) {
+ ip_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + mbuf->l2_len);
+ l4_proto = ip_hdr->next_proto_id;
+ l3_hdr_ptr = (uint8_t *)ip_hdr;
+
+ /* IP checksum offload */
+ ip_hdr->hdr_checksum = 0;
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+ } else if (mbuf->ol_flags & PKT_TX_IPV6) {
+ ip6_hdr = (struct rte_ipv6_hdr *)((char *)eth_hdr + mbuf->l2_len);
+ l4_proto = ip6_hdr->proto;
+ l3_hdr_ptr = (uint8_t *)ip6_hdr;
+ }
+
+ /* In some cases, PKT_TX_TCP_SEG or PKT_TX_UDP_SEG wasn't set, here is
+ * a place we can mark it.
+ */
+ if ((mbuf->pkt_len > (tso_segsz + mbuf->l2_len))
+ && (!(mbuf->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_UDP_SEG)))) {
+ if (l4_proto == IPPROTO_UDP) {
+ mbuf->ol_flags |= PKT_TX_UDP_SEG;
+ } else if (l4_proto == IPPROTO_TCP) {
+ mbuf->ol_flags |= PKT_TX_TCP_SEG;
+ }
+ }
}
- if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
- struct tcp_header *th = dp_packet_l4(pkt);
+ /* It is possible that l4_len isn't set for vhostuserclient */
+ if ((l3_hdr_ptr != NULL) && (l4_proto == IPPROTO_TCP)
+ && (mbuf->l4_len < 20)) {
+ struct rte_tcp_hdr *tcp_hdr = (struct rte_tcp_hdr *)
+ (l3_hdr_ptr + mbuf->l3_len);
- if (!th) {
- VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
+ mbuf->l4_len = (tcp_hdr->data_off & 0xf0) >> 2;
+ }
+
+ if ((l4_proto != IPPROTO_UDP) && (l4_proto != IPPROTO_TCP)) {
+ return true;
+ }
+
+ if ((mbuf->ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM) {
+ if (l4_proto != IPPROTO_UDP) {
+ VLOG_WARN_RL(&rl, "%s: UDP packet without L4 header"
" pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
return false;
}
+ } else if (mbuf->ol_flags & PKT_TX_TCP_SEG ||
+ mbuf->ol_flags & PKT_TX_TCP_CKSUM) {
+ if (l4_proto != IPPROTO_TCP) {
+ VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
+ " pkt len: %"PRIu32" l4_proto = %d",
+ dev->up.name, mbuf->pkt_len, l4_proto);
+ return false;
+ }
+
+ if (mbuf->pkt_len > tso_segsz + mbuf->outer_l2_len + mbuf->outer_l3_len
+ + mbuf->l2_len) {
+ dp_packet_hwol_set_tcp_seg(pkt);
+ }
- mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
- mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
+ if (mbuf->ol_flags & PKT_TX_TCP_SEG) {
+ mbuf->tso_segsz = tso_segsz - mbuf->l3_len - mbuf->l4_len;
+ } else {
+ mbuf->tso_segsz = 0;
+ }
- if (mbuf->ol_flags & PKT_TX_IPV4) {
- mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+ if (!(dev->up.ol_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
+ /* PKT_TX_TCP_CKSUM must be cleaned because
+ * tcp checksum only can be caculated by software if NIC
+ * can not support it.
+ */
+ mbuf->ol_flags &= ~PKT_TX_TCP_CKSUM;
}
}
+
+ if (l4_proto == IPPROTO_UDP) {
+ /* in case of pkt_len < dev->mtu, it still can be handled correctly */
+ if (mbuf->pkt_len < dev->mtu + ETHER_VLAN_HDR_MAX_LEN) {
+ mbuf->ol_flags &= ~PKT_TX_UDP_SEG;
+ if (mbuf->ol_flags & PKT_TX_TUNNEL_VXLAN) {
+ /* Pretend it as a normal UDP and stop inner cksum offload */
+ mbuf->ol_flags &= ~PKT_TX_TUNNEL_VXLAN;
+ mbuf->ol_flags &= ~PKT_TX_OUTER_IP_CKSUM;
+ if (mbuf->ol_flags & PKT_TX_OUTER_IPV4) {
+ mbuf->ol_flags &= ~PKT_TX_OUTER_IPV4;
+ if (mbuf->ol_flags & PKT_TX_IPV6) {
+ mbuf->ol_flags &= ~PKT_TX_IPV6;
+ }
+ if ((mbuf->ol_flags & PKT_TX_IPV4) == 0) {
+ mbuf->ol_flags |= PKT_TX_IPV4;
+ }
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+ } else if (mbuf->ol_flags & PKT_TX_OUTER_IPV6) {
+ mbuf->ol_flags &= ~PKT_TX_OUTER_IPV6;
+ if (mbuf->ol_flags & PKT_TX_IPV4) {
+ mbuf->ol_flags &= ~PKT_TX_IPV4;
+ mbuf->ol_flags &= ~PKT_TX_IP_CKSUM;
+ }
+ if ((mbuf->ol_flags & PKT_TX_IPV6) == 0) {
+ mbuf->ol_flags |= PKT_TX_IPV6;
+ }
+ /* For outer IPv6, outer udp checksum is incorrect */
+ mbuf->ol_flags |= PKT_TX_UDP_CKSUM;
+ }
+ mbuf->l2_len = mbuf->outer_l2_len;
+ mbuf->l3_len = mbuf->outer_l3_len;
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+ }
+ return true;
+ }
+
+ /* Can't handle bigger UDP packet, so return false */
+ VLOG_WARN_RL(&rl, "%s: too big UDP packet"
+ ", pkt len: %"PRIu32"", dev->up.name, mbuf->pkt_len);
+ return false;
+ }
+
return true;
}
@@ -2781,17 +3026,26 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig)
mbuf_dest->packet_type = pkt_orig->mbuf.packet_type;
mbuf_dest->ol_flags |= (pkt_orig->mbuf.ol_flags &
~(EXT_ATTACHED_MBUF | IND_ATTACHED_MBUF));
+ mbuf_dest->l2_len = pkt_orig->mbuf.l2_len;
+ mbuf_dest->l3_len = pkt_orig->mbuf.l3_len;
+ mbuf_dest->l4_len = pkt_orig->mbuf.l4_len;
+ mbuf_dest->outer_l2_len = pkt_orig->mbuf.outer_l2_len;
+ mbuf_dest->outer_l3_len = pkt_orig->mbuf.outer_l3_len;
memcpy(&pkt_dest->l2_pad_size, &pkt_orig->l2_pad_size,
sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size));
- if (mbuf_dest->ol_flags & PKT_TX_L4_MASK) {
+ if ((mbuf_dest->outer_l2_len == 0) &&
+ (mbuf_dest->ol_flags & PKT_TX_L4_MASK)) {
mbuf_dest->l2_len = (char *)dp_packet_l3(pkt_dest)
- (char *)dp_packet_eth(pkt_dest);
mbuf_dest->l3_len = (char *)dp_packet_l4(pkt_dest)
- (char *) dp_packet_l3(pkt_dest);
}
+ /* Mark it as non-DPDK port */
+ mbuf_dest->port = UINT16_MAX;
+
return pkt_dest;
}
@@ -2850,6 +3104,11 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
if (dev->type == DPDK_DEV_VHOST) {
__netdev_dpdk_vhost_send(netdev, qid, pkts, txcnt);
} else {
+ if (userspace_tso_enabled()) {
+ txcnt = netdev_dpdk_prep_hwol_batch(dev,
+ (struct rte_mbuf **)pkts,
+ txcnt);
+ }
tx_failure += netdev_dpdk_eth_tx_burst(dev, qid,
(struct rte_mbuf **)pkts,
txcnt);
@@ -2872,7 +3131,6 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
struct dp_packet_batch *batch,
bool concurrent_txq OVS_UNUSED)
{
-
if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
dpdk_do_tx_copy(netdev, qid, batch);
dp_packet_delete_batch(batch, true);
@@ -5033,6 +5291,10 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ /* Enable VXLAN TSO support if available */
+ if (dev->hw_ol_features & NETDEV_TX_VXLAN_TNL_TSO_OFFLOAD) {
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_VXLAN_TSO;
+ }
if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
}
@@ -50,6 +50,7 @@
#include <unistd.h>
#include "coverage.h"
+#include "csum.h"
#include "dp-packet.h"
#include "dpif-netlink.h"
#include "dpif-netdev.h"
@@ -79,6 +80,7 @@
#include "unaligned.h"
#include "openvswitch/vlog.h"
#include "userspace-tso.h"
+#include "userspace-tso-segsz.h"
#include "util.h"
VLOG_DEFINE_THIS_MODULE(netdev_linux);
@@ -6508,6 +6510,8 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
struct eth_header *eth_hdr;
ovs_be16 eth_type;
int l2_len;
+ int l3_len = 0;
+ int l4_len = 0;
eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN);
if (!eth_hdr) {
@@ -6527,6 +6531,8 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
l2_len += VLAN_HEADER_LEN;
}
+ dp_packet_hwol_set_l2_len(b, l2_len);
+
if (eth_type == htons(ETH_TYPE_IP)) {
struct ip_header *ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN);
@@ -6534,6 +6540,7 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
return -EINVAL;
}
+ l3_len = IP_HEADER_LEN;
*l4proto = ip_hdr->ip_proto;
dp_packet_hwol_set_tx_ipv4(b);
} else if (eth_type == htons(ETH_TYPE_IPV6)) {
@@ -6544,10 +6551,35 @@ netdev_linux_parse_l2(struct dp_packet *b, uint16_t *l4proto)
return -EINVAL;
}
+ l3_len = IPV6_HEADER_LEN;
*l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
dp_packet_hwol_set_tx_ipv6(b);
}
+ dp_packet_hwol_set_l3_len(b, l3_len);
+
+ if (*l4proto == IPPROTO_TCP) {
+ struct tcp_header *tcp_hdr = dp_packet_at(b, l2_len + l3_len,
+ sizeof(struct tcp_header));
+
+ if (!tcp_hdr) {
+ return -EINVAL;
+ }
+
+ l4_len = TCP_OFFSET(tcp_hdr->tcp_ctl) * 4;
+ dp_packet_hwol_set_l4_len(b, l4_len);
+ } else if (*l4proto == IPPROTO_UDP) {
+ struct udp_header *udp_hdr = dp_packet_at(b, l2_len + l3_len,
+ sizeof(struct udp_header));
+
+ if (!udp_hdr) {
+ return -EINVAL;
+ }
+
+ l4_len = sizeof(struct udp_header);
+ dp_packet_hwol_set_l4_len(b, l4_len);
+ }
+
return 0;
}
@@ -6561,10 +6593,6 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
return -EINVAL;
}
- if (vnet->flags == 0 && vnet->gso_type == VIRTIO_NET_HDR_GSO_NONE) {
- return 0;
- }
-
if (netdev_linux_parse_l2(b, &l4proto)) {
return -EINVAL;
}
@@ -6595,22 +6623,130 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
}
static void
-netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
+netdev_linux_set_ol_flags_and_cksum(struct dp_packet *b, int mtu)
+{
+ struct eth_header *eth_hdr;
+ struct ip_header *ip_hdr = NULL;
+ struct ovs_16aligned_ip6_hdr *nh6 = NULL;
+ uint16_t l4proto = 0;
+ ovs_be16 eth_type;
+ int l2_len;
+ int l3_len = 0;
+ int l4_len = 0;
+
+ eth_hdr = dp_packet_at(b, 0, ETH_HEADER_LEN);
+ if (!eth_hdr) {
+ return;
+ }
+
+ l2_len = ETH_HEADER_LEN;
+ eth_type = eth_hdr->eth_type;
+ if (eth_type_vlan(eth_type)) {
+ struct vlan_header *vlan = dp_packet_at(b, l2_len, VLAN_HEADER_LEN);
+
+ if (!vlan) {
+ return;
+ }
+
+ eth_type = vlan->vlan_next_type;
+ l2_len += VLAN_HEADER_LEN;
+ }
+
+ if (eth_type == htons(ETH_TYPE_IP)) {
+ ip_hdr = dp_packet_at(b, l2_len, IP_HEADER_LEN);
+
+ if (!ip_hdr) {
+ return;
+ }
+
+ dp_packet_set_l3(b, ip_hdr);
+ ip_hdr->ip_csum = 0;
+ ip_hdr->ip_csum = csum(ip_hdr, sizeof *ip_hdr);
+ l4proto = ip_hdr->ip_proto;
+ dp_packet_hwol_set_tx_ipv4(b);
+ l3_len = IP_HEADER_LEN;
+ } else if (eth_type == htons(ETH_TYPE_IPV6)) {
+ nh6 = dp_packet_at(b, l2_len, IPV6_HEADER_LEN);
+ if (!nh6) {
+ return;
+ }
+
+ dp_packet_set_l3(b, nh6);
+ l4proto = nh6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
+ dp_packet_hwol_set_tx_ipv6(b);
+ l3_len = IPV6_HEADER_LEN;
+ }
+
+ if (l4proto == IPPROTO_TCP) {
+ /* Note: need set tcp pseudo checksum */
+ struct tcp_header *tcp_hdr = dp_packet_at(b, l2_len + l3_len,
+ sizeof(struct tcp_header));
+
+ if (!tcp_hdr) {
+ return;
+ }
+ l4_len = TCP_OFFSET(tcp_hdr->tcp_ctl) * 4;
+ dp_packet_hwol_set_l4_len(b, l4_len);
+ dp_packet_set_l4(b, tcp_hdr);
+
+ if (l3_len == IP_HEADER_LEN) {
+ tcp_hdr->tcp_csum = csum_finish(packet_csum_pseudoheader(ip_hdr));
+ } else {
+ tcp_hdr->tcp_csum = csum_finish(packet_csum_pseudoheader6(nh6));
+ }
+ if (dp_packet_size(b) > mtu + l2_len) {
+ dp_packet_hwol_set_tcp_seg(b);
+ }
+ dp_packet_hwol_set_csum_tcp(b);
+ } else if (l4proto == IPPROTO_UDP) {
+ struct udp_header *udp_hdr = dp_packet_at(b, l2_len + l3_len,
+ sizeof(struct udp_header));
+
+ if (!udp_hdr) {
+ return;
+ }
+ l4_len = sizeof(struct udp_header);
+ dp_packet_hwol_set_l4_len(b, l4_len);
+ dp_packet_set_l4(b, udp_hdr);
+ if (dp_packet_size(b) > mtu + l2_len) {
+ dp_packet_hwol_set_udp_seg(b);
+ }
+ dp_packet_hwol_set_csum_udp(b);
+ }
+}
+
+static void
+netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu OVS_UNUSED)
{
- struct virtio_net_hdr *vnet = dp_packet_push_zeros(b, sizeof *vnet);
+ struct virtio_net_hdr *vnet;
+ uint16_t tso_segsz = get_userspace_tso_segsz();
+ uint16_t l4proto;
+
+ netdev_linux_parse_l2(b, &l4proto);
+
+ /* ol_flags weren't set correctly for received packets which are from
+ * physical port, so it has to been set again in order that
+ * vnet_hdr can be prepended correctly. Note: here tso_segsz but not
+ * mtu are used because tso_segsz may be less than mtu.
+ */
+ if ((dp_packet_size(b) > tso_segsz + dp_packet_hwol_get_l2_len(b))
+ && !dp_packet_hwol_l4_mask(b)) {
+ netdev_linux_set_ol_flags_and_cksum(b, tso_segsz);
+ }
+
+ vnet = dp_packet_push_zeros(b, sizeof *vnet);
if (dp_packet_hwol_is_tso(b)) {
uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
- + TCP_HEADER_LEN;
+ + dp_packet_hwol_get_l4_len(b);
vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
- vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
+ vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz - hdr_len);
if (dp_packet_hwol_is_ipv4(b)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
} else {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
}
-
} else {
vnet->flags = VIRTIO_NET_HDR_GSO_NONE;
}
@@ -43,6 +43,7 @@ enum netdev_ol_flags {
NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2,
NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3,
NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4,
+ NETDEV_TX_OFFLOAD_VXLAN_TSO = 1 << 5,
};
/* A network device (e.g. an Ethernet device).
@@ -33,6 +33,7 @@
#include "cmap.h"
#include "coverage.h"
+#include "csum.h"
#include "dpif.h"
#include "dp-packet.h"
#include "openvswitch/dynamic-string.h"
@@ -55,6 +56,7 @@
#include "svec.h"
#include "openvswitch/vlog.h"
#include "flow.h"
+#include "userspace-tso.h"
#include "util.h"
#ifdef __linux__
#include "tc.h"
@@ -785,6 +787,64 @@ netdev_get_pt_mode(const struct netdev *netdev)
: NETDEV_PT_LEGACY_L2);
}
+static inline void
+calculate_tcpudp_checksum(struct dp_packet *p)
+{
+ uint32_t pseudo_hdr_csum = 0;
+ bool is_ipv6 = false;
+ struct ovs_16aligned_ip6_hdr *ip6 = NULL;
+ size_t len_l2 = (char *) dp_packet_l3(p) - (char *) dp_packet_eth(p);
+ size_t len_l3 = (char *) dp_packet_l4(p) - (char *) dp_packet_l3(p);
+ size_t l4_len = (char *) dp_packet_tail(p) - (char *) dp_packet_l4(p);
+ uint16_t l4_proto = 0;
+
+ /* It is possible l2_len and l3_len aren't set here, so set them if no */
+ if (dp_packet_hwol_get_l2_len(p) != len_l2) {
+ dp_packet_hwol_set_l2_len(p, len_l2);
+ dp_packet_hwol_set_l3_len(p, len_l3);
+ }
+
+ if (len_l3 == sizeof(struct ovs_16aligned_ip6_hdr)) {
+ ip6 = dp_packet_l3(p);
+ l4_proto = ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt;
+ is_ipv6 = true;
+ } else {
+ struct ip_header *ip = dp_packet_l3(p);
+
+ l4_proto = ip->ip_proto;
+ ip->ip_csum = 0;
+ ip->ip_csum = csum(ip, sizeof *ip);
+ pseudo_hdr_csum = packet_csum_pseudoheader(ip);
+ }
+
+ if (l4_proto == IPPROTO_TCP) {
+ struct tcp_header *tcp = dp_packet_l4(p);
+
+ tcp->tcp_csum = 0;
+ if (is_ipv6) {
+ tcp->tcp_csum = packet_csum_upperlayer6(ip6, tcp, l4_proto,
+ l4_len);
+ } else {
+ tcp->tcp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
+ tcp, l4_len));
+ }
+ } else if (l4_proto == IPPROTO_UDP) {
+ struct udp_header *udp = dp_packet_l4(p);
+
+ udp->udp_csum = 0;
+ if (is_ipv6) {
+ udp->udp_csum = packet_csum_upperlayer6(ip6, udp, l4_proto,
+ l4_len);
+ } else {
+ udp->udp_csum = csum_finish(csum_continue(pseudo_hdr_csum,
+ udp, l4_len));
+ }
+ if (!udp->udp_csum) {
+ udp->udp_csum = htons(0xffff);
+ }
+ }
+}
+
/* Check if a 'packet' is compatible with 'netdev_flags'.
* If a packet is incompatible, return 'false' with the 'errormsg'
* pointing to a reason. */
@@ -794,6 +854,14 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
{
uint64_t l4_mask;
+ if (dp_packet_hwol_is_vxlan_tcp_seg(packet)
+ && (dp_packet_hwol_is_tso(packet) || dp_packet_hwol_l4_mask(packet))
+ && !(netdev_flags & NETDEV_TX_OFFLOAD_VXLAN_TSO)) {
+ /* Fall back to GSO in software. */
+ VLOG_ERR_BUF(errormsg, "No VXLAN TSO support");
+ return false;
+ }
+
if (dp_packet_hwol_is_tso(packet)
&& !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
/* Fall back to GSO in software. */
@@ -803,6 +871,33 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
l4_mask = dp_packet_hwol_l4_mask(packet);
if (l4_mask) {
+ /* Calculate checksum for VLAN TSO case when no hardware offload
+ * feature is available. Note: for VXLAN TSO case, checksum has
+ * been calculated before here, so it won't be done here again
+ * because checksum flags in packet->m.ol_flags have been cleaned.
+ */
+ if (dp_packet_hwol_l4_is_tcp(packet)
+ && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
+ && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
+ dp_packet_hwol_reset_csum_tcp(packet);
+ /* Only calculate TCP checksum for non-TSO packet.
+ */
+ if (!dp_packet_hwol_is_tso(packet)) {
+ calculate_tcpudp_checksum(packet);
+ }
+ return true;
+ } else if (dp_packet_hwol_l4_is_udp(packet)
+ && !dp_packet_hwol_is_vxlan_tcp_seg(packet)
+ && !(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
+ dp_packet_hwol_reset_csum_udp(packet);
+ /* Only calculate UDP checksum for non-UFO packet.
+ */
+ if (!dp_packet_hwol_is_ufo(packet)) {
+ calculate_tcpudp_checksum(packet);
+ }
+ return true;
+ }
+
if (dp_packet_hwol_l4_is_tcp(packet)) {
if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
/* Fall back to TCP csum in software. */
@@ -960,15 +1055,61 @@ netdev_push_header(const struct netdev *netdev,
size_t i, size = dp_packet_batch_size(batch);
DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) {
- if (OVS_UNLIKELY(dp_packet_hwol_is_tso(packet)
- || dp_packet_hwol_l4_mask(packet))) {
+ if (OVS_UNLIKELY((dp_packet_hwol_is_tso(packet)
+ || dp_packet_hwol_l4_mask(packet))
+ && (data->tnl_type != OVS_VPORT_TYPE_VXLAN))) {
COVERAGE_INC(netdev_push_header_drops);
dp_packet_delete(packet);
- VLOG_WARN_RL(&rl, "%s: Tunneling packets with HW offload flags is "
- "not supported: packet dropped",
+ VLOG_WARN_RL(&rl,
+ "%s: non-VxLAN Tunneling packets with HW offload "
+ "flags is not supported: packet dropped",
netdev_get_name(netdev));
} else {
+ size_t len_l2 = (char *) dp_packet_l3(packet)
+ - (char *) dp_packet_eth(packet);
+ size_t len_l3 = (char *) dp_packet_l4(packet)
+ - (char *) dp_packet_l3(packet);
+ if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
+ /* VXLAN offload can't support udp checksum offload
+ * for inner udp packet, so udp checksum must be set
+ * before push header in order that outer checksum can
+ * be set correctly.
+ */
+ if (dp_packet_hwol_l4_is_udp(packet)) {
+ dp_packet_hwol_reset_csum_udp(packet);
+ /* Only calculate UDP checksum for non-UFO packet.
+ */
+ if (!dp_packet_hwol_is_ufo(packet)) {
+ calculate_tcpudp_checksum(packet);
+ }
+ } else if (dp_packet_hwol_l4_is_tcp(packet)) {
+ dp_packet_hwol_reset_csum_tcp(packet);
+ /* Only calculate TCP checksum for non-TSO packet.
+ */
+ if (!dp_packet_hwol_is_tso(packet)) {
+ calculate_tcpudp_checksum(packet);
+ }
+ }
+ }
+ /* It is possible l2_len and l3_len aren't set here, so set them
+ * if no.
+ */
+ if (dp_packet_hwol_get_l2_len(packet) != len_l2) {
+ dp_packet_hwol_set_l2_len(packet, len_l2);
+ dp_packet_hwol_set_l3_len(packet, len_l3);
+ }
+
netdev->netdev_class->push_header(netdev, packet, data);
+ if (userspace_tso_enabled()
+ && (data->tnl_type == OVS_VPORT_TYPE_VXLAN)) {
+ /* Just identify it as a vxlan packet, here netdev is
+ * vxlan_sys_*, netdev->ol_flags can't indicate if final
+ * physical output port can support VXLAN TSO, in
+ * netdev_send_prepare_packet will drop it if final
+ * physical output port can't support VXLAN TSO.
+ */
+ dp_packet_hwol_set_vxlan_tcp_seg(packet);
+ }
pkt_metadata_init(&packet->md, data->out_port);
dp_packet_batch_refill(batch, packet, i);
}
new file mode 100644
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2020 Inspur, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "smap.h"
+#include "ovs-thread.h"
+#include "openvswitch/vlog.h"
+#include "dpdk.h"
+#include "userspace-tso-segsz.h"
+#include "vswitch-idl.h"
+
+VLOG_DEFINE_THIS_MODULE(userspace_tso_segsz);
+
+#define DEFAULT_TSO_SEGSZ 1500
+#define MAX_TSO_SEGSZ 9000
+static uint16_t userspace_tso_segsz = DEFAULT_TSO_SEGSZ;
+
+void
+userspace_tso_segsz_init(const struct smap *ovs_other_config)
+{
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+ if (ovsthread_once_start(&once)) {
+ int tso_segsz;
+
+ tso_segsz = smap_get_int(ovs_other_config, "userspace-tso-segsz",
+ DEFAULT_TSO_SEGSZ);
+ if ((tso_segsz < 0) || (tso_segsz > MAX_TSO_SEGSZ)) {
+ tso_segsz = DEFAULT_TSO_SEGSZ;
+ }
+ userspace_tso_segsz = tso_segsz;
+ VLOG_INFO("Userspace TSO segsz set to %u", userspace_tso_segsz);
+ ovsthread_once_done(&once);
+ }
+}
+
+uint16_t
+get_userspace_tso_segsz(void)
+{
+ return userspace_tso_segsz;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2020 Inspur, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef USERSPACE_TSO_SEGSZ_H
+#define USERSPACE_TSO_SEGSZ_H 1
+
+void userspace_tso_segsz_init(const struct smap *ovs_other_config);
+uint16_t get_userspace_tso_segsz(void);
+
+#endif /* userspace-tso-segsz.h */
@@ -66,6 +66,7 @@
#include "timeval.h"
#include "tnl-ports.h"
#include "userspace-tso.h"
+#include "userspace-tso-segsz.h"
#include "util.h"
#include "unixctl.h"
#include "lib/vswitch-idl.h"
@@ -3292,6 +3293,7 @@ bridge_run(void)
netdev_set_flow_api_enabled(&cfg->other_config);
dpdk_init(&cfg->other_config);
userspace_tso_init(&cfg->other_config);
+ userspace_tso_segsz_init(&cfg->other_config);
}
/* Initialize the ofproto library. This only needs to run once, but