@@ -34,6 +34,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
pkt_metadata_init(&b->md, 0);
dp_packet_reset_cutlen(b);
dp_packet_reset_offload(b);
+ dp_packet_set_tso_segsz(b, 0);
/* Initialize implementation-specific fields of dp_packet. */
dp_packet_init_specific(b);
/* By default assume the packet type to be Ethernet. */
@@ -199,6 +200,8 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
*dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
*dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
+ dp_packet_set_tso_segsz(new_buffer, dp_packet_get_tso_segsz(buffer));
+
if (dp_packet_rss_valid(buffer)) {
dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
}
@@ -126,6 +126,7 @@ struct dp_packet {
uint32_t ol_flags; /* Offloading flags. */
uint32_t rss_hash; /* Packet hash. */
uint32_t flow_mark; /* Packet flow mark. */
+ uint16_t tso_segsz; /* TCP TSO segment size. */
#endif
enum dp_packet_source source; /* Source of memory allocated as 'base'. */
@@ -166,6 +167,9 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
+static inline uint16_t dp_packet_get_tso_segsz(const struct dp_packet *);
+static inline void dp_packet_set_tso_segsz(struct dp_packet *, uint16_t);
+
void *dp_packet_resize_l2(struct dp_packet *, int increment);
void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
static inline void *dp_packet_eth(const struct dp_packet *);
@@ -644,6 +648,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->mbuf.buf_len = s;
}
+static inline uint16_t
+dp_packet_get_tso_segsz(const struct dp_packet *p)
+{
+ return p->mbuf.tso_segsz;
+}
+
+static inline void
+dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
+{
+ p->mbuf.tso_segsz = s;
+}
#else /* DPDK_NETDEV */
static inline void
@@ -700,6 +715,17 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->allocated_ = s;
}
+static inline uint16_t
+dp_packet_get_tso_segsz(const struct dp_packet *p)
+{
+ return p->tso_segsz;
+}
+
+static inline void
+dp_packet_set_tso_segsz(struct dp_packet *p, uint16_t s)
+{
+ p->tso_segsz = s;
+}
#endif /* DPDK_NETDEV */
static inline void
@@ -2302,6 +2302,7 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
if (mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
struct tcp_header *th = dp_packet_l4(pkt);
+ int hdr_len;
if (!th) {
VLOG_WARN_RL(&rl, "%s: TCP Segmentation without L4 header"
@@ -2311,7 +2312,15 @@ netdev_dpdk_prep_hwol_packet(struct netdev_dpdk *dev, struct rte_mbuf *mbuf)
mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
mbuf->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
+ hdr_len = mbuf->l2_len + mbuf->l3_len + mbuf->l4_len;
mbuf->tso_segsz = dev->mtu - mbuf->l3_len - mbuf->l4_len;
+ if (OVS_UNLIKELY((hdr_len + mbuf->tso_segsz) > dev->max_packet_len)) {
+ VLOG_WARN_RL(&rl, "%s: Oversized TSO packet. "
+ "hdr: %"PRIu32", gso: %"PRIu32", max len: %"PRIu32"",
+ dev->up.name, hdr_len, mbuf->tso_segsz,
+ dev->max_packet_len);
+ return false;
+ }
if (mbuf->ol_flags & RTE_MBUF_F_TX_IPV4) {
mbuf->ol_flags |= RTE_MBUF_F_TX_IP_CKSUM;
@@ -2586,7 +2595,8 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
int cnt = 0;
struct rte_mbuf *pkt;
- /* Filter oversized packets, unless are marked for TSO. */
+ /* Filter oversized packets. The TSO packets are filtered out
+ * during the offloading preparation for performance reasons. */
for (i = 0; i < pkt_cnt; i++) {
pkt = pkts[i];
if (OVS_UNLIKELY((pkt->pkt_len > dev->max_packet_len)
@@ -536,7 +536,7 @@ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0);
static bool tap_supports_vnet_hdr = true;
static int netdev_linux_parse_vnet_hdr(struct dp_packet *b);
-static void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
+static int netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu);
static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *,
int cmd, const char *cmd_name);
static int get_flags(const struct netdev *, unsigned int *flags);
@@ -1595,9 +1595,10 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
}
static int
-netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
- struct dp_packet_batch *batch)
+netdev_linux_sock_batch_send(struct netdev *netdev_, int sock, int ifindex,
+ bool tso, int mtu, struct dp_packet_batch *batch)
{
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
const size_t size = dp_packet_batch_size(batch);
/* We don't bother setting most fields in sockaddr_ll because the
* kernel ignores them for SOCK_RAW. */
@@ -1606,26 +1607,35 @@ netdev_linux_sock_batch_send(int sock, int ifindex, bool tso, int mtu,
struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
struct iovec *iov = xmalloc(sizeof(*iov) * size);
-
struct dp_packet *packet;
+ int cnt = 0;
+
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
if (tso) {
- netdev_linux_prepend_vnet_hdr(packet, mtu);
- }
+ int ret = netdev_linux_prepend_vnet_hdr(packet, mtu);
+
+ if (OVS_UNLIKELY(ret)) {
+ netdev->tx_dropped += 1;
+ VLOG_WARN_RL(&rl, "%s: Packet dropped. %s",
+ netdev_get_name(netdev_), ovs_strerror(ret));
+ continue;
+ }
+ }
- iov[i].iov_base = dp_packet_data(packet);
- iov[i].iov_len = dp_packet_size(packet);
- mmsg[i].msg_hdr = (struct msghdr) { .msg_name = &sll,
- .msg_namelen = sizeof sll,
- .msg_iov = &iov[i],
- .msg_iovlen = 1 };
+ iov[cnt].iov_base = dp_packet_data(packet);
+ iov[cnt].iov_len = dp_packet_size(packet);
+ mmsg[cnt].msg_hdr = (struct msghdr) { .msg_name = &sll,
+ .msg_namelen = sizeof sll,
+ .msg_iov = &iov[cnt],
+ .msg_iovlen = 1 };
+ cnt++;
}
int error = 0;
- for (uint32_t ofs = 0; ofs < size; ) {
+ for (uint32_t ofs = 0; ofs < cnt;) {
ssize_t retval;
do {
- retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
+ retval = sendmmsg(sock, mmsg + ofs, cnt - ofs, 0);
error = retval < 0 ? errno : 0;
} while (error == EINTR);
if (error) {
@@ -1667,7 +1677,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, int mtu,
int error;
if (OVS_LIKELY(tap_supports_vnet_hdr)) {
- netdev_linux_prepend_vnet_hdr(packet, mtu);
+ error = netdev_linux_prepend_vnet_hdr(packet, mtu);
+ if (OVS_UNLIKELY(error)) {
+ netdev->tx_dropped++;
+ VLOG_WARN_RL(&rl, "%s: Prepend vnet hdr failed, packet "
+ "dropped. %s", netdev_get_name(netdev_),
+ ovs_strerror(error));
+ continue;
+ }
}
size = dp_packet_size(packet);
@@ -1797,7 +1814,8 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
goto free_batch;
}
- error = netdev_linux_sock_batch_send(sock, ifindex, tso, mtu, batch);
+ error = netdev_linux_sock_batch_send(netdev_, sock, ifindex, tso, mtu,
+ batch);
} else {
error = netdev_linux_tap_batch_send(netdev_, mtu, batch);
}
@@ -6922,8 +6940,7 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
switch (vnet->gso_type) {
case VIRTIO_NET_HDR_GSO_TCPV4:
case VIRTIO_NET_HDR_GSO_TCPV6:
- /* FIXME: The packet has offloaded TCP segmentation. The gso_size
- * is given and needs to be respected. */
+ dp_packet_set_tso_segsz(b, (OVS_FORCE uint16_t) vnet->gso_size);
dp_packet_hwol_set_tcp_seg(b);
break;
@@ -6945,18 +6962,32 @@ netdev_linux_parse_vnet_hdr(struct dp_packet *b)
return ret;
}
-static void
+/* Prepends struct virtio_net_hdr to packet 'b'.
+ * Returns 0 if successful, otherwise a positive errno value.
+ * Returns EMSGSIZE if the packet 'b' cannot be sent over MTU 'mtu'. */
+static int
netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
{
struct virtio_net_hdr v;
struct virtio_net_hdr *vnet = &v;
if (dp_packet_hwol_is_tso(b)) {
- uint16_t hdr_len = ((char *)dp_packet_l4(b) - (char *)dp_packet_eth(b))
- + TCP_HEADER_LEN;
+ uint16_t tso_segsz = dp_packet_get_tso_segsz(b);
+ struct tcp_header *tcp = dp_packet_l4(b);
+ int tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+ int hdr_len = ((char *) dp_packet_l4(b) - (char *) dp_packet_eth(b))
+ + tcp_hdr_len;
+ int max_packet_len = mtu + ETH_HEADER_LEN + VLAN_HEADER_LEN;
+
+ if (OVS_UNLIKELY((hdr_len + tso_segsz) > max_packet_len)) {
+ VLOG_WARN_RL(&rl, "Oversized TSO packet. hdr_len: %"PRIu32", "
+ "gso: %"PRIu16", max length: %"PRIu32".", hdr_len,
+ tso_segsz, max_packet_len);
+ return EMSGSIZE;
+ }
vnet->hdr_len = (OVS_FORCE __virtio16)hdr_len;
- vnet->gso_size = (OVS_FORCE __virtio16)(mtu - hdr_len);
+ vnet->gso_size = (OVS_FORCE __virtio16)(tso_segsz);
if (dp_packet_hwol_is_ipv4(b)) {
vnet->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
} else if (dp_packet_hwol_tx_ipv6(b)) {
@@ -7046,4 +7077,5 @@ netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu)
}
dp_packet_push(b, vnet, sizeof *vnet);
+ return 0;
}