@@ -122,6 +122,8 @@ static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
+static inline bool dp_packet_is_tso(struct dp_packet *b);
+
void *dp_packet_resize_l2(struct dp_packet *, int increment);
void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
static inline void *dp_packet_eth(const struct dp_packet *);
@@ -797,6 +799,14 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->mbuf.buf_len = s;
}
+static inline bool
+dp_packet_is_tso(struct dp_packet *b)
+{
+ return (b->mbuf.ol_flags & (PKT_TX_TCP_SEG | PKT_TX_L4_MASK))
+ ? true
+ : false;
+}
+
static inline void
dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet *src)
{
@@ -1007,6 +1017,12 @@ dp_packet_get_allocated(const struct dp_packet *b)
return b->allocated_;
}
+static inline bool
+dp_packet_is_tso(struct dp_packet *b OVS_UNUSED)
+{
+ return false;
+}
+
static inline void
dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
{
@@ -700,11 +700,20 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
}
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+ size_t size = dp_packet_size(packet);
+
+ /* TSO not supported in BSD netdev */
+ if (dp_packet_is_tso(packet)) {
+ VLOG_WARN_RL(&rl, "%s: No TSO support on port, TSO packet of size "
+ "%" PRIuSIZE " dropped", name, size);
+
+ continue;
+ }
+
/* We need the whole data to send the packet on the device */
dp_packet_linearize(packet);
const void *data = dp_packet_data(packet);
- size_t size = dp_packet_size(packet);
while (!error) {
ssize_t retval;
@@ -1384,14 +1384,16 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
goto out;
}
- err = rte_vhost_driver_disable_features(dev->vhost_id,
- 1ULL << VIRTIO_NET_F_HOST_TSO4
- | 1ULL << VIRTIO_NET_F_HOST_TSO6
- | 1ULL << VIRTIO_NET_F_CSUM);
- if (err) {
- VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
- "port: %s\n", name);
- goto out;
+ if (!dpdk_multi_segment_mbufs) {
+ err = rte_vhost_driver_disable_features(dev->vhost_id,
+ 1ULL << VIRTIO_NET_F_HOST_TSO4
+ | 1ULL << VIRTIO_NET_F_HOST_TSO6
+ | 1ULL << VIRTIO_NET_F_CSUM);
+ if (err) {
+ VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
+ "client port: %s\n", dev->up.name);
+ goto out;
+ }
}
err = rte_vhost_driver_start(dev->vhost_id);
@@ -2104,6 +2106,44 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
rte_free(rx);
}
+/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
+ * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
+ * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
+ * respectively. */
+static void
+netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
+{
+ struct dp_packet *pkt;
+ struct tcp_header *th;
+
+ pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+ mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+ mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ th = dp_packet_l4(pkt);
+ /* There's no layer 4 in the packet. */
+ if (!th) {
+ return;
+ }
+ mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+
+ /* Reset packet RX RSS flag to reuse in egress. */
+ dp_packet_mbuf_rss_flag_reset(pkt);
+
+ if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
+ return;
+ }
+
+ /* Prepare packet for egress. */
+ mbuf->ol_flags |= PKT_TX_TCP_SEG;
+ mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+
+ /* Set the size of each TCP segment, based on the MTU of the device. */
+ mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
+}
+
/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
* 'pkts', even in case of failure.
* In case multi-segment mbufs / TSO is being used, it also prepares. In such
@@ -2413,13 +2453,29 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
int cnt = 0;
struct rte_mbuf *pkt;
+ /* Filter oversized packets, unless are marked for TSO. */
for (i = 0; i < pkt_cnt; i++) {
pkt = pkts[i];
+
if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len %d",
- dev->up.name, pkt->pkt_len, dev->max_packet_len);
- rte_pktmbuf_free(pkt);
- continue;
+ if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+ VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
+ "max_packet_len %d",
+ dev->up.name, pkt->pkt_len, dev->max_packet_len);
+ rte_pktmbuf_free(pkt);
+ continue;
+ } else {
+ /* 'If' the 'pkt' is intended for a VM, prepare it for sending,
+ * 'else' the 'pkt' will not actually traverse the NIC, but
+ * rather travel between VMs on the same host. */
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
+ }
+ } else {
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
}
if (OVS_UNLIKELY(i != cnt)) {
@@ -2519,6 +2575,7 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
struct rte_mempool *mp)
{
struct rte_mbuf *mbuf, *fmbuf;
+ struct dp_packet *pkt = NULL;
uint16_t max_data_len;
uint32_t nb_segs = 0;
uint32_t size = 0;
@@ -2559,6 +2616,12 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
fmbuf->nb_segs = nb_segs;
fmbuf->pkt_len = size;
+ pkt = CONTAINER_OF(fmbuf, struct dp_packet, mbuf);
+ pkt->l2_pad_size = packet->l2_pad_size;
+ pkt->l2_5_ofs = packet->l2_5_ofs;
+ pkt->l3_ofs = packet->l3_ofs;
+ pkt->l4_ofs = packet->l4_ofs;
+
dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
return 0;
@@ -2593,14 +2656,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
for (i = 0; i < cnt; i++) {
struct dp_packet *packet = batch->packets[i];
+ struct rte_mbuf *pkt = &batch->packets[i]->mbuf;
uint32_t size = dp_packet_size(packet);
int err = 0;
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
- size, dev->max_packet_len);
- dropped++;
- continue;
+ if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+ VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+ size, dev->max_packet_len);
+ dropped++;
+ continue;
+ }
}
err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
@@ -2616,6 +2682,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
}
dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
+ if (dev->type != DPDK_DEV_VHOST) {
+ /* If packet is non-DPDK, at the very least, we need to update the
+ * mbuf length members, even if TSO is not to be performed. */
+ netdev_dpdk_prep_tso_packet(pkts[txcnt], dev->mtu);
+ }
+
txcnt++;
}
@@ -4466,14 +4538,16 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
goto unlock;
}
- err = rte_vhost_driver_disable_features(dev->vhost_id,
- 1ULL << VIRTIO_NET_F_HOST_TSO4
- | 1ULL << VIRTIO_NET_F_HOST_TSO6
- | 1ULL << VIRTIO_NET_F_CSUM);
- if (err) {
- VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
- "client port: %s\n", dev->up.name);
- goto unlock;
+ if (!dpdk_multi_segment_mbufs) {
+ err = rte_vhost_driver_disable_features(dev->vhost_id,
+ 1ULL << VIRTIO_NET_F_HOST_TSO4
+ | 1ULL << VIRTIO_NET_F_HOST_TSO6
+ | 1ULL << VIRTIO_NET_F_CSUM);
+ if (err) {
+ VLOG_ERR("rte_vhost_driver_disable_features failed for vhost "
+ "user client port: %s\n", dev->up.name);
+ goto unlock;
+ }
}
err = rte_vhost_driver_start(dev->vhost_id);
@@ -1108,11 +1108,20 @@ netdev_dummy_send(struct netdev *netdev, int qid OVS_UNUSED,
struct dp_packet *packet;
DP_PACKET_BATCH_FOR_EACH(i, packet, batch) {
+ size_t size = dp_packet_size(packet);
+
+ /* TSO not supported in Dummy netdev */
+ if (dp_packet_is_tso(packet)) {
+ VLOG_WARN("%s: No TSO support on port, TSO packet of size "
+ "%" PRIuSIZE " dropped", netdev_get_name(netdev), size);
+
+ continue;
+ }
+
/* We need the whole data to send the packet on the device */
dp_packet_linearize(packet);
const void *buffer = dp_packet_data(packet);
- size_t size = dp_packet_size(packet);
if (packet->packet_type != htonl(PT_ETH)) {
error = EPFNOSUPPORT;
@@ -1316,6 +1316,13 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
struct dp_packet *packet;
DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
+ /* TSO not supported in Linux netdev */
+ if (dp_packet_is_tso(packet)) {
+ VLOG_WARN_RL(&rl, "%d: No TSO support on port, TSO packet of size "
+ "%" PRIuSIZE " dropped", sock, size);
+ continue;
+ }
+
/* We need the whole data to send the packet on the device */
dp_packet_linearize(packet);
@@ -1372,6 +1379,14 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
ssize_t retval;
int error;
+ /* TSO not supported in Linux netdev */
+ if (dp_packet_is_tso(packet)) {
+ VLOG_WARN_RL(&rl, "%s: No TSO support on port, TSO packet of size "
+ "%" PRIuSIZE " dropped", netdev_get_name(netdev_),
+ size);
+ continue;
+ }
+
/* We need the whole data to send the packet on the device */
dp_packet_linearize(packet);