@@ -56,7 +56,6 @@ dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet *src)
struct rte_mbuf *buf_dst = &(dst->mbuf);
struct rte_mbuf buf_src = src->mbuf;
- buf_dst->nb_segs = buf_src.nb_segs;
buf_dst->ol_flags = buf_src.ol_flags;
buf_dst->packet_type = buf_src.packet_type;
buf_dst->tx_offload = buf_src.tx_offload;
@@ -184,6 +183,7 @@ dp_packet_clone_with_headroom(const struct dp_packet *b, size_t headroom) {
/* copy multi-seg data */
if (b->source == DPBUF_DPDK && b->mbuf.nb_segs > 1) {
void *dst = NULL;
+ struct rte_mbuf *new_mbuf = NULL;
struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, &b->mbuf);
new_buffer = dp_packet_new_with_headroom(pkt_len, headroom);
@@ -193,6 +193,9 @@ dp_packet_clone_with_headroom(const struct dp_packet *b, size_t headroom) {
if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
return NULL;
}
+
+ new_mbuf = CONST_CAST(struct rte_mbuf *, &new_buffer->mbuf);
+ new_mbuf->nb_segs = 1;
} else {
new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(b),
dp_packet_size(b),
@@ -28,6 +28,8 @@
#include <rte_bus_pci.h>
#include <rte_config.h>
+#include "rte_ip.h"
+#include "rte_tcp.h"
#include <rte_cycles.h>
#include <rte_errno.h>
#include <rte_eth_ring.h>
@@ -1375,16 +1377,6 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
goto out;
}
- err = rte_vhost_driver_disable_features(dev->vhost_id,
- 1ULL << VIRTIO_NET_F_HOST_TSO4
- | 1ULL << VIRTIO_NET_F_HOST_TSO6
- | 1ULL << VIRTIO_NET_F_CSUM);
- if (err) {
- VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
- "port: %s\n", name);
- goto out;
- }
-
err = rte_vhost_driver_start(dev->vhost_id);
if (err) {
VLOG_ERR("rte_vhost_driver_start failed for vhost user "
@@ -2019,6 +2011,57 @@ netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
rte_free(rx);
}
+/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
+ * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
+ * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
+ * respectiveoly. */
+static void
+netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
+{
+ struct dp_packet *pkt;
+ struct tcp_header *th;
+ struct ether_hdr *m_eth_hdr;
+ struct tcp_hdr *m_tcp_hdr;
+ char *m_l3_hdr;
+
+ pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+ mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+ mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+ th = dp_packet_l4(pkt);
+ /* There's no layer 4 in the packet */
+ if (!th) {
+ return;
+ }
+ mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
+ mbuf->outer_l2_len = 0;
+ mbuf->outer_l3_len = 0;
+
+ if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
+ return;
+ }
+
+ m_eth_hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+ m_l3_hdr = (char *) m_eth_hdr + mbuf->l2_len;
+ m_tcp_hdr = (struct tcp_hdr *) ((char *) m_l3_hdr + mbuf->l3_len);
+
+ mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
+ mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+
+ /* Set the size of each TCP segment, based on the MTU of the device */
+ mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
+
+ if (mbuf->ol_flags & PKT_TX_IPV4) {
+ /* IPv4 packet */
+ struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *) m_l3_hdr;
+ ipv4_hdr->hdr_checksum = 0;
+ m_tcp_hdr->cksum = (rte_ipv4_phdr_cksum(ipv4_hdr, mbuf->ol_flags));
+ } else {
+ /* IPv6 packet */
+ struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *) m_l3_hdr;
+ m_tcp_hdr->cksum = (rte_ipv6_phdr_cksum(ipv6_hdr, mbuf->ol_flags));
+ }
+}
+
/* Tries to transmit 'pkts' to txq 'qid' of device 'dev'. Takes ownership of
* 'pkts', even in case of failure.
*
@@ -2300,13 +2343,29 @@ netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
int cnt = 0;
struct rte_mbuf *pkt;
+ /* Filter oversized packets, unless are marked for TSO. */
for (i = 0; i < pkt_cnt; i++) {
pkt = pkts[i];
+
if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len %d",
- dev->up.name, pkt->pkt_len, dev->max_packet_len);
- rte_pktmbuf_free(pkt);
- continue;
+ if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+ VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
+ "max_packet_len %d",
+ dev->up.name, pkt->pkt_len, dev->max_packet_len);
+ rte_pktmbuf_free(pkt);
+ continue;
+ } else {
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
+
+ /* Else the frames will not actually traverse the NIC, but
+ * rather travel between VMs on the same host. */
+ }
+ } else {
+ if (dev->type != DPDK_DEV_VHOST) {
+ netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+ }
}
if (OVS_UNLIKELY(i != cnt)) {
@@ -2430,6 +2489,12 @@ dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
fmbuf->nb_segs = nb_segs;
fmbuf->pkt_len = size;
+ struct dp_packet *pkt = CONTAINER_OF(fmbuf, struct dp_packet, mbuf);
+ pkt->l2_pad_size = packet->l2_pad_size;
+ pkt->l2_5_ofs = packet->l2_5_ofs;
+ pkt->l3_ofs = packet->l3_ofs;
+ pkt->l4_ofs = packet->l4_ofs;
+
dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
return 0;
@@ -2464,14 +2529,17 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
for (i = 0; i < cnt; i++) {
struct dp_packet *packet = batch->packets[i];
+ struct rte_mbuf *pkt = &batch->packets[i]->mbuf;
uint32_t size = dp_packet_size(packet);
int err = 0;
if (OVS_UNLIKELY(size > dev->max_packet_len)) {
- VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
- size, dev->max_packet_len);
- dropped++;
- continue;
+ if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+ VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d",
+ size, dev->max_packet_len);
+ dropped++;
+ continue;
+ }
}
err = dpdk_copy_dp_packet_to_mbuf(packet, &pkts[txcnt],
@@ -2487,6 +2555,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
}
dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
+ if (dev->type != DPDK_DEV_VHOST) {
+ /* If packet is non-DPDK, at the very least, we need to update the
+ * mbuf length members, even if TSO is not to be performed. */
+ netdev_dpdk_prep_tso_packet(pkts[txcnt], dev->mtu);
+ }
+
txcnt++;
}
@@ -4137,16 +4211,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
goto unlock;
}
- err = rte_vhost_driver_disable_features(dev->vhost_id,
- 1ULL << VIRTIO_NET_F_HOST_TSO4
- | 1ULL << VIRTIO_NET_F_HOST_TSO6
- | 1ULL << VIRTIO_NET_F_CSUM);
- if (err) {
- VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
- "client port: %s\n", dev->up.name);
- goto unlock;
- }
-
err = rte_vhost_driver_start(dev->vhost_id);
if (err) {
VLOG_ERR("rte_vhost_driver_start failed for vhost user "