[ovs-dev,2/3] vhost TSO enabled.
diff mbox series

Message ID 20190923141510.10303-2-fbl@sysclose.org
State New
Headers show
Series
  • [ovs-dev,1/3] support for large mpool.
Related show

Commit Message

Flavio Leitner Sept. 23, 2019, 2:15 p.m. UTC
Signed-off-by: Flavio Leitner <fbl@sysclose.org>
---
 lib/dp-packet.h   | 16 ++++++++
 lib/netdev-dpdk.c | 93 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 103 insertions(+), 6 deletions(-)

Patch
diff mbox series

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 14f0897fa..2b12604b9 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -109,6 +109,8 @@  static inline void dp_packet_set_size(struct dp_packet *, uint32_t);
 static inline uint16_t dp_packet_get_allocated(const struct dp_packet *);
 static inline void dp_packet_set_allocated(struct dp_packet *, uint16_t);
 
+static inline bool dp_packet_is_tso(struct dp_packet *b);
+
 void *dp_packet_resize_l2(struct dp_packet *, int increment);
 void *dp_packet_resize_l2_5(struct dp_packet *, int increment);
 static inline void *dp_packet_eth(const struct dp_packet *);
@@ -514,6 +516,14 @@  dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
     b->mbuf.buf_len = s;
 }
 
+static inline bool
+dp_packet_is_tso(struct dp_packet *b)
+{
+    return (b->mbuf.ol_flags & (PKT_TX_TCP_SEG | PKT_TX_L4_MASK))
+           ? true
+           : false;
+}
+
 /* Returns the RSS hash of the packet 'p'.  Note that the returned value is
  * correct only if 'dp_packet_rss_valid(p)' returns true */
 static inline uint32_t
@@ -643,6 +653,12 @@  dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
     b->allocated_ = s;
 }
 
+static inline bool
+dp_packet_is_tso(struct dp_packet *b OVS_UNUSED)
+{
+    return false;
+}
+
 /* Returns the RSS hash of the packet 'p'.  Note that the returned value is
  * correct only if 'dp_packet_rss_valid(p)' returns true */
 static inline uint32_t
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 7eb294366..cfbd9a9e5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -371,7 +371,8 @@  struct ingress_policer {
 enum dpdk_hw_ol_features {
     NETDEV_RX_CHECKSUM_OFFLOAD = 1 << 0,
     NETDEV_RX_HW_CRC_STRIP = 1 << 1,
-    NETDEV_RX_HW_SCATTER = 1 << 2
+    NETDEV_RX_HW_SCATTER = 1 << 2,
+    NETDEV_TX_TSO_OFFLOAD = 1 << 3,
 };
 
 /*
@@ -640,7 +641,7 @@  dpdk_calculate_mbufs(struct netdev_dpdk *dev, int mtu, bool per_port_mp)
     return n_mbufs;
 }
 
-static struct dpdk_mp *
+static struct rte_mempool *
 dpdk_mp64k_create(uint32_t socket_id)
 {
     struct rte_mempool *mp;
@@ -990,6 +991,12 @@  dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         conf.rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
     }
 
+    if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
+        conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+    }
+
     /* Limit configured rss hash functions to only those supported
      * by the eth device. */
     conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
@@ -1091,6 +1098,9 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
     uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
                                      DEV_RX_OFFLOAD_TCP_CKSUM |
                                      DEV_RX_OFFLOAD_IPV4_CKSUM;
+    uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO |
+                                   DEV_TX_OFFLOAD_TCP_CKSUM |
+                                   DEV_TX_OFFLOAD_IPV4_CKSUM;
 
     rte_eth_dev_info_get(dev->port_id, &info);
 
@@ -1117,6 +1127,14 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER;
     }
 
+    if (info.tx_offload_capa & tx_tso_offload_capa) {
+        dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+    } else {
+        dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD;
+        VLOG_WARN("Tx TSO offload is not supported on port "
+                  DPDK_PORT_ID_FMT, dev->port_id);
+    }
+
     n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
     n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
 
@@ -1361,6 +1379,7 @@  netdev_dpdk_vhost_construct(struct netdev *netdev)
         goto out;
     }
 
+#if 0
     err = rte_vhost_driver_disable_features(dev->vhost_id,
                                 1ULL << VIRTIO_NET_F_HOST_TSO4
                                 | 1ULL << VIRTIO_NET_F_HOST_TSO6
@@ -1370,6 +1389,7 @@  netdev_dpdk_vhost_construct(struct netdev *netdev)
                  "port: %s\n", name);
         goto out;
     }
+#endif
 
     err = rte_vhost_driver_start(dev->vhost_id);
     if (err) {
@@ -1702,6 +1722,11 @@  netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
         } else {
             smap_add(args, "rx_csum_offload", "false");
         }
+        if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
+            smap_add(args, "tx_tso_offload", "true");
+        } else {
+            smap_add(args, "tx_tso_offload", "false");
+        }
         smap_add(args, "lsc_interrupt_mode",
                  dev->lsc_interrupt_mode ? "true" : "false");
     }
@@ -2081,6 +2106,41 @@  netdev_dpdk_rxq_dealloc(struct netdev_rxq *rxq)
     rte_free(rx);
 }
 
+/* Should only be called if PKT_TX_TCP_SEG is set in ol_flags.
+ * Furthermore, it also sets the PKT_TX_TCP_CKSUM and PKT_TX_IP_CKSUM flags,
+ * and PKT_TX_IPV4 and PKT_TX_IPV6 in case the packet is IPv4 or IPv6,
+ * respectively. */
+static void
+netdev_dpdk_prep_tso_packet(struct rte_mbuf *mbuf, int mtu)
+{
+    struct dp_packet *pkt;
+    struct tcp_header *th;
+
+    pkt = CONTAINER_OF(mbuf, struct dp_packet, mbuf);
+    mbuf->l2_len = (char *) dp_packet_l3(pkt) - (char *) dp_packet_eth(pkt);
+    mbuf->l3_len = (char *) dp_packet_l4(pkt) - (char *) dp_packet_l3(pkt);
+    th = dp_packet_l4(pkt);
+    /* There's no layer 4 in the packet. */
+    if (!th) {
+        return;
+    }
+    mbuf->l4_len = TCP_OFFSET(th->tcp_ctl) * 4;
+    mbuf->outer_l2_len = 0;
+    mbuf->outer_l3_len = 0;
+
+    if (!(mbuf->ol_flags & PKT_TX_TCP_SEG)) {
+        return;
+    }
+
+    /* Prepare packet for egress. */
+    mbuf->ol_flags |= PKT_TX_TCP_SEG;
+    mbuf->ol_flags |= PKT_TX_TCP_CKSUM;
+    mbuf->ol_flags |= PKT_TX_IP_CKSUM;
+
+    /* Set the size of each TCP segment, based on the MTU of the device. */
+    mbuf->tso_segsz = mtu - mbuf->l3_len - mbuf->l4_len;
+}
+
 /* Tries to transmit 'pkts' to txq 'qid' of device 'dev'.  Takes ownership of
  * 'pkts', even in case of failure.
  *
@@ -2374,13 +2434,29 @@  netdev_dpdk_filter_packet_len(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     int cnt = 0;
     struct rte_mbuf *pkt;
 
+    /* Filter oversized packets, unless are marked for TSO. */
     for (i = 0; i < pkt_cnt; i++) {
         pkt = pkts[i];
         if (OVS_UNLIKELY(pkt->pkt_len > dev->max_packet_len)) {
-            VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " max_packet_len %d",
-                         dev->up.name, pkt->pkt_len, dev->max_packet_len);
-            rte_pktmbuf_free(pkt);
-            continue;
+            if (!(pkt->ol_flags & PKT_TX_TCP_SEG)) {
+                VLOG_WARN_RL(&rl, "%s: Too big size %" PRIu32 " "
+                             "max_packet_len %d",
+                             dev->up.name, pkt->pkt_len, dev->max_packet_len);
+                rte_pktmbuf_free(pkt);
+                continue;
+            } else {
+                /* 'If' the 'pkt' is intended for a VM, prepare it for sending,
+                 * 'else' the 'pkt' will not actually traverse the NIC, but
+                 * rather travel between VMs on the same host. */
+                if (dev->type != DPDK_DEV_VHOST) {
+                    netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+                }
+            }
+        } else {
+            if (dev->type != DPDK_DEV_VHOST) {
+                netdev_dpdk_prep_tso_packet(pkt, dev->mtu);
+            }
+
         }
 
         if (OVS_UNLIKELY(i != cnt)) {
@@ -4277,6 +4353,9 @@  dpdk_vhost_reconfigure_helper(struct netdev_dpdk *dev)
         dev->tx_q[0].map = 0;
     }
 
+    dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+    VLOG_DBG("%s: TSO enabled on vhost port", dev->up.name);
+
     netdev_dpdk_remap_txqs(dev);
 
     err = netdev_dpdk_mempool_configure(dev);
@@ -4370,6 +4449,7 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
             goto unlock;
         }
 
+#if 0
         err = rte_vhost_driver_disable_features(dev->vhost_id,
                                     1ULL << VIRTIO_NET_F_HOST_TSO4
                                     | 1ULL << VIRTIO_NET_F_HOST_TSO6
@@ -4379,6 +4459,7 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
                      "client port: %s\n", dev->up.name);
             goto unlock;
         }
+#endif
 
         err = rte_vhost_driver_start(dev->vhost_id);
         if (err) {