diff mbox

[ovs-dev,RFC,v6] Update relevant artifacts to add support for DPDK 17.05.

Message ID 1497272871-20306-1-git-send-email-michalx.weglicki@intel.com
State Superseded
Headers show

Commit Message

Weglicki, MichalX June 12, 2017, 1:07 p.m. UTC
Patch is marked as RFC because of DPDK 17.05 possible crash:
http://dpdk.org/dev/patchwork/patch/25012/

Upgrading to DPDK 17.05 adds new significant features
relevant to OVS, including, but not limited to:
- vhost pmd,
- tun/tap PMD,
- VFIO hotplug support,
- Generic flow API.

Following changes are applied:
- netdev-dpdk: Changes required by DPDK API modifications.
- doc: Because of DPDK API changes, backward compatibility
  with previous DPDK releases will be broken, thus all
  relevant documentation entries are updated.
- .travis: DPDK version change from 16.11.1 to 17.05.
- rhel/openvswitch-fedora.spec.in: DPDK version change
  from 16.11 to 17.05.

v1->v2: Patch rework based on minor review comments.
v2->v3: VHOST user client reconfiguration corrected.
v3->v4: Patch is rebased against OVS master, minor
        rework based on review comments.
v4->v5: Some error log messages are corrected, NEWS file
        is updated accordingly.
v5->v6: Funcions call order in vhost construct has been
        corrected.

Signed-off-by: Michal Weglicki <michalx.weglicki@intel.com>
---
 .travis/linux-build.sh                   |   2 +-
 Documentation/intro/install/dpdk.rst     |   8 +-
 Documentation/topics/dpdk/vhost-user.rst |   8 +-
 NEWS                                     |   1 +
 lib/netdev-dpdk.c                        | 144 +++++++++++++++++++------------
 rhel/openvswitch-fedora.spec.in          |   2 +-
 tests/dpdk/ring_client.c                 |   6 +-
 7 files changed, 105 insertions(+), 66 deletions(-)
diff mbox

Patch

diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh
index 8750d68..ec15fd8 100755
--- a/.travis/linux-build.sh
+++ b/.travis/linux-build.sh
@@ -80,7 +80,7 @@  fi
 
 if [ "$DPDK" ]; then
     if [ -z "$DPDK_VER" ]; then
-        DPDK_VER="16.11.1"
+        DPDK_VER="17.05"
     fi
     install_dpdk $DPDK_VER
     if [ "$CC" = "clang" ]; then
diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index e83f852..536450b 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -40,7 +40,7 @@  Build requirements
 In addition to the requirements described in :doc:`general`, building Open
 vSwitch with DPDK will require the following:
 
-- DPDK 16.11
+- DPDK 17.05
 
 - A `DPDK supported NIC`_
 
@@ -69,9 +69,9 @@  Install DPDK
 #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
 
        $ cd /usr/src/
-       $ wget http://fast.dpdk.org/rel/dpdk-16.11.1.tar.xz
-       $ tar xf dpdk-16.11.1.tar.xz
-       $ export DPDK_DIR=/usr/src/dpdk-stable-16.11.1
+       $ wget http://fast.dpdk.org/rel/dpdk-17.05.tar.xz
+       $ tar xf dpdk-17.05.tar.xz
+       $ export DPDK_DIR=/usr/src/dpdk-17.05
        $ cd $DPDK_DIR
 
 #. (Optional) Configure DPDK as a shared library
diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst
index 3b11c4d..61387ae 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -292,9 +292,9 @@  To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or
 DPDK sources to VM and build DPDK::
 
     $ cd /root/dpdk/
-    $ wget http://fast.dpdk.org/rel/dpdk-16.11.1.tar.xz
-    $ tar xf dpdk-16.11.1.tar.xz
-    $ export DPDK_DIR=/root/dpdk/dpdk-stable-16.11.1
+    $ wget http://fast.dpdk.org/rel/dpdk-17.05.tar.xz
+    $ tar xf dpdk-17.05.tar.xz
+    $ export DPDK_DIR=/root/dpdk/dpdk-17.05
     $ export DPDK_TARGET=x86_64-native-linuxapp-gcc
     $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
     $ cd $DPDK_DIR
@@ -378,7 +378,7 @@  Sample XML
         </disk>
         <disk type='dir' device='disk'>
           <driver name='qemu' type='fat'/>
-          <source dir='/usr/src/dpdk-stable-16.11.1'/>
+          <source dir='/usr/src/dpdk-17.05'/>
           <target dev='vdb' bus='virtio'/>
           <readonly/>
         </disk>
diff --git a/NEWS b/NEWS
index b526646..a55700d 100644
--- a/NEWS
+++ b/NEWS
@@ -18,6 +18,7 @@  Post-v2.7.0
        still can be configured via extra arguments for DPDK EAL.
      * dpdkvhostuser ports are marked as deprecated.  They will be removed
        in an upcoming release.
+     * Support for DPDK v17.05.
    - IPFIX now provides additional counters:
      * Total counters since metering process startup.
      * Per-flow TCP flag counters.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 810800e..7e11420 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -22,6 +22,9 @@ 
 #include <stdlib.h>
 #include <errno.h>
 #include <unistd.h>
+#include <linux/virtio_net.h>
+#include <sys/socket.h>
+#include <linux/if.h>
 
 #include <rte_config.h>
 #include <rte_cycles.h>
@@ -31,7 +34,7 @@ 
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
 #include <rte_meter.h>
-#include <rte_virtio_net.h>
+#include <rte_vhost.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -56,6 +59,8 @@ 
 #include "timeval.h"
 #include "unixctl.h"
 
+enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
+
 VLOG_DEFINE_THIS_MODULE(netdev_dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
@@ -170,6 +175,21 @@  static const struct rte_eth_conf port_conf = {
     },
 };
 
+/*
+ * These callbacks allow virtio-net devices to be added to vhost ports when
+ * configuration has been fully completed.
+ */
+static int new_device(int vid);
+static void destroy_device(int vid);
+static int vring_state_changed(int vid, uint16_t queue_id, int enable);
+static const struct vhost_device_ops virtio_net_device_ops =
+{
+    .new_device =  new_device,
+    .destroy_device = destroy_device,
+    .vring_state_changed = vring_state_changed,
+    .features_changed = NULL
+};
+
 enum { DPDK_RING_SIZE = 256 };
 BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
 enum { DRAIN_TSC = 200000ULL };
@@ -411,8 +431,8 @@  struct netdev_rxq_dpdk {
     dpdk_port_t port_id;
 };
 
-static int netdev_dpdk_class_init(void);
-static int netdev_dpdk_vhost_class_init(void);
+static void netdev_dpdk_destruct(struct netdev *netdev);
+static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
 
 int netdev_dpdk_get_vid(const struct netdev_dpdk *dev);
 
@@ -422,8 +442,8 @@  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
 static bool
 is_dpdk_class(const struct netdev_class *class)
 {
-    return class->init == netdev_dpdk_class_init
-           || class->init == netdev_dpdk_vhost_class_init;
+    return class->destruct == netdev_dpdk_destruct
+           || class->destruct == netdev_dpdk_vhost_destruct;
 }
 
 /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
@@ -958,13 +978,45 @@  netdev_dpdk_vhost_construct(struct netdev *netdev)
     if (err) {
         VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
                  dev->vhost_id);
+        goto out;
     } else {
         fatal_signal_add_file_to_unlink(dev->vhost_id);
         VLOG_INFO("Socket %s created for vhost-user port %s\n",
                   dev->vhost_id, name);
     }
+
+    err = rte_vhost_driver_callback_register(dev->vhost_id,
+                                                &virtio_net_device_ops);
+    if (err) {
+        VLOG_ERR("rte_vhost_driver_callback_register failed for vhost user "
+                 "port: %s\n", name);
+        goto out;
+    }
+
+    err = rte_vhost_driver_disable_features(dev->vhost_id,
+                                1ULL << VIRTIO_NET_F_HOST_TSO4
+                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                | 1ULL << VIRTIO_NET_F_CSUM);
+    if (err) {
+        VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
+                 "port: %s\n", name);
+        goto out;
+    }
+
+    err = rte_vhost_driver_start(dev->vhost_id);
+    if (err) {
+        VLOG_ERR("rte_vhost_driver_start failed for vhost user "
+                 "port: %s\n", name);
+    }
+
     err = vhost_common_construct(netdev);
+    if (err) {
+        VLOG_ERR("vhost_common_construct failed for vhost user "
+                 "port: %s\n", name);
+        goto out;
+    }
 
+out:
     ovs_mutex_unlock(&dpdk_mutex);
     VLOG_WARN_ONCE("dpdkvhostuser ports are considered deprecated;  "
                    "please migrate to dpdkvhostuserclient ports.");
@@ -978,6 +1030,10 @@  netdev_dpdk_vhost_client_construct(struct netdev *netdev)
 
     ovs_mutex_lock(&dpdk_mutex);
     err = vhost_common_construct(netdev);
+    if (err) {
+        VLOG_ERR("vhost_common_construct failed for vhost user client"
+                 "port: %s\n", netdev->name);
+    }
     ovs_mutex_unlock(&dpdk_mutex);
     return err;
 }
@@ -2478,12 +2534,9 @@  static void
 set_irq_status(int vid)
 {
     uint32_t i;
-    uint64_t idx;
 
-    for (i = 0; i < rte_vhost_get_queue_num(vid); i++) {
-        idx = i * VIRTIO_QNUM;
-        rte_vhost_enable_guest_notification(vid, idx + VIRTIO_RXQ, 0);
-        rte_vhost_enable_guest_notification(vid, idx + VIRTIO_TXQ, 0);
+    for (i = 0; i < rte_vhost_get_vring_num(vid); i++) {
+        rte_vhost_enable_guest_notification(vid, i, 0);
     }
 }
 
@@ -2546,7 +2599,7 @@  new_device(int vid)
     LIST_FOR_EACH(dev, list_node, &dpdk_list) {
         ovs_mutex_lock(&dev->mutex);
         if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
-            uint32_t qp_num = rte_vhost_get_queue_num(vid);
+            uint32_t qp_num = rte_vhost_get_vring_num(vid)/VIRTIO_QNUM;
 
             /* Get NUMA information */
             newnode = rte_vhost_get_numa_node(vid);
@@ -2713,27 +2766,6 @@  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
     return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
 }
 
-/*
- * These callbacks allow virtio-net devices to be added to vhost ports when
- * configuration has been fully complete.
- */
-static const struct virtio_net_device_ops virtio_net_device_ops =
-{
-    .new_device =  new_device,
-    .destroy_device = destroy_device,
-    .vring_state_changed = vring_state_changed
-};
-
-static void *
-start_vhost_loop(void *dummy OVS_UNUSED)
-{
-     pthread_detach(pthread_self());
-     /* Put the vhost thread into quiescent state. */
-     ovsrcu_quiesce_start();
-     rte_vhost_driver_session_start();
-     return NULL;
-}
-
 static int
 netdev_dpdk_class_init(void)
 {
@@ -2753,25 +2785,6 @@  netdev_dpdk_class_init(void)
     return 0;
 }
 
-static int
-netdev_dpdk_vhost_class_init(void)
-{
-    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-
-    /* This function can be called for different classes.  The initialization
-     * needs to be done only once */
-    if (ovsthread_once_start(&once)) {
-        rte_vhost_driver_callback_register(&virtio_net_device_ops);
-        rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
-                                  | 1ULL << VIRTIO_NET_F_HOST_TSO6
-                                  | 1ULL << VIRTIO_NET_F_CSUM);
-        ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
-
-        ovsthread_once_done(&once);
-    }
-
-    return 0;
-}
 
 /* Client Rings */
 
@@ -3226,6 +3239,31 @@  netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
                       "using client socket '%s'",
                       dev->up.name, dev->vhost_id);
         }
+
+        err = rte_vhost_driver_callback_register(dev->vhost_id,
+                                                 &virtio_net_device_ops);
+        if (err) {
+            VLOG_ERR("rte_vhost_driver_callback_register failed for "
+                     "vhost user port: %s\n", dev->up.name);
+            goto unlock;
+        }
+
+        err = rte_vhost_driver_disable_features(dev->vhost_id,
+                                    1ULL << VIRTIO_NET_F_HOST_TSO4
+                                    | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                    | 1ULL << VIRTIO_NET_F_CSUM);
+        if (err) {
+            VLOG_ERR("rte_vhost_driver_disable_features failed for vhost user "
+                     "port: %s\n", dev->up.name);
+            goto unlock;
+        }
+
+        err = rte_vhost_driver_start(dev->vhost_id);
+        if (err) {
+            VLOG_ERR("rte_vhost_driver_start failed for vhost user "
+                     "port: %s\n", dev->up.name);
+            goto unlock;
+        }
     }
 
     err = dpdk_vhost_reconfigure_helper(dev);
@@ -3344,7 +3382,7 @@  static const struct netdev_class dpdk_ring_class =
 static const struct netdev_class dpdk_vhost_class =
     NETDEV_DPDK_CLASS(
         "dpdkvhostuser",
-        netdev_dpdk_vhost_class_init,
+        NULL,
         netdev_dpdk_vhost_construct,
         netdev_dpdk_vhost_destruct,
         NULL,
@@ -3359,7 +3397,7 @@  static const struct netdev_class dpdk_vhost_class =
 static const struct netdev_class dpdk_vhost_client_class =
     NETDEV_DPDK_CLASS(
         "dpdkvhostuserclient",
-        netdev_dpdk_vhost_class_init,
+        NULL,
         netdev_dpdk_vhost_client_construct,
         netdev_dpdk_vhost_destruct,
         netdev_dpdk_vhost_client_set_config,
diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 9fc5f27..82d8748 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -84,7 +84,7 @@  BuildRequires: libcap-ng libcap-ng-devel
 %endif
 %if %{with dpdk}
 BuildRequires: libpcap-devel numactl-devel
-BuildRequires: dpdk-devel >= 16.11
+BuildRequires: dpdk-devel >= 17.05
 Provides: %{name}-dpdk = %{version}-%{release}
 %endif
 
diff --git a/tests/dpdk/ring_client.c b/tests/dpdk/ring_client.c
index b153713..8cc3fb5 100644
--- a/tests/dpdk/ring_client.c
+++ b/tests/dpdk/ring_client.c
@@ -185,15 +185,15 @@  main(int argc, char *argv[])
         /* Try dequeuing max possible packets first, if that fails, get the
          * most we can. Loop body should only execute once, maximum.
          */
-        while (unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, rx_pkts) != 0) &&
-            rx_pkts > 0) {
+        while (unlikely(rte_ring_dequeue_bulk(rx_ring, pkts,
+                        rx_pkts, NULL) != 0) && rx_pkts > 0) {
             rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE);
         }
 
         if (rx_pkts > 0) {
             /* blocking enqueue */
             do {
-                rslt = rte_ring_enqueue_bulk(tx_ring, pkts, rx_pkts);
+                rslt = rte_ring_enqueue_bulk(tx_ring, pkts, rx_pkts, NULL);
             } while (rslt == -ENOBUFS);
         }
     }