diff mbox

[ovs-dev,v3,3/5] netdev-dpdk: Add vHost User PMD

Message ID 1469722875-8848-4-git-send-email-ciara.loftus@intel.com
State Superseded
Delegated to: Daniele Di Proietto
Headers show

Commit Message

Ciara Loftus July 28, 2016, 4:21 p.m. UTC
DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports
to be controlled by the librte_ether API, like physical 'dpdk' ports and
IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and
removes direct calls to the librte_vhost DPDK library.

This commit removes extended statistics support for vHost User ports
until such a time that this becomes available in the vHost PMD in a
DPDK release supported by OVS.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 INSTALL.DPDK.md   |  10 +
 NEWS              |   2 +
 lib/netdev-dpdk.c | 857 ++++++++++++++++++------------------------------------
 3 files changed, 300 insertions(+), 569 deletions(-)

Comments

Ilya Maximets July 29, 2016, 1:31 p.m. UTC | #1
Not the complete review. Just few comments to design.

And what about performance? Is there any difference in comparison to
current version of code? I guess, this may be slower than direct
access to vhost library.

Comments inline.

Best regards, Ilya Maximets.

On 28.07.2016 19:21, Ciara Loftus wrote:
> DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports
> to be controlled by the librte_ether API, like physical 'dpdk' ports and
> IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and
> removes direct calls to the librte_vhost DPDK library.
> 
> This commit removes extended statistics support for vHost User ports
> until such a time that this becomes available in the vHost PMD in a
> DPDK release supported by OVS.
> 
> Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> ---
>  INSTALL.DPDK.md   |  10 +
>  NEWS              |   2 +
>  lib/netdev-dpdk.c | 857 ++++++++++++++++++------------------------------------
>  3 files changed, 300 insertions(+), 569 deletions(-)
> 
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index 7609aa7..4feb7be 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough].
>  
>      http://dpdk.org/doc/guides/rel_notes/release_16_04.html
>  
> +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context of
> +    DPDK as they are all managed by the rte_ether API. This means that they
> +    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS which by
> +    default is set to 32. This means by default the combined total number of
> +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. This
> +    value can be changed if desired by modifying the configuration file in
> +    DPDK, or by overriding the default value on the command line when building
> +    DPDK. eg.
> +
> +        `make install CONFIG_RTE_MAX_ETHPORTS=64`
>  
>  Bug Reporting:
>  --------------
> diff --git a/NEWS b/NEWS
> index dc3dedb..6510dde 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -64,6 +64,8 @@ Post-v2.5.0
>       * Basic connection tracking for the userspace datapath (no ALG,
>         fragmentation or NAT support yet)
>       * Remove dpdkvhostcuse port type.
> +     * vHost PMD integration brings vhost-user ports under control of the
> +       rte_ether DPDK API.
>     - Increase number of registers to 16.
>     - ovs-benchmark: This utility has been removed due to lack of use and
>       bitrot.
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index d6959fe..d6ceeec 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -30,7 +30,6 @@
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <getopt.h>
> -#include <numaif.h>
>  
>  #include "dirs.h"
>  #include "dp-packet.h"
> @@ -56,9 +55,9 @@
>  #include "unixctl.h"
>  
>  #include "rte_config.h"
> +#include "rte_eth_vhost.h"
>  #include "rte_mbuf.h"
>  #include "rte_meter.h"
> -#include "rte_virtio_net.h"
>  
>  VLOG_DEFINE_THIS_MODULE(dpdk);
>  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
>  
>  #define VHOST_ENQ_RETRY_NUM 8
>  
> +/* Array that tracks the used & unused vHost user driver IDs */
> +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];
> +
>  static const struct rte_eth_conf port_conf = {
>      .rxmode = {
>          .mq_mode = ETH_MQ_RX_RSS,
> @@ -346,12 +348,15 @@ struct netdev_dpdk {
>      struct rte_eth_link link;
>      int link_reset_cnt;
>  
> -    /* virtio-net structure for vhost device */
> -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;
> +    /* Number of virtqueue pairs reported by the guest */
> +    uint32_t vhost_qp_nb;
>  
>      /* Identifier used to distinguish vhost devices from each other */
>      char vhost_id[PATH_MAX];
>  
> +    /* ID of vhost user port given to the PMD driver */
> +    unsigned int vhost_pmd_id;
> +
>      /* In dpdk_list. */
>      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
>  
> @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk {
>  static bool dpdk_thread_is_pmd(void);
>  
>  static int netdev_dpdk_construct(struct netdev *);
> -
> -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
> +static int netdev_dpdk_vhost_construct(struct netdev *);
>  
>  struct ingress_policer *
>  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
>  
> +static void link_status_changed_callback(uint8_t port_id,
> +        enum rte_eth_event_type type, void *param);
> +static void vring_state_changed_callback(uint8_t port_id,
> +        enum rte_eth_event_type type, void *param);
> +static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);
> +static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);
> +
>  static bool
> -is_dpdk_class(const struct netdev_class *class)
> +is_dpdk_eth_class(const struct netdev_class *class)
>  {
> -    return class->construct == netdev_dpdk_construct;
> +    return ((class->construct == netdev_dpdk_construct) ||
> +            (class->construct == netdev_dpdk_vhost_construct));
>  }
>  
>  /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
> @@ -616,8 +628,13 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
>              continue;
>          }
>  
> -        dev->up.n_rxq = n_rxq;
> -        dev->up.n_txq = n_txq;
> +        /* Only set n_*xq for physical devices. vHost User devices will set
> +         * this value correctly using info from the virtio backend.
> +         */
> +        if (dev->type == DPDK_DEV_ETH) {
> +            dev->up.n_rxq = n_rxq;
> +            dev->up.n_txq = n_txq;
> +        }
>  
>          return 0;
>      }
> @@ -641,8 +658,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
>  
>      rte_eth_dev_info_get(dev->port_id, &info);
>  
> -    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
> -    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
> +    if (dev->type == DPDK_DEV_VHOST) {
> +        /* We don't know how many queues QEMU will use so set up the max */
> +        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
> +        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
> +    } else {
> +        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
> +        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
> +    }
>  
>      diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);
>      if (diag) {
> @@ -709,6 +732,85 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
>      }
>  }
>  
> +void
> +link_status_changed_callback(uint8_t port_id,
> +                             enum rte_eth_event_type type OVS_UNUSED,
> +                             void *param OVS_UNUSED)
> +{
> +    struct netdev_dpdk *dev;
> +    int socket_id = -1;
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (port_id == dev->port_id) {
> +            ovs_mutex_lock(&dev->mutex);
> +            check_link_status(dev);
> +            if (dev->link.link_status == ETH_LINK_UP) {
> +                /* new device */
> +                /* Get NUMA information */
> +                socket_id = rte_eth_dev_socket_id(dev->port_id);
> +                if (socket_id != -1 && socket_id != dev->socket_id) {
> +                    dev->requested_socket_id = socket_id;
> +                }
> +                netdev_request_reconfigure(&dev->up);
> +                netdev_change_seq_changed(&dev->up);
> +                VLOG_INFO("vHost Device '%s' has been added on numa node %i",
> +                          dev->vhost_id, socket_id);
> +            } else {
> +                /* destroy device */
> +                /* Clear tx/rx queue settings. */
> +                netdev_dpdk_txq_map_clear(dev);

Why requesting of less number of queues removed from here?
There may be no 'state changed' calls in some cases (e.g. QEMU crash).

> +                netdev_request_reconfigure(&dev->up);
> +                netdev_change_seq_changed(&dev->up);
> +                VLOG_INFO("vHost Device '%s' has been removed", dev->vhost_id);
> +            }
> +            ovs_mutex_unlock(&dev->mutex);
> +            break;
> +        }
> +    }
> +
> +    ovs_mutex_unlock(&dpdk_mutex);
> +
> +    return;
> +}
> +
> +void
> +vring_state_changed_callback(uint8_t port_id,
> +                             enum rte_eth_event_type type OVS_UNUSED,
> +                             void *param OVS_UNUSED)
> +{
> +    struct netdev_dpdk *dev;
> +    struct rte_eth_vhost_queue_event event;
> +    int err = 0;
> +
> +    err = rte_eth_vhost_get_queue_event(port_id, &event);
> +    if (err || event.rx) {
> +        return;
> +    }
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> +        if (port_id == dev->port_id) {
> +            ovs_mutex_lock(&dev->mutex);
> +            if (event.enable) {
> +                dev->tx_q[event.queue_id].map = event.queue_id;
> +                dev->vhost_qp_nb++;
> +            } else {
> +                dev->tx_q[event.queue_id].map = OVS_VHOST_QUEUE_DISABLED;
> +                dev->vhost_qp_nb--;
> +            }
> +            dev->requested_n_rxq = dev->vhost_qp_nb;
> +            dev->requested_n_txq = dev->vhost_qp_nb;
> +            netdev_request_reconfigure(&dev->up);

Do we really need to reconfigure here. Is there any way to keep
reconfiguration only if link status changed?
In current implementation buggy or malicious guest may perform DOS
attack on the vSwitch just by executing the below script:

while true;
do
	ethtool -l eth0 combined 4
	ethtool -l eth0 combined 1
done

Another thing: Guest/QEMU may disable/enable queues in random order,
also, there is no any constraints in virtio standard that may force
QEMU or guest to send 'disable' commands on disabled queues.
I already told about this in reply to one the previous versions of
this patch.

Also, by executing following commands in a row in a VM with 4 queues:
	ethtool -l eth0 combined 2
	ethtool -l eth0 combined 1
Queues #2 and #3 will be disabled twice. This will lead to wrong (even
negative --> huge positive) value of 'vhost_qp_nb'.
This is true at least for the new versions of QEMU.

Last question to this function:
Why there is no call to 'txq_remap()' ?

> +            ovs_mutex_unlock(&dev->mutex);
> +            break;
> +        }
> +    }
> +    ovs_mutex_unlock(&dpdk_mutex);
> +
> +    return;
> +}
> +
>  static int
>  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
>                   enum dpdk_dev_type type)
> @@ -718,6 +820,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
>      int sid;
>      int err = 0;
>      uint32_t buf_size;
> +    unsigned int nr_q = 0;
>  
>      ovs_mutex_init(&dev->mutex);
>      ovs_mutex_lock(&dev->mutex);
> @@ -727,11 +830,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
>      /* If the 'sid' is negative, it means that the kernel fails
>       * to obtain the pci numa info.  In that situation, always
>       * use 'SOCKET0'. */
> -    if (type == DPDK_DEV_ETH) {
> -        sid = rte_eth_dev_socket_id(port_no);
> -    } else {
> -        sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> -    }
> +    sid = rte_eth_dev_socket_id(port_no);
>  
>      dev->socket_id = sid < 0 ? SOCKET0 : sid;
>      dev->requested_socket_id = dev->socket_id;
> @@ -761,17 +860,21 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
>      netdev->n_txq = NR_QUEUE;
>      dev->requested_n_rxq = netdev->n_rxq;
>      dev->requested_n_txq = netdev->n_txq;
> +    dev->vhost_qp_nb = 0;
>  
> -    if (type == DPDK_DEV_ETH) {
> -        err = dpdk_eth_dev_init(dev);
> -        if (err) {
> -            goto unlock;
> -        }
> -        netdev_dpdk_alloc_txq(dev, netdev->n_txq);
> -    } else {
> -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
> -        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. */
> -        dev->flags = NETDEV_UP | NETDEV_PROMISC;
> +    err = dpdk_eth_dev_init(dev);
> +    if (err) {
> +        goto unlock;
> +    }
> +    nr_q = (type == DPDK_DEV_ETH ?
> +            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
> +    netdev_dpdk_alloc_txq(dev, nr_q);
> +
> +    if (type == DPDK_DEV_VHOST) {
> +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_QUEUE_STATE,
> +                                      vring_state_changed_callback, NULL);
> +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,
> +                                      link_status_changed_callback, NULL);
>      }
>  
>      ovs_list_push_back(&dpdk_list, &dev->list_node);
> @@ -802,17 +905,48 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
>      }
>  }
>  
> +/* When attaching a vhost device to DPDK, a unique name of the format
> + * 'eth_vhostX' is expected, where X is a unique identifier.
> + * get_vhost_drv_id returns a valid X value to provide to DPDK.
> + */
> +static int
> +get_vhost_drv_id(void)
> +{
> +    int i = 0;
> +
> +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +        if (vhost_drv_ids[i] == 0) {
> +            return i;
> +        }
> +    }
> +
> +    return -1;
> +}
> +
> +static void
> +set_vhost_drv_id(int id, int val)
> +{
> +    vhost_drv_ids[id] = val;
> +}
> +
>  static int
>  netdev_dpdk_vhost_construct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>      const char *name = netdev->name;
>      int err;
> +    uint8_t port_no = 0;
> +    char *devargs;
> +    int driver_id = 0;
> +
> +    if (rte_eal_init_ret) {
> +        return rte_eal_init_ret;
> +    }
>  
>      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
>       * the file system. '/' or '\' would traverse directories, so they're not
>       * acceptable in 'name'. */
> -    if (strchr(name, '/') || strchr(name, '\\')) {
> +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {
>          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
>                   "A valid name must not include '/' or '\\'",
>                   name);
> @@ -829,18 +963,32 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
>       */
>      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
>               vhost_sock_dir, name);
> +    driver_id = get_vhost_drv_id();
> +    if (driver_id == -1) {
> +        VLOG_ERR("Unable to create vhost-user device %s - too many vhost-user"
> +                 "devices registered with PMD", dev->vhost_id);
> +        err = ENODEV;
> +        goto out;
> +    } else {
> +        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",
> +                 driver_id, dev->vhost_id,
> +                 MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
> +        err = rte_eth_dev_attach(devargs, &port_no);
> +    }
>  
> -    err = rte_vhost_driver_register(dev->vhost_id);
>      if (err) {
> -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
> +        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",
>                   dev->vhost_id);
>      } else {
>          fatal_signal_add_file_to_unlink(dev->vhost_id);
>          VLOG_INFO("Socket %s created for vhost-user port %s\n",
>                    dev->vhost_id, name);
> -        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
> +        dev->vhost_pmd_id = driver_id;
> +        set_vhost_drv_id(driver_id, 1);
> +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);
>      }
>  
> +out:
>      ovs_mutex_unlock(&dpdk_mutex);
>      return err;
>  }
> @@ -868,20 +1016,28 @@ netdev_dpdk_construct(struct netdev *netdev)
>  }
>  
>  static void
> -netdev_dpdk_destruct(struct netdev *netdev)
> +dpdk_destruct_helper(struct netdev_dpdk *dev)
>  {
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -
> -    ovs_mutex_lock(&dev->mutex);
>      rte_eth_dev_stop(dev->port_id);
>      free(ovsrcu_get_protected(struct ingress_policer *,
>                                &dev->ingress_policer));
> -    ovs_mutex_unlock(&dev->mutex);
>  
> -    ovs_mutex_lock(&dpdk_mutex);
>      rte_free(dev->tx_q);
>      ovs_list_remove(&dev->list_node);
>      dpdk_mp_put(dev->dpdk_mp);
> +}
> +
> +static void
> +netdev_dpdk_destruct(struct netdev *netdev)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    ovs_mutex_lock(&dev->mutex);
> +
> +    dpdk_destruct_helper(dev);
> +
> +    ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>  }
>  
> @@ -890,30 +1046,19 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> -    /* Guest becomes an orphan if still attached. */
> -    if (netdev_dpdk_get_virtio(dev) != NULL) {
> -        VLOG_ERR("Removing port '%s' while vhost device still attached.",
> -                 netdev->name);
> -        VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
> -                 " '%s' must be restarted.",
> -                 dev->vhost_id);
> -    }
> +    ovs_mutex_lock(&dpdk_mutex);
> +    ovs_mutex_lock(&dev->mutex);
>  
> -    if (rte_vhost_driver_unregister(dev->vhost_id)) {
> -        VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
> +    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {
> +        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);
>      } else {
>          fatal_signal_remove_file_to_unlink(dev->vhost_id);
>      }
> +    set_vhost_drv_id(dev->vhost_pmd_id, 0);
>  
> -    ovs_mutex_lock(&dev->mutex);
> -    free(ovsrcu_get_protected(struct ingress_policer *,
> -                              &dev->ingress_policer));
> -    ovs_mutex_unlock(&dev->mutex);
> +    dpdk_destruct_helper(dev);
>  
> -    ovs_mutex_lock(&dpdk_mutex);
> -    rte_free(dev->tx_q);
> -    ovs_list_remove(&dev->list_node);
> -    dpdk_mp_put(dev->dpdk_mp);
> +    ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>  }
>  
> @@ -1105,117 +1250,6 @@ ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
>      return cnt;
>  }
>  
> -static bool
> -is_vhost_running(struct virtio_net *virtio_dev)
> -{
> -    return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING));
> -}
> -
> -static inline void
> -netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
> -                                          unsigned int packet_size)
> -{
> -    /* Hard-coded search for the size bucket. */
> -    if (packet_size < 256) {
> -        if (packet_size >= 128) {
> -            stats->rx_128_to_255_packets++;
> -        } else if (packet_size <= 64) {
> -            stats->rx_1_to_64_packets++;
> -        } else {
> -            stats->rx_65_to_127_packets++;
> -        }
> -    } else {
> -        if (packet_size >= 1523) {
> -            stats->rx_1523_to_max_packets++;
> -        } else if (packet_size >= 1024) {
> -            stats->rx_1024_to_1522_packets++;
> -        } else if (packet_size < 512) {
> -            stats->rx_256_to_511_packets++;
> -        } else {
> -            stats->rx_512_to_1023_packets++;
> -        }
> -    }
> -}
> -
> -static inline void
> -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
> -                                     struct dp_packet **packets, int count,
> -                                     int dropped)
> -{
> -    int i;
> -    unsigned int packet_size;
> -    struct dp_packet *packet;
> -
> -    stats->rx_packets += count;
> -    stats->rx_dropped += dropped;
> -    for (i = 0; i < count; i++) {
> -        packet = packets[i];
> -        packet_size = dp_packet_size(packet);
> -
> -        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {
> -            /* This only protects the following multicast counting from
> -             * too short packets, but it does not stop the packet from
> -             * further processing. */
> -            stats->rx_errors++;
> -            stats->rx_length_errors++;
> -            continue;
> -        }
> -
> -        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);
> -
> -        struct eth_header *eh = (struct eth_header *) dp_packet_data(packet);
> -        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {
> -            stats->multicast++;
> -        }
> -
> -        stats->rx_bytes += packet_size;
> -    }
> -}
> -
> -/*
> - * The receive path for the vhost port is the TX path out from guest.
> - */
> -static int
> -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> -                           struct dp_packet_batch *batch)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> -    int qid = rxq->queue_id;
> -    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
> -    uint16_t nb_rx = 0;
> -    uint16_t dropped = 0;
> -
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
> -                     || !(dev->flags & NETDEV_UP))) {
> -        return EAGAIN;
> -    }
> -
> -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM + VIRTIO_TXQ,
> -                                    dev->dpdk_mp->mp,
> -                                    (struct rte_mbuf **) batch->packets,
> -                                    NETDEV_MAX_BURST);
> -    if (!nb_rx) {
> -        return EAGAIN;
> -    }
> -
> -    if (policer) {
> -        dropped = nb_rx;
> -        nb_rx = ingress_policer_run(policer,
> -                                    (struct rte_mbuf **) batch->packets,
> -                                    nb_rx);
> -        dropped -= nb_rx;
> -    }
> -
> -    rte_spinlock_lock(&dev->stats_lock);
> -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
> -                                         nb_rx, dropped);
> -    rte_spinlock_unlock(&dev->stats_lock);
> -
> -    batch->count = (int) nb_rx;
> -    return 0;
> -}
> -
>  static int
>  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
>  {
> @@ -1269,85 +1303,6 @@ netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
>      return cnt;
>  }
>  
> -static inline void
> -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
> -                                     struct dp_packet **packets,
> -                                     int attempted,
> -                                     int dropped)
> -{
> -    int i;
> -    int sent = attempted - dropped;
> -
> -    stats->tx_packets += sent;
> -    stats->tx_dropped += dropped;
> -
> -    for (i = 0; i < sent; i++) {
> -        stats->tx_bytes += dp_packet_size(packets[i]);
> -    }
> -}
> -
> -static void
> -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> -                         struct dp_packet **pkts, int cnt,
> -                         bool may_steal)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> -    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
> -    unsigned int total_pkts = cnt;
> -    unsigned int qos_pkts = cnt;
> -    int retries = 0;
> -
> -    qid = dev->tx_q[qid % netdev->n_txq].map;
> -
> -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
> -                     || !(dev->flags & NETDEV_UP))) {
> -        rte_spinlock_lock(&dev->stats_lock);
> -        dev->stats.tx_dropped+= cnt;
> -        rte_spinlock_unlock(&dev->stats_lock);
> -        goto out;
> -    }
> -
> -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> -
> -    /* Check has QoS has been configured for the netdev */
> -    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
> -    qos_pkts -= cnt;
> -
> -    do {
> -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> -        unsigned int tx_pkts;
> -
> -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
> -                                          cur_pkts, cnt);
> -        if (OVS_LIKELY(tx_pkts)) {
> -            /* Packets have been sent.*/
> -            cnt -= tx_pkts;
> -            /* Prepare for possible retry.*/
> -            cur_pkts = &cur_pkts[tx_pkts];
> -        } else {
> -            /* No packets sent - do not retry.*/
> -            break;
> -        }
> -    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));
> -
> -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> -
> -    rte_spinlock_lock(&dev->stats_lock);
> -    cnt += qos_pkts;
> -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts, cnt);
> -    rte_spinlock_unlock(&dev->stats_lock);
> -
> -out:
> -    if (may_steal) {
> -        int i;
> -
> -        for (i = 0; i < total_pkts; i++) {
> -            dp_packet_delete(pkts[i]);
> -        }
> -    }
> -}
> -
>  /* Tx function. Transmit packets indefinitely */
>  static void
>  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
> @@ -1402,18 +1357,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>          newcnt++;
>      }
>  
> -    if (dev->type == DPDK_DEV_VHOST) {
> -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs,
> -                                 newcnt, true);
> -    } else {
> -        unsigned int qos_pkts = newcnt;
> +    unsigned int qos_pkts = newcnt;
>  
> -        /* Check if QoS has been configured for this netdev. */
> -        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
> +    /* Check if QoS has been configured for this netdev. */
> +    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
>  
> -        dropped += qos_pkts - newcnt;
> -        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
> -    }
> +    dropped += qos_pkts - newcnt;
> +    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
>  
>      if (OVS_UNLIKELY(dropped)) {
>          rte_spinlock_lock(&dev->stats_lock);
> @@ -1426,33 +1376,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
>      }
>  }
>  
> -static int
> -netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> -                       struct dp_packet_batch *batch,
> -                       bool may_steal, bool concurrent_txq OVS_UNUSED)
> -{
> -
> -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> -        dpdk_do_tx_copy(netdev, qid, batch);
> -        dp_packet_delete_batch(batch, may_steal);
> -    } else {
> -        dp_packet_batch_apply_cutlen(batch);
> -        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count,
> -                                 may_steal);
> -    }
> -    return 0;
> -}
> -
>  static inline void
>  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> -                   struct dp_packet_batch *batch, bool may_steal,
> -                   bool concurrent_txq)
> +                   struct dp_packet_batch *batch, bool may_steal)
>  {
> -    if (OVS_UNLIKELY(concurrent_txq)) {
> -        qid = qid % dev->up.n_txq;
> -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> -    }
> -
>      if (OVS_UNLIKELY(!may_steal ||
>                       batch->packets[0]->source != DPBUF_DPDK)) {
>          struct netdev *netdev = &dev->up;
> @@ -1512,20 +1439,50 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>              rte_spinlock_unlock(&dev->stats_lock);
>          }
>      }
> +}
> +
> +static int
> +netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> +                     struct dp_packet_batch *batch, bool may_steal,
> +                     bool concurrent_txq)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +
> +    if (OVS_UNLIKELY(concurrent_txq)) {
> +        qid = qid % dev->up.n_txq;
> +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +    }
> +
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
>  
>      if (OVS_UNLIKELY(concurrent_txq)) {
>          rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
>      }
> +
> +    return 0;
>  }
>  
>  static int
> -netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> -                     struct dp_packet_batch *batch, bool may_steal,
> -                     bool concurrent_txq)
> +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> +                       struct dp_packet_batch *batch, bool may_steal,
> +                       bool concurrent_txq OVS_UNUSED)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>  
> -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> +    qid = dev->tx_q[qid % netdev->n_txq].map;
> +    if (qid == -1) {
> +        rte_spinlock_lock(&dev->stats_lock);
> +        dev->stats.tx_dropped+= batch->count;
> +        rte_spinlock_unlock(&dev->stats_lock);
> +        if (may_steal) {
> +            dp_packet_delete_batch(batch, may_steal);
> +        }
> +    } else {
> +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +        netdev_dpdk_send__(dev, qid, batch, may_steal);
> +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> +    }
> +
>      return 0;
>  }
>  
> @@ -1622,41 +1579,6 @@ out:
>  static int
>  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
>  
> -static int
> -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
> -                            struct netdev_stats *stats)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -
> -    ovs_mutex_lock(&dev->mutex);
> -
> -    rte_spinlock_lock(&dev->stats_lock);
> -    /* Supported Stats */
> -    stats->rx_packets += dev->stats.rx_packets;
> -    stats->tx_packets += dev->stats.tx_packets;
> -    stats->rx_dropped = dev->stats.rx_dropped;
> -    stats->tx_dropped += dev->stats.tx_dropped;
> -    stats->multicast = dev->stats.multicast;
> -    stats->rx_bytes = dev->stats.rx_bytes;
> -    stats->tx_bytes = dev->stats.tx_bytes;
> -    stats->rx_errors = dev->stats.rx_errors;
> -    stats->rx_length_errors = dev->stats.rx_length_errors;
> -
> -    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;
> -    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;
> -    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;
> -    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;
> -    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;
> -    stats->rx_1024_to_1522_packets = dev->stats.rx_1024_to_1522_packets;
> -    stats->rx_1523_to_max_packets = dev->stats.rx_1523_to_max_packets;
> -
> -    rte_spinlock_unlock(&dev->stats_lock);
> -
> -    ovs_mutex_unlock(&dev->mutex);
> -
> -    return 0;
> -}
> -
>  static void
>  netdev_dpdk_convert_xstats(struct netdev_stats *stats,
>                             const struct rte_eth_xstats *xstats,
> @@ -1737,28 +1659,40 @@ netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
>          return EPROTO;
>      }
>  
> -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> -    if (rte_xstats_len > 0) {
> -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
> -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> -                                            rte_xstats_len);
> -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> -            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
> +    /* Extended statistics are not yet available for vHost User PMD */
> +    if (dev->type == DPDK_DEV_ETH) {
> +        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> +        if (rte_xstats_len > 0) {
> +            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)
> +                                          * rte_xstats_len);
> +            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> +            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> +                                                rte_xstats_len);
> +            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> +                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
> +            }
> +            rte_free(rte_xstats);
> +        } else {
> +            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
>          }
> -        rte_free(rte_xstats);
> -    } else {
> -        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
>      }
>  
>      stats->rx_packets = rte_stats.ipackets;
>      stats->tx_packets = rte_stats.opackets;
>      stats->rx_bytes = rte_stats.ibytes;
>      stats->tx_bytes = rte_stats.obytes;
> -    /* DPDK counts imissed as errors, but count them here as dropped instead */
> -    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
> -    stats->tx_errors = rte_stats.oerrors;
> -    stats->multicast = rte_stats.imcasts;
> +
> +    if (dev->type == DPDK_DEV_ETH) {
> +        /* DPDK counts imissed as errors, but count them here as dropped
> +         * instead */
> +        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
> +        stats->tx_errors = rte_stats.oerrors;
> +        stats->multicast = rte_stats.imcasts;
> +    } else {
> +        stats->rx_errors = UINT64_MAX;
> +        stats->tx_errors = UINT64_MAX;
> +        stats->multicast = UINT64_MAX;
> +    }
>  
>      rte_spinlock_lock(&dev->stats_lock);
>      stats->tx_dropped = dev->stats.tx_dropped;
> @@ -1921,25 +1855,6 @@ netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier)
>      return 0;
>  }
>  
> -static int
> -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> -
> -    ovs_mutex_lock(&dev->mutex);
> -
> -    if (is_vhost_running(virtio_dev)) {
> -        *carrier = 1;
> -    } else {
> -        *carrier = 0;
> -    }
> -
> -    ovs_mutex_unlock(&dev->mutex);
> -
> -    return 0;
> -}
> -
>  static long long int
>  netdev_dpdk_get_carrier_resets(const struct netdev *netdev)
>  {
> @@ -1995,13 +1910,10 @@ netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
>              rte_eth_dev_stop(dev->port_id);
>          }
>      } else {
> -        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is
> -         * running then change netdev's change_seq to trigger link state
> -         * update. */
> -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> +        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then change
> +         * netdev's change_seq to trigger link state update. */
>  
> -        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
> -            && is_vhost_running(virtio_dev)) {
> +        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {
>              netdev_change_seq_changed(&dev->up);
>  
>              /* Clear statistics if device is getting up. */
> @@ -2096,7 +2008,7 @@ netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
>  
>      if (argc > 2) {
>          struct netdev *netdev = netdev_from_name(argv[1]);
> -        if (netdev && is_dpdk_class(netdev->netdev_class)) {
> +        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {
>              struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);
>  
>              ovs_mutex_lock(&dpdk_dev->mutex);
> @@ -2124,22 +2036,6 @@ netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
>  }
>  
>  /*
> - * Set virtqueue flags so that we do not receive interrupts.
> - */
> -static void
> -set_irq_status(struct virtio_net *virtio_dev)
> -{
> -    uint32_t i;
> -    uint64_t idx;
> -
> -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
> -        idx = i * VIRTIO_QNUM;
> -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0);
> -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0);
> -    }
> -}
> -
> -/*
>   * Fixes mapping for vhost-user tx queues. Must be called after each
>   * enabling/disabling of queues and n_txq modifications.
>   */
> @@ -2180,62 +2076,6 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
>      rte_free(enabled_queues);
>  }
>  
> -/*
> - * A new virtio-net device is added to a vhost port.
> - */
> -static int
> -new_device(struct virtio_net *virtio_dev)
> -{
> -    struct netdev_dpdk *dev;
> -    bool exists = false;
> -    int newnode = 0;
> -    long err = 0;
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    /* Add device to the vhost port with the same name as that passed down. */
> -    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> -            uint32_t qp_num = virtio_dev->virt_qp_nb;
> -
> -            ovs_mutex_lock(&dev->mutex);
> -            /* Get NUMA information */
> -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
> -                                MPOL_F_NODE | MPOL_F_ADDR);
> -            if (err) {
> -                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
> -                        virtio_dev->ifname);
> -                newnode = dev->socket_id;
> -            }
> -
> -            dev->requested_socket_id = newnode;
> -            dev->requested_n_rxq = qp_num;
> -            dev->requested_n_txq = qp_num;
> -            netdev_request_reconfigure(&dev->up);
> -
> -            ovsrcu_set(&dev->virtio_dev, virtio_dev);
> -            exists = true;
> -
> -            /* Disable notifications. */
> -            set_irq_status(virtio_dev);
> -            netdev_change_seq_changed(&dev->up);
> -            ovs_mutex_unlock(&dev->mutex);
> -            break;
> -        }
> -    }
> -    ovs_mutex_unlock(&dpdk_mutex);
> -
> -    if (!exists) {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
> -                  "found", virtio_dev->ifname, virtio_dev->device_fh);
> -
> -        return -1;
> -    }
> -
> -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node %i",
> -              virtio_dev->ifname, virtio_dev->device_fh, newnode);
> -    return 0;
> -}
> -
>  /* Clears mapping for all available queues of vhost interface. */
>  static void
>  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
> @@ -2248,144 +2088,18 @@ netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
>      }
>  }
>  
> -/*
> - * Remove a virtio-net device from the specific vhost port.  Use dev->remove
> - * flag to stop any more packets from being sent or received to/from a VM and
> - * ensure all currently queued packets have been sent/received before removing
> - *  the device.
> - */
> -static void
> -destroy_device(volatile struct virtio_net *virtio_dev)
> -{
> -    struct netdev_dpdk *dev;
> -    bool exists = false;
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
> -
> -            ovs_mutex_lock(&dev->mutex);
> -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
> -            ovsrcu_set(&dev->virtio_dev, NULL);
> -            /* Clear tx/rx queue settings. */
> -            netdev_dpdk_txq_map_clear(dev);
> -            dev->requested_n_rxq = NR_QUEUE;
> -            dev->requested_n_txq = NR_QUEUE;
> -            netdev_request_reconfigure(&dev->up);
> -
> -            netdev_change_seq_changed(&dev->up);
> -            ovs_mutex_unlock(&dev->mutex);
> -            exists = true;
> -            break;
> -        }
> -    }
> -
> -    ovs_mutex_unlock(&dpdk_mutex);
> -
> -    if (exists == true) {
> -        /*
> -         * Wait for other threads to quiesce after setting the 'virtio_dev'
> -         * to NULL, before returning.
> -         */
> -        ovsrcu_synchronize();
> -        /*
> -         * As call to ovsrcu_synchronize() will end the quiescent state,
> -         * put thread back into quiescent state before returning.
> -         */
> -        ovsrcu_quiesce_start();
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
> -                  virtio_dev->ifname, virtio_dev->device_fh);
> -    } else {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
> -                  virtio_dev->device_fh);
> -    }
> -}
> -
> -static int
> -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
> -                    int enable)
> -{
> -    struct netdev_dpdk *dev;
> -    bool exists = false;
> -    int qid = queue_id / VIRTIO_QNUM;
> -
> -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
> -        return 0;
> -    }
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
> -            ovs_mutex_lock(&dev->mutex);
> -            if (enable) {
> -                dev->tx_q[qid].map = qid;
> -            } else {
> -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
> -            }
> -            netdev_dpdk_remap_txqs(dev);
> -            exists = true;
> -            ovs_mutex_unlock(&dev->mutex);
> -            break;
> -        }
> -    }
> -    ovs_mutex_unlock(&dpdk_mutex);
> -
> -    if (exists) {
> -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
> -                  PRIu64" changed to \'%s\'", queue_id, qid,
> -                  virtio_dev->ifname, virtio_dev->device_fh,
> -                  (enable == 1) ? "enabled" : "disabled");
> -    } else {
> -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
> -                  virtio_dev->device_fh);
> -        return -1;
> -    }
> -
> -    return 0;
> -}
> -
> -struct virtio_net *
> -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
> -{
> -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
> -}
> -
>  struct ingress_policer *
>  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
>  {
>      return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
>  }
>  
> -/*
> - * These callbacks allow virtio-net devices to be added to vhost ports when
> - * configuration has been fully complete.
> - */
> -static const struct virtio_net_device_ops virtio_net_device_ops =
> -{
> -    .new_device =  new_device,
> -    .destroy_device = destroy_device,
> -    .vring_state_changed = vring_state_changed
> -};
> -
> -static void *
> -start_vhost_loop(void *dummy OVS_UNUSED)
> -{
> -     pthread_detach(pthread_self());
> -     /* Put the vhost thread into quiescent state. */
> -     ovsrcu_quiesce_start();
> -     rte_vhost_driver_session_start();
> -     return NULL;
> -}
> -
>  static int
>  dpdk_vhost_class_init(void)
>  {
> -    rte_vhost_driver_callback_register(&virtio_net_device_ops);
> -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
> -                            | 1ULL << VIRTIO_NET_F_CSUM);
> -
> -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
> +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
> +                                | 1ULL << VIRTIO_NET_F_CSUM);
>      return 0;
>  }
>  
> @@ -2498,7 +2212,17 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
>          dp_packet_rss_invalidate(batch->packets[i]);
>      }
>  
> -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> +    if (OVS_UNLIKELY(concurrent_txq)) {
> +        qid = qid % dev->up.n_txq;
> +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> +    }
> +
> +    netdev_dpdk_send__(dev, qid, batch, may_steal);
> +
> +    if (OVS_UNLIKELY(concurrent_txq)) {
> +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> +    }
> +
>      return 0;
>  }
>  
> @@ -2787,7 +2511,6 @@ static int
>  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
>      int err = 0;
>  
>      ovs_mutex_lock(&dpdk_mutex);
> @@ -2813,10 +2536,6 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
>          }
>      }
>  
> -    if (virtio_dev) {
> -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> -    }
> -
>      ovs_mutex_unlock(&dev->mutex);
>      ovs_mutex_unlock(&dpdk_mutex);
>  
> @@ -3307,12 +3026,12 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_class =
>          NULL,
>          NULL,
>          netdev_dpdk_vhost_send,
> -        netdev_dpdk_vhost_get_carrier,
> -        netdev_dpdk_vhost_get_stats,
> +        netdev_dpdk_get_carrier,
> +        netdev_dpdk_get_stats,
>          NULL,
>          NULL,
>          netdev_dpdk_vhost_reconfigure,
> -        netdev_dpdk_vhost_rxq_recv);
> +        netdev_dpdk_rxq_recv);
>  
>  void
>  netdev_dpdk_register(void)
>
Ciara Loftus July 29, 2016, 1:59 p.m. UTC | #2
> 

> Not the complete review. Just few comments to design.


Hi Ilya,

Thanks for the feedback.

> 

> And what about performance? Is there any difference in comparison to

> current version of code? I guess, this may be slower than direct

> access to vhost library.


I had the same concern I've measured the difference and it's negligible.

> 

> Comments inline.

> 

> Best regards, Ilya Maximets.

> 

> On 28.07.2016 19:21, Ciara Loftus wrote:

> > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports

> > to be controlled by the librte_ether API, like physical 'dpdk' ports and

> > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and

> > removes direct calls to the librte_vhost DPDK library.

> >

> > This commit removes extended statistics support for vHost User ports

> > until such a time that this becomes available in the vHost PMD in a

> > DPDK release supported by OVS.

> >

> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> > ---

> >  INSTALL.DPDK.md   |  10 +

> >  NEWS              |   2 +

> >  lib/netdev-dpdk.c | 857 ++++++++++++++++++---------------------------------

> ---

> >  3 files changed, 300 insertions(+), 569 deletions(-)

> >

> > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> > index 7609aa7..4feb7be 100644

> > --- a/INSTALL.DPDK.md

> > +++ b/INSTALL.DPDK.md

> > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough].

> >

> >      http://dpdk.org/doc/guides/rel_notes/release_16_04.html

> >

> > +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context

> of

> > +    DPDK as they are all managed by the rte_ether API. This means that

> they

> > +    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS

> which by

> > +    default is set to 32. This means by default the combined total number of

> > +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32.

> This

> > +    value can be changed if desired by modifying the configuration file in

> > +    DPDK, or by overriding the default value on the command line when

> building

> > +    DPDK. eg.

> > +

> > +        `make install CONFIG_RTE_MAX_ETHPORTS=64`

> >

> >  Bug Reporting:

> >  --------------

> > diff --git a/NEWS b/NEWS

> > index dc3dedb..6510dde 100644

> > --- a/NEWS

> > +++ b/NEWS

> > @@ -64,6 +64,8 @@ Post-v2.5.0

> >       * Basic connection tracking for the userspace datapath (no ALG,

> >         fragmentation or NAT support yet)

> >       * Remove dpdkvhostcuse port type.

> > +     * vHost PMD integration brings vhost-user ports under control of the

> > +       rte_ether DPDK API.

> >     - Increase number of registers to 16.

> >     - ovs-benchmark: This utility has been removed due to lack of use and

> >       bitrot.

> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> > index d6959fe..d6ceeec 100644

> > --- a/lib/netdev-dpdk.c

> > +++ b/lib/netdev-dpdk.c

> > @@ -30,7 +30,6 @@

> >  #include <sys/types.h>

> >  #include <sys/stat.h>

> >  #include <getopt.h>

> > -#include <numaif.h>

> >

> >  #include "dirs.h"

> >  #include "dp-packet.h"

> > @@ -56,9 +55,9 @@

> >  #include "unixctl.h"

> >

> >  #include "rte_config.h"

> > +#include "rte_eth_vhost.h"

> >  #include "rte_mbuf.h"

> >  #include "rte_meter.h"

> > -#include "rte_virtio_net.h"

> >

> >  VLOG_DEFINE_THIS_MODULE(dpdk);

> >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);

> > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL;   /* Location of

> vhost-user sockets */

> >

> >  #define VHOST_ENQ_RETRY_NUM 8

> >

> > +/* Array that tracks the used & unused vHost user driver IDs */

> > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];

> > +

> >  static const struct rte_eth_conf port_conf = {

> >      .rxmode = {

> >          .mq_mode = ETH_MQ_RX_RSS,

> > @@ -346,12 +348,15 @@ struct netdev_dpdk {

> >      struct rte_eth_link link;

> >      int link_reset_cnt;

> >

> > -    /* virtio-net structure for vhost device */

> > -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;

> > +    /* Number of virtqueue pairs reported by the guest */

> > +    uint32_t vhost_qp_nb;

> >

> >      /* Identifier used to distinguish vhost devices from each other */

> >      char vhost_id[PATH_MAX];

> >

> > +    /* ID of vhost user port given to the PMD driver */

> > +    unsigned int vhost_pmd_id;

> > +

> >      /* In dpdk_list. */

> >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);

> >

> > @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk {

> >  static bool dpdk_thread_is_pmd(void);

> >

> >  static int netdev_dpdk_construct(struct netdev *);

> > -

> > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk

> *dev);

> > +static int netdev_dpdk_vhost_construct(struct netdev *);

> >

> >  struct ingress_policer *

> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);

> >

> > +static void link_status_changed_callback(uint8_t port_id,

> > +        enum rte_eth_event_type type, void *param);

> > +static void vring_state_changed_callback(uint8_t port_id,

> > +        enum rte_eth_event_type type, void *param);

> > +static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);

> > +static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);

> > +

> >  static bool

> > -is_dpdk_class(const struct netdev_class *class)

> > +is_dpdk_eth_class(const struct netdev_class *class)

> >  {

> > -    return class->construct == netdev_dpdk_construct;

> > +    return ((class->construct == netdev_dpdk_construct) ||

> > +            (class->construct == netdev_dpdk_vhost_construct));

> >  }

> >

> >  /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically

> > @@ -616,8 +628,13 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk

> *dev, int n_rxq, int n_txq)

> >              continue;

> >          }

> >

> > -        dev->up.n_rxq = n_rxq;

> > -        dev->up.n_txq = n_txq;

> > +        /* Only set n_*xq for physical devices. vHost User devices will set

> > +         * this value correctly using info from the virtio backend.

> > +         */

> > +        if (dev->type == DPDK_DEV_ETH) {

> > +            dev->up.n_rxq = n_rxq;

> > +            dev->up.n_txq = n_txq;

> > +        }

> >

> >          return 0;

> >      }

> > @@ -641,8 +658,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)

> OVS_REQUIRES(dpdk_mutex)

> >

> >      rte_eth_dev_info_get(dev->port_id, &info);

> >

> > -    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > -    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > +    if (dev->type == DPDK_DEV_VHOST) {

> > +        /* We don't know how many queues QEMU will use so set up the

> max */

> > +        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > +        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > +    } else {

> > +        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > +        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > +    }

> >

> >      diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);

> >      if (diag) {

> > @@ -709,6 +732,85 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev,

> unsigned int n_txqs)

> >      }

> >  }

> >

> > +void

> > +link_status_changed_callback(uint8_t port_id,

> > +                             enum rte_eth_event_type type OVS_UNUSED,

> > +                             void *param OVS_UNUSED)

> > +{

> > +    struct netdev_dpdk *dev;

> > +    int socket_id = -1;

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > +        if (port_id == dev->port_id) {

> > +            ovs_mutex_lock(&dev->mutex);

> > +            check_link_status(dev);

> > +            if (dev->link.link_status == ETH_LINK_UP) {

> > +                /* new device */

> > +                /* Get NUMA information */

> > +                socket_id = rte_eth_dev_socket_id(dev->port_id);

> > +                if (socket_id != -1 && socket_id != dev->socket_id) {

> > +                    dev->requested_socket_id = socket_id;

> > +                }

> > +                netdev_request_reconfigure(&dev->up);

> > +                netdev_change_seq_changed(&dev->up);

> > +                VLOG_INFO("vHost Device '%s' has been added on numa node

> %i",

> > +                          dev->vhost_id, socket_id);

> > +            } else {

> > +                /* destroy device */

> > +                /* Clear tx/rx queue settings. */

> > +                netdev_dpdk_txq_map_clear(dev);

> 

> Why requesting of less number of queues removed from here?


If you are talking about destroy_device(), the difference between this and master is that I've removed
            dev->requested_n_rxq = NR_QUEUE;
            dev->requested_n_txq = NR_QUEUE;
This is because all queue management must now belong in vring_state_changed callback. I explain why below.

> There may be no 'state changed' calls in some cases (e.g. QEMU crash).

> 

> > +                netdev_request_reconfigure(&dev->up);

> > +                netdev_change_seq_changed(&dev->up);

> > +                VLOG_INFO("vHost Device '%s' has been removed", dev-

> >vhost_id);

> > +            }

> > +            ovs_mutex_unlock(&dev->mutex);

> > +            break;

> > +        }

> > +    }

> > +

> > +    ovs_mutex_unlock(&dpdk_mutex);

> > +

> > +    return;

> > +}

> > +

> > +void

> > +vring_state_changed_callback(uint8_t port_id,

> > +                             enum rte_eth_event_type type OVS_UNUSED,

> > +                             void *param OVS_UNUSED)

> > +{

> > +    struct netdev_dpdk *dev;

> > +    struct rte_eth_vhost_queue_event event;

> > +    int err = 0;

> > +

> > +    err = rte_eth_vhost_get_queue_event(port_id, &event);

> > +    if (err || event.rx) {

> > +        return;

> > +    }

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > +        if (port_id == dev->port_id) {

> > +            ovs_mutex_lock(&dev->mutex);

> > +            if (event.enable) {

> > +                dev->tx_q[event.queue_id].map = event.queue_id;

> > +                dev->vhost_qp_nb++;

> > +            } else {

> > +                dev->tx_q[event.queue_id].map =

> OVS_VHOST_QUEUE_DISABLED;

> > +                dev->vhost_qp_nb--;

> > +            }

> > +            dev->requested_n_rxq = dev->vhost_qp_nb;

> > +            dev->requested_n_txq = dev->vhost_qp_nb;

> > +            netdev_request_reconfigure(&dev->up);

> 

> Do we really need to reconfigure here. Is there any way to keep

> reconfiguration only if link status changed?


It needs to be here unfortunately. In reconfigure we set n_rxq and n_txq from requested_. With the PMD requested_ may change outside of links status changes.
Previously we could work with only reconfiguring during link status change as we had full information available to us ie. virtio_net->virt_qp_nb. We don't have that any more, so we need to count the queues in OVS now every time we get a vring_change.

> In current implementation buggy or malicious guest may perform DOS

> attack on the vSwitch just by executing the below script:

> 

> while true;

> do

> 	ethtool -l eth0 combined 4

> 	ethtool -l eth0 combined 1

> done

> 

> Another thing: Guest/QEMU may disable/enable queues in random order,

> also, there is no any constraints in virtio standard that may force

> QEMU or guest to send 'disable' commands on disabled queues.

> I already told about this in reply to one the previous versions of

> this patch.


This callback does not exactly mirror the vring_state_changed callback. DPDK controls what messages are sent.
DPDK does not send a valid 'disable' message to OVS for a queue that is already disabled. The callback is triggered but the information given to OVS appears as an error when we call rte_eth_vhost_get_queue_event(port_id, &event);
So the above case should not happen.

> 

> Also, by executing following commands in a row in a VM with 4 queues:

> 	ethtool -l eth0 combined 2

> 	ethtool -l eth0 combined 1

> Queues #2 and #3 will be disabled twice. This will lead to wrong (even

> negative --> huge positive) value of 'vhost_qp_nb'.

> This is true at least for the new versions of QEMU.


See above.

> 

> Last question to this function:

> Why there is no call to 'txq_remap()' ?


Remap is called as part of reconfigure.

> 

> > +            ovs_mutex_unlock(&dev->mutex);

> > +            break;

> > +        }

> > +    }

> > +    ovs_mutex_unlock(&dpdk_mutex);

> > +

> > +    return;

> > +}

> > +

> >  static int

> >  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,

> >                   enum dpdk_dev_type type)

> > @@ -718,6 +820,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned

> int port_no,

> >      int sid;

> >      int err = 0;

> >      uint32_t buf_size;

> > +    unsigned int nr_q = 0;

> >

> >      ovs_mutex_init(&dev->mutex);

> >      ovs_mutex_lock(&dev->mutex);

> > @@ -727,11 +830,7 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> >      /* If the 'sid' is negative, it means that the kernel fails

> >       * to obtain the pci numa info.  In that situation, always

> >       * use 'SOCKET0'. */

> > -    if (type == DPDK_DEV_ETH) {

> > -        sid = rte_eth_dev_socket_id(port_no);

> > -    } else {

> > -        sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> > -    }

> > +    sid = rte_eth_dev_socket_id(port_no);

> >

> >      dev->socket_id = sid < 0 ? SOCKET0 : sid;

> >      dev->requested_socket_id = dev->socket_id;

> > @@ -761,17 +860,21 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> >      netdev->n_txq = NR_QUEUE;

> >      dev->requested_n_rxq = netdev->n_rxq;

> >      dev->requested_n_txq = netdev->n_txq;

> > +    dev->vhost_qp_nb = 0;

> >

> > -    if (type == DPDK_DEV_ETH) {

> > -        err = dpdk_eth_dev_init(dev);

> > -        if (err) {

> > -            goto unlock;

> > -        }

> > -        netdev_dpdk_alloc_txq(dev, netdev->n_txq);

> > -    } else {

> > -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);

> > -        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag.

> */

> > -        dev->flags = NETDEV_UP | NETDEV_PROMISC;

> > +    err = dpdk_eth_dev_init(dev);

> > +    if (err) {

> > +        goto unlock;

> > +    }

> > +    nr_q = (type == DPDK_DEV_ETH ?

> > +            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > +    netdev_dpdk_alloc_txq(dev, nr_q);

> > +

> > +    if (type == DPDK_DEV_VHOST) {

> > +        rte_eth_dev_callback_register(port_no,

> RTE_ETH_EVENT_QUEUE_STATE,

> > +                                      vring_state_changed_callback, NULL);

> > +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,

> > +                                      link_status_changed_callback, NULL);

> >      }

> >

> >      ovs_list_push_back(&dpdk_list, &dev->list_node);

> > @@ -802,17 +905,48 @@ dpdk_dev_parse_name(const char dev_name[],

> const char prefix[],

> >      }

> >  }

> >

> > +/* When attaching a vhost device to DPDK, a unique name of the format

> > + * 'eth_vhostX' is expected, where X is a unique identifier.

> > + * get_vhost_drv_id returns a valid X value to provide to DPDK.

> > + */

> > +static int

> > +get_vhost_drv_id(void)

> > +{

> > +    int i = 0;

> > +

> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > +        if (vhost_drv_ids[i] == 0) {

> > +            return i;

> > +        }

> > +    }

> > +

> > +    return -1;

> > +}

> > +

> > +static void

> > +set_vhost_drv_id(int id, int val)

> > +{

> > +    vhost_drv_ids[id] = val;

> > +}

> > +

> >  static int

> >  netdev_dpdk_vhost_construct(struct netdev *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >      const char *name = netdev->name;

> >      int err;

> > +    uint8_t port_no = 0;

> > +    char *devargs;

> > +    int driver_id = 0;

> > +

> > +    if (rte_eal_init_ret) {

> > +        return rte_eal_init_ret;

> > +    }

> >

> >      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in

> >       * the file system. '/' or '\' would traverse directories, so they're not

> >       * acceptable in 'name'. */

> > -    if (strchr(name, '/') || strchr(name, '\\')) {

> > +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {

> >          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "

> >                   "A valid name must not include '/' or '\\'",

> >                   name);

> > @@ -829,18 +963,32 @@ netdev_dpdk_vhost_construct(struct netdev

> *netdev)

> >       */

> >      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",

> >               vhost_sock_dir, name);

> > +    driver_id = get_vhost_drv_id();

> > +    if (driver_id == -1) {

> > +        VLOG_ERR("Unable to create vhost-user device %s - too many vhost-

> user"

> > +                 "devices registered with PMD", dev->vhost_id);

> > +        err = ENODEV;

> > +        goto out;

> > +    } else {

> > +        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",

> > +                 driver_id, dev->vhost_id,

> > +                 MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > +        err = rte_eth_dev_attach(devargs, &port_no);

> > +    }

> >

> > -    err = rte_vhost_driver_register(dev->vhost_id);

> >      if (err) {

> > -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",

> > +        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",

> >                   dev->vhost_id);

> >      } else {

> >          fatal_signal_add_file_to_unlink(dev->vhost_id);

> >          VLOG_INFO("Socket %s created for vhost-user port %s\n",

> >                    dev->vhost_id, name);

> > -        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);

> > +        dev->vhost_pmd_id = driver_id;

> > +        set_vhost_drv_id(driver_id, 1);

> > +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);

> >      }

> >

> > +out:

> >      ovs_mutex_unlock(&dpdk_mutex);

> >      return err;

> >  }

> > @@ -868,20 +1016,28 @@ netdev_dpdk_construct(struct netdev *netdev)

> >  }

> >

> >  static void

> > -netdev_dpdk_destruct(struct netdev *netdev)

> > +dpdk_destruct_helper(struct netdev_dpdk *dev)

> >  {

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> >      rte_eth_dev_stop(dev->port_id);

> >      free(ovsrcu_get_protected(struct ingress_policer *,

> >                                &dev->ingress_policer));

> > -    ovs_mutex_unlock(&dev->mutex);

> >

> > -    ovs_mutex_lock(&dpdk_mutex);

> >      rte_free(dev->tx_q);

> >      ovs_list_remove(&dev->list_node);

> >      dpdk_mp_put(dev->dpdk_mp);

> > +}

> > +

> > +static void

> > +netdev_dpdk_destruct(struct netdev *netdev)

> > +{

> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    ovs_mutex_lock(&dev->mutex);

> > +

> > +    dpdk_destruct_helper(dev);

> > +

> > +    ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >  }

> >

> > @@ -890,30 +1046,19 @@ netdev_dpdk_vhost_destruct(struct netdev

> *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >

> > -    /* Guest becomes an orphan if still attached. */

> > -    if (netdev_dpdk_get_virtio(dev) != NULL) {

> > -        VLOG_ERR("Removing port '%s' while vhost device still attached.",

> > -                 netdev->name);

> > -        VLOG_ERR("To restore connectivity after re-adding of port, VM on

> socket"

> > -                 " '%s' must be restarted.",

> > -                 dev->vhost_id);

> > -    }

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    ovs_mutex_lock(&dev->mutex);

> >

> > -    if (rte_vhost_driver_unregister(dev->vhost_id)) {

> > -        VLOG_ERR("Unable to remove vhost-user socket %s", dev-

> >vhost_id);

> > +    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {

> > +        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);

> >      } else {

> >          fatal_signal_remove_file_to_unlink(dev->vhost_id);

> >      }

> > +    set_vhost_drv_id(dev->vhost_pmd_id, 0);

> >

> > -    ovs_mutex_lock(&dev->mutex);

> > -    free(ovsrcu_get_protected(struct ingress_policer *,

> > -                              &dev->ingress_policer));

> > -    ovs_mutex_unlock(&dev->mutex);

> > +    dpdk_destruct_helper(dev);

> >

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    rte_free(dev->tx_q);

> > -    ovs_list_remove(&dev->list_node);

> > -    dpdk_mp_put(dev->dpdk_mp);

> > +    ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >  }

> >

> > @@ -1105,117 +1250,6 @@ ingress_policer_run(struct ingress_policer

> *policer, struct rte_mbuf **pkts,

> >      return cnt;

> >  }

> >

> > -static bool

> > -is_vhost_running(struct virtio_net *virtio_dev)

> > -{

> > -    return (virtio_dev != NULL && (virtio_dev->flags &

> VIRTIO_DEV_RUNNING));

> > -}

> > -

> > -static inline void

> > -netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats

> *stats,

> > -                                          unsigned int packet_size)

> > -{

> > -    /* Hard-coded search for the size bucket. */

> > -    if (packet_size < 256) {

> > -        if (packet_size >= 128) {

> > -            stats->rx_128_to_255_packets++;

> > -        } else if (packet_size <= 64) {

> > -            stats->rx_1_to_64_packets++;

> > -        } else {

> > -            stats->rx_65_to_127_packets++;

> > -        }

> > -    } else {

> > -        if (packet_size >= 1523) {

> > -            stats->rx_1523_to_max_packets++;

> > -        } else if (packet_size >= 1024) {

> > -            stats->rx_1024_to_1522_packets++;

> > -        } else if (packet_size < 512) {

> > -            stats->rx_256_to_511_packets++;

> > -        } else {

> > -            stats->rx_512_to_1023_packets++;

> > -        }

> > -    }

> > -}

> > -

> > -static inline void

> > -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,

> > -                                     struct dp_packet **packets, int count,

> > -                                     int dropped)

> > -{

> > -    int i;

> > -    unsigned int packet_size;

> > -    struct dp_packet *packet;

> > -

> > -    stats->rx_packets += count;

> > -    stats->rx_dropped += dropped;

> > -    for (i = 0; i < count; i++) {

> > -        packet = packets[i];

> > -        packet_size = dp_packet_size(packet);

> > -

> > -        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {

> > -            /* This only protects the following multicast counting from

> > -             * too short packets, but it does not stop the packet from

> > -             * further processing. */

> > -            stats->rx_errors++;

> > -            stats->rx_length_errors++;

> > -            continue;

> > -        }

> > -

> > -        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);

> > -

> > -        struct eth_header *eh = (struct eth_header *)

> dp_packet_data(packet);

> > -        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {

> > -            stats->multicast++;

> > -        }

> > -

> > -        stats->rx_bytes += packet_size;

> > -    }

> > -}

> > -

> > -/*

> > - * The receive path for the vhost port is the TX path out from guest.

> > - */

> > -static int

> > -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,

> > -                           struct dp_packet_batch *batch)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -    int qid = rxq->queue_id;

> > -    struct ingress_policer *policer =

> netdev_dpdk_get_ingress_policer(dev);

> > -    uint16_t nb_rx = 0;

> > -    uint16_t dropped = 0;

> > -

> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)

> > -                     || !(dev->flags & NETDEV_UP))) {

> > -        return EAGAIN;

> > -    }

> > -

> > -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +

> VIRTIO_TXQ,

> > -                                    dev->dpdk_mp->mp,

> > -                                    (struct rte_mbuf **) batch->packets,

> > -                                    NETDEV_MAX_BURST);

> > -    if (!nb_rx) {

> > -        return EAGAIN;

> > -    }

> > -

> > -    if (policer) {

> > -        dropped = nb_rx;

> > -        nb_rx = ingress_policer_run(policer,

> > -                                    (struct rte_mbuf **) batch->packets,

> > -                                    nb_rx);

> > -        dropped -= nb_rx;

> > -    }

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch-

> >packets,

> > -                                         nb_rx, dropped);

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -    batch->count = (int) nb_rx;

> > -    return 0;

> > -}

> > -

> >  static int

> >  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch

> *batch)

> >  {

> > @@ -1269,85 +1303,6 @@ netdev_dpdk_qos_run__(struct netdev_dpdk

> *dev, struct rte_mbuf **pkts,

> >      return cnt;

> >  }

> >

> > -static inline void

> > -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,

> > -                                     struct dp_packet **packets,

> > -                                     int attempted,

> > -                                     int dropped)

> > -{

> > -    int i;

> > -    int sent = attempted - dropped;

> > -

> > -    stats->tx_packets += sent;

> > -    stats->tx_dropped += dropped;

> > -

> > -    for (i = 0; i < sent; i++) {

> > -        stats->tx_bytes += dp_packet_size(packets[i]);

> > -    }

> > -}

> > -

> > -static void

> > -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > -                         struct dp_packet **pkts, int cnt,

> > -                         bool may_steal)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;

> > -    unsigned int total_pkts = cnt;

> > -    unsigned int qos_pkts = cnt;

> > -    int retries = 0;

> > -

> > -    qid = dev->tx_q[qid % netdev->n_txq].map;

> > -

> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0

> > -                     || !(dev->flags & NETDEV_UP))) {

> > -        rte_spinlock_lock(&dev->stats_lock);

> > -        dev->stats.tx_dropped+= cnt;

> > -        rte_spinlock_unlock(&dev->stats_lock);

> > -        goto out;

> > -    }

> > -

> > -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > -

> > -    /* Check has QoS has been configured for the netdev */

> > -    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);

> > -    qos_pkts -= cnt;

> > -

> > -    do {

> > -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;

> > -        unsigned int tx_pkts;

> > -

> > -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,

> > -                                          cur_pkts, cnt);

> > -        if (OVS_LIKELY(tx_pkts)) {

> > -            /* Packets have been sent.*/

> > -            cnt -= tx_pkts;

> > -            /* Prepare for possible retry.*/

> > -            cur_pkts = &cur_pkts[tx_pkts];

> > -        } else {

> > -            /* No packets sent - do not retry.*/

> > -            break;

> > -        }

> > -    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));

> > -

> > -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    cnt += qos_pkts;

> > -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts,

> total_pkts, cnt);

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -out:

> > -    if (may_steal) {

> > -        int i;

> > -

> > -        for (i = 0; i < total_pkts; i++) {

> > -            dp_packet_delete(pkts[i]);

> > -        }

> > -    }

> > -}

> > -

> >  /* Tx function. Transmit packets indefinitely */

> >  static void

> >  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch

> *batch)

> > @@ -1402,18 +1357,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> >          newcnt++;

> >      }

> >

> > -    if (dev->type == DPDK_DEV_VHOST) {

> > -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)

> mbufs,

> > -                                 newcnt, true);

> > -    } else {

> > -        unsigned int qos_pkts = newcnt;

> > +    unsigned int qos_pkts = newcnt;

> >

> > -        /* Check if QoS has been configured for this netdev. */

> > -        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> > +    /* Check if QoS has been configured for this netdev. */

> > +    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> >

> > -        dropped += qos_pkts - newcnt;

> > -        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> > -    }

> > +    dropped += qos_pkts - newcnt;

> > +    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> >

> >      if (OVS_UNLIKELY(dropped)) {

> >          rte_spinlock_lock(&dev->stats_lock);

> > @@ -1426,33 +1376,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> >      }

> >  }

> >

> > -static int

> > -netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > -                       struct dp_packet_batch *batch,

> > -                       bool may_steal, bool concurrent_txq OVS_UNUSED)

> > -{

> > -

> > -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {

> > -        dpdk_do_tx_copy(netdev, qid, batch);

> > -        dp_packet_delete_batch(batch, may_steal);

> > -    } else {

> > -        dp_packet_batch_apply_cutlen(batch);

> > -        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch-

> >count,

> > -                                 may_steal);

> > -    }

> > -    return 0;

> > -}

> > -

> >  static inline void

> >  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

> > -                   struct dp_packet_batch *batch, bool may_steal,

> > -                   bool concurrent_txq)

> > +                   struct dp_packet_batch *batch, bool may_steal)

> >  {

> > -    if (OVS_UNLIKELY(concurrent_txq)) {

> > -        qid = qid % dev->up.n_txq;

> > -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > -    }

> > -

> >      if (OVS_UNLIKELY(!may_steal ||

> >                       batch->packets[0]->source != DPBUF_DPDK)) {

> >          struct netdev *netdev = &dev->up;

> > @@ -1512,20 +1439,50 @@ netdev_dpdk_send__(struct netdev_dpdk

> *dev, int qid,

> >              rte_spinlock_unlock(&dev->stats_lock);

> >          }

> >      }

> > +}

> > +

> > +static int

> > +netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > +                     struct dp_packet_batch *batch, bool may_steal,

> > +                     bool concurrent_txq)

> > +{

> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > +

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        qid = qid % dev->up.n_txq;

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> >

> >      if (OVS_UNLIKELY(concurrent_txq)) {

> >          rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> >      }

> > +

> > +    return 0;

> >  }

> >

> >  static int

> > -netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > -                     struct dp_packet_batch *batch, bool may_steal,

> > -                     bool concurrent_txq)

> > +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > +                       struct dp_packet_batch *batch, bool may_steal,

> > +                       bool concurrent_txq OVS_UNUSED)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >

> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > +    qid = dev->tx_q[qid % netdev->n_txq].map;

> > +    if (qid == -1) {

> > +        rte_spinlock_lock(&dev->stats_lock);

> > +        dev->stats.tx_dropped+= batch->count;

> > +        rte_spinlock_unlock(&dev->stats_lock);

> > +        if (may_steal) {

> > +            dp_packet_delete_batch(batch, may_steal);

> > +        }

> > +    } else {

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +        netdev_dpdk_send__(dev, qid, batch, may_steal);

> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> >      return 0;

> >  }

> >

> > @@ -1622,41 +1579,6 @@ out:

> >  static int

> >  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);

> >

> > -static int

> > -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,

> > -                            struct netdev_stats *stats)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    /* Supported Stats */

> > -    stats->rx_packets += dev->stats.rx_packets;

> > -    stats->tx_packets += dev->stats.tx_packets;

> > -    stats->rx_dropped = dev->stats.rx_dropped;

> > -    stats->tx_dropped += dev->stats.tx_dropped;

> > -    stats->multicast = dev->stats.multicast;

> > -    stats->rx_bytes = dev->stats.rx_bytes;

> > -    stats->tx_bytes = dev->stats.tx_bytes;

> > -    stats->rx_errors = dev->stats.rx_errors;

> > -    stats->rx_length_errors = dev->stats.rx_length_errors;

> > -

> > -    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;

> > -    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;

> > -    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;

> > -    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;

> > -    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;

> > -    stats->rx_1024_to_1522_packets = dev-

> >stats.rx_1024_to_1522_packets;

> > -    stats->rx_1523_to_max_packets = dev-

> >stats.rx_1523_to_max_packets;

> > -

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -    ovs_mutex_unlock(&dev->mutex);

> > -

> > -    return 0;

> > -}

> > -

> >  static void

> >  netdev_dpdk_convert_xstats(struct netdev_stats *stats,

> >                             const struct rte_eth_xstats *xstats,

> > @@ -1737,28 +1659,40 @@ netdev_dpdk_get_stats(const struct netdev

> *netdev, struct netdev_stats *stats)

> >          return EPROTO;

> >      }

> >

> > -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > -    if (rte_xstats_len > 0) {

> > -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);

> > -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > -                                            rte_xstats_len);

> > -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > -            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > +    /* Extended statistics are not yet available for vHost User PMD */

> > +    if (dev->type == DPDK_DEV_ETH) {

> > +        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > +        if (rte_xstats_len > 0) {

> > +            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)

> > +                                          * rte_xstats_len);

> > +            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > +            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > +                                                rte_xstats_len);

> > +            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > +                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > +            }

> > +            rte_free(rte_xstats);

> > +        } else {

> > +            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> >          }

> > -        rte_free(rte_xstats);

> > -    } else {

> > -        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> >      }

> >

> >      stats->rx_packets = rte_stats.ipackets;

> >      stats->tx_packets = rte_stats.opackets;

> >      stats->rx_bytes = rte_stats.ibytes;

> >      stats->tx_bytes = rte_stats.obytes;

> > -    /* DPDK counts imissed as errors, but count them here as dropped

> instead */

> > -    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > -    stats->tx_errors = rte_stats.oerrors;

> > -    stats->multicast = rte_stats.imcasts;

> > +

> > +    if (dev->type == DPDK_DEV_ETH) {

> > +        /* DPDK counts imissed as errors, but count them here as dropped

> > +         * instead */

> > +        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > +        stats->tx_errors = rte_stats.oerrors;

> > +        stats->multicast = rte_stats.imcasts;

> > +    } else {

> > +        stats->rx_errors = UINT64_MAX;

> > +        stats->tx_errors = UINT64_MAX;

> > +        stats->multicast = UINT64_MAX;

> > +    }

> >

> >      rte_spinlock_lock(&dev->stats_lock);

> >      stats->tx_dropped = dev->stats.tx_dropped;

> > @@ -1921,25 +1855,6 @@ netdev_dpdk_get_carrier(const struct netdev

> *netdev, bool *carrier)

> >      return 0;

> >  }

> >

> > -static int

> > -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool

> *carrier)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> > -

> > -    if (is_vhost_running(virtio_dev)) {

> > -        *carrier = 1;

> > -    } else {

> > -        *carrier = 0;

> > -    }

> > -

> > -    ovs_mutex_unlock(&dev->mutex);

> > -

> > -    return 0;

> > -}

> > -

> >  static long long int

> >  netdev_dpdk_get_carrier_resets(const struct netdev *netdev)

> >  {

> > @@ -1995,13 +1910,10 @@ netdev_dpdk_update_flags__(struct

> netdev_dpdk *dev,

> >              rte_eth_dev_stop(dev->port_id);

> >          }

> >      } else {

> > -        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and

> vhost is

> > -         * running then change netdev's change_seq to trigger link state

> > -         * update. */

> > -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > +        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then

> change

> > +         * netdev's change_seq to trigger link state update. */

> >

> > -        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))

> > -            && is_vhost_running(virtio_dev)) {

> > +        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {

> >              netdev_change_seq_changed(&dev->up);

> >

> >              /* Clear statistics if device is getting up. */

> > @@ -2096,7 +2008,7 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> >

> >      if (argc > 2) {

> >          struct netdev *netdev = netdev_from_name(argv[1]);

> > -        if (netdev && is_dpdk_class(netdev->netdev_class)) {

> > +        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {

> >              struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);

> >

> >              ovs_mutex_lock(&dpdk_dev->mutex);

> > @@ -2124,22 +2036,6 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> >  }

> >

> >  /*

> > - * Set virtqueue flags so that we do not receive interrupts.

> > - */

> > -static void

> > -set_irq_status(struct virtio_net *virtio_dev)

> > -{

> > -    uint32_t i;

> > -    uint64_t idx;

> > -

> > -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {

> > -        idx = i * VIRTIO_QNUM;

> > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ,

> 0);

> > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ,

> 0);

> > -    }

> > -}

> > -

> > -/*

> >   * Fixes mapping for vhost-user tx queues. Must be called after each

> >   * enabling/disabling of queues and n_txq modifications.

> >   */

> > @@ -2180,62 +2076,6 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk

> *dev)

> >      rte_free(enabled_queues);

> >  }

> >

> > -/*

> > - * A new virtio-net device is added to a vhost port.

> > - */

> > -static int

> > -new_device(struct virtio_net *virtio_dev)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -    int newnode = 0;

> > -    long err = 0;

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    /* Add device to the vhost port with the same name as that passed

> down. */

> > -    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > -            uint32_t qp_num = virtio_dev->virt_qp_nb;

> > -

> > -            ovs_mutex_lock(&dev->mutex);

> > -            /* Get NUMA information */

> > -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,

> > -                                MPOL_F_NODE | MPOL_F_ADDR);

> > -            if (err) {

> > -                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",

> > -                        virtio_dev->ifname);

> > -                newnode = dev->socket_id;

> > -            }

> > -

> > -            dev->requested_socket_id = newnode;

> > -            dev->requested_n_rxq = qp_num;

> > -            dev->requested_n_txq = qp_num;

> > -            netdev_request_reconfigure(&dev->up);

> > -

> > -            ovsrcu_set(&dev->virtio_dev, virtio_dev);

> > -            exists = true;

> > -

> > -            /* Disable notifications. */

> > -            set_irq_status(virtio_dev);

> > -            netdev_change_seq_changed(&dev->up);

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            break;

> > -        }

> > -    }

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (!exists) {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "

> > -                  "found", virtio_dev->ifname, virtio_dev->device_fh);

> > -

> > -        return -1;

> > -    }

> > -

> > -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa

> node %i",

> > -              virtio_dev->ifname, virtio_dev->device_fh, newnode);

> > -    return 0;

> > -}

> > -

> >  /* Clears mapping for all available queues of vhost interface. */

> >  static void

> >  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)

> > @@ -2248,144 +2088,18 @@ netdev_dpdk_txq_map_clear(struct

> netdev_dpdk *dev)

> >      }

> >  }

> >

> > -/*

> > - * Remove a virtio-net device from the specific vhost port.  Use dev-

> >remove

> > - * flag to stop any more packets from being sent or received to/from a VM

> and

> > - * ensure all currently queued packets have been sent/received before

> removing

> > - *  the device.

> > - */

> > -static void

> > -destroy_device(volatile struct virtio_net *virtio_dev)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {

> > -

> > -            ovs_mutex_lock(&dev->mutex);

> > -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;

> > -            ovsrcu_set(&dev->virtio_dev, NULL);

> > -            /* Clear tx/rx queue settings. */

> > -            netdev_dpdk_txq_map_clear(dev);

> > -            dev->requested_n_rxq = NR_QUEUE;

> > -            dev->requested_n_txq = NR_QUEUE;

> > -            netdev_request_reconfigure(&dev->up);

> > -

> > -            netdev_change_seq_changed(&dev->up);

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            exists = true;

> > -            break;

> > -        }

> > -    }

> > -

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (exists == true) {

> > -        /*

> > -         * Wait for other threads to quiesce after setting the 'virtio_dev'

> > -         * to NULL, before returning.

> > -         */

> > -        ovsrcu_synchronize();

> > -        /*

> > -         * As call to ovsrcu_synchronize() will end the quiescent state,

> > -         * put thread back into quiescent state before returning.

> > -         */

> > -        ovsrcu_quiesce_start();

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",

> > -                  virtio_dev->ifname, virtio_dev->device_fh);

> > -    } else {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > -                  virtio_dev->device_fh);

> > -    }

> > -}

> > -

> > -static int

> > -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,

> > -                    int enable)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -    int qid = queue_id / VIRTIO_QNUM;

> > -

> > -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {

> > -        return 0;

> > -    }

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > -            ovs_mutex_lock(&dev->mutex);

> > -            if (enable) {

> > -                dev->tx_q[qid].map = qid;

> > -            } else {

> > -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;

> > -            }

> > -            netdev_dpdk_remap_txqs(dev);

> > -            exists = true;

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            break;

> > -        }

> > -    }

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (exists) {

> > -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"

> > -                  PRIu64" changed to \'%s\'", queue_id, qid,

> > -                  virtio_dev->ifname, virtio_dev->device_fh,

> > -                  (enable == 1) ? "enabled" : "disabled");

> > -    } else {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > -                  virtio_dev->device_fh);

> > -        return -1;

> > -    }

> > -

> > -    return 0;

> > -}

> > -

> > -struct virtio_net *

> > -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)

> > -{

> > -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);

> > -}

> > -

> >  struct ingress_policer *

> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)

> >  {

> >      return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);

> >  }

> >

> > -/*

> > - * These callbacks allow virtio-net devices to be added to vhost ports when

> > - * configuration has been fully complete.

> > - */

> > -static const struct virtio_net_device_ops virtio_net_device_ops =

> > -{

> > -    .new_device =  new_device,

> > -    .destroy_device = destroy_device,

> > -    .vring_state_changed = vring_state_changed

> > -};

> > -

> > -static void *

> > -start_vhost_loop(void *dummy OVS_UNUSED)

> > -{

> > -     pthread_detach(pthread_self());

> > -     /* Put the vhost thread into quiescent state. */

> > -     ovsrcu_quiesce_start();

> > -     rte_vhost_driver_session_start();

> > -     return NULL;

> > -}

> > -

> >  static int

> >  dpdk_vhost_class_init(void)

> >  {

> > -    rte_vhost_driver_callback_register(&virtio_net_device_ops);

> > -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > -                            | 1ULL << VIRTIO_NET_F_CSUM);

> > -

> > -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);

> > +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > +                                | 1ULL << VIRTIO_NET_F_CSUM);

> >      return 0;

> >  }

> >

> > @@ -2498,7 +2212,17 @@ netdev_dpdk_ring_send(struct netdev

> *netdev, int qid,

> >          dp_packet_rss_invalidate(batch->packets[i]);

> >      }

> >

> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        qid = qid % dev->up.n_txq;

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> > +

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> >      return 0;

> >  }

> >

> > @@ -2787,7 +2511,6 @@ static int

> >  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> >      int err = 0;

> >

> >      ovs_mutex_lock(&dpdk_mutex);

> > @@ -2813,10 +2536,6 @@ netdev_dpdk_vhost_reconfigure(struct netdev

> *netdev)

> >          }

> >      }

> >

> > -    if (virtio_dev) {

> > -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;

> > -    }

> > -

> >      ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >

> > @@ -3307,12 +3026,12 @@ static const struct netdev_class OVS_UNUSED

> dpdk_vhost_class =

> >          NULL,

> >          NULL,

> >          netdev_dpdk_vhost_send,

> > -        netdev_dpdk_vhost_get_carrier,

> > -        netdev_dpdk_vhost_get_stats,

> > +        netdev_dpdk_get_carrier,

> > +        netdev_dpdk_get_stats,

> >          NULL,

> >          NULL,

> >          netdev_dpdk_vhost_reconfigure,

> > -        netdev_dpdk_vhost_rxq_recv);

> > +        netdev_dpdk_rxq_recv);

> >

> >  void

> >  netdev_dpdk_register(void)

> >
diff mbox

Patch

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index 7609aa7..4feb7be 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -604,6 +604,16 @@  can be found in [Vhost Walkthrough].
 
     http://dpdk.org/doc/guides/rel_notes/release_16_04.html
 
+  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context of
+    DPDK as they are all managed by the rte_ether API. This means that they
+    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS which by
+    default is set to 32. This means by default the combined total number of
+    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. This
+    value can be changed if desired by modifying the configuration file in
+    DPDK, or by overriding the default value on the command line when building
+    DPDK. eg.
+
+        `make install CONFIG_RTE_MAX_ETHPORTS=64`
 
 Bug Reporting:
 --------------
diff --git a/NEWS b/NEWS
index dc3dedb..6510dde 100644
--- a/NEWS
+++ b/NEWS
@@ -64,6 +64,8 @@  Post-v2.5.0
      * Basic connection tracking for the userspace datapath (no ALG,
        fragmentation or NAT support yet)
      * Remove dpdkvhostcuse port type.
+     * vHost PMD integration brings vhost-user ports under control of the
+       rte_ether DPDK API.
    - Increase number of registers to 16.
    - ovs-benchmark: This utility has been removed due to lack of use and
      bitrot.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d6959fe..d6ceeec 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -30,7 +30,6 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <getopt.h>
-#include <numaif.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -56,9 +55,9 @@ 
 #include "unixctl.h"
 
 #include "rte_config.h"
+#include "rte_eth_vhost.h"
 #include "rte_mbuf.h"
 #include "rte_meter.h"
-#include "rte_virtio_net.h"
 
 VLOG_DEFINE_THIS_MODULE(dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -141,6 +140,9 @@  static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
 
 #define VHOST_ENQ_RETRY_NUM 8
 
+/* Array that tracks the used & unused vHost user driver IDs */
+static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];
+
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .mq_mode = ETH_MQ_RX_RSS,
@@ -346,12 +348,15 @@  struct netdev_dpdk {
     struct rte_eth_link link;
     int link_reset_cnt;
 
-    /* virtio-net structure for vhost device */
-    OVSRCU_TYPE(struct virtio_net *) virtio_dev;
+    /* Number of virtqueue pairs reported by the guest */
+    uint32_t vhost_qp_nb;
 
     /* Identifier used to distinguish vhost devices from each other */
     char vhost_id[PATH_MAX];
 
+    /* ID of vhost user port given to the PMD driver */
+    unsigned int vhost_pmd_id;
+
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
 
@@ -382,16 +387,23 @@  struct netdev_rxq_dpdk {
 static bool dpdk_thread_is_pmd(void);
 
 static int netdev_dpdk_construct(struct netdev *);
-
-struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
+static int netdev_dpdk_vhost_construct(struct netdev *);
 
 struct ingress_policer *
 netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
 
+static void link_status_changed_callback(uint8_t port_id,
+        enum rte_eth_event_type type, void *param);
+static void vring_state_changed_callback(uint8_t port_id,
+        enum rte_eth_event_type type, void *param);
+static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);
+static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);
+
 static bool
-is_dpdk_class(const struct netdev_class *class)
+is_dpdk_eth_class(const struct netdev_class *class)
 {
-    return class->construct == netdev_dpdk_construct;
+    return ((class->construct == netdev_dpdk_construct) ||
+            (class->construct == netdev_dpdk_vhost_construct));
 }
 
 /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
@@ -616,8 +628,13 @@  dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
             continue;
         }
 
-        dev->up.n_rxq = n_rxq;
-        dev->up.n_txq = n_txq;
+        /* Only set n_*xq for physical devices. vHost User devices will set
+         * this value correctly using info from the virtio backend.
+         */
+        if (dev->type == DPDK_DEV_ETH) {
+            dev->up.n_rxq = n_rxq;
+            dev->up.n_txq = n_txq;
+        }
 
         return 0;
     }
@@ -641,8 +658,14 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
 
     rte_eth_dev_info_get(dev->port_id, &info);
 
-    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
-    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
+    if (dev->type == DPDK_DEV_VHOST) {
+        /* We don't know how many queues QEMU will use so set up the max */
+        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
+        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
+    } else {
+        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
+        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
+    }
 
     diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);
     if (diag) {
@@ -709,6 +732,85 @@  netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
     }
 }
 
+void
+link_status_changed_callback(uint8_t port_id,
+                             enum rte_eth_event_type type OVS_UNUSED,
+                             void *param OVS_UNUSED)
+{
+    struct netdev_dpdk *dev;
+    int socket_id = -1;
+
+    ovs_mutex_lock(&dpdk_mutex);
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (port_id == dev->port_id) {
+            ovs_mutex_lock(&dev->mutex);
+            check_link_status(dev);
+            if (dev->link.link_status == ETH_LINK_UP) {
+                /* new device */
+                /* Get NUMA information */
+                socket_id = rte_eth_dev_socket_id(dev->port_id);
+                if (socket_id != -1 && socket_id != dev->socket_id) {
+                    dev->requested_socket_id = socket_id;
+                }
+                netdev_request_reconfigure(&dev->up);
+                netdev_change_seq_changed(&dev->up);
+                VLOG_INFO("vHost Device '%s' has been added on numa node %i",
+                          dev->vhost_id, socket_id);
+            } else {
+                /* destroy device */
+                /* Clear tx/rx queue settings. */
+                netdev_dpdk_txq_map_clear(dev);
+                netdev_request_reconfigure(&dev->up);
+                netdev_change_seq_changed(&dev->up);
+                VLOG_INFO("vHost Device '%s' has been removed", dev->vhost_id);
+            }
+            ovs_mutex_unlock(&dev->mutex);
+            break;
+        }
+    }
+
+    ovs_mutex_unlock(&dpdk_mutex);
+
+    return;
+}
+
+void
+vring_state_changed_callback(uint8_t port_id,
+                             enum rte_eth_event_type type OVS_UNUSED,
+                             void *param OVS_UNUSED)
+{
+    struct netdev_dpdk *dev;
+    struct rte_eth_vhost_queue_event event;
+    int err = 0;
+
+    err = rte_eth_vhost_get_queue_event(port_id, &event);
+    if (err || event.rx) {
+        return;
+    }
+
+    ovs_mutex_lock(&dpdk_mutex);
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (port_id == dev->port_id) {
+            ovs_mutex_lock(&dev->mutex);
+            if (event.enable) {
+                dev->tx_q[event.queue_id].map = event.queue_id;
+                dev->vhost_qp_nb++;
+            } else {
+                dev->tx_q[event.queue_id].map = OVS_VHOST_QUEUE_DISABLED;
+                dev->vhost_qp_nb--;
+            }
+            dev->requested_n_rxq = dev->vhost_qp_nb;
+            dev->requested_n_txq = dev->vhost_qp_nb;
+            netdev_request_reconfigure(&dev->up);
+            ovs_mutex_unlock(&dev->mutex);
+            break;
+        }
+    }
+    ovs_mutex_unlock(&dpdk_mutex);
+
+    return;
+}
+
 static int
 netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
                  enum dpdk_dev_type type)
@@ -718,6 +820,7 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     int sid;
     int err = 0;
     uint32_t buf_size;
+    unsigned int nr_q = 0;
 
     ovs_mutex_init(&dev->mutex);
     ovs_mutex_lock(&dev->mutex);
@@ -727,11 +830,7 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     /* If the 'sid' is negative, it means that the kernel fails
      * to obtain the pci numa info.  In that situation, always
      * use 'SOCKET0'. */
-    if (type == DPDK_DEV_ETH) {
-        sid = rte_eth_dev_socket_id(port_no);
-    } else {
-        sid = rte_lcore_to_socket_id(rte_get_master_lcore());
-    }
+    sid = rte_eth_dev_socket_id(port_no);
 
     dev->socket_id = sid < 0 ? SOCKET0 : sid;
     dev->requested_socket_id = dev->socket_id;
@@ -761,17 +860,21 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     netdev->n_txq = NR_QUEUE;
     dev->requested_n_rxq = netdev->n_rxq;
     dev->requested_n_txq = netdev->n_txq;
+    dev->vhost_qp_nb = 0;
 
-    if (type == DPDK_DEV_ETH) {
-        err = dpdk_eth_dev_init(dev);
-        if (err) {
-            goto unlock;
-        }
-        netdev_dpdk_alloc_txq(dev, netdev->n_txq);
-    } else {
-        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
-        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. */
-        dev->flags = NETDEV_UP | NETDEV_PROMISC;
+    err = dpdk_eth_dev_init(dev);
+    if (err) {
+        goto unlock;
+    }
+    nr_q = (type == DPDK_DEV_ETH ?
+            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
+    netdev_dpdk_alloc_txq(dev, nr_q);
+
+    if (type == DPDK_DEV_VHOST) {
+        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_QUEUE_STATE,
+                                      vring_state_changed_callback, NULL);
+        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,
+                                      link_status_changed_callback, NULL);
     }
 
     ovs_list_push_back(&dpdk_list, &dev->list_node);
@@ -802,17 +905,48 @@  dpdk_dev_parse_name(const char dev_name[], const char prefix[],
     }
 }
 
+/* When attaching a vhost device to DPDK, a unique name of the format
+ * 'eth_vhostX' is expected, where X is a unique identifier.
+ * get_vhost_drv_id returns a valid X value to provide to DPDK.
+ */
+static int
+get_vhost_drv_id(void)
+{
+    int i = 0;
+
+    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+        if (vhost_drv_ids[i] == 0) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+static void
+set_vhost_drv_id(int id, int val)
+{
+    vhost_drv_ids[id] = val;
+}
+
 static int
 netdev_dpdk_vhost_construct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     const char *name = netdev->name;
     int err;
+    uint8_t port_no = 0;
+    char *devargs;
+    int driver_id = 0;
+
+    if (rte_eal_init_ret) {
+        return rte_eal_init_ret;
+    }
 
     /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
      * the file system. '/' or '\' would traverse directories, so they're not
      * acceptable in 'name'. */
-    if (strchr(name, '/') || strchr(name, '\\')) {
+    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {
         VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
                  "A valid name must not include '/' or '\\'",
                  name);
@@ -829,18 +963,32 @@  netdev_dpdk_vhost_construct(struct netdev *netdev)
      */
     snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
              vhost_sock_dir, name);
+    driver_id = get_vhost_drv_id();
+    if (driver_id == -1) {
+        VLOG_ERR("Unable to create vhost-user device %s - too many vhost-user"
+                 "devices registered with PMD", dev->vhost_id);
+        err = ENODEV;
+        goto out;
+    } else {
+        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",
+                 driver_id, dev->vhost_id,
+                 MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
+        err = rte_eth_dev_attach(devargs, &port_no);
+    }
 
-    err = rte_vhost_driver_register(dev->vhost_id);
     if (err) {
-        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
+        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",
                  dev->vhost_id);
     } else {
         fatal_signal_add_file_to_unlink(dev->vhost_id);
         VLOG_INFO("Socket %s created for vhost-user port %s\n",
                   dev->vhost_id, name);
-        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
+        dev->vhost_pmd_id = driver_id;
+        set_vhost_drv_id(driver_id, 1);
+        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);
     }
 
+out:
     ovs_mutex_unlock(&dpdk_mutex);
     return err;
 }
@@ -868,20 +1016,28 @@  netdev_dpdk_construct(struct netdev *netdev)
 }
 
 static void
-netdev_dpdk_destruct(struct netdev *netdev)
+dpdk_destruct_helper(struct netdev_dpdk *dev)
 {
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-
-    ovs_mutex_lock(&dev->mutex);
     rte_eth_dev_stop(dev->port_id);
     free(ovsrcu_get_protected(struct ingress_policer *,
                               &dev->ingress_policer));
-    ovs_mutex_unlock(&dev->mutex);
 
-    ovs_mutex_lock(&dpdk_mutex);
     rte_free(dev->tx_q);
     ovs_list_remove(&dev->list_node);
     dpdk_mp_put(dev->dpdk_mp);
+}
+
+static void
+netdev_dpdk_destruct(struct netdev *netdev)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
+
+    dpdk_destruct_helper(dev);
+
+    ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 }
 
@@ -890,30 +1046,19 @@  netdev_dpdk_vhost_destruct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    /* Guest becomes an orphan if still attached. */
-    if (netdev_dpdk_get_virtio(dev) != NULL) {
-        VLOG_ERR("Removing port '%s' while vhost device still attached.",
-                 netdev->name);
-        VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
-                 " '%s' must be restarted.",
-                 dev->vhost_id);
-    }
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
 
-    if (rte_vhost_driver_unregister(dev->vhost_id)) {
-        VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
+    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {
+        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);
     } else {
         fatal_signal_remove_file_to_unlink(dev->vhost_id);
     }
+    set_vhost_drv_id(dev->vhost_pmd_id, 0);
 
-    ovs_mutex_lock(&dev->mutex);
-    free(ovsrcu_get_protected(struct ingress_policer *,
-                              &dev->ingress_policer));
-    ovs_mutex_unlock(&dev->mutex);
+    dpdk_destruct_helper(dev);
 
-    ovs_mutex_lock(&dpdk_mutex);
-    rte_free(dev->tx_q);
-    ovs_list_remove(&dev->list_node);
-    dpdk_mp_put(dev->dpdk_mp);
+    ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 }
 
@@ -1105,117 +1250,6 @@  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
     return cnt;
 }
 
-static bool
-is_vhost_running(struct virtio_net *virtio_dev)
-{
-    return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING));
-}
-
-static inline void
-netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
-                                          unsigned int packet_size)
-{
-    /* Hard-coded search for the size bucket. */
-    if (packet_size < 256) {
-        if (packet_size >= 128) {
-            stats->rx_128_to_255_packets++;
-        } else if (packet_size <= 64) {
-            stats->rx_1_to_64_packets++;
-        } else {
-            stats->rx_65_to_127_packets++;
-        }
-    } else {
-        if (packet_size >= 1523) {
-            stats->rx_1523_to_max_packets++;
-        } else if (packet_size >= 1024) {
-            stats->rx_1024_to_1522_packets++;
-        } else if (packet_size < 512) {
-            stats->rx_256_to_511_packets++;
-        } else {
-            stats->rx_512_to_1023_packets++;
-        }
-    }
-}
-
-static inline void
-netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
-                                     struct dp_packet **packets, int count,
-                                     int dropped)
-{
-    int i;
-    unsigned int packet_size;
-    struct dp_packet *packet;
-
-    stats->rx_packets += count;
-    stats->rx_dropped += dropped;
-    for (i = 0; i < count; i++) {
-        packet = packets[i];
-        packet_size = dp_packet_size(packet);
-
-        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {
-            /* This only protects the following multicast counting from
-             * too short packets, but it does not stop the packet from
-             * further processing. */
-            stats->rx_errors++;
-            stats->rx_length_errors++;
-            continue;
-        }
-
-        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);
-
-        struct eth_header *eh = (struct eth_header *) dp_packet_data(packet);
-        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {
-            stats->multicast++;
-        }
-
-        stats->rx_bytes += packet_size;
-    }
-}
-
-/*
- * The receive path for the vhost port is the TX path out from guest.
- */
-static int
-netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
-                           struct dp_packet_batch *batch)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-    int qid = rxq->queue_id;
-    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
-    uint16_t nb_rx = 0;
-    uint16_t dropped = 0;
-
-    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
-                     || !(dev->flags & NETDEV_UP))) {
-        return EAGAIN;
-    }
-
-    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM + VIRTIO_TXQ,
-                                    dev->dpdk_mp->mp,
-                                    (struct rte_mbuf **) batch->packets,
-                                    NETDEV_MAX_BURST);
-    if (!nb_rx) {
-        return EAGAIN;
-    }
-
-    if (policer) {
-        dropped = nb_rx;
-        nb_rx = ingress_policer_run(policer,
-                                    (struct rte_mbuf **) batch->packets,
-                                    nb_rx);
-        dropped -= nb_rx;
-    }
-
-    rte_spinlock_lock(&dev->stats_lock);
-    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
-                                         nb_rx, dropped);
-    rte_spinlock_unlock(&dev->stats_lock);
-
-    batch->count = (int) nb_rx;
-    return 0;
-}
-
 static int
 netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 {
@@ -1269,85 +1303,6 @@  netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     return cnt;
 }
 
-static inline void
-netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
-                                     struct dp_packet **packets,
-                                     int attempted,
-                                     int dropped)
-{
-    int i;
-    int sent = attempted - dropped;
-
-    stats->tx_packets += sent;
-    stats->tx_dropped += dropped;
-
-    for (i = 0; i < sent; i++) {
-        stats->tx_bytes += dp_packet_size(packets[i]);
-    }
-}
-
-static void
-__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                         struct dp_packet **pkts, int cnt,
-                         bool may_steal)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
-    unsigned int total_pkts = cnt;
-    unsigned int qos_pkts = cnt;
-    int retries = 0;
-
-    qid = dev->tx_q[qid % netdev->n_txq].map;
-
-    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
-                     || !(dev->flags & NETDEV_UP))) {
-        rte_spinlock_lock(&dev->stats_lock);
-        dev->stats.tx_dropped+= cnt;
-        rte_spinlock_unlock(&dev->stats_lock);
-        goto out;
-    }
-
-    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-
-    /* Check has QoS has been configured for the netdev */
-    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
-    qos_pkts -= cnt;
-
-    do {
-        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
-        unsigned int tx_pkts;
-
-        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
-                                          cur_pkts, cnt);
-        if (OVS_LIKELY(tx_pkts)) {
-            /* Packets have been sent.*/
-            cnt -= tx_pkts;
-            /* Prepare for possible retry.*/
-            cur_pkts = &cur_pkts[tx_pkts];
-        } else {
-            /* No packets sent - do not retry.*/
-            break;
-        }
-    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));
-
-    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
-
-    rte_spinlock_lock(&dev->stats_lock);
-    cnt += qos_pkts;
-    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts, cnt);
-    rte_spinlock_unlock(&dev->stats_lock);
-
-out:
-    if (may_steal) {
-        int i;
-
-        for (i = 0; i < total_pkts; i++) {
-            dp_packet_delete(pkts[i]);
-        }
-    }
-}
-
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
@@ -1402,18 +1357,13 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         newcnt++;
     }
 
-    if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs,
-                                 newcnt, true);
-    } else {
-        unsigned int qos_pkts = newcnt;
+    unsigned int qos_pkts = newcnt;
 
-        /* Check if QoS has been configured for this netdev. */
-        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
+    /* Check if QoS has been configured for this netdev. */
+    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
 
-        dropped += qos_pkts - newcnt;
-        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
-    }
+    dropped += qos_pkts - newcnt;
+    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
 
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
@@ -1426,33 +1376,10 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     }
 }
 
-static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                       struct dp_packet_batch *batch,
-                       bool may_steal, bool concurrent_txq OVS_UNUSED)
-{
-
-    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
-        dpdk_do_tx_copy(netdev, qid, batch);
-        dp_packet_delete_batch(batch, may_steal);
-    } else {
-        dp_packet_batch_apply_cutlen(batch);
-        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count,
-                                 may_steal);
-    }
-    return 0;
-}
-
 static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
-                   struct dp_packet_batch *batch, bool may_steal,
-                   bool concurrent_txq)
+                   struct dp_packet_batch *batch, bool may_steal)
 {
-    if (OVS_UNLIKELY(concurrent_txq)) {
-        qid = qid % dev->up.n_txq;
-        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-    }
-
     if (OVS_UNLIKELY(!may_steal ||
                      batch->packets[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
@@ -1512,20 +1439,50 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
             rte_spinlock_unlock(&dev->stats_lock);
         }
     }
+}
+
+static int
+netdev_dpdk_eth_send(struct netdev *netdev, int qid,
+                     struct dp_packet_batch *batch, bool may_steal,
+                     bool concurrent_txq)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        qid = qid % dev->up.n_txq;
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
 
     if (OVS_UNLIKELY(concurrent_txq)) {
         rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
     }
+
+    return 0;
 }
 
 static int
-netdev_dpdk_eth_send(struct netdev *netdev, int qid,
-                     struct dp_packet_batch *batch, bool may_steal,
-                     bool concurrent_txq)
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+                       struct dp_packet_batch *batch, bool may_steal,
+                       bool concurrent_txq OVS_UNUSED)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    qid = dev->tx_q[qid % netdev->n_txq].map;
+    if (qid == -1) {
+        rte_spinlock_lock(&dev->stats_lock);
+        dev->stats.tx_dropped+= batch->count;
+        rte_spinlock_unlock(&dev->stats_lock);
+        if (may_steal) {
+            dp_packet_delete_batch(batch, may_steal);
+        }
+    } else {
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+        netdev_dpdk_send__(dev, qid, batch, may_steal);
+        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+    }
+
     return 0;
 }
 
@@ -1622,41 +1579,6 @@  out:
 static int
 netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
 
-static int
-netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
-                            struct netdev_stats *stats)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-
-    ovs_mutex_lock(&dev->mutex);
-
-    rte_spinlock_lock(&dev->stats_lock);
-    /* Supported Stats */
-    stats->rx_packets += dev->stats.rx_packets;
-    stats->tx_packets += dev->stats.tx_packets;
-    stats->rx_dropped = dev->stats.rx_dropped;
-    stats->tx_dropped += dev->stats.tx_dropped;
-    stats->multicast = dev->stats.multicast;
-    stats->rx_bytes = dev->stats.rx_bytes;
-    stats->tx_bytes = dev->stats.tx_bytes;
-    stats->rx_errors = dev->stats.rx_errors;
-    stats->rx_length_errors = dev->stats.rx_length_errors;
-
-    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;
-    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;
-    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;
-    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;
-    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;
-    stats->rx_1024_to_1522_packets = dev->stats.rx_1024_to_1522_packets;
-    stats->rx_1523_to_max_packets = dev->stats.rx_1523_to_max_packets;
-
-    rte_spinlock_unlock(&dev->stats_lock);
-
-    ovs_mutex_unlock(&dev->mutex);
-
-    return 0;
-}
-
 static void
 netdev_dpdk_convert_xstats(struct netdev_stats *stats,
                            const struct rte_eth_xstats *xstats,
@@ -1737,28 +1659,40 @@  netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
         return EPROTO;
     }
 
-    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
-    if (rte_xstats_len > 0) {
-        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
-        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
-        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
-                                            rte_xstats_len);
-        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
-            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
+    /* Extended statistics are not yet available for vHost User PMD */
+    if (dev->type == DPDK_DEV_ETH) {
+        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
+        if (rte_xstats_len > 0) {
+            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)
+                                          * rte_xstats_len);
+            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
+            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
+                                                rte_xstats_len);
+            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
+                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
+            }
+            rte_free(rte_xstats);
+        } else {
+            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
         }
-        rte_free(rte_xstats);
-    } else {
-        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
     }
 
     stats->rx_packets = rte_stats.ipackets;
     stats->tx_packets = rte_stats.opackets;
     stats->rx_bytes = rte_stats.ibytes;
     stats->tx_bytes = rte_stats.obytes;
-    /* DPDK counts imissed as errors, but count them here as dropped instead */
-    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
-    stats->tx_errors = rte_stats.oerrors;
-    stats->multicast = rte_stats.imcasts;
+
+    if (dev->type == DPDK_DEV_ETH) {
+        /* DPDK counts imissed as errors, but count them here as dropped
+         * instead */
+        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
+        stats->tx_errors = rte_stats.oerrors;
+        stats->multicast = rte_stats.imcasts;
+    } else {
+        stats->rx_errors = UINT64_MAX;
+        stats->tx_errors = UINT64_MAX;
+        stats->multicast = UINT64_MAX;
+    }
 
     rte_spinlock_lock(&dev->stats_lock);
     stats->tx_dropped = dev->stats.tx_dropped;
@@ -1921,25 +1855,6 @@  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier)
     return 0;
 }
 
-static int
-netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-
-    ovs_mutex_lock(&dev->mutex);
-
-    if (is_vhost_running(virtio_dev)) {
-        *carrier = 1;
-    } else {
-        *carrier = 0;
-    }
-
-    ovs_mutex_unlock(&dev->mutex);
-
-    return 0;
-}
-
 static long long int
 netdev_dpdk_get_carrier_resets(const struct netdev *netdev)
 {
@@ -1995,13 +1910,10 @@  netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
             rte_eth_dev_stop(dev->port_id);
         }
     } else {
-        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is
-         * running then change netdev's change_seq to trigger link state
-         * update. */
-        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
+        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then change
+         * netdev's change_seq to trigger link state update. */
 
-        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
-            && is_vhost_running(virtio_dev)) {
+        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {
             netdev_change_seq_changed(&dev->up);
 
             /* Clear statistics if device is getting up. */
@@ -2096,7 +2008,7 @@  netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
 
     if (argc > 2) {
         struct netdev *netdev = netdev_from_name(argv[1]);
-        if (netdev && is_dpdk_class(netdev->netdev_class)) {
+        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {
             struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);
 
             ovs_mutex_lock(&dpdk_dev->mutex);
@@ -2124,22 +2036,6 @@  netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
 }
 
 /*
- * Set virtqueue flags so that we do not receive interrupts.
- */
-static void
-set_irq_status(struct virtio_net *virtio_dev)
-{
-    uint32_t i;
-    uint64_t idx;
-
-    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
-        idx = i * VIRTIO_QNUM;
-        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0);
-        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0);
-    }
-}
-
-/*
  * Fixes mapping for vhost-user tx queues. Must be called after each
  * enabling/disabling of queues and n_txq modifications.
  */
@@ -2180,62 +2076,6 @@  netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
     rte_free(enabled_queues);
 }
 
-/*
- * A new virtio-net device is added to a vhost port.
- */
-static int
-new_device(struct virtio_net *virtio_dev)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-    int newnode = 0;
-    long err = 0;
-
-    ovs_mutex_lock(&dpdk_mutex);
-    /* Add device to the vhost port with the same name as that passed down. */
-    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
-        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
-            uint32_t qp_num = virtio_dev->virt_qp_nb;
-
-            ovs_mutex_lock(&dev->mutex);
-            /* Get NUMA information */
-            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
-                                MPOL_F_NODE | MPOL_F_ADDR);
-            if (err) {
-                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
-                        virtio_dev->ifname);
-                newnode = dev->socket_id;
-            }
-
-            dev->requested_socket_id = newnode;
-            dev->requested_n_rxq = qp_num;
-            dev->requested_n_txq = qp_num;
-            netdev_request_reconfigure(&dev->up);
-
-            ovsrcu_set(&dev->virtio_dev, virtio_dev);
-            exists = true;
-
-            /* Disable notifications. */
-            set_irq_status(virtio_dev);
-            netdev_change_seq_changed(&dev->up);
-            ovs_mutex_unlock(&dev->mutex);
-            break;
-        }
-    }
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (!exists) {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
-                  "found", virtio_dev->ifname, virtio_dev->device_fh);
-
-        return -1;
-    }
-
-    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node %i",
-              virtio_dev->ifname, virtio_dev->device_fh, newnode);
-    return 0;
-}
-
 /* Clears mapping for all available queues of vhost interface. */
 static void
 netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
@@ -2248,144 +2088,18 @@  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
     }
 }
 
-/*
- * Remove a virtio-net device from the specific vhost port.  Use dev->remove
- * flag to stop any more packets from being sent or received to/from a VM and
- * ensure all currently queued packets have been sent/received before removing
- *  the device.
- */
-static void
-destroy_device(volatile struct virtio_net *virtio_dev)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-
-    ovs_mutex_lock(&dpdk_mutex);
-    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
-        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
-
-            ovs_mutex_lock(&dev->mutex);
-            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
-            ovsrcu_set(&dev->virtio_dev, NULL);
-            /* Clear tx/rx queue settings. */
-            netdev_dpdk_txq_map_clear(dev);
-            dev->requested_n_rxq = NR_QUEUE;
-            dev->requested_n_txq = NR_QUEUE;
-            netdev_request_reconfigure(&dev->up);
-
-            netdev_change_seq_changed(&dev->up);
-            ovs_mutex_unlock(&dev->mutex);
-            exists = true;
-            break;
-        }
-    }
-
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (exists == true) {
-        /*
-         * Wait for other threads to quiesce after setting the 'virtio_dev'
-         * to NULL, before returning.
-         */
-        ovsrcu_synchronize();
-        /*
-         * As call to ovsrcu_synchronize() will end the quiescent state,
-         * put thread back into quiescent state before returning.
-         */
-        ovsrcu_quiesce_start();
-        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
-                  virtio_dev->ifname, virtio_dev->device_fh);
-    } else {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
-                  virtio_dev->device_fh);
-    }
-}
-
-static int
-vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
-                    int enable)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-    int qid = queue_id / VIRTIO_QNUM;
-
-    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
-        return 0;
-    }
-
-    ovs_mutex_lock(&dpdk_mutex);
-    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
-        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
-            ovs_mutex_lock(&dev->mutex);
-            if (enable) {
-                dev->tx_q[qid].map = qid;
-            } else {
-                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
-            }
-            netdev_dpdk_remap_txqs(dev);
-            exists = true;
-            ovs_mutex_unlock(&dev->mutex);
-            break;
-        }
-    }
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (exists) {
-        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
-                  PRIu64" changed to \'%s\'", queue_id, qid,
-                  virtio_dev->ifname, virtio_dev->device_fh,
-                  (enable == 1) ? "enabled" : "disabled");
-    } else {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
-                  virtio_dev->device_fh);
-        return -1;
-    }
-
-    return 0;
-}
-
-struct virtio_net *
-netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
-{
-    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
-}
-
 struct ingress_policer *
 netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
 {
     return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
 }
 
-/*
- * These callbacks allow virtio-net devices to be added to vhost ports when
- * configuration has been fully complete.
- */
-static const struct virtio_net_device_ops virtio_net_device_ops =
-{
-    .new_device =  new_device,
-    .destroy_device = destroy_device,
-    .vring_state_changed = vring_state_changed
-};
-
-static void *
-start_vhost_loop(void *dummy OVS_UNUSED)
-{
-     pthread_detach(pthread_self());
-     /* Put the vhost thread into quiescent state. */
-     ovsrcu_quiesce_start();
-     rte_vhost_driver_session_start();
-     return NULL;
-}
-
 static int
 dpdk_vhost_class_init(void)
 {
-    rte_vhost_driver_callback_register(&virtio_net_device_ops);
-    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
-                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
-                            | 1ULL << VIRTIO_NET_F_CSUM);
-
-    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
+    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
+                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                | 1ULL << VIRTIO_NET_F_CSUM);
     return 0;
 }
 
@@ -2498,7 +2212,17 @@  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
         dp_packet_rss_invalidate(batch->packets[i]);
     }
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        qid = qid % dev->up.n_txq;
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+    }
+
     return 0;
 }
 
@@ -2787,7 +2511,6 @@  static int
 netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
     int err = 0;
 
     ovs_mutex_lock(&dpdk_mutex);
@@ -2813,10 +2536,6 @@  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
         }
     }
 
-    if (virtio_dev) {
-        virtio_dev->flags |= VIRTIO_DEV_RUNNING;
-    }
-
     ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 
@@ -3307,12 +3026,12 @@  static const struct netdev_class OVS_UNUSED dpdk_vhost_class =
         NULL,
         NULL,
         netdev_dpdk_vhost_send,
-        netdev_dpdk_vhost_get_carrier,
-        netdev_dpdk_vhost_get_stats,
+        netdev_dpdk_get_carrier,
+        netdev_dpdk_get_stats,
         NULL,
         NULL,
         netdev_dpdk_vhost_reconfigure,
-        netdev_dpdk_vhost_rxq_recv);
+        netdev_dpdk_rxq_recv);
 
 void
 netdev_dpdk_register(void)