diff mbox

[ovs-dev,v4,3/5] netdev-dpdk: Add vHost User PMD

Message ID 1469798683-1758-4-git-send-email-ciara.loftus@intel.com
State Rejected
Delegated to: Daniele Di Proietto
Headers show

Commit Message

Ciara Loftus July 29, 2016, 1:24 p.m. UTC
DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports
to be controlled by the librte_ether API, like physical 'dpdk' ports and
IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and
removes direct calls to the librte_vhost DPDK library.

This commit removes extended statistics support for vHost User ports
until such a time that this becomes available in the vHost PMD in a
DPDK release supported by OVS.

Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
---
 INSTALL.DPDK.md   |  10 +
 NEWS              |   2 +
 lib/netdev-dpdk.c | 857 ++++++++++++++++++------------------------------------
 3 files changed, 300 insertions(+), 569 deletions(-)

Comments

Tetsuya Mukawa Aug. 2, 2016, 5:41 a.m. UTC | #1
> I've added vHost maintainers to CC-list to hear their opinion about
> new API to get number of queues from the vHost PMD.
> Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make
> 'dev_info->nb_rx_queues' usable?
>

I appreciate great investigation.
So far, I am not sure what is good way to get the value , but I agree it's
nice to have such functionality.

Currently vhost library has such a function.
So, one of possible solution may prepare a function to convert portid to
vid, then use vid to call vhost library function directly.

Thanks,
Tetsuya

> NACK for now.
>
> Best regards, Ilya Maximets.
>
> On 29.07.2016 16:24, Ciara Loftus wrote:
> > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports
> > to be controlled by the librte_ether API, like physical 'dpdk' ports and
> > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and
> > removes direct calls to the librte_vhost DPDK library.
> >
> > This commit removes extended statistics support for vHost User ports
> > until such a time that this becomes available in the vHost PMD in a
> > DPDK release supported by OVS.
> >
> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
> > ---
> >  INSTALL.DPDK.md   |  10 +
> >  NEWS              |   2 +
> >  lib/netdev-dpdk.c | 857
++++++++++++++++++------------------------------------
> >  3 files changed, 300 insertions(+), 569 deletions(-)
> >
> > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> > index 7609aa7..4feb7be 100644
> > --- a/INSTALL.DPDK.md
> > +++ b/INSTALL.DPDK.md
> > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough].
> >
> >      http://dpdk.org/doc/guides/rel_notes/release_16_04.html
> >
> > +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the
context of
> > +    DPDK as they are all managed by the rte_ether API. This means that
they
> > +    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS
which by
> > +    default is set to 32. This means by default the combined total
number of
> > +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is
32. This
> > +    value can be changed if desired by modifying the configuration
file in
> > +    DPDK, or by overriding the default value on the command line when
building
> > +    DPDK. eg.
> > +
> > +        `make install CONFIG_RTE_MAX_ETHPORTS=64`
> >
> >  Bug Reporting:
> >  --------------
> > diff --git a/NEWS b/NEWS
> > index dc3dedb..6510dde 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -64,6 +64,8 @@ Post-v2.5.0
> >       * Basic connection tracking for the userspace datapath (no ALG,
> >         fragmentation or NAT support yet)
> >       * Remove dpdkvhostcuse port type.
> > +     * vHost PMD integration brings vhost-user ports under control of
the
> > +       rte_ether DPDK API.
> >     - Increase number of registers to 16.
> >     - ovs-benchmark: This utility has been removed due to lack of use
and
> >       bitrot.
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index d6959fe..d6ceeec 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -30,7 +30,6 @@
> >  #include <sys/types.h>
> >  #include <sys/stat.h>
> >  #include <getopt.h>
> > -#include <numaif.h>
> >
> >  #include "dirs.h"
> >  #include "dp-packet.h"
> > @@ -56,9 +55,9 @@
> >  #include "unixctl.h"
> >
> >  #include "rte_config.h"
> > +#include "rte_eth_vhost.h"
> >  #include "rte_mbuf.h"
> >  #include "rte_meter.h"
> > -#include "rte_virtio_net.h"
> >
> >  VLOG_DEFINE_THIS_MODULE(dpdk);
> >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL;   /* Location
of vhost-user sockets */
> >
> >  #define VHOST_ENQ_RETRY_NUM 8
> >
> > +/* Array that tracks the used & unused vHost user driver IDs */
> > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];
> > +
> >  static const struct rte_eth_conf port_conf = {
> >      .rxmode = {
> >          .mq_mode = ETH_MQ_RX_RSS,
> > @@ -346,12 +348,15 @@ struct netdev_dpdk {
> >      struct rte_eth_link link;
> >      int link_reset_cnt;
> >
> > -    /* virtio-net structure for vhost device */
> > -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;
> > +    /* Number of virtqueue pairs reported by the guest */
> > +    uint32_t vhost_qp_nb;
> >
> >      /* Identifier used to distinguish vhost devices from each other */
> >      char vhost_id[PATH_MAX];
> >
> > +    /* ID of vhost user port given to the PMD driver */
> > +    unsigned int vhost_pmd_id;
> > +
> >      /* In dpdk_list. */
> >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
> >
> > @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk {
> >  static bool dpdk_thread_is_pmd(void);
> >
> >  static int netdev_dpdk_construct(struct netdev *);
> > -
> > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk
*dev);
> > +static int netdev_dpdk_vhost_construct(struct netdev *);
> >
> >  struct ingress_policer *
> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
> >
> > +static void link_status_changed_callback(uint8_t port_id,
> > +        enum rte_eth_event_type type, void *param);
> > +static void vring_state_changed_callback(uint8_t port_id,
> > +        enum rte_eth_event_type type, void *param);
> > +static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);
> > +static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);
> > +
> >  static bool
> > -is_dpdk_class(const struct netdev_class *class)
> > +is_dpdk_eth_class(const struct netdev_class *class)
> >  {
> > -    return class->construct == netdev_dpdk_construct;
> > +    return ((class->construct == netdev_dpdk_construct) ||
> > +            (class->construct == netdev_dpdk_vhost_construct));
> >  }
> >
> >  /* DPDK NIC drivers allocate RX buffers at a particular granularity,
typically
> > @@ -616,8 +628,13 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev,
int n_rxq, int n_txq)
> >              continue;
> >          }
> >
> > -        dev->up.n_rxq = n_rxq;
> > -        dev->up.n_txq = n_txq;
> > +        /* Only set n_*xq for physical devices. vHost User devices
will set
> > +         * this value correctly using info from the virtio backend.
> > +         */
> > +        if (dev->type == DPDK_DEV_ETH) {
> > +            dev->up.n_rxq = n_rxq;
> > +            dev->up.n_txq = n_txq;
> > +        }
> >
> >          return 0;
> >      }
> > @@ -641,8 +658,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
OVS_REQUIRES(dpdk_mutex)
> >
> >      rte_eth_dev_info_get(dev->port_id, &info);
> >
> > -    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
> > -    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
> > +    if (dev->type == DPDK_DEV_VHOST) {
> > +        /* We don't know how many queues QEMU will use so set up the
max */
> > +        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
> > +        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
> > +    } else {
> > +        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
> > +        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
> > +    }
> >
> >      diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);
> >      if (diag) {
> > @@ -709,6 +732,85 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev,
unsigned int n_txqs)
> >      }
> >  }
> >
> > +void
> > +link_status_changed_callback(uint8_t port_id,
> > +                             enum rte_eth_event_type type OVS_UNUSED,
> > +                             void *param OVS_UNUSED)
> > +{
> > +    struct netdev_dpdk *dev;
> > +    int socket_id = -1;
> > +
> > +    ovs_mutex_lock(&dpdk_mutex);
> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> > +        if (port_id == dev->port_id) {
> > +            ovs_mutex_lock(&dev->mutex);
> > +            check_link_status(dev);
> > +            if (dev->link.link_status == ETH_LINK_UP) {
> > +                /* new device */
> > +                /* Get NUMA information */
> > +                socket_id = rte_eth_dev_socket_id(dev->port_id);
> > +                if (socket_id != -1 && socket_id != dev->socket_id) {
> > +                    dev->requested_socket_id = socket_id;
> > +                }
> > +                netdev_request_reconfigure(&dev->up);
> > +                netdev_change_seq_changed(&dev->up);
> > +                VLOG_INFO("vHost Device '%s' has been added on numa
node %i",
> > +                          dev->vhost_id, socket_id);
> > +            } else {
> > +                /* destroy device */
> > +                /* Clear tx/rx queue settings. */
> > +                netdev_dpdk_txq_map_clear(dev);
> > +                netdev_request_reconfigure(&dev->up);
> > +                netdev_change_seq_changed(&dev->up);
> > +                VLOG_INFO("vHost Device '%s' has been removed",
dev->vhost_id);
> > +            }
> > +            ovs_mutex_unlock(&dev->mutex);
> > +            break;
> > +        }
> > +    }
> > +
> > +    ovs_mutex_unlock(&dpdk_mutex);
> > +
> > +    return;
> > +}
> > +
> > +void
> > +vring_state_changed_callback(uint8_t port_id,
> > +                             enum rte_eth_event_type type OVS_UNUSED,
> > +                             void *param OVS_UNUSED)
> > +{
> > +    struct netdev_dpdk *dev;
> > +    struct rte_eth_vhost_queue_event event;
> > +    int err = 0;
> > +
> > +    err = rte_eth_vhost_get_queue_event(port_id, &event);
> > +    if (err || event.rx) {
> > +        return;
> > +    }
> > +
> > +    ovs_mutex_lock(&dpdk_mutex);
> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> > +        if (port_id == dev->port_id) {
> > +            ovs_mutex_lock(&dev->mutex);
> > +            if (event.enable) {
> > +                dev->tx_q[event.queue_id].map = event.queue_id;
> > +                dev->vhost_qp_nb++;
> > +            } else {
> > +                dev->tx_q[event.queue_id].map =
OVS_VHOST_QUEUE_DISABLED;
> > +                dev->vhost_qp_nb--;
> > +            }
> > +            dev->requested_n_rxq = dev->vhost_qp_nb;
> > +            dev->requested_n_txq = dev->vhost_qp_nb;
> > +            netdev_request_reconfigure(&dev->up);
> > +            ovs_mutex_unlock(&dev->mutex);
> > +            break;
> > +        }
> > +    }
> > +    ovs_mutex_unlock(&dpdk_mutex);
> > +
> > +    return;
> > +}
> > +
> >  static int
> >  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
> >                   enum dpdk_dev_type type)
> > @@ -718,6 +820,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned
int port_no,
> >      int sid;
> >      int err = 0;
> >      uint32_t buf_size;
> > +    unsigned int nr_q = 0;
> >
> >      ovs_mutex_init(&dev->mutex);
> >      ovs_mutex_lock(&dev->mutex);
> > @@ -727,11 +830,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned
int port_no,
> >      /* If the 'sid' is negative, it means that the kernel fails
> >       * to obtain the pci numa info.  In that situation, always
> >       * use 'SOCKET0'. */
> > -    if (type == DPDK_DEV_ETH) {
> > -        sid = rte_eth_dev_socket_id(port_no);
> > -    } else {
> > -        sid = rte_lcore_to_socket_id(rte_get_master_lcore());
> > -    }
> > +    sid = rte_eth_dev_socket_id(port_no);
> >
> >      dev->socket_id = sid < 0 ? SOCKET0 : sid;
> >      dev->requested_socket_id = dev->socket_id;
> > @@ -761,17 +860,21 @@ netdev_dpdk_init(struct netdev *netdev, unsigned
int port_no,
> >      netdev->n_txq = NR_QUEUE;
> >      dev->requested_n_rxq = netdev->n_rxq;
> >      dev->requested_n_txq = netdev->n_txq;
> > +    dev->vhost_qp_nb = 0;
> >
> > -    if (type == DPDK_DEV_ETH) {
> > -        err = dpdk_eth_dev_init(dev);
> > -        if (err) {
> > -            goto unlock;
> > -        }
> > -        netdev_dpdk_alloc_txq(dev, netdev->n_txq);
> > -    } else {
> > -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
> > -        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag.
*/
> > -        dev->flags = NETDEV_UP | NETDEV_PROMISC;
> > +    err = dpdk_eth_dev_init(dev);
> > +    if (err) {
> > +        goto unlock;
> > +    }
> > +    nr_q = (type == DPDK_DEV_ETH ?
> > +            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
> > +    netdev_dpdk_alloc_txq(dev, nr_q);
> > +
> > +    if (type == DPDK_DEV_VHOST) {
> > +        rte_eth_dev_callback_register(port_no,
RTE_ETH_EVENT_QUEUE_STATE,
> > +                                      vring_state_changed_callback,
NULL);
> > +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,
> > +                                      link_status_changed_callback,
NULL);
> >      }
> >
> >      ovs_list_push_back(&dpdk_list, &dev->list_node);
> > @@ -802,17 +905,48 @@ dpdk_dev_parse_name(const char dev_name[], const
char prefix[],
> >      }
> >  }
> >
> > +/* When attaching a vhost device to DPDK, a unique name of the format
> > + * 'eth_vhostX' is expected, where X is a unique identifier.
> > + * get_vhost_drv_id returns a valid X value to provide to DPDK.
> > + */
> > +static int
> > +get_vhost_drv_id(void)
> > +{
> > +    int i = 0;
> > +
> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> > +        if (vhost_drv_ids[i] == 0) {
> > +            return i;
> > +        }
> > +    }
> > +
> > +    return -1;
> > +}
> > +
> > +static void
> > +set_vhost_drv_id(int id, int val)
> > +{
> > +    vhost_drv_ids[id] = val;
> > +}
> > +
> >  static int
> >  netdev_dpdk_vhost_construct(struct netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >      const char *name = netdev->name;
> >      int err;
> > +    uint8_t port_no = 0;
> > +    char *devargs;
> > +    int driver_id = 0;
> > +
> > +    if (rte_eal_init_ret) {
> > +        return rte_eal_init_ret;
> > +    }
> >
> >      /* 'name' is appended to 'vhost_sock_dir' and used to create a
socket in
> >       * the file system. '/' or '\' would traverse directories, so
they're not
> >       * acceptable in 'name'. */
> > -    if (strchr(name, '/') || strchr(name, '\\')) {
> > +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {
> >          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
> >                   "A valid name must not include '/' or '\\'",
> >                   name);
> > @@ -829,18 +963,32 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
> >       */
> >      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
> >               vhost_sock_dir, name);
> > +    driver_id = get_vhost_drv_id();
> > +    if (driver_id == -1) {
> > +        VLOG_ERR("Unable to create vhost-user device %s - too many
vhost-user"
> > +                 "devices registered with PMD", dev->vhost_id);
> > +        err = ENODEV;
> > +        goto out;
> > +    } else {
> > +        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",
> > +                 driver_id, dev->vhost_id,
> > +                 MIN(OVS_VHOST_MAX_QUEUE_NUM,
RTE_MAX_QUEUES_PER_PORT));
> > +        err = rte_eth_dev_attach(devargs, &port_no);
> > +    }
> >
> > -    err = rte_vhost_driver_register(dev->vhost_id);
> >      if (err) {
> > -        VLOG_ERR("vhost-user socket device setup failure for socket
%s\n",
> > +        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",
> >                   dev->vhost_id);
> >      } else {
> >          fatal_signal_add_file_to_unlink(dev->vhost_id);
> >          VLOG_INFO("Socket %s created for vhost-user port %s\n",
> >                    dev->vhost_id, name);
> > -        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
> > +        dev->vhost_pmd_id = driver_id;
> > +        set_vhost_drv_id(driver_id, 1);
> > +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);
> >      }
> >
> > +out:
> >      ovs_mutex_unlock(&dpdk_mutex);
> >      return err;
> >  }
> > @@ -868,20 +1016,28 @@ netdev_dpdk_construct(struct netdev *netdev)
> >  }
> >
> >  static void
> > -netdev_dpdk_destruct(struct netdev *netdev)
> > +dpdk_destruct_helper(struct netdev_dpdk *dev)
> >  {
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -
> > -    ovs_mutex_lock(&dev->mutex);
> >      rte_eth_dev_stop(dev->port_id);
> >      free(ovsrcu_get_protected(struct ingress_policer *,
> >                                &dev->ingress_policer));
> > -    ovs_mutex_unlock(&dev->mutex);
> >
> > -    ovs_mutex_lock(&dpdk_mutex);
> >      rte_free(dev->tx_q);
> >      ovs_list_remove(&dev->list_node);
> >      dpdk_mp_put(dev->dpdk_mp);
> > +}
> > +
> > +static void
> > +netdev_dpdk_destruct(struct netdev *netdev)
> > +{
> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > +
> > +    ovs_mutex_lock(&dpdk_mutex);
> > +    ovs_mutex_lock(&dev->mutex);
> > +
> > +    dpdk_destruct_helper(dev);
> > +
> > +    ovs_mutex_unlock(&dev->mutex);
> >      ovs_mutex_unlock(&dpdk_mutex);
> >  }
> >
> > @@ -890,30 +1046,19 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >
> > -    /* Guest becomes an orphan if still attached. */
> > -    if (netdev_dpdk_get_virtio(dev) != NULL) {
> > -        VLOG_ERR("Removing port '%s' while vhost device still
attached.",
> > -                 netdev->name);
> > -        VLOG_ERR("To restore connectivity after re-adding of port, VM
on socket"
> > -                 " '%s' must be restarted.",
> > -                 dev->vhost_id);
> > -    }
> > +    ovs_mutex_lock(&dpdk_mutex);
> > +    ovs_mutex_lock(&dev->mutex);
> >
> > -    if (rte_vhost_driver_unregister(dev->vhost_id)) {
> > -        VLOG_ERR("Unable to remove vhost-user socket %s",
dev->vhost_id);
> > +    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {
> > +        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);
> >      } else {
> >          fatal_signal_remove_file_to_unlink(dev->vhost_id);
> >      }
> > +    set_vhost_drv_id(dev->vhost_pmd_id, 0);
> >
> > -    ovs_mutex_lock(&dev->mutex);
> > -    free(ovsrcu_get_protected(struct ingress_policer *,
> > -                              &dev->ingress_policer));
> > -    ovs_mutex_unlock(&dev->mutex);
> > +    dpdk_destruct_helper(dev);
> >
> > -    ovs_mutex_lock(&dpdk_mutex);
> > -    rte_free(dev->tx_q);
> > -    ovs_list_remove(&dev->list_node);
> > -    dpdk_mp_put(dev->dpdk_mp);
> > +    ovs_mutex_unlock(&dev->mutex);
> >      ovs_mutex_unlock(&dpdk_mutex);
> >  }
> >
> > @@ -1105,117 +1250,6 @@ ingress_policer_run(struct ingress_policer
*policer, struct rte_mbuf **pkts,
> >      return cnt;
> >  }
> >
> > -static bool
> > -is_vhost_running(struct virtio_net *virtio_dev)
> > -{
> > -    return (virtio_dev != NULL && (virtio_dev->flags &
VIRTIO_DEV_RUNNING));
> > -}
> > -
> > -static inline void
> > -netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
> > -                                          unsigned int packet_size)
> > -{
> > -    /* Hard-coded search for the size bucket. */
> > -    if (packet_size < 256) {
> > -        if (packet_size >= 128) {
> > -            stats->rx_128_to_255_packets++;
> > -        } else if (packet_size <= 64) {
> > -            stats->rx_1_to_64_packets++;
> > -        } else {
> > -            stats->rx_65_to_127_packets++;
> > -        }
> > -    } else {
> > -        if (packet_size >= 1523) {
> > -            stats->rx_1523_to_max_packets++;
> > -        } else if (packet_size >= 1024) {
> > -            stats->rx_1024_to_1522_packets++;
> > -        } else if (packet_size < 512) {
> > -            stats->rx_256_to_511_packets++;
> > -        } else {
> > -            stats->rx_512_to_1023_packets++;
> > -        }
> > -    }
> > -}
> > -
> > -static inline void
> > -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
> > -                                     struct dp_packet **packets, int
count,
> > -                                     int dropped)
> > -{
> > -    int i;
> > -    unsigned int packet_size;
> > -    struct dp_packet *packet;
> > -
> > -    stats->rx_packets += count;
> > -    stats->rx_dropped += dropped;
> > -    for (i = 0; i < count; i++) {
> > -        packet = packets[i];
> > -        packet_size = dp_packet_size(packet);
> > -
> > -        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {
> > -            /* This only protects the following multicast counting from
> > -             * too short packets, but it does not stop the packet from
> > -             * further processing. */
> > -            stats->rx_errors++;
> > -            stats->rx_length_errors++;
> > -            continue;
> > -        }
> > -
> > -        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);
> > -
> > -        struct eth_header *eh = (struct eth_header *)
dp_packet_data(packet);
> > -        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {
> > -            stats->multicast++;
> > -        }
> > -
> > -        stats->rx_bytes += packet_size;
> > -    }
> > -}
> > -
> > -/*
> > - * The receive path for the vhost port is the TX path out from guest.
> > - */
> > -static int
> > -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
> > -                           struct dp_packet_batch *batch)
> > -{
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> > -    int qid = rxq->queue_id;
> > -    struct ingress_policer *policer =
netdev_dpdk_get_ingress_policer(dev);
> > -    uint16_t nb_rx = 0;
> > -    uint16_t dropped = 0;
> > -
> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
> > -                     || !(dev->flags & NETDEV_UP))) {
> > -        return EAGAIN;
> > -    }
> > -
> > -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +
VIRTIO_TXQ,
> > -                                    dev->dpdk_mp->mp,
> > -                                    (struct rte_mbuf **)
batch->packets,
> > -                                    NETDEV_MAX_BURST);
> > -    if (!nb_rx) {
> > -        return EAGAIN;
> > -    }
> > -
> > -    if (policer) {
> > -        dropped = nb_rx;
> > -        nb_rx = ingress_policer_run(policer,
> > -                                    (struct rte_mbuf **)
batch->packets,
> > -                                    nb_rx);
> > -        dropped -= nb_rx;
> > -    }
> > -
> > -    rte_spinlock_lock(&dev->stats_lock);
> > -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
> > -                                         nb_rx, dropped);
> > -    rte_spinlock_unlock(&dev->stats_lock);
> > -
> > -    batch->count = (int) nb_rx;
> > -    return 0;
> > -}
> > -
> >  static int
> >  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch
*batch)
> >  {
> > @@ -1269,85 +1303,6 @@ netdev_dpdk_qos_run__(struct netdev_dpdk *dev,
struct rte_mbuf **pkts,
> >      return cnt;
> >  }
> >
> > -static inline void
> > -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
> > -                                     struct dp_packet **packets,
> > -                                     int attempted,
> > -                                     int dropped)
> > -{
> > -    int i;
> > -    int sent = attempted - dropped;
> > -
> > -    stats->tx_packets += sent;
> > -    stats->tx_dropped += dropped;
> > -
> > -    for (i = 0; i < sent; i++) {
> > -        stats->tx_bytes += dp_packet_size(packets[i]);
> > -    }
> > -}
> > -
> > -static void
> > -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> > -                         struct dp_packet **pkts, int cnt,
> > -                         bool may_steal)
> > -{
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> > -    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
> > -    unsigned int total_pkts = cnt;
> > -    unsigned int qos_pkts = cnt;
> > -    int retries = 0;
> > -
> > -    qid = dev->tx_q[qid % netdev->n_txq].map;
> > -
> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
> > -                     || !(dev->flags & NETDEV_UP))) {
> > -        rte_spinlock_lock(&dev->stats_lock);
> > -        dev->stats.tx_dropped+= cnt;
> > -        rte_spinlock_unlock(&dev->stats_lock);
> > -        goto out;
> > -    }
> > -
> > -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > -
> > -    /* Check has QoS has been configured for the netdev */
> > -    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
> > -    qos_pkts -= cnt;
> > -
> > -    do {
> > -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
> > -        unsigned int tx_pkts;
> > -
> > -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
> > -                                          cur_pkts, cnt);
> > -        if (OVS_LIKELY(tx_pkts)) {
> > -            /* Packets have been sent.*/
> > -            cnt -= tx_pkts;
> > -            /* Prepare for possible retry.*/
> > -            cur_pkts = &cur_pkts[tx_pkts];
> > -        } else {
> > -            /* No packets sent - do not retry.*/
> > -            break;
> > -        }
> > -    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));
> > -
> > -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> > -
> > -    rte_spinlock_lock(&dev->stats_lock);
> > -    cnt += qos_pkts;
> > -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts,
total_pkts, cnt);
> > -    rte_spinlock_unlock(&dev->stats_lock);
> > -
> > -out:
> > -    if (may_steal) {
> > -        int i;
> > -
> > -        for (i = 0; i < total_pkts; i++) {
> > -            dp_packet_delete(pkts[i]);
> > -        }
> > -    }
> > -}
> > -
> >  /* Tx function. Transmit packets indefinitely */
> >  static void
> >  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch
*batch)
> > @@ -1402,18 +1357,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
struct dp_packet_batch *batch)
> >          newcnt++;
> >      }
> >
> > -    if (dev->type == DPDK_DEV_VHOST) {
> > -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)
mbufs,
> > -                                 newcnt, true);
> > -    } else {
> > -        unsigned int qos_pkts = newcnt;
> > +    unsigned int qos_pkts = newcnt;
> >
> > -        /* Check if QoS has been configured for this netdev. */
> > -        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
> > +    /* Check if QoS has been configured for this netdev. */
> > +    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
> >
> > -        dropped += qos_pkts - newcnt;
> > -        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
> > -    }
> > +    dropped += qos_pkts - newcnt;
> > +    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
> >
> >      if (OVS_UNLIKELY(dropped)) {
> >          rte_spinlock_lock(&dev->stats_lock);
> > @@ -1426,33 +1376,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
struct dp_packet_batch *batch)
> >      }
> >  }
> >
> > -static int
> > -netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> > -                       struct dp_packet_batch *batch,
> > -                       bool may_steal, bool concurrent_txq OVS_UNUSED)
> > -{
> > -
> > -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> > -        dpdk_do_tx_copy(netdev, qid, batch);
> > -        dp_packet_delete_batch(batch, may_steal);
> > -    } else {
> > -        dp_packet_batch_apply_cutlen(batch);
> > -        __netdev_dpdk_vhost_send(netdev, qid, batch->packets,
batch->count,
> > -                                 may_steal);
> > -    }
> > -    return 0;
> > -}
> > -
> >  static inline void
> >  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> > -                   struct dp_packet_batch *batch, bool may_steal,
> > -                   bool concurrent_txq)
> > +                   struct dp_packet_batch *batch, bool may_steal)
> >  {
> > -    if (OVS_UNLIKELY(concurrent_txq)) {
> > -        qid = qid % dev->up.n_txq;
> > -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > -    }
> > -
> >      if (OVS_UNLIKELY(!may_steal ||
> >                       batch->packets[0]->source != DPBUF_DPDK)) {
> >          struct netdev *netdev = &dev->up;
> > @@ -1512,20 +1439,50 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
qid,
> >              rte_spinlock_unlock(&dev->stats_lock);
> >          }
> >      }
> > +}
> > +
> > +static int
> > +netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> > +                     struct dp_packet_batch *batch, bool may_steal,
> > +                     bool concurrent_txq)
> > +{
> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > +
> > +    if (OVS_UNLIKELY(concurrent_txq)) {
> > +        qid = qid % dev->up.n_txq;
> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);
> >
> >      if (OVS_UNLIKELY(concurrent_txq)) {
> >          rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> >      }
> > +
> > +    return 0;
> >  }
> >
> >  static int
> > -netdev_dpdk_eth_send(struct netdev *netdev, int qid,
> > -                     struct dp_packet_batch *batch, bool may_steal,
> > -                     bool concurrent_txq)
> > +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
> > +                       struct dp_packet_batch *batch, bool may_steal,
> > +                       bool concurrent_txq OVS_UNUSED)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> >
> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> > +    qid = dev->tx_q[qid % netdev->n_txq].map;
> > +    if (qid == -1) {
> > +        rte_spinlock_lock(&dev->stats_lock);
> > +        dev->stats.tx_dropped+= batch->count;
> > +        rte_spinlock_unlock(&dev->stats_lock);
> > +        if (may_steal) {
> > +            dp_packet_delete_batch(batch, may_steal);
> > +        }
> > +    } else {
> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > +        netdev_dpdk_send__(dev, qid, batch, may_steal);
> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> >      return 0;
> >  }
> >
> > @@ -1622,41 +1579,6 @@ out:
> >  static int
> >  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
> >
> > -static int
> > -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
> > -                            struct netdev_stats *stats)
> > -{
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -
> > -    ovs_mutex_lock(&dev->mutex);
> > -
> > -    rte_spinlock_lock(&dev->stats_lock);
> > -    /* Supported Stats */
> > -    stats->rx_packets += dev->stats.rx_packets;
> > -    stats->tx_packets += dev->stats.tx_packets;
> > -    stats->rx_dropped = dev->stats.rx_dropped;
> > -    stats->tx_dropped += dev->stats.tx_dropped;
> > -    stats->multicast = dev->stats.multicast;
> > -    stats->rx_bytes = dev->stats.rx_bytes;
> > -    stats->tx_bytes = dev->stats.tx_bytes;
> > -    stats->rx_errors = dev->stats.rx_errors;
> > -    stats->rx_length_errors = dev->stats.rx_length_errors;
> > -
> > -    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;
> > -    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;
> > -    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;
> > -    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;
> > -    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;
> > -    stats->rx_1024_to_1522_packets =
dev->stats.rx_1024_to_1522_packets;
> > -    stats->rx_1523_to_max_packets = dev->stats.rx_1523_to_max_packets;
> > -
> > -    rte_spinlock_unlock(&dev->stats_lock);
> > -
> > -    ovs_mutex_unlock(&dev->mutex);
> > -
> > -    return 0;
> > -}
> > -
> >  static void
> >  netdev_dpdk_convert_xstats(struct netdev_stats *stats,
> >                             const struct rte_eth_xstats *xstats,
> > @@ -1737,28 +1659,40 @@ netdev_dpdk_get_stats(const struct netdev
*netdev, struct netdev_stats *stats)
> >          return EPROTO;
> >      }
> >
> > -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> > -    if (rte_xstats_len > 0) {
> > -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) *
rte_xstats_len);
> > -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
> > -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
> > -                                            rte_xstats_len);
> > -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
> > -            netdev_dpdk_convert_xstats(stats, rte_xstats,
rte_xstats_ret);
> > +    /* Extended statistics are not yet available for vHost User PMD */
> > +    if (dev->type == DPDK_DEV_ETH) {
> > +        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
> > +        if (rte_xstats_len > 0) {
> > +            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)
> > +                                          * rte_xstats_len);
> > +            memset(rte_xstats, 0xff, sizeof(*rte_xstats) *
rte_xstats_len);
> > +            rte_xstats_ret = rte_eth_xstats_get(dev->port_id,
rte_xstats,
> > +                                                rte_xstats_len);
> > +            if (rte_xstats_ret > 0 && rte_xstats_ret <=
rte_xstats_len) {
> > +                netdev_dpdk_convert_xstats(stats, rte_xstats,
rte_xstats_ret);
> > +            }
> > +            rte_free(rte_xstats);
> > +        } else {
> > +            VLOG_WARN("Can't get XSTATS counters for port: %i.",
dev->port_id);
> >          }
> > -        rte_free(rte_xstats);
> > -    } else {
> > -        VLOG_WARN("Can't get XSTATS counters for port: %i.",
dev->port_id);
> >      }
> >
> >      stats->rx_packets = rte_stats.ipackets;
> >      stats->tx_packets = rte_stats.opackets;
> >      stats->rx_bytes = rte_stats.ibytes;
> >      stats->tx_bytes = rte_stats.obytes;
> > -    /* DPDK counts imissed as errors, but count them here as dropped
instead */
> > -    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
> > -    stats->tx_errors = rte_stats.oerrors;
> > -    stats->multicast = rte_stats.imcasts;
> > +
> > +    if (dev->type == DPDK_DEV_ETH) {
> > +        /* DPDK counts imissed as errors, but count them here as
dropped
> > +         * instead */
> > +        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
> > +        stats->tx_errors = rte_stats.oerrors;
> > +        stats->multicast = rte_stats.imcasts;
> > +    } else {
> > +        stats->rx_errors = UINT64_MAX;
> > +        stats->tx_errors = UINT64_MAX;
> > +        stats->multicast = UINT64_MAX;
> > +    }
> >
> >      rte_spinlock_lock(&dev->stats_lock);
> >      stats->tx_dropped = dev->stats.tx_dropped;
> > @@ -1921,25 +1855,6 @@ netdev_dpdk_get_carrier(const struct netdev
*netdev, bool *carrier)
> >      return 0;
> >  }
> >
> > -static int
> > -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool
*carrier)
> > -{
> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> > -
> > -    ovs_mutex_lock(&dev->mutex);
> > -
> > -    if (is_vhost_running(virtio_dev)) {
> > -        *carrier = 1;
> > -    } else {
> > -        *carrier = 0;
> > -    }
> > -
> > -    ovs_mutex_unlock(&dev->mutex);
> > -
> > -    return 0;
> > -}
> > -
> >  static long long int
> >  netdev_dpdk_get_carrier_resets(const struct netdev *netdev)
> >  {
> > @@ -1995,13 +1910,10 @@ netdev_dpdk_update_flags__(struct netdev_dpdk
*dev,
> >              rte_eth_dev_stop(dev->port_id);
> >          }
> >      } else {
> > -        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and
vhost is
> > -         * running then change netdev's change_seq to trigger link
state
> > -         * update. */
> > -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> > +        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then
change
> > +         * netdev's change_seq to trigger link state update. */
> >
> > -        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
> > -            && is_vhost_running(virtio_dev)) {
> > +        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {
> >              netdev_change_seq_changed(&dev->up);
> >
> >              /* Clear statistics if device is getting up. */
> > @@ -2096,7 +2008,7 @@ netdev_dpdk_set_admin_state(struct unixctl_conn
*conn, int argc,
> >
> >      if (argc > 2) {
> >          struct netdev *netdev = netdev_from_name(argv[1]);
> > -        if (netdev && is_dpdk_class(netdev->netdev_class)) {
> > +        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {
> >              struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);
> >
> >              ovs_mutex_lock(&dpdk_dev->mutex);
> > @@ -2124,22 +2036,6 @@ netdev_dpdk_set_admin_state(struct unixctl_conn
*conn, int argc,
> >  }
> >
> >  /*
> > - * Set virtqueue flags so that we do not receive interrupts.
> > - */
> > -static void
> > -set_irq_status(struct virtio_net *virtio_dev)
> > -{
> > -    uint32_t i;
> > -    uint64_t idx;
> > -
> > -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
> > -        idx = i * VIRTIO_QNUM;
> > -        rte_vhost_enable_guest_notification(virtio_dev, idx +
VIRTIO_RXQ, 0);
> > -        rte_vhost_enable_guest_notification(virtio_dev, idx +
VIRTIO_TXQ, 0);
> > -    }
> > -}
> > -
> > -/*
> >   * Fixes mapping for vhost-user tx queues. Must be called after each
> >   * enabling/disabling of queues and n_txq modifications.
> >   */
> > @@ -2180,62 +2076,6 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
> >      rte_free(enabled_queues);
> >  }
> >
> > -/*
> > - * A new virtio-net device is added to a vhost port.
> > - */
> > -static int
> > -new_device(struct virtio_net *virtio_dev)
> > -{
> > -    struct netdev_dpdk *dev;
> > -    bool exists = false;
> > -    int newnode = 0;
> > -    long err = 0;
> > -
> > -    ovs_mutex_lock(&dpdk_mutex);
> > -    /* Add device to the vhost port with the same name as that passed
down. */
> > -    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) ==
0) {
> > -            uint32_t qp_num = virtio_dev->virt_qp_nb;
> > -
> > -            ovs_mutex_lock(&dev->mutex);
> > -            /* Get NUMA information */
> > -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
> > -                                MPOL_F_NODE | MPOL_F_ADDR);
> > -            if (err) {
> > -                VLOG_INFO("Error getting NUMA info for vHost Device
'%s'",
> > -                        virtio_dev->ifname);
> > -                newnode = dev->socket_id;
> > -            }
> > -
> > -            dev->requested_socket_id = newnode;
> > -            dev->requested_n_rxq = qp_num;
> > -            dev->requested_n_txq = qp_num;
> > -            netdev_request_reconfigure(&dev->up);
> > -
> > -            ovsrcu_set(&dev->virtio_dev, virtio_dev);
> > -            exists = true;
> > -
> > -            /* Disable notifications. */
> > -            set_irq_status(virtio_dev);
> > -            netdev_change_seq_changed(&dev->up);
> > -            ovs_mutex_unlock(&dev->mutex);
> > -            break;
> > -        }
> > -    }
> > -    ovs_mutex_unlock(&dpdk_mutex);
> > -
> > -    if (!exists) {
> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name
not "
> > -                  "found", virtio_dev->ifname, virtio_dev->device_fh);
> > -
> > -        return -1;
> > -    }
> > -
> > -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node
%i",
> > -              virtio_dev->ifname, virtio_dev->device_fh, newnode);
> > -    return 0;
> > -}
> > -
> >  /* Clears mapping for all available queues of vhost interface. */
> >  static void
> >  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
> > @@ -2248,144 +2088,18 @@ netdev_dpdk_txq_map_clear(struct netdev_dpdk
*dev)
> >      }
> >  }
> >
> > -/*
> > - * Remove a virtio-net device from the specific vhost port.  Use
dev->remove
> > - * flag to stop any more packets from being sent or received to/from a
VM and
> > - * ensure all currently queued packets have been sent/received before
removing
> > - *  the device.
> > - */
> > -static void
> > -destroy_device(volatile struct virtio_net *virtio_dev)
> > -{
> > -    struct netdev_dpdk *dev;
> > -    bool exists = false;
> > -
> > -    ovs_mutex_lock(&dpdk_mutex);
> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> > -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
> > -
> > -            ovs_mutex_lock(&dev->mutex);
> > -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
> > -            ovsrcu_set(&dev->virtio_dev, NULL);
> > -            /* Clear tx/rx queue settings. */
> > -            netdev_dpdk_txq_map_clear(dev);
> > -            dev->requested_n_rxq = NR_QUEUE;
> > -            dev->requested_n_txq = NR_QUEUE;
> > -            netdev_request_reconfigure(&dev->up);
> > -
> > -            netdev_change_seq_changed(&dev->up);
> > -            ovs_mutex_unlock(&dev->mutex);
> > -            exists = true;
> > -            break;
> > -        }
> > -    }
> > -
> > -    ovs_mutex_unlock(&dpdk_mutex);
> > -
> > -    if (exists == true) {
> > -        /*
> > -         * Wait for other threads to quiesce after setting the
'virtio_dev'
> > -         * to NULL, before returning.
> > -         */
> > -        ovsrcu_synchronize();
> > -        /*
> > -         * As call to ovsrcu_synchronize() will end the quiescent
state,
> > -         * put thread back into quiescent state before returning.
> > -         */
> > -        ovsrcu_quiesce_start();
> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
> > -                  virtio_dev->ifname, virtio_dev->device_fh);
> > -    } else {
> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
virtio_dev->ifname,
> > -                  virtio_dev->device_fh);
> > -    }
> > -}
> > -
> > -static int
> > -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
> > -                    int enable)
> > -{
> > -    struct netdev_dpdk *dev;
> > -    bool exists = false;
> > -    int qid = queue_id / VIRTIO_QNUM;
> > -
> > -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
> > -        return 0;
> > -    }
> > -
> > -    ovs_mutex_lock(&dpdk_mutex);
> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) ==
0) {
> > -            ovs_mutex_lock(&dev->mutex);
> > -            if (enable) {
> > -                dev->tx_q[qid].map = qid;
> > -            } else {
> > -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
> > -            }
> > -            netdev_dpdk_remap_txqs(dev);
> > -            exists = true;
> > -            ovs_mutex_unlock(&dev->mutex);
> > -            break;
> > -        }
> > -    }
> > -    ovs_mutex_unlock(&dpdk_mutex);
> > -
> > -    if (exists) {
> > -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device
'%s' %"
> > -                  PRIu64" changed to \'%s\'", queue_id, qid,
> > -                  virtio_dev->ifname, virtio_dev->device_fh,
> > -                  (enable == 1) ? "enabled" : "disabled");
> > -    } else {
> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found",
virtio_dev->ifname,
> > -                  virtio_dev->device_fh);
> > -        return -1;
> > -    }
> > -
> > -    return 0;
> > -}
> > -
> > -struct virtio_net *
> > -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
> > -{
> > -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
> > -}
> > -
> >  struct ingress_policer *
> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
> >  {
> >      return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
> >  }
> >
> > -/*
> > - * These callbacks allow virtio-net devices to be added to vhost ports
when
> > - * configuration has been fully complete.
> > - */
> > -static const struct virtio_net_device_ops virtio_net_device_ops =
> > -{
> > -    .new_device =  new_device,
> > -    .destroy_device = destroy_device,
> > -    .vring_state_changed = vring_state_changed
> > -};
> > -
> > -static void *
> > -start_vhost_loop(void *dummy OVS_UNUSED)
> > -{
> > -     pthread_detach(pthread_self());
> > -     /* Put the vhost thread into quiescent state. */
> > -     ovsrcu_quiesce_start();
> > -     rte_vhost_driver_session_start();
> > -     return NULL;
> > -}
> > -
> >  static int
> >  dpdk_vhost_class_init(void)
> >  {
> > -    rte_vhost_driver_callback_register(&virtio_net_device_ops);
> > -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> > -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
> > -                            | 1ULL << VIRTIO_NET_F_CSUM);
> > -
> > -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
> > +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
> > +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
> > +                                | 1ULL << VIRTIO_NET_F_CSUM);
> >      return 0;
> >  }
> >
> > @@ -2498,7 +2212,17 @@ netdev_dpdk_ring_send(struct netdev *netdev, int
qid,
> >          dp_packet_rss_invalidate(batch->packets[i]);
> >      }
> >
> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
> > +    if (OVS_UNLIKELY(concurrent_txq)) {
> > +        qid = qid % dev->up.n_txq;
> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);
> > +
> > +    if (OVS_UNLIKELY(concurrent_txq)) {
> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
> > +    }
> > +
> >      return 0;
> >  }
> >
> > @@ -2787,7 +2511,6 @@ static int
> >  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
> >  {
> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
> >      int err = 0;
> >
> >      ovs_mutex_lock(&dpdk_mutex);
> > @@ -2813,10 +2536,6 @@ netdev_dpdk_vhost_reconfigure(struct netdev
*netdev)
> >          }
> >      }
> >
> > -    if (virtio_dev) {
> > -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;
> > -    }
> > -
> >      ovs_mutex_unlock(&dev->mutex);
> >      ovs_mutex_unlock(&dpdk_mutex);
> >
> > @@ -3307,12 +3026,12 @@ static const struct netdev_class OVS_UNUSED
dpdk_vhost_class =
> >          NULL,
> >          NULL,
> >          netdev_dpdk_vhost_send,
> > -        netdev_dpdk_vhost_get_carrier,
> > -        netdev_dpdk_vhost_get_stats,
> > +        netdev_dpdk_get_carrier,
> > +        netdev_dpdk_get_stats,
> >          NULL,
> >          NULL,
> >          netdev_dpdk_vhost_reconfigure,
> > -        netdev_dpdk_vhost_rxq_recv);
> > +        netdev_dpdk_rxq_recv);
> >
> >  void
> >  netdev_dpdk_register(void)
> >
Ciara Loftus Aug. 3, 2016, 9:21 a.m. UTC | #2
> 

> I've applied this patch and performed following test:

> 

> OVS with 2 VMs connected via vhost-user ports.

> Each vhost-user port has 4 queues.

> 

> VM1 executes ping on LOCAL port.

> In normal situation ping results are following:

> 

> 	100 packets transmitted, 100 received, 0% packet loss, time 99144ms

> 	rtt min/avg/max/mdev = 0.231/0.459/0.888/0.111 ms

> 

> After that VM2 starts execution of this script:

> 

> 	while true;

> 	do

> 		ethtool -L eth0 combined 4;

> 		ethtool -L eth0 combined 1;

> 	done

> 

> Now results of ping between VM1 and LOCAL port are:

> 

> 	100 packets transmitted, 100 received, 0% packet loss, time 99116ms

> 	rtt min/avg/max/mdev = 5.466/150.327/356.201/85.208 ms

> 

> Minimal time increased from 0.231 to 5.466 ms.

> Average time increased from 0.459 to 150.327 ms (~300 times)!

> 

> This happens because of constant reconfiguration requests from

> the 'vring_state_changed_callback()'.

> 

> As Ciara said, "Previously we could work with only reconfiguring during

> link status change as we had full information available to us

> ie. virtio_net->virt_qp_nb. We don't have that any more, so we need to

> count the queues in OVS now every time we get a vring_change."

> 

> Test above shows that this is unacceptable for OVS to perform

> reconfiguration each time vring state changed because this leads to

> ability for the guest user to break normal networking on all ports

> connected to the same instance of Open vSwitch.


Hi Ilya,

Another thought on this. With the current master branch, isn't the above possible too with a script like this:

while true;
do
    echo "0000:00:03.0" > /sys/bus/pci/drivers/virtio-pci/bind
    echo "0000:00:03.0" > /sys/bus/pci/drivers/virtio-pci/unbind
done

The bind/unbind calls new/destroy device which in turn call reconfigure() each time.

Thanks,
Ciara

> 

> If this vulnerability is unavoidable with current version of vHost PMD,

> I'm suggesting to postpone it's integration until there will be

> method or special API to get number of queues from the inside of

> 'link_status_changed_callback()'.

> 

> I've added vHost maintainers to CC-list to hear their opinion about

> new API to get number of queues from the vHost PMD.

> Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make

> 'dev_info->nb_rx_queues' usable?

> 

> NACK for now.

> 

> Best regards, Ilya Maximets.

> 

> On 29.07.2016 16:24, Ciara Loftus wrote:

> > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports

> > to be controlled by the librte_ether API, like physical 'dpdk' ports and

> > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and

> > removes direct calls to the librte_vhost DPDK library.

> >

> > This commit removes extended statistics support for vHost User ports

> > until such a time that this becomes available in the vHost PMD in a

> > DPDK release supported by OVS.

> >

> > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> > ---

> >  INSTALL.DPDK.md   |  10 +

> >  NEWS              |   2 +

> >  lib/netdev-dpdk.c | 857 ++++++++++++++++++---------------------------------

> ---

> >  3 files changed, 300 insertions(+), 569 deletions(-)

> >

> > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> > index 7609aa7..4feb7be 100644

> > --- a/INSTALL.DPDK.md

> > +++ b/INSTALL.DPDK.md

> > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough].

> >

> >      http://dpdk.org/doc/guides/rel_notes/release_16_04.html

> >

> > +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context

> of

> > +    DPDK as they are all managed by the rte_ether API. This means that

> they

> > +    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS

> which by

> > +    default is set to 32. This means by default the combined total number of

> > +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32.

> This

> > +    value can be changed if desired by modifying the configuration file in

> > +    DPDK, or by overriding the default value on the command line when

> building

> > +    DPDK. eg.

> > +

> > +        `make install CONFIG_RTE_MAX_ETHPORTS=64`

> >

> >  Bug Reporting:

> >  --------------

> > diff --git a/NEWS b/NEWS

> > index dc3dedb..6510dde 100644

> > --- a/NEWS

> > +++ b/NEWS

> > @@ -64,6 +64,8 @@ Post-v2.5.0

> >       * Basic connection tracking for the userspace datapath (no ALG,

> >         fragmentation or NAT support yet)

> >       * Remove dpdkvhostcuse port type.

> > +     * vHost PMD integration brings vhost-user ports under control of the

> > +       rte_ether DPDK API.

> >     - Increase number of registers to 16.

> >     - ovs-benchmark: This utility has been removed due to lack of use and

> >       bitrot.

> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> > index d6959fe..d6ceeec 100644

> > --- a/lib/netdev-dpdk.c

> > +++ b/lib/netdev-dpdk.c

> > @@ -30,7 +30,6 @@

> >  #include <sys/types.h>

> >  #include <sys/stat.h>

> >  #include <getopt.h>

> > -#include <numaif.h>

> >

> >  #include "dirs.h"

> >  #include "dp-packet.h"

> > @@ -56,9 +55,9 @@

> >  #include "unixctl.h"

> >

> >  #include "rte_config.h"

> > +#include "rte_eth_vhost.h"

> >  #include "rte_mbuf.h"

> >  #include "rte_meter.h"

> > -#include "rte_virtio_net.h"

> >

> >  VLOG_DEFINE_THIS_MODULE(dpdk);

> >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);

> > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL;   /* Location of

> vhost-user sockets */

> >

> >  #define VHOST_ENQ_RETRY_NUM 8

> >

> > +/* Array that tracks the used & unused vHost user driver IDs */

> > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];

> > +

> >  static const struct rte_eth_conf port_conf = {

> >      .rxmode = {

> >          .mq_mode = ETH_MQ_RX_RSS,

> > @@ -346,12 +348,15 @@ struct netdev_dpdk {

> >      struct rte_eth_link link;

> >      int link_reset_cnt;

> >

> > -    /* virtio-net structure for vhost device */

> > -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;

> > +    /* Number of virtqueue pairs reported by the guest */

> > +    uint32_t vhost_qp_nb;

> >

> >      /* Identifier used to distinguish vhost devices from each other */

> >      char vhost_id[PATH_MAX];

> >

> > +    /* ID of vhost user port given to the PMD driver */

> > +    unsigned int vhost_pmd_id;

> > +

> >      /* In dpdk_list. */

> >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);

> >

> > @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk {

> >  static bool dpdk_thread_is_pmd(void);

> >

> >  static int netdev_dpdk_construct(struct netdev *);

> > -

> > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk

> *dev);

> > +static int netdev_dpdk_vhost_construct(struct netdev *);

> >

> >  struct ingress_policer *

> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);

> >

> > +static void link_status_changed_callback(uint8_t port_id,

> > +        enum rte_eth_event_type type, void *param);

> > +static void vring_state_changed_callback(uint8_t port_id,

> > +        enum rte_eth_event_type type, void *param);

> > +static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);

> > +static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);

> > +

> >  static bool

> > -is_dpdk_class(const struct netdev_class *class)

> > +is_dpdk_eth_class(const struct netdev_class *class)

> >  {

> > -    return class->construct == netdev_dpdk_construct;

> > +    return ((class->construct == netdev_dpdk_construct) ||

> > +            (class->construct == netdev_dpdk_vhost_construct));

> >  }

> >

> >  /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically

> > @@ -616,8 +628,13 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk

> *dev, int n_rxq, int n_txq)

> >              continue;

> >          }

> >

> > -        dev->up.n_rxq = n_rxq;

> > -        dev->up.n_txq = n_txq;

> > +        /* Only set n_*xq for physical devices. vHost User devices will set

> > +         * this value correctly using info from the virtio backend.

> > +         */

> > +        if (dev->type == DPDK_DEV_ETH) {

> > +            dev->up.n_rxq = n_rxq;

> > +            dev->up.n_txq = n_txq;

> > +        }

> >

> >          return 0;

> >      }

> > @@ -641,8 +658,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)

> OVS_REQUIRES(dpdk_mutex)

> >

> >      rte_eth_dev_info_get(dev->port_id, &info);

> >

> > -    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > -    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > +    if (dev->type == DPDK_DEV_VHOST) {

> > +        /* We don't know how many queues QEMU will use so set up the

> max */

> > +        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > +        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > +    } else {

> > +        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > +        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > +    }

> >

> >      diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);

> >      if (diag) {

> > @@ -709,6 +732,85 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk *dev,

> unsigned int n_txqs)

> >      }

> >  }

> >

> > +void

> > +link_status_changed_callback(uint8_t port_id,

> > +                             enum rte_eth_event_type type OVS_UNUSED,

> > +                             void *param OVS_UNUSED)

> > +{

> > +    struct netdev_dpdk *dev;

> > +    int socket_id = -1;

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > +        if (port_id == dev->port_id) {

> > +            ovs_mutex_lock(&dev->mutex);

> > +            check_link_status(dev);

> > +            if (dev->link.link_status == ETH_LINK_UP) {

> > +                /* new device */

> > +                /* Get NUMA information */

> > +                socket_id = rte_eth_dev_socket_id(dev->port_id);

> > +                if (socket_id != -1 && socket_id != dev->socket_id) {

> > +                    dev->requested_socket_id = socket_id;

> > +                }

> > +                netdev_request_reconfigure(&dev->up);

> > +                netdev_change_seq_changed(&dev->up);

> > +                VLOG_INFO("vHost Device '%s' has been added on numa node

> %i",

> > +                          dev->vhost_id, socket_id);

> > +            } else {

> > +                /* destroy device */

> > +                /* Clear tx/rx queue settings. */

> > +                netdev_dpdk_txq_map_clear(dev);

> > +                netdev_request_reconfigure(&dev->up);

> > +                netdev_change_seq_changed(&dev->up);

> > +                VLOG_INFO("vHost Device '%s' has been removed", dev-

> >vhost_id);

> > +            }

> > +            ovs_mutex_unlock(&dev->mutex);

> > +            break;

> > +        }

> > +    }

> > +

> > +    ovs_mutex_unlock(&dpdk_mutex);

> > +

> > +    return;

> > +}

> > +

> > +void

> > +vring_state_changed_callback(uint8_t port_id,

> > +                             enum rte_eth_event_type type OVS_UNUSED,

> > +                             void *param OVS_UNUSED)

> > +{

> > +    struct netdev_dpdk *dev;

> > +    struct rte_eth_vhost_queue_event event;

> > +    int err = 0;

> > +

> > +    err = rte_eth_vhost_get_queue_event(port_id, &event);

> > +    if (err || event.rx) {

> > +        return;

> > +    }

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > +        if (port_id == dev->port_id) {

> > +            ovs_mutex_lock(&dev->mutex);

> > +            if (event.enable) {

> > +                dev->tx_q[event.queue_id].map = event.queue_id;

> > +                dev->vhost_qp_nb++;

> > +            } else {

> > +                dev->tx_q[event.queue_id].map =

> OVS_VHOST_QUEUE_DISABLED;

> > +                dev->vhost_qp_nb--;

> > +            }

> > +            dev->requested_n_rxq = dev->vhost_qp_nb;

> > +            dev->requested_n_txq = dev->vhost_qp_nb;

> > +            netdev_request_reconfigure(&dev->up);

> > +            ovs_mutex_unlock(&dev->mutex);

> > +            break;

> > +        }

> > +    }

> > +    ovs_mutex_unlock(&dpdk_mutex);

> > +

> > +    return;

> > +}

> > +

> >  static int

> >  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,

> >                   enum dpdk_dev_type type)

> > @@ -718,6 +820,7 @@ netdev_dpdk_init(struct netdev *netdev, unsigned

> int port_no,

> >      int sid;

> >      int err = 0;

> >      uint32_t buf_size;

> > +    unsigned int nr_q = 0;

> >

> >      ovs_mutex_init(&dev->mutex);

> >      ovs_mutex_lock(&dev->mutex);

> > @@ -727,11 +830,7 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> >      /* If the 'sid' is negative, it means that the kernel fails

> >       * to obtain the pci numa info.  In that situation, always

> >       * use 'SOCKET0'. */

> > -    if (type == DPDK_DEV_ETH) {

> > -        sid = rte_eth_dev_socket_id(port_no);

> > -    } else {

> > -        sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> > -    }

> > +    sid = rte_eth_dev_socket_id(port_no);

> >

> >      dev->socket_id = sid < 0 ? SOCKET0 : sid;

> >      dev->requested_socket_id = dev->socket_id;

> > @@ -761,17 +860,21 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> >      netdev->n_txq = NR_QUEUE;

> >      dev->requested_n_rxq = netdev->n_rxq;

> >      dev->requested_n_txq = netdev->n_txq;

> > +    dev->vhost_qp_nb = 0;

> >

> > -    if (type == DPDK_DEV_ETH) {

> > -        err = dpdk_eth_dev_init(dev);

> > -        if (err) {

> > -            goto unlock;

> > -        }

> > -        netdev_dpdk_alloc_txq(dev, netdev->n_txq);

> > -    } else {

> > -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);

> > -        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag.

> */

> > -        dev->flags = NETDEV_UP | NETDEV_PROMISC;

> > +    err = dpdk_eth_dev_init(dev);

> > +    if (err) {

> > +        goto unlock;

> > +    }

> > +    nr_q = (type == DPDK_DEV_ETH ?

> > +            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > +    netdev_dpdk_alloc_txq(dev, nr_q);

> > +

> > +    if (type == DPDK_DEV_VHOST) {

> > +        rte_eth_dev_callback_register(port_no,

> RTE_ETH_EVENT_QUEUE_STATE,

> > +                                      vring_state_changed_callback, NULL);

> > +        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,

> > +                                      link_status_changed_callback, NULL);

> >      }

> >

> >      ovs_list_push_back(&dpdk_list, &dev->list_node);

> > @@ -802,17 +905,48 @@ dpdk_dev_parse_name(const char dev_name[],

> const char prefix[],

> >      }

> >  }

> >

> > +/* When attaching a vhost device to DPDK, a unique name of the format

> > + * 'eth_vhostX' is expected, where X is a unique identifier.

> > + * get_vhost_drv_id returns a valid X value to provide to DPDK.

> > + */

> > +static int

> > +get_vhost_drv_id(void)

> > +{

> > +    int i = 0;

> > +

> > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > +        if (vhost_drv_ids[i] == 0) {

> > +            return i;

> > +        }

> > +    }

> > +

> > +    return -1;

> > +}

> > +

> > +static void

> > +set_vhost_drv_id(int id, int val)

> > +{

> > +    vhost_drv_ids[id] = val;

> > +}

> > +

> >  static int

> >  netdev_dpdk_vhost_construct(struct netdev *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >      const char *name = netdev->name;

> >      int err;

> > +    uint8_t port_no = 0;

> > +    char *devargs;

> > +    int driver_id = 0;

> > +

> > +    if (rte_eal_init_ret) {

> > +        return rte_eal_init_ret;

> > +    }

> >

> >      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in

> >       * the file system. '/' or '\' would traverse directories, so they're not

> >       * acceptable in 'name'. */

> > -    if (strchr(name, '/') || strchr(name, '\\')) {

> > +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {

> >          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "

> >                   "A valid name must not include '/' or '\\'",

> >                   name);

> > @@ -829,18 +963,32 @@ netdev_dpdk_vhost_construct(struct netdev

> *netdev)

> >       */

> >      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",

> >               vhost_sock_dir, name);

> > +    driver_id = get_vhost_drv_id();

> > +    if (driver_id == -1) {

> > +        VLOG_ERR("Unable to create vhost-user device %s - too many vhost-

> user"

> > +                 "devices registered with PMD", dev->vhost_id);

> > +        err = ENODEV;

> > +        goto out;

> > +    } else {

> > +        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",

> > +                 driver_id, dev->vhost_id,

> > +                 MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > +        err = rte_eth_dev_attach(devargs, &port_no);

> > +    }

> >

> > -    err = rte_vhost_driver_register(dev->vhost_id);

> >      if (err) {

> > -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",

> > +        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",

> >                   dev->vhost_id);

> >      } else {

> >          fatal_signal_add_file_to_unlink(dev->vhost_id);

> >          VLOG_INFO("Socket %s created for vhost-user port %s\n",

> >                    dev->vhost_id, name);

> > -        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);

> > +        dev->vhost_pmd_id = driver_id;

> > +        set_vhost_drv_id(driver_id, 1);

> > +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);

> >      }

> >

> > +out:

> >      ovs_mutex_unlock(&dpdk_mutex);

> >      return err;

> >  }

> > @@ -868,20 +1016,28 @@ netdev_dpdk_construct(struct netdev *netdev)

> >  }

> >

> >  static void

> > -netdev_dpdk_destruct(struct netdev *netdev)

> > +dpdk_destruct_helper(struct netdev_dpdk *dev)

> >  {

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> >      rte_eth_dev_stop(dev->port_id);

> >      free(ovsrcu_get_protected(struct ingress_policer *,

> >                                &dev->ingress_policer));

> > -    ovs_mutex_unlock(&dev->mutex);

> >

> > -    ovs_mutex_lock(&dpdk_mutex);

> >      rte_free(dev->tx_q);

> >      ovs_list_remove(&dev->list_node);

> >      dpdk_mp_put(dev->dpdk_mp);

> > +}

> > +

> > +static void

> > +netdev_dpdk_destruct(struct netdev *netdev)

> > +{

> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > +

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    ovs_mutex_lock(&dev->mutex);

> > +

> > +    dpdk_destruct_helper(dev);

> > +

> > +    ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >  }

> >

> > @@ -890,30 +1046,19 @@ netdev_dpdk_vhost_destruct(struct netdev

> *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >

> > -    /* Guest becomes an orphan if still attached. */

> > -    if (netdev_dpdk_get_virtio(dev) != NULL) {

> > -        VLOG_ERR("Removing port '%s' while vhost device still attached.",

> > -                 netdev->name);

> > -        VLOG_ERR("To restore connectivity after re-adding of port, VM on

> socket"

> > -                 " '%s' must be restarted.",

> > -                 dev->vhost_id);

> > -    }

> > +    ovs_mutex_lock(&dpdk_mutex);

> > +    ovs_mutex_lock(&dev->mutex);

> >

> > -    if (rte_vhost_driver_unregister(dev->vhost_id)) {

> > -        VLOG_ERR("Unable to remove vhost-user socket %s", dev-

> >vhost_id);

> > +    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {

> > +        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);

> >      } else {

> >          fatal_signal_remove_file_to_unlink(dev->vhost_id);

> >      }

> > +    set_vhost_drv_id(dev->vhost_pmd_id, 0);

> >

> > -    ovs_mutex_lock(&dev->mutex);

> > -    free(ovsrcu_get_protected(struct ingress_policer *,

> > -                              &dev->ingress_policer));

> > -    ovs_mutex_unlock(&dev->mutex);

> > +    dpdk_destruct_helper(dev);

> >

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    rte_free(dev->tx_q);

> > -    ovs_list_remove(&dev->list_node);

> > -    dpdk_mp_put(dev->dpdk_mp);

> > +    ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >  }

> >

> > @@ -1105,117 +1250,6 @@ ingress_policer_run(struct ingress_policer

> *policer, struct rte_mbuf **pkts,

> >      return cnt;

> >  }

> >

> > -static bool

> > -is_vhost_running(struct virtio_net *virtio_dev)

> > -{

> > -    return (virtio_dev != NULL && (virtio_dev->flags &

> VIRTIO_DEV_RUNNING));

> > -}

> > -

> > -static inline void

> > -netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats

> *stats,

> > -                                          unsigned int packet_size)

> > -{

> > -    /* Hard-coded search for the size bucket. */

> > -    if (packet_size < 256) {

> > -        if (packet_size >= 128) {

> > -            stats->rx_128_to_255_packets++;

> > -        } else if (packet_size <= 64) {

> > -            stats->rx_1_to_64_packets++;

> > -        } else {

> > -            stats->rx_65_to_127_packets++;

> > -        }

> > -    } else {

> > -        if (packet_size >= 1523) {

> > -            stats->rx_1523_to_max_packets++;

> > -        } else if (packet_size >= 1024) {

> > -            stats->rx_1024_to_1522_packets++;

> > -        } else if (packet_size < 512) {

> > -            stats->rx_256_to_511_packets++;

> > -        } else {

> > -            stats->rx_512_to_1023_packets++;

> > -        }

> > -    }

> > -}

> > -

> > -static inline void

> > -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,

> > -                                     struct dp_packet **packets, int count,

> > -                                     int dropped)

> > -{

> > -    int i;

> > -    unsigned int packet_size;

> > -    struct dp_packet *packet;

> > -

> > -    stats->rx_packets += count;

> > -    stats->rx_dropped += dropped;

> > -    for (i = 0; i < count; i++) {

> > -        packet = packets[i];

> > -        packet_size = dp_packet_size(packet);

> > -

> > -        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {

> > -            /* This only protects the following multicast counting from

> > -             * too short packets, but it does not stop the packet from

> > -             * further processing. */

> > -            stats->rx_errors++;

> > -            stats->rx_length_errors++;

> > -            continue;

> > -        }

> > -

> > -        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);

> > -

> > -        struct eth_header *eh = (struct eth_header *)

> dp_packet_data(packet);

> > -        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {

> > -            stats->multicast++;

> > -        }

> > -

> > -        stats->rx_bytes += packet_size;

> > -    }

> > -}

> > -

> > -/*

> > - * The receive path for the vhost port is the TX path out from guest.

> > - */

> > -static int

> > -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,

> > -                           struct dp_packet_batch *batch)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -    int qid = rxq->queue_id;

> > -    struct ingress_policer *policer =

> netdev_dpdk_get_ingress_policer(dev);

> > -    uint16_t nb_rx = 0;

> > -    uint16_t dropped = 0;

> > -

> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)

> > -                     || !(dev->flags & NETDEV_UP))) {

> > -        return EAGAIN;

> > -    }

> > -

> > -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +

> VIRTIO_TXQ,

> > -                                    dev->dpdk_mp->mp,

> > -                                    (struct rte_mbuf **) batch->packets,

> > -                                    NETDEV_MAX_BURST);

> > -    if (!nb_rx) {

> > -        return EAGAIN;

> > -    }

> > -

> > -    if (policer) {

> > -        dropped = nb_rx;

> > -        nb_rx = ingress_policer_run(policer,

> > -                                    (struct rte_mbuf **) batch->packets,

> > -                                    nb_rx);

> > -        dropped -= nb_rx;

> > -    }

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch-

> >packets,

> > -                                         nb_rx, dropped);

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -    batch->count = (int) nb_rx;

> > -    return 0;

> > -}

> > -

> >  static int

> >  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch

> *batch)

> >  {

> > @@ -1269,85 +1303,6 @@ netdev_dpdk_qos_run__(struct netdev_dpdk

> *dev, struct rte_mbuf **pkts,

> >      return cnt;

> >  }

> >

> > -static inline void

> > -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,

> > -                                     struct dp_packet **packets,

> > -                                     int attempted,

> > -                                     int dropped)

> > -{

> > -    int i;

> > -    int sent = attempted - dropped;

> > -

> > -    stats->tx_packets += sent;

> > -    stats->tx_dropped += dropped;

> > -

> > -    for (i = 0; i < sent; i++) {

> > -        stats->tx_bytes += dp_packet_size(packets[i]);

> > -    }

> > -}

> > -

> > -static void

> > -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > -                         struct dp_packet **pkts, int cnt,

> > -                         bool may_steal)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;

> > -    unsigned int total_pkts = cnt;

> > -    unsigned int qos_pkts = cnt;

> > -    int retries = 0;

> > -

> > -    qid = dev->tx_q[qid % netdev->n_txq].map;

> > -

> > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0

> > -                     || !(dev->flags & NETDEV_UP))) {

> > -        rte_spinlock_lock(&dev->stats_lock);

> > -        dev->stats.tx_dropped+= cnt;

> > -        rte_spinlock_unlock(&dev->stats_lock);

> > -        goto out;

> > -    }

> > -

> > -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > -

> > -    /* Check has QoS has been configured for the netdev */

> > -    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);

> > -    qos_pkts -= cnt;

> > -

> > -    do {

> > -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;

> > -        unsigned int tx_pkts;

> > -

> > -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,

> > -                                          cur_pkts, cnt);

> > -        if (OVS_LIKELY(tx_pkts)) {

> > -            /* Packets have been sent.*/

> > -            cnt -= tx_pkts;

> > -            /* Prepare for possible retry.*/

> > -            cur_pkts = &cur_pkts[tx_pkts];

> > -        } else {

> > -            /* No packets sent - do not retry.*/

> > -            break;

> > -        }

> > -    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));

> > -

> > -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    cnt += qos_pkts;

> > -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts,

> total_pkts, cnt);

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -out:

> > -    if (may_steal) {

> > -        int i;

> > -

> > -        for (i = 0; i < total_pkts; i++) {

> > -            dp_packet_delete(pkts[i]);

> > -        }

> > -    }

> > -}

> > -

> >  /* Tx function. Transmit packets indefinitely */

> >  static void

> >  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch

> *batch)

> > @@ -1402,18 +1357,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> >          newcnt++;

> >      }

> >

> > -    if (dev->type == DPDK_DEV_VHOST) {

> > -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)

> mbufs,

> > -                                 newcnt, true);

> > -    } else {

> > -        unsigned int qos_pkts = newcnt;

> > +    unsigned int qos_pkts = newcnt;

> >

> > -        /* Check if QoS has been configured for this netdev. */

> > -        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> > +    /* Check if QoS has been configured for this netdev. */

> > +    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> >

> > -        dropped += qos_pkts - newcnt;

> > -        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> > -    }

> > +    dropped += qos_pkts - newcnt;

> > +    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> >

> >      if (OVS_UNLIKELY(dropped)) {

> >          rte_spinlock_lock(&dev->stats_lock);

> > @@ -1426,33 +1376,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> >      }

> >  }

> >

> > -static int

> > -netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > -                       struct dp_packet_batch *batch,

> > -                       bool may_steal, bool concurrent_txq OVS_UNUSED)

> > -{

> > -

> > -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {

> > -        dpdk_do_tx_copy(netdev, qid, batch);

> > -        dp_packet_delete_batch(batch, may_steal);

> > -    } else {

> > -        dp_packet_batch_apply_cutlen(batch);

> > -        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch-

> >count,

> > -                                 may_steal);

> > -    }

> > -    return 0;

> > -}

> > -

> >  static inline void

> >  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

> > -                   struct dp_packet_batch *batch, bool may_steal,

> > -                   bool concurrent_txq)

> > +                   struct dp_packet_batch *batch, bool may_steal)

> >  {

> > -    if (OVS_UNLIKELY(concurrent_txq)) {

> > -        qid = qid % dev->up.n_txq;

> > -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > -    }

> > -

> >      if (OVS_UNLIKELY(!may_steal ||

> >                       batch->packets[0]->source != DPBUF_DPDK)) {

> >          struct netdev *netdev = &dev->up;

> > @@ -1512,20 +1439,50 @@ netdev_dpdk_send__(struct netdev_dpdk

> *dev, int qid,

> >              rte_spinlock_unlock(&dev->stats_lock);

> >          }

> >      }

> > +}

> > +

> > +static int

> > +netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > +                     struct dp_packet_batch *batch, bool may_steal,

> > +                     bool concurrent_txq)

> > +{

> > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > +

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        qid = qid % dev->up.n_txq;

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> >

> >      if (OVS_UNLIKELY(concurrent_txq)) {

> >          rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> >      }

> > +

> > +    return 0;

> >  }

> >

> >  static int

> > -netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > -                     struct dp_packet_batch *batch, bool may_steal,

> > -                     bool concurrent_txq)

> > +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > +                       struct dp_packet_batch *batch, bool may_steal,

> > +                       bool concurrent_txq OVS_UNUSED)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> >

> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > +    qid = dev->tx_q[qid % netdev->n_txq].map;

> > +    if (qid == -1) {

> > +        rte_spinlock_lock(&dev->stats_lock);

> > +        dev->stats.tx_dropped+= batch->count;

> > +        rte_spinlock_unlock(&dev->stats_lock);

> > +        if (may_steal) {

> > +            dp_packet_delete_batch(batch, may_steal);

> > +        }

> > +    } else {

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +        netdev_dpdk_send__(dev, qid, batch, may_steal);

> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> >      return 0;

> >  }

> >

> > @@ -1622,41 +1579,6 @@ out:

> >  static int

> >  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);

> >

> > -static int

> > -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,

> > -                            struct netdev_stats *stats)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> > -

> > -    rte_spinlock_lock(&dev->stats_lock);

> > -    /* Supported Stats */

> > -    stats->rx_packets += dev->stats.rx_packets;

> > -    stats->tx_packets += dev->stats.tx_packets;

> > -    stats->rx_dropped = dev->stats.rx_dropped;

> > -    stats->tx_dropped += dev->stats.tx_dropped;

> > -    stats->multicast = dev->stats.multicast;

> > -    stats->rx_bytes = dev->stats.rx_bytes;

> > -    stats->tx_bytes = dev->stats.tx_bytes;

> > -    stats->rx_errors = dev->stats.rx_errors;

> > -    stats->rx_length_errors = dev->stats.rx_length_errors;

> > -

> > -    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;

> > -    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;

> > -    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;

> > -    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;

> > -    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;

> > -    stats->rx_1024_to_1522_packets = dev-

> >stats.rx_1024_to_1522_packets;

> > -    stats->rx_1523_to_max_packets = dev-

> >stats.rx_1523_to_max_packets;

> > -

> > -    rte_spinlock_unlock(&dev->stats_lock);

> > -

> > -    ovs_mutex_unlock(&dev->mutex);

> > -

> > -    return 0;

> > -}

> > -

> >  static void

> >  netdev_dpdk_convert_xstats(struct netdev_stats *stats,

> >                             const struct rte_eth_xstats *xstats,

> > @@ -1737,28 +1659,40 @@ netdev_dpdk_get_stats(const struct netdev

> *netdev, struct netdev_stats *stats)

> >          return EPROTO;

> >      }

> >

> > -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > -    if (rte_xstats_len > 0) {

> > -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);

> > -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > -                                            rte_xstats_len);

> > -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > -            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > +    /* Extended statistics are not yet available for vHost User PMD */

> > +    if (dev->type == DPDK_DEV_ETH) {

> > +        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > +        if (rte_xstats_len > 0) {

> > +            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)

> > +                                          * rte_xstats_len);

> > +            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > +            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > +                                                rte_xstats_len);

> > +            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > +                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > +            }

> > +            rte_free(rte_xstats);

> > +        } else {

> > +            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> >          }

> > -        rte_free(rte_xstats);

> > -    } else {

> > -        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> >      }

> >

> >      stats->rx_packets = rte_stats.ipackets;

> >      stats->tx_packets = rte_stats.opackets;

> >      stats->rx_bytes = rte_stats.ibytes;

> >      stats->tx_bytes = rte_stats.obytes;

> > -    /* DPDK counts imissed as errors, but count them here as dropped

> instead */

> > -    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > -    stats->tx_errors = rte_stats.oerrors;

> > -    stats->multicast = rte_stats.imcasts;

> > +

> > +    if (dev->type == DPDK_DEV_ETH) {

> > +        /* DPDK counts imissed as errors, but count them here as dropped

> > +         * instead */

> > +        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > +        stats->tx_errors = rte_stats.oerrors;

> > +        stats->multicast = rte_stats.imcasts;

> > +    } else {

> > +        stats->rx_errors = UINT64_MAX;

> > +        stats->tx_errors = UINT64_MAX;

> > +        stats->multicast = UINT64_MAX;

> > +    }

> >

> >      rte_spinlock_lock(&dev->stats_lock);

> >      stats->tx_dropped = dev->stats.tx_dropped;

> > @@ -1921,25 +1855,6 @@ netdev_dpdk_get_carrier(const struct netdev

> *netdev, bool *carrier)

> >      return 0;

> >  }

> >

> > -static int

> > -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool

> *carrier)

> > -{

> > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > -

> > -    ovs_mutex_lock(&dev->mutex);

> > -

> > -    if (is_vhost_running(virtio_dev)) {

> > -        *carrier = 1;

> > -    } else {

> > -        *carrier = 0;

> > -    }

> > -

> > -    ovs_mutex_unlock(&dev->mutex);

> > -

> > -    return 0;

> > -}

> > -

> >  static long long int

> >  netdev_dpdk_get_carrier_resets(const struct netdev *netdev)

> >  {

> > @@ -1995,13 +1910,10 @@ netdev_dpdk_update_flags__(struct

> netdev_dpdk *dev,

> >              rte_eth_dev_stop(dev->port_id);

> >          }

> >      } else {

> > -        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and

> vhost is

> > -         * running then change netdev's change_seq to trigger link state

> > -         * update. */

> > -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > +        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then

> change

> > +         * netdev's change_seq to trigger link state update. */

> >

> > -        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))

> > -            && is_vhost_running(virtio_dev)) {

> > +        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {

> >              netdev_change_seq_changed(&dev->up);

> >

> >              /* Clear statistics if device is getting up. */

> > @@ -2096,7 +2008,7 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> >

> >      if (argc > 2) {

> >          struct netdev *netdev = netdev_from_name(argv[1]);

> > -        if (netdev && is_dpdk_class(netdev->netdev_class)) {

> > +        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {

> >              struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);

> >

> >              ovs_mutex_lock(&dpdk_dev->mutex);

> > @@ -2124,22 +2036,6 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> >  }

> >

> >  /*

> > - * Set virtqueue flags so that we do not receive interrupts.

> > - */

> > -static void

> > -set_irq_status(struct virtio_net *virtio_dev)

> > -{

> > -    uint32_t i;

> > -    uint64_t idx;

> > -

> > -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {

> > -        idx = i * VIRTIO_QNUM;

> > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ,

> 0);

> > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ,

> 0);

> > -    }

> > -}

> > -

> > -/*

> >   * Fixes mapping for vhost-user tx queues. Must be called after each

> >   * enabling/disabling of queues and n_txq modifications.

> >   */

> > @@ -2180,62 +2076,6 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk

> *dev)

> >      rte_free(enabled_queues);

> >  }

> >

> > -/*

> > - * A new virtio-net device is added to a vhost port.

> > - */

> > -static int

> > -new_device(struct virtio_net *virtio_dev)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -    int newnode = 0;

> > -    long err = 0;

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    /* Add device to the vhost port with the same name as that passed

> down. */

> > -    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > -            uint32_t qp_num = virtio_dev->virt_qp_nb;

> > -

> > -            ovs_mutex_lock(&dev->mutex);

> > -            /* Get NUMA information */

> > -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,

> > -                                MPOL_F_NODE | MPOL_F_ADDR);

> > -            if (err) {

> > -                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",

> > -                        virtio_dev->ifname);

> > -                newnode = dev->socket_id;

> > -            }

> > -

> > -            dev->requested_socket_id = newnode;

> > -            dev->requested_n_rxq = qp_num;

> > -            dev->requested_n_txq = qp_num;

> > -            netdev_request_reconfigure(&dev->up);

> > -

> > -            ovsrcu_set(&dev->virtio_dev, virtio_dev);

> > -            exists = true;

> > -

> > -            /* Disable notifications. */

> > -            set_irq_status(virtio_dev);

> > -            netdev_change_seq_changed(&dev->up);

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            break;

> > -        }

> > -    }

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (!exists) {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "

> > -                  "found", virtio_dev->ifname, virtio_dev->device_fh);

> > -

> > -        return -1;

> > -    }

> > -

> > -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa

> node %i",

> > -              virtio_dev->ifname, virtio_dev->device_fh, newnode);

> > -    return 0;

> > -}

> > -

> >  /* Clears mapping for all available queues of vhost interface. */

> >  static void

> >  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)

> > @@ -2248,144 +2088,18 @@ netdev_dpdk_txq_map_clear(struct

> netdev_dpdk *dev)

> >      }

> >  }

> >

> > -/*

> > - * Remove a virtio-net device from the specific vhost port.  Use dev-

> >remove

> > - * flag to stop any more packets from being sent or received to/from a VM

> and

> > - * ensure all currently queued packets have been sent/received before

> removing

> > - *  the device.

> > - */

> > -static void

> > -destroy_device(volatile struct virtio_net *virtio_dev)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {

> > -

> > -            ovs_mutex_lock(&dev->mutex);

> > -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;

> > -            ovsrcu_set(&dev->virtio_dev, NULL);

> > -            /* Clear tx/rx queue settings. */

> > -            netdev_dpdk_txq_map_clear(dev);

> > -            dev->requested_n_rxq = NR_QUEUE;

> > -            dev->requested_n_txq = NR_QUEUE;

> > -            netdev_request_reconfigure(&dev->up);

> > -

> > -            netdev_change_seq_changed(&dev->up);

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            exists = true;

> > -            break;

> > -        }

> > -    }

> > -

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (exists == true) {

> > -        /*

> > -         * Wait for other threads to quiesce after setting the 'virtio_dev'

> > -         * to NULL, before returning.

> > -         */

> > -        ovsrcu_synchronize();

> > -        /*

> > -         * As call to ovsrcu_synchronize() will end the quiescent state,

> > -         * put thread back into quiescent state before returning.

> > -         */

> > -        ovsrcu_quiesce_start();

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",

> > -                  virtio_dev->ifname, virtio_dev->device_fh);

> > -    } else {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > -                  virtio_dev->device_fh);

> > -    }

> > -}

> > -

> > -static int

> > -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,

> > -                    int enable)

> > -{

> > -    struct netdev_dpdk *dev;

> > -    bool exists = false;

> > -    int qid = queue_id / VIRTIO_QNUM;

> > -

> > -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {

> > -        return 0;

> > -    }

> > -

> > -    ovs_mutex_lock(&dpdk_mutex);

> > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > -            ovs_mutex_lock(&dev->mutex);

> > -            if (enable) {

> > -                dev->tx_q[qid].map = qid;

> > -            } else {

> > -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;

> > -            }

> > -            netdev_dpdk_remap_txqs(dev);

> > -            exists = true;

> > -            ovs_mutex_unlock(&dev->mutex);

> > -            break;

> > -        }

> > -    }

> > -    ovs_mutex_unlock(&dpdk_mutex);

> > -

> > -    if (exists) {

> > -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"

> > -                  PRIu64" changed to \'%s\'", queue_id, qid,

> > -                  virtio_dev->ifname, virtio_dev->device_fh,

> > -                  (enable == 1) ? "enabled" : "disabled");

> > -    } else {

> > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > -                  virtio_dev->device_fh);

> > -        return -1;

> > -    }

> > -

> > -    return 0;

> > -}

> > -

> > -struct virtio_net *

> > -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)

> > -{

> > -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);

> > -}

> > -

> >  struct ingress_policer *

> >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)

> >  {

> >      return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);

> >  }

> >

> > -/*

> > - * These callbacks allow virtio-net devices to be added to vhost ports when

> > - * configuration has been fully complete.

> > - */

> > -static const struct virtio_net_device_ops virtio_net_device_ops =

> > -{

> > -    .new_device =  new_device,

> > -    .destroy_device = destroy_device,

> > -    .vring_state_changed = vring_state_changed

> > -};

> > -

> > -static void *

> > -start_vhost_loop(void *dummy OVS_UNUSED)

> > -{

> > -     pthread_detach(pthread_self());

> > -     /* Put the vhost thread into quiescent state. */

> > -     ovsrcu_quiesce_start();

> > -     rte_vhost_driver_session_start();

> > -     return NULL;

> > -}

> > -

> >  static int

> >  dpdk_vhost_class_init(void)

> >  {

> > -    rte_vhost_driver_callback_register(&virtio_net_device_ops);

> > -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > -                            | 1ULL << VIRTIO_NET_F_CSUM);

> > -

> > -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);

> > +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > +                                | 1ULL << VIRTIO_NET_F_CSUM);

> >      return 0;

> >  }

> >

> > @@ -2498,7 +2212,17 @@ netdev_dpdk_ring_send(struct netdev

> *netdev, int qid,

> >          dp_packet_rss_invalidate(batch->packets[i]);

> >      }

> >

> > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        qid = qid % dev->up.n_txq;

> > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> > +

> > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > +    }

> > +

> >      return 0;

> >  }

> >

> > @@ -2787,7 +2511,6 @@ static int

> >  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)

> >  {

> >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> >      int err = 0;

> >

> >      ovs_mutex_lock(&dpdk_mutex);

> > @@ -2813,10 +2536,6 @@ netdev_dpdk_vhost_reconfigure(struct netdev

> *netdev)

> >          }

> >      }

> >

> > -    if (virtio_dev) {

> > -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;

> > -    }

> > -

> >      ovs_mutex_unlock(&dev->mutex);

> >      ovs_mutex_unlock(&dpdk_mutex);

> >

> > @@ -3307,12 +3026,12 @@ static const struct netdev_class OVS_UNUSED

> dpdk_vhost_class =

> >          NULL,

> >          NULL,

> >          netdev_dpdk_vhost_send,

> > -        netdev_dpdk_vhost_get_carrier,

> > -        netdev_dpdk_vhost_get_stats,

> > +        netdev_dpdk_get_carrier,

> > +        netdev_dpdk_get_stats,

> >          NULL,

> >          NULL,

> >          netdev_dpdk_vhost_reconfigure,

> > -        netdev_dpdk_vhost_rxq_recv);

> > +        netdev_dpdk_rxq_recv);

> >

> >  void

> >  netdev_dpdk_register(void)

> >
Ciara Loftus Sept. 13, 2016, 3:37 p.m. UTC | #3
> 

> > I've added vHost maintainers to CC-list to hear their opinion about

> > new API to get number of queues from the vHost PMD.

> > Maybe we can expose 'rte_vhost_get_queue_num()' somehow or make

> > 'dev_info->nb_rx_queues' usable?

> >

> I appreciate great investigation.

> So far, I am not sure what is good way to get the value , but I agree it's nice to

> have such functionality.

> Currently vhost library has such a function.

> So, one of possible solution may prepare a function to convert portid to vid,

> then use vid to call vhost library function directly.


Hi,

I submitted a patch that returns the vid for a given port_id as suggested above:

http://dpdk.org/ml/archives/dev/2016-September/046631.html

Any feedback would be much appreciated. Would hope to get this into DPDK 16.11 such that we can integrate the vHost PMD into OVS when it supports 16.11.

Thanks,
Ciara

> Thanks,

> Tetsuya

> > NACK for now.

> >

> > Best regards, Ilya Maximets.

> >

> > On 29.07.2016 16:24, Ciara Loftus wrote:

> > > DPDK 16.04 introduces the vHost PMD which allows 'dpdkvhostuser' ports

> > > to be controlled by the librte_ether API, like physical 'dpdk' ports and

> > > IVSHM 'dpdkr' ports. This commit integrates this PMD into OVS and

> > > removes direct calls to the librte_vhost DPDK library.

> > >

> > > This commit removes extended statistics support for vHost User ports

> > > until such a time that this becomes available in the vHost PMD in a

> > > DPDK release supported by OVS.

> > >

> > > Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>

> > > ---

> > >  INSTALL.DPDK.md   |  10 +

> > >  NEWS              |   2 +

> > >  lib/netdev-dpdk.c | 857 ++++++++++++++++++------------------------------

> ------

> > >  3 files changed, 300 insertions(+), 569 deletions(-)

> > >

> > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md

> > > index 7609aa7..4feb7be 100644

> > > --- a/INSTALL.DPDK.md

> > > +++ b/INSTALL.DPDK.md

> > > @@ -604,6 +604,16 @@ can be found in [Vhost Walkthrough].

> > >

> > >      http://dpdk.org/doc/guides/rel_notes/release_16_04.html

> > >

> > > +  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the

> context of

> > > +    DPDK as they are all managed by the rte_ether API. This means that

> they

> > > +    adhere to the DPDK configuration option

> CONFIG_RTE_MAX_ETHPORTS which by

> > > +    default is set to 32. This means by default the combined total number

> of

> > > +    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is

> 32. This

> > > +    value can be changed if desired by modifying the configuration file in

> > > +    DPDK, or by overriding the default value on the command line when

> building

> > > +    DPDK. eg.

> > > +

> > > +        `make install CONFIG_RTE_MAX_ETHPORTS=64`

> > >

> > >  Bug Reporting:

> > >  --------------

> > > diff --git a/NEWS b/NEWS

> > > index dc3dedb..6510dde 100644

> > > --- a/NEWS

> > > +++ b/NEWS

> > > @@ -64,6 +64,8 @@ Post-v2.5.0

> > >       * Basic connection tracking for the userspace datapath (no ALG,

> > >         fragmentation or NAT support yet)

> > >       * Remove dpdkvhostcuse port type.

> > > +     * vHost PMD integration brings vhost-user ports under control of the

> > > +       rte_ether DPDK API.

> > >     - Increase number of registers to 16.

> > >     - ovs-benchmark: This utility has been removed due to lack of use and

> > >       bitrot.

> > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> > > index d6959fe..d6ceeec 100644

> > > --- a/lib/netdev-dpdk.c

> > > +++ b/lib/netdev-dpdk.c

> > > @@ -30,7 +30,6 @@

> > >  #include <sys/types.h>

> > >  #include <sys/stat.h>

> > >  #include <getopt.h>

> > > -#include <numaif.h>

> > >

> > >  #include "dirs.h"

> > >  #include "dp-packet.h"

> > > @@ -56,9 +55,9 @@

> > >  #include "unixctl.h"

> > >

> > >  #include "rte_config.h"

> > > +#include "rte_eth_vhost.h"

> > >  #include "rte_mbuf.h"

> > >  #include "rte_meter.h"

> > > -#include "rte_virtio_net.h"

> > >

> > >  VLOG_DEFINE_THIS_MODULE(dpdk);

> > >  static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);

> > > @@ -141,6 +140,9 @@ static char *vhost_sock_dir = NULL;   /* Location

> of vhost-user sockets */

> > >

> > >  #define VHOST_ENQ_RETRY_NUM 8

> > >

> > > +/* Array that tracks the used & unused vHost user driver IDs */

> > > +static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];

> > > +

> > >  static const struct rte_eth_conf port_conf = {

> > >      .rxmode = {

> > >          .mq_mode = ETH_MQ_RX_RSS,

> > > @@ -346,12 +348,15 @@ struct netdev_dpdk {

> > >      struct rte_eth_link link;

> > >      int link_reset_cnt;

> > >

> > > -    /* virtio-net structure for vhost device */

> > > -    OVSRCU_TYPE(struct virtio_net *) virtio_dev;

> > > +    /* Number of virtqueue pairs reported by the guest */

> > > +    uint32_t vhost_qp_nb;

> > >

> > >      /* Identifier used to distinguish vhost devices from each other */

> > >      char vhost_id[PATH_MAX];

> > >

> > > +    /* ID of vhost user port given to the PMD driver */

> > > +    unsigned int vhost_pmd_id;

> > > +

> > >      /* In dpdk_list. */

> > >      struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);

> > >

> > > @@ -382,16 +387,23 @@ struct netdev_rxq_dpdk {

> > >  static bool dpdk_thread_is_pmd(void);

> > >

> > >  static int netdev_dpdk_construct(struct netdev *);

> > > -

> > > -struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk

> *dev);

> > > +static int netdev_dpdk_vhost_construct(struct netdev *);

> > >

> > >  struct ingress_policer *

> > >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);

> > >

> > > +static void link_status_changed_callback(uint8_t port_id,

> > > +        enum rte_eth_event_type type, void *param);

> > > +static void vring_state_changed_callback(uint8_t port_id,

> > > +        enum rte_eth_event_type type, void *param);

> > > +static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);

> > > +static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);

> > > +

> > >  static bool

> > > -is_dpdk_class(const struct netdev_class *class)

> > > +is_dpdk_eth_class(const struct netdev_class *class)

> > >  {

> > > -    return class->construct == netdev_dpdk_construct;

> > > +    return ((class->construct == netdev_dpdk_construct) ||

> > > +            (class->construct == netdev_dpdk_vhost_construct));

> > >  }

> > >

> > >  /* DPDK NIC drivers allocate RX buffers at a particular granularity,

> typically

> > > @@ -616,8 +628,13 @@ dpdk_eth_dev_queue_setup(struct

> netdev_dpdk *dev, int n_rxq, int n_txq)

> > >              continue;

> > >          }

> > >

> > > -        dev->up.n_rxq = n_rxq;

> > > -        dev->up.n_txq = n_txq;

> > > +        /* Only set n_*xq for physical devices. vHost User devices will set

> > > +         * this value correctly using info from the virtio backend.

> > > +         */

> > > +        if (dev->type == DPDK_DEV_ETH) {

> > > +            dev->up.n_rxq = n_rxq;

> > > +            dev->up.n_txq = n_txq;

> > > +        }

> > >

> > >          return 0;

> > >      }

> > > @@ -641,8 +658,14 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)

> OVS_REQUIRES(dpdk_mutex)

> > >

> > >      rte_eth_dev_info_get(dev->port_id, &info);

> > >

> > > -    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > > -    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > > +    if (dev->type == DPDK_DEV_VHOST) {

> > > +        /* We don't know how many queues QEMU will use so set up the

> max */

> > > +        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > > +        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT);

> > > +    } else {

> > > +        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);

> > > +        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);

> > > +    }

> > >

> > >      diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);

> > >      if (diag) {

> > > @@ -709,6 +732,85 @@ netdev_dpdk_alloc_txq(struct netdev_dpdk

> *dev, unsigned int n_txqs)

> > >      }

> > >  }

> > >

> > > +void

> > > +link_status_changed_callback(uint8_t port_id,

> > > +                             enum rte_eth_event_type type OVS_UNUSED,

> > > +                             void *param OVS_UNUSED)

> > > +{

> > > +    struct netdev_dpdk *dev;

> > > +    int socket_id = -1;

> > > +

> > > +    ovs_mutex_lock(&dpdk_mutex);

> > > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > > +        if (port_id == dev->port_id) {

> > > +            ovs_mutex_lock(&dev->mutex);

> > > +            check_link_status(dev);

> > > +            if (dev->link.link_status == ETH_LINK_UP) {

> > > +                /* new device */

> > > +                /* Get NUMA information */

> > > +                socket_id = rte_eth_dev_socket_id(dev->port_id);

> > > +                if (socket_id != -1 && socket_id != dev->socket_id) {

> > > +                    dev->requested_socket_id = socket_id;

> > > +                }

> > > +                netdev_request_reconfigure(&dev->up);

> > > +                netdev_change_seq_changed(&dev->up);

> > > +                VLOG_INFO("vHost Device '%s' has been added on numa node

> %i",

> > > +                          dev->vhost_id, socket_id);

> > > +            } else {

> > > +                /* destroy device */

> > > +                /* Clear tx/rx queue settings. */

> > > +                netdev_dpdk_txq_map_clear(dev);

> > > +                netdev_request_reconfigure(&dev->up);

> > > +                netdev_change_seq_changed(&dev->up);

> > > +                VLOG_INFO("vHost Device '%s' has been removed", dev-

> >vhost_id);

> > > +            }

> > > +            ovs_mutex_unlock(&dev->mutex);

> > > +            break;

> > > +        }

> > > +    }

> > > +

> > > +    ovs_mutex_unlock(&dpdk_mutex);

> > > +

> > > +    return;

> > > +}

> > > +

> > > +void

> > > +vring_state_changed_callback(uint8_t port_id,

> > > +                             enum rte_eth_event_type type OVS_UNUSED,

> > > +                             void *param OVS_UNUSED)

> > > +{

> > > +    struct netdev_dpdk *dev;

> > > +    struct rte_eth_vhost_queue_event event;

> > > +    int err = 0;

> > > +

> > > +    err = rte_eth_vhost_get_queue_event(port_id, &event);

> > > +    if (err || event.rx) {

> > > +        return;

> > > +    }

> > > +

> > > +    ovs_mutex_lock(&dpdk_mutex);

> > > +    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > > +        if (port_id == dev->port_id) {

> > > +            ovs_mutex_lock(&dev->mutex);

> > > +            if (event.enable) {

> > > +                dev->tx_q[event.queue_id].map = event.queue_id;

> > > +                dev->vhost_qp_nb++;

> > > +            } else {

> > > +                dev->tx_q[event.queue_id].map =

> OVS_VHOST_QUEUE_DISABLED;

> > > +                dev->vhost_qp_nb--;

> > > +            }

> > > +            dev->requested_n_rxq = dev->vhost_qp_nb;

> > > +            dev->requested_n_txq = dev->vhost_qp_nb;

> > > +            netdev_request_reconfigure(&dev->up);

> > > +            ovs_mutex_unlock(&dev->mutex);

> > > +            break;

> > > +        }

> > > +    }

> > > +    ovs_mutex_unlock(&dpdk_mutex);

> > > +

> > > +    return;

> > > +}

> > > +

> > >  static int

> > >  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,

> > >                   enum dpdk_dev_type type)

> > > @@ -718,6 +820,7 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> > >      int sid;

> > >      int err = 0;

> > >      uint32_t buf_size;

> > > +    unsigned int nr_q = 0;

> > >

> > >      ovs_mutex_init(&dev->mutex);

> > >      ovs_mutex_lock(&dev->mutex);

> > > @@ -727,11 +830,7 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> > >      /* If the 'sid' is negative, it means that the kernel fails

> > >       * to obtain the pci numa info.  In that situation, always

> > >       * use 'SOCKET0'. */

> > > -    if (type == DPDK_DEV_ETH) {

> > > -        sid = rte_eth_dev_socket_id(port_no);

> > > -    } else {

> > > -        sid = rte_lcore_to_socket_id(rte_get_master_lcore());

> > > -    }

> > > +    sid = rte_eth_dev_socket_id(port_no);

> > >

> > >      dev->socket_id = sid < 0 ? SOCKET0 : sid;

> > >      dev->requested_socket_id = dev->socket_id;

> > > @@ -761,17 +860,21 @@ netdev_dpdk_init(struct netdev *netdev,

> unsigned int port_no,

> > >      netdev->n_txq = NR_QUEUE;

> > >      dev->requested_n_rxq = netdev->n_rxq;

> > >      dev->requested_n_txq = netdev->n_txq;

> > > +    dev->vhost_qp_nb = 0;

> > >

> > > -    if (type == DPDK_DEV_ETH) {

> > > -        err = dpdk_eth_dev_init(dev);

> > > -        if (err) {

> > > -            goto unlock;

> > > -        }

> > > -        netdev_dpdk_alloc_txq(dev, netdev->n_txq);

> > > -    } else {

> > > -        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);

> > > -        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag.

> */

> > > -        dev->flags = NETDEV_UP | NETDEV_PROMISC;

> > > +    err = dpdk_eth_dev_init(dev);

> > > +    if (err) {

> > > +        goto unlock;

> > > +    }

> > > +    nr_q = (type == DPDK_DEV_ETH ?

> > > +            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > > +    netdev_dpdk_alloc_txq(dev, nr_q);

> > > +

> > > +    if (type == DPDK_DEV_VHOST) {

> > > +        rte_eth_dev_callback_register(port_no,

> RTE_ETH_EVENT_QUEUE_STATE,

> > > +                                      vring_state_changed_callback, NULL);

> > > +        rte_eth_dev_callback_register(port_no,

> RTE_ETH_EVENT_INTR_LSC,

> > > +                                      link_status_changed_callback, NULL);

> > >      }

> > >

> > >      ovs_list_push_back(&dpdk_list, &dev->list_node);

> > > @@ -802,17 +905,48 @@ dpdk_dev_parse_name(const char

> dev_name[], const char prefix[],

> > >      }

> > >  }

> > >

> > > +/* When attaching a vhost device to DPDK, a unique name of the format

> > > + * 'eth_vhostX' is expected, where X is a unique identifier.

> > > + * get_vhost_drv_id returns a valid X value to provide to DPDK.

> > > + */

> > > +static int

> > > +get_vhost_drv_id(void)

> > > +{

> > > +    int i = 0;

> > > +

> > > +    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {

> > > +        if (vhost_drv_ids[i] == 0) {

> > > +            return i;

> > > +        }

> > > +    }

> > > +

> > > +    return -1;

> > > +}

> > > +

> > > +static void

> > > +set_vhost_drv_id(int id, int val)

> > > +{

> > > +    vhost_drv_ids[id] = val;

> > > +}

> > > +

> > >  static int

> > >  netdev_dpdk_vhost_construct(struct netdev *netdev)

> > >  {

> > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > >      const char *name = netdev->name;

> > >      int err;

> > > +    uint8_t port_no = 0;

> > > +    char *devargs;

> > > +    int driver_id = 0;

> > > +

> > > +    if (rte_eal_init_ret) {

> > > +        return rte_eal_init_ret;

> > > +    }

> > >

> > >      /* 'name' is appended to 'vhost_sock_dir' and used to create a socket

> in

> > >       * the file system. '/' or '\' would traverse directories, so they're not

> > >       * acceptable in 'name'. */

> > > -    if (strchr(name, '/') || strchr(name, '\\')) {

> > > +    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {

> > >          VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "

> > >                   "A valid name must not include '/' or '\\'",

> > >                   name);

> > > @@ -829,18 +963,32 @@ netdev_dpdk_vhost_construct(struct netdev

> *netdev)

> > >       */

> > >      snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",

> > >               vhost_sock_dir, name);

> > > +    driver_id = get_vhost_drv_id();

> > > +    if (driver_id == -1) {

> > > +        VLOG_ERR("Unable to create vhost-user device %s - too many

> vhost-user"

> > > +                 "devices registered with PMD", dev->vhost_id);

> > > +        err = ENODEV;

> > > +        goto out;

> > > +    } else {

> > > +        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",

> > > +                 driver_id, dev->vhost_id,

> > > +                 MIN(OVS_VHOST_MAX_QUEUE_NUM,

> RTE_MAX_QUEUES_PER_PORT));

> > > +        err = rte_eth_dev_attach(devargs, &port_no);

> > > +    }

> > >

> > > -    err = rte_vhost_driver_register(dev->vhost_id);

> > >      if (err) {

> > > -        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",

> > > +        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",

> > >                   dev->vhost_id);

> > >      } else {

> > >          fatal_signal_add_file_to_unlink(dev->vhost_id);

> > >          VLOG_INFO("Socket %s created for vhost-user port %s\n",

> > >                    dev->vhost_id, name);

> > > -        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);

> > > +        dev->vhost_pmd_id = driver_id;

> > > +        set_vhost_drv_id(driver_id, 1);

> > > +        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);

> > >      }

> > >

> > > +out:

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >      return err;

> > >  }

> > > @@ -868,20 +1016,28 @@ netdev_dpdk_construct(struct netdev

> *netdev)

> > >  }

> > >

> > >  static void

> > > -netdev_dpdk_destruct(struct netdev *netdev)

> > > +dpdk_destruct_helper(struct netdev_dpdk *dev)

> > >  {

> > > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > -

> > > -    ovs_mutex_lock(&dev->mutex);

> > >      rte_eth_dev_stop(dev->port_id);

> > >      free(ovsrcu_get_protected(struct ingress_policer *,

> > >                                &dev->ingress_policer));

> > > -    ovs_mutex_unlock(&dev->mutex);

> > >

> > > -    ovs_mutex_lock(&dpdk_mutex);

> > >      rte_free(dev->tx_q);

> > >      ovs_list_remove(&dev->list_node);

> > >      dpdk_mp_put(dev->dpdk_mp);

> > > +}

> > > +

> > > +static void

> > > +netdev_dpdk_destruct(struct netdev *netdev)

> > > +{

> > > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > +

> > > +    ovs_mutex_lock(&dpdk_mutex);

> > > +    ovs_mutex_lock(&dev->mutex);

> > > +

> > > +    dpdk_destruct_helper(dev);

> > > +

> > > +    ovs_mutex_unlock(&dev->mutex);

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >  }

> > >

> > > @@ -890,30 +1046,19 @@ netdev_dpdk_vhost_destruct(struct netdev

> *netdev)

> > >  {

> > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > >

> > > -    /* Guest becomes an orphan if still attached. */

> > > -    if (netdev_dpdk_get_virtio(dev) != NULL) {

> > > -        VLOG_ERR("Removing port '%s' while vhost device still attached.",

> > > -                 netdev->name);

> > > -        VLOG_ERR("To restore connectivity after re-adding of port, VM on

> socket"

> > > -                 " '%s' must be restarted.",

> > > -                 dev->vhost_id);

> > > -    }

> > > +    ovs_mutex_lock(&dpdk_mutex);

> > > +    ovs_mutex_lock(&dev->mutex);

> > >

> > > -    if (rte_vhost_driver_unregister(dev->vhost_id)) {

> > > -        VLOG_ERR("Unable to remove vhost-user socket %s", dev-

> >vhost_id);

> > > +    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {

> > > +        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);

> > >      } else {

> > >          fatal_signal_remove_file_to_unlink(dev->vhost_id);

> > >      }

> > > +    set_vhost_drv_id(dev->vhost_pmd_id, 0);

> > >

> > > -    ovs_mutex_lock(&dev->mutex);

> > > -    free(ovsrcu_get_protected(struct ingress_policer *,

> > > -                              &dev->ingress_policer));

> > > -    ovs_mutex_unlock(&dev->mutex);

> > > +    dpdk_destruct_helper(dev);

> > >

> > > -    ovs_mutex_lock(&dpdk_mutex);

> > > -    rte_free(dev->tx_q);

> > > -    ovs_list_remove(&dev->list_node);

> > > -    dpdk_mp_put(dev->dpdk_mp);

> > > +    ovs_mutex_unlock(&dev->mutex);

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >  }

> > >

> > > @@ -1105,117 +1250,6 @@ ingress_policer_run(struct ingress_policer

> *policer, struct rte_mbuf **pkts,

> > >      return cnt;

> > >  }

> > >

> > > -static bool

> > > -is_vhost_running(struct virtio_net *virtio_dev)

> > > -{

> > > -    return (virtio_dev != NULL && (virtio_dev->flags &

> VIRTIO_DEV_RUNNING));

> > > -}

> > > -

> > > -static inline void

> > > -netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats

> *stats,

> > > -                                          unsigned int packet_size)

> > > -{

> > > -    /* Hard-coded search for the size bucket. */

> > > -    if (packet_size < 256) {

> > > -        if (packet_size >= 128) {

> > > -            stats->rx_128_to_255_packets++;

> > > -        } else if (packet_size <= 64) {

> > > -            stats->rx_1_to_64_packets++;

> > > -        } else {

> > > -            stats->rx_65_to_127_packets++;

> > > -        }

> > > -    } else {

> > > -        if (packet_size >= 1523) {

> > > -            stats->rx_1523_to_max_packets++;

> > > -        } else if (packet_size >= 1024) {

> > > -            stats->rx_1024_to_1522_packets++;

> > > -        } else if (packet_size < 512) {

> > > -            stats->rx_256_to_511_packets++;

> > > -        } else {

> > > -            stats->rx_512_to_1023_packets++;

> > > -        }

> > > -    }

> > > -}

> > > -

> > > -static inline void

> > > -netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,

> > > -                                     struct dp_packet **packets, int count,

> > > -                                     int dropped)

> > > -{

> > > -    int i;

> > > -    unsigned int packet_size;

> > > -    struct dp_packet *packet;

> > > -

> > > -    stats->rx_packets += count;

> > > -    stats->rx_dropped += dropped;

> > > -    for (i = 0; i < count; i++) {

> > > -        packet = packets[i];

> > > -        packet_size = dp_packet_size(packet);

> > > -

> > > -        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {

> > > -            /* This only protects the following multicast counting from

> > > -             * too short packets, but it does not stop the packet from

> > > -             * further processing. */

> > > -            stats->rx_errors++;

> > > -            stats->rx_length_errors++;

> > > -            continue;

> > > -        }

> > > -

> > > -        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);

> > > -

> > > -        struct eth_header *eh = (struct eth_header *)

> dp_packet_data(packet);

> > > -        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {

> > > -            stats->multicast++;

> > > -        }

> > > -

> > > -        stats->rx_bytes += packet_size;

> > > -    }

> > > -}

> > > -

> > > -/*

> > > - * The receive path for the vhost port is the TX path out from guest.

> > > - */

> > > -static int

> > > -netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,

> > > -                           struct dp_packet_batch *batch)

> > > -{

> > > -    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);

> > > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > > -    int qid = rxq->queue_id;

> > > -    struct ingress_policer *policer =

> netdev_dpdk_get_ingress_policer(dev);

> > > -    uint16_t nb_rx = 0;

> > > -    uint16_t dropped = 0;

> > > -

> > > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)

> > > -                     || !(dev->flags & NETDEV_UP))) {

> > > -        return EAGAIN;

> > > -    }

> > > -

> > > -    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM +

> VIRTIO_TXQ,

> > > -                                    dev->dpdk_mp->mp,

> > > -                                    (struct rte_mbuf **) batch->packets,

> > > -                                    NETDEV_MAX_BURST);

> > > -    if (!nb_rx) {

> > > -        return EAGAIN;

> > > -    }

> > > -

> > > -    if (policer) {

> > > -        dropped = nb_rx;

> > > -        nb_rx = ingress_policer_run(policer,

> > > -                                    (struct rte_mbuf **) batch->packets,

> > > -                                    nb_rx);

> > > -        dropped -= nb_rx;

> > > -    }

> > > -

> > > -    rte_spinlock_lock(&dev->stats_lock);

> > > -    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch-

> >packets,

> > > -                                         nb_rx, dropped);

> > > -    rte_spinlock_unlock(&dev->stats_lock);

> > > -

> > > -    batch->count = (int) nb_rx;

> > > -    return 0;

> > > -}

> > > -

> > >  static int

> > >  netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch

> *batch)

> > >  {

> > > @@ -1269,85 +1303,6 @@ netdev_dpdk_qos_run__(struct netdev_dpdk

> *dev, struct rte_mbuf **pkts,

> > >      return cnt;

> > >  }

> > >

> > > -static inline void

> > > -netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,

> > > -                                     struct dp_packet **packets,

> > > -                                     int attempted,

> > > -                                     int dropped)

> > > -{

> > > -    int i;

> > > -    int sent = attempted - dropped;

> > > -

> > > -    stats->tx_packets += sent;

> > > -    stats->tx_dropped += dropped;

> > > -

> > > -    for (i = 0; i < sent; i++) {

> > > -        stats->tx_bytes += dp_packet_size(packets[i]);

> > > -    }

> > > -}

> > > -

> > > -static void

> > > -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > > -                         struct dp_packet **pkts, int cnt,

> > > -                         bool may_steal)

> > > -{

> > > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > > -    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;

> > > -    unsigned int total_pkts = cnt;

> > > -    unsigned int qos_pkts = cnt;

> > > -    int retries = 0;

> > > -

> > > -    qid = dev->tx_q[qid % netdev->n_txq].map;

> > > -

> > > -    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0

> > > -                     || !(dev->flags & NETDEV_UP))) {

> > > -        rte_spinlock_lock(&dev->stats_lock);

> > > -        dev->stats.tx_dropped+= cnt;

> > > -        rte_spinlock_unlock(&dev->stats_lock);

> > > -        goto out;

> > > -    }

> > > -

> > > -    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > > -

> > > -    /* Check has QoS has been configured for the netdev */

> > > -    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);

> > > -    qos_pkts -= cnt;

> > > -

> > > -    do {

> > > -        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;

> > > -        unsigned int tx_pkts;

> > > -

> > > -        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,

> > > -                                          cur_pkts, cnt);

> > > -        if (OVS_LIKELY(tx_pkts)) {

> > > -            /* Packets have been sent.*/

> > > -            cnt -= tx_pkts;

> > > -            /* Prepare for possible retry.*/

> > > -            cur_pkts = &cur_pkts[tx_pkts];

> > > -        } else {

> > > -            /* No packets sent - do not retry.*/

> > > -            break;

> > > -        }

> > > -    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));

> > > -

> > > -    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > > -

> > > -    rte_spinlock_lock(&dev->stats_lock);

> > > -    cnt += qos_pkts;

> > > -    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts,

> total_pkts, cnt);

> > > -    rte_spinlock_unlock(&dev->stats_lock);

> > > -

> > > -out:

> > > -    if (may_steal) {

> > > -        int i;

> > > -

> > > -        for (i = 0; i < total_pkts; i++) {

> > > -            dp_packet_delete(pkts[i]);

> > > -        }

> > > -    }

> > > -}

> > > -

> > >  /* Tx function. Transmit packets indefinitely */

> > >  static void

> > >  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct

> dp_packet_batch *batch)

> > > @@ -1402,18 +1357,13 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> > >          newcnt++;

> > >      }

> > >

> > > -    if (dev->type == DPDK_DEV_VHOST) {

> > > -        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **)

> mbufs,

> > > -                                 newcnt, true);

> > > -    } else {

> > > -        unsigned int qos_pkts = newcnt;

> > > +    unsigned int qos_pkts = newcnt;

> > >

> > > -        /* Check if QoS has been configured for this netdev. */

> > > -        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> > > +    /* Check if QoS has been configured for this netdev. */

> > > +    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);

> > >

> > > -        dropped += qos_pkts - newcnt;

> > > -        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> > > -    }

> > > +    dropped += qos_pkts - newcnt;

> > > +    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);

> > >

> > >      if (OVS_UNLIKELY(dropped)) {

> > >          rte_spinlock_lock(&dev->stats_lock);

> > > @@ -1426,33 +1376,10 @@ dpdk_do_tx_copy(struct netdev *netdev, int

> qid, struct dp_packet_batch *batch)

> > >      }

> > >  }

> > >

> > > -static int

> > > -netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > > -                       struct dp_packet_batch *batch,

> > > -                       bool may_steal, bool concurrent_txq OVS_UNUSED)

> > > -{

> > > -

> > > -    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {

> > > -        dpdk_do_tx_copy(netdev, qid, batch);

> > > -        dp_packet_delete_batch(batch, may_steal);

> > > -    } else {

> > > -        dp_packet_batch_apply_cutlen(batch);

> > > -        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch-

> >count,

> > > -                                 may_steal);

> > > -    }

> > > -    return 0;

> > > -}

> > > -

> > >  static inline void

> > >  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,

> > > -                   struct dp_packet_batch *batch, bool may_steal,

> > > -                   bool concurrent_txq)

> > > +                   struct dp_packet_batch *batch, bool may_steal)

> > >  {

> > > -    if (OVS_UNLIKELY(concurrent_txq)) {

> > > -        qid = qid % dev->up.n_txq;

> > > -        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > > -    }

> > > -

> > >      if (OVS_UNLIKELY(!may_steal ||

> > >                       batch->packets[0]->source != DPBUF_DPDK)) {

> > >          struct netdev *netdev = &dev->up;

> > > @@ -1512,20 +1439,50 @@ netdev_dpdk_send__(struct netdev_dpdk

> *dev, int qid,

> > >              rte_spinlock_unlock(&dev->stats_lock);

> > >          }

> > >      }

> > > +}

> > > +

> > > +static int

> > > +netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > > +                     struct dp_packet_batch *batch, bool may_steal,

> > > +                     bool concurrent_txq)

> > > +{

> > > +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > +

> > > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > > +        qid = qid % dev->up.n_txq;

> > > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > > +    }

> > > +

> > > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> > >

> > >      if (OVS_UNLIKELY(concurrent_txq)) {

> > >          rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > >      }

> > > +

> > > +    return 0;

> > >  }

> > >

> > >  static int

> > > -netdev_dpdk_eth_send(struct netdev *netdev, int qid,

> > > -                     struct dp_packet_batch *batch, bool may_steal,

> > > -                     bool concurrent_txq)

> > > +netdev_dpdk_vhost_send(struct netdev *netdev, int qid,

> > > +                       struct dp_packet_batch *batch, bool may_steal,

> > > +                       bool concurrent_txq OVS_UNUSED)

> > >  {

> > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > >

> > > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > > +    qid = dev->tx_q[qid % netdev->n_txq].map;

> > > +    if (qid == -1) {

> > > +        rte_spinlock_lock(&dev->stats_lock);

> > > +        dev->stats.tx_dropped+= batch->count;

> > > +        rte_spinlock_unlock(&dev->stats_lock);

> > > +        if (may_steal) {

> > > +            dp_packet_delete_batch(batch, may_steal);

> > > +        }

> > > +    } else {

> > > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > > +        netdev_dpdk_send__(dev, qid, batch, may_steal);

> > > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > > +    }

> > > +

> > >      return 0;

> > >  }

> > >

> > > @@ -1622,41 +1579,6 @@ out:

> > >  static int

> > >  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);

> > >

> > > -static int

> > > -netdev_dpdk_vhost_get_stats(const struct netdev *netdev,

> > > -                            struct netdev_stats *stats)

> > > -{

> > > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > -

> > > -    ovs_mutex_lock(&dev->mutex);

> > > -

> > > -    rte_spinlock_lock(&dev->stats_lock);

> > > -    /* Supported Stats */

> > > -    stats->rx_packets += dev->stats.rx_packets;

> > > -    stats->tx_packets += dev->stats.tx_packets;

> > > -    stats->rx_dropped = dev->stats.rx_dropped;

> > > -    stats->tx_dropped += dev->stats.tx_dropped;

> > > -    stats->multicast = dev->stats.multicast;

> > > -    stats->rx_bytes = dev->stats.rx_bytes;

> > > -    stats->tx_bytes = dev->stats.tx_bytes;

> > > -    stats->rx_errors = dev->stats.rx_errors;

> > > -    stats->rx_length_errors = dev->stats.rx_length_errors;

> > > -

> > > -    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;

> > > -    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;

> > > -    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;

> > > -    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;

> > > -    stats->rx_512_to_1023_packets = dev-

> >stats.rx_512_to_1023_packets;

> > > -    stats->rx_1024_to_1522_packets = dev-

> >stats.rx_1024_to_1522_packets;

> > > -    stats->rx_1523_to_max_packets = dev-

> >stats.rx_1523_to_max_packets;

> > > -

> > > -    rte_spinlock_unlock(&dev->stats_lock);

> > > -

> > > -    ovs_mutex_unlock(&dev->mutex);

> > > -

> > > -    return 0;

> > > -}

> > > -

> > >  static void

> > >  netdev_dpdk_convert_xstats(struct netdev_stats *stats,

> > >                             const struct rte_eth_xstats *xstats,

> > > @@ -1737,28 +1659,40 @@ netdev_dpdk_get_stats(const struct netdev

> *netdev, struct netdev_stats *stats)

> > >          return EPROTO;

> > >      }

> > >

> > > -    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > > -    if (rte_xstats_len > 0) {

> > > -        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);

> > > -        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > > -        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > > -                                            rte_xstats_len);

> > > -        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > > -            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > > +    /* Extended statistics are not yet available for vHost User PMD */

> > > +    if (dev->type == DPDK_DEV_ETH) {

> > > +        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);

> > > +        if (rte_xstats_len > 0) {

> > > +            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)

> > > +                                          * rte_xstats_len);

> > > +            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);

> > > +            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,

> > > +                                                rte_xstats_len);

> > > +            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {

> > > +                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);

> > > +            }

> > > +            rte_free(rte_xstats);

> > > +        } else {

> > > +            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> > >          }

> > > -        rte_free(rte_xstats);

> > > -    } else {

> > > -        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev-

> >port_id);

> > >      }

> > >

> > >      stats->rx_packets = rte_stats.ipackets;

> > >      stats->tx_packets = rte_stats.opackets;

> > >      stats->rx_bytes = rte_stats.ibytes;

> > >      stats->tx_bytes = rte_stats.obytes;

> > > -    /* DPDK counts imissed as errors, but count them here as dropped

> instead */

> > > -    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > > -    stats->tx_errors = rte_stats.oerrors;

> > > -    stats->multicast = rte_stats.imcasts;

> > > +

> > > +    if (dev->type == DPDK_DEV_ETH) {

> > > +        /* DPDK counts imissed as errors, but count them here as dropped

> > > +         * instead */

> > > +        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;

> > > +        stats->tx_errors = rte_stats.oerrors;

> > > +        stats->multicast = rte_stats.imcasts;

> > > +    } else {

> > > +        stats->rx_errors = UINT64_MAX;

> > > +        stats->tx_errors = UINT64_MAX;

> > > +        stats->multicast = UINT64_MAX;

> > > +    }

> > >

> > >      rte_spinlock_lock(&dev->stats_lock);

> > >      stats->tx_dropped = dev->stats.tx_dropped;

> > > @@ -1921,25 +1855,6 @@ netdev_dpdk_get_carrier(const struct netdev

> *netdev, bool *carrier)

> > >      return 0;

> > >  }

> > >

> > > -static int

> > > -netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool

> *carrier)

> > > -{

> > > -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > > -

> > > -    ovs_mutex_lock(&dev->mutex);

> > > -

> > > -    if (is_vhost_running(virtio_dev)) {

> > > -        *carrier = 1;

> > > -    } else {

> > > -        *carrier = 0;

> > > -    }

> > > -

> > > -    ovs_mutex_unlock(&dev->mutex);

> > > -

> > > -    return 0;

> > > -}

> > > -

> > >  static long long int

> > >  netdev_dpdk_get_carrier_resets(const struct netdev *netdev)

> > >  {

> > > @@ -1995,13 +1910,10 @@ netdev_dpdk_update_flags__(struct

> netdev_dpdk *dev,

> > >              rte_eth_dev_stop(dev->port_id);

> > >          }

> > >      } else {

> > > -        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and

> vhost is

> > > -         * running then change netdev's change_seq to trigger link state

> > > -         * update. */

> > > -        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > > +        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then

> change

> > > +         * netdev's change_seq to trigger link state update. */

> > >

> > > -        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))

> > > -            && is_vhost_running(virtio_dev)) {

> > > +        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {

> > >              netdev_change_seq_changed(&dev->up);

> > >

> > >              /* Clear statistics if device is getting up. */

> > > @@ -2096,7 +2008,7 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> > >

> > >      if (argc > 2) {

> > >          struct netdev *netdev = netdev_from_name(argv[1]);

> > > -        if (netdev && is_dpdk_class(netdev->netdev_class)) {

> > > +        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {

> > >              struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);

> > >

> > >              ovs_mutex_lock(&dpdk_dev->mutex);

> > > @@ -2124,22 +2036,6 @@ netdev_dpdk_set_admin_state(struct

> unixctl_conn *conn, int argc,

> > >  }

> > >

> > >  /*

> > > - * Set virtqueue flags so that we do not receive interrupts.

> > > - */

> > > -static void

> > > -set_irq_status(struct virtio_net *virtio_dev)

> > > -{

> > > -    uint32_t i;

> > > -    uint64_t idx;

> > > -

> > > -    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {

> > > -        idx = i * VIRTIO_QNUM;

> > > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ,

> 0);

> > > -        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ,

> 0);

> > > -    }

> > > -}

> > > -

> > > -/*

> > >   * Fixes mapping for vhost-user tx queues. Must be called after each

> > >   * enabling/disabling of queues and n_txq modifications.

> > >   */

> > > @@ -2180,62 +2076,6 @@ netdev_dpdk_remap_txqs(struct

> netdev_dpdk *dev)

> > >      rte_free(enabled_queues);

> > >  }

> > >

> > > -/*

> > > - * A new virtio-net device is added to a vhost port.

> > > - */

> > > -static int

> > > -new_device(struct virtio_net *virtio_dev)

> > > -{

> > > -    struct netdev_dpdk *dev;

> > > -    bool exists = false;

> > > -    int newnode = 0;

> > > -    long err = 0;

> > > -

> > > -    ovs_mutex_lock(&dpdk_mutex);

> > > -    /* Add device to the vhost port with the same name as that passed

> down. */

> > > -    LIST_FOR_EACH(dev, list_node, &dpdk_list) {

> > > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > > -            uint32_t qp_num = virtio_dev->virt_qp_nb;

> > > -

> > > -            ovs_mutex_lock(&dev->mutex);

> > > -            /* Get NUMA information */

> > > -            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,

> > > -                                MPOL_F_NODE | MPOL_F_ADDR);

> > > -            if (err) {

> > > -                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",

> > > -                        virtio_dev->ifname);

> > > -                newnode = dev->socket_id;

> > > -            }

> > > -

> > > -            dev->requested_socket_id = newnode;

> > > -            dev->requested_n_rxq = qp_num;

> > > -            dev->requested_n_txq = qp_num;

> > > -            netdev_request_reconfigure(&dev->up);

> > > -

> > > -            ovsrcu_set(&dev->virtio_dev, virtio_dev);

> > > -            exists = true;

> > > -

> > > -            /* Disable notifications. */

> > > -            set_irq_status(virtio_dev);

> > > -            netdev_change_seq_changed(&dev->up);

> > > -            ovs_mutex_unlock(&dev->mutex);

> > > -            break;

> > > -        }

> > > -    }

> > > -    ovs_mutex_unlock(&dpdk_mutex);

> > > -

> > > -    if (!exists) {

> > > -        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name

> not "

> > > -                  "found", virtio_dev->ifname, virtio_dev->device_fh);

> > > -

> > > -        return -1;

> > > -    }

> > > -

> > > -    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa

> node %i",

> > > -              virtio_dev->ifname, virtio_dev->device_fh, newnode);

> > > -    return 0;

> > > -}

> > > -

> > >  /* Clears mapping for all available queues of vhost interface. */

> > >  static void

> > >  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)

> > > @@ -2248,144 +2088,18 @@ netdev_dpdk_txq_map_clear(struct

> netdev_dpdk *dev)

> > >      }

> > >  }

> > >

> > > -/*

> > > - * Remove a virtio-net device from the specific vhost port.  Use dev-

> >remove

> > > - * flag to stop any more packets from being sent or received to/from a

> VM and

> > > - * ensure all currently queued packets have been sent/received before

> removing

> > > - *  the device.

> > > - */

> > > -static void

> > > -destroy_device(volatile struct virtio_net *virtio_dev)

> > > -{

> > > -    struct netdev_dpdk *dev;

> > > -    bool exists = false;

> > > -

> > > -    ovs_mutex_lock(&dpdk_mutex);

> > > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > > -        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {

> > > -

> > > -            ovs_mutex_lock(&dev->mutex);

> > > -            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;

> > > -            ovsrcu_set(&dev->virtio_dev, NULL);

> > > -            /* Clear tx/rx queue settings. */

> > > -            netdev_dpdk_txq_map_clear(dev);

> > > -            dev->requested_n_rxq = NR_QUEUE;

> > > -            dev->requested_n_txq = NR_QUEUE;

> > > -            netdev_request_reconfigure(&dev->up);

> > > -

> > > -            netdev_change_seq_changed(&dev->up);

> > > -            ovs_mutex_unlock(&dev->mutex);

> > > -            exists = true;

> > > -            break;

> > > -        }

> > > -    }

> > > -

> > > -    ovs_mutex_unlock(&dpdk_mutex);

> > > -

> > > -    if (exists == true) {

> > > -        /*

> > > -         * Wait for other threads to quiesce after setting the 'virtio_dev'

> > > -         * to NULL, before returning.

> > > -         */

> > > -        ovsrcu_synchronize();

> > > -        /*

> > > -         * As call to ovsrcu_synchronize() will end the quiescent state,

> > > -         * put thread back into quiescent state before returning.

> > > -         */

> > > -        ovsrcu_quiesce_start();

> > > -        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",

> > > -                  virtio_dev->ifname, virtio_dev->device_fh);

> > > -    } else {

> > > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > > -                  virtio_dev->device_fh);

> > > -    }

> > > -}

> > > -

> > > -static int

> > > -vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,

> > > -                    int enable)

> > > -{

> > > -    struct netdev_dpdk *dev;

> > > -    bool exists = false;

> > > -    int qid = queue_id / VIRTIO_QNUM;

> > > -

> > > -    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {

> > > -        return 0;

> > > -    }

> > > -

> > > -    ovs_mutex_lock(&dpdk_mutex);

> > > -    LIST_FOR_EACH (dev, list_node, &dpdk_list) {

> > > -        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {

> > > -            ovs_mutex_lock(&dev->mutex);

> > > -            if (enable) {

> > > -                dev->tx_q[qid].map = qid;

> > > -            } else {

> > > -                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;

> > > -            }

> > > -            netdev_dpdk_remap_txqs(dev);

> > > -            exists = true;

> > > -            ovs_mutex_unlock(&dev->mutex);

> > > -            break;

> > > -        }

> > > -    }

> > > -    ovs_mutex_unlock(&dpdk_mutex);

> > > -

> > > -    if (exists) {

> > > -        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s'

> %"

> > > -                  PRIu64" changed to \'%s\'", queue_id, qid,

> > > -                  virtio_dev->ifname, virtio_dev->device_fh,

> > > -                  (enable == 1) ? "enabled" : "disabled");

> > > -    } else {

> > > -        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev-

> >ifname,

> > > -                  virtio_dev->device_fh);

> > > -        return -1;

> > > -    }

> > > -

> > > -    return 0;

> > > -}

> > > -

> > > -struct virtio_net *

> > > -netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)

> > > -{

> > > -    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);

> > > -}

> > > -

> > >  struct ingress_policer *

> > >  netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)

> > >  {

> > >      return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);

> > >  }

> > >

> > > -/*

> > > - * These callbacks allow virtio-net devices to be added to vhost ports

> when

> > > - * configuration has been fully complete.

> > > - */

> > > -static const struct virtio_net_device_ops virtio_net_device_ops =

> > > -{

> > > -    .new_device =  new_device,

> > > -    .destroy_device = destroy_device,

> > > -    .vring_state_changed = vring_state_changed

> > > -};

> > > -

> > > -static void *

> > > -start_vhost_loop(void *dummy OVS_UNUSED)

> > > -{

> > > -     pthread_detach(pthread_self());

> > > -     /* Put the vhost thread into quiescent state. */

> > > -     ovsrcu_quiesce_start();

> > > -     rte_vhost_driver_session_start();

> > > -     return NULL;

> > > -}

> > > -

> > >  static int

> > >  dpdk_vhost_class_init(void)

> > >  {

> > > -    rte_vhost_driver_callback_register(&virtio_net_device_ops);

> > > -    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > > -                            | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > > -                            | 1ULL << VIRTIO_NET_F_CSUM);

> > > -

> > > -    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);

> > > +    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4

> > > +                                | 1ULL << VIRTIO_NET_F_HOST_TSO6

> > > +                                | 1ULL << VIRTIO_NET_F_CSUM);

> > >      return 0;

> > >  }

> > >

> > > @@ -2498,7 +2212,17 @@ netdev_dpdk_ring_send(struct netdev

> *netdev, int qid,

> > >          dp_packet_rss_invalidate(batch->packets[i]);

> > >      }

> > >

> > > -    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);

> > > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > > +        qid = qid % dev->up.n_txq;

> > > +        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);

> > > +    }

> > > +

> > > +    netdev_dpdk_send__(dev, qid, batch, may_steal);

> > > +

> > > +    if (OVS_UNLIKELY(concurrent_txq)) {

> > > +        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);

> > > +    }

> > > +

> > >      return 0;

> > >  }

> > >

> > > @@ -2787,7 +2511,6 @@ static int

> > >  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)

> > >  {

> > >      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);

> > > -    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);

> > >      int err = 0;

> > >

> > >      ovs_mutex_lock(&dpdk_mutex);

> > > @@ -2813,10 +2536,6 @@ netdev_dpdk_vhost_reconfigure(struct

> netdev *netdev)

> > >          }

> > >      }

> > >

> > > -    if (virtio_dev) {

> > > -        virtio_dev->flags |= VIRTIO_DEV_RUNNING;

> > > -    }

> > > -

> > >      ovs_mutex_unlock(&dev->mutex);

> > >      ovs_mutex_unlock(&dpdk_mutex);

> > >

> > > @@ -3307,12 +3026,12 @@ static const struct netdev_class OVS_UNUSED

> dpdk_vhost_class =

> > >          NULL,

> > >          NULL,

> > >          netdev_dpdk_vhost_send,

> > > -        netdev_dpdk_vhost_get_carrier,

> > > -        netdev_dpdk_vhost_get_stats,

> > > +        netdev_dpdk_get_carrier,

> > > +        netdev_dpdk_get_stats,

> > >          NULL,

> > >          NULL,

> > >          netdev_dpdk_vhost_reconfigure,

> > > -        netdev_dpdk_vhost_rxq_recv);

> > > +        netdev_dpdk_rxq_recv);

> > >

> > >  void

> > >  netdev_dpdk_register(void)

> > >
diff mbox

Patch

diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
index 7609aa7..4feb7be 100644
--- a/INSTALL.DPDK.md
+++ b/INSTALL.DPDK.md
@@ -604,6 +604,16 @@  can be found in [Vhost Walkthrough].
 
     http://dpdk.org/doc/guides/rel_notes/release_16_04.html
 
+  - dpdk, dpdkr and dpdkvhostuser ports are 'eth' type ports in the context of
+    DPDK as they are all managed by the rte_ether API. This means that they
+    adhere to the DPDK configuration option CONFIG_RTE_MAX_ETHPORTS which by
+    default is set to 32. This means by default the combined total number of
+    dpdk, dpdkr and dpdkvhostuser ports allowable in OVS with DPDK is 32. This
+    value can be changed if desired by modifying the configuration file in
+    DPDK, or by overriding the default value on the command line when building
+    DPDK. eg.
+
+        `make install CONFIG_RTE_MAX_ETHPORTS=64`
 
 Bug Reporting:
 --------------
diff --git a/NEWS b/NEWS
index dc3dedb..6510dde 100644
--- a/NEWS
+++ b/NEWS
@@ -64,6 +64,8 @@  Post-v2.5.0
      * Basic connection tracking for the userspace datapath (no ALG,
        fragmentation or NAT support yet)
      * Remove dpdkvhostcuse port type.
+     * vHost PMD integration brings vhost-user ports under control of the
+       rte_ether DPDK API.
    - Increase number of registers to 16.
    - ovs-benchmark: This utility has been removed due to lack of use and
      bitrot.
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d6959fe..d6ceeec 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -30,7 +30,6 @@ 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <getopt.h>
-#include <numaif.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -56,9 +55,9 @@ 
 #include "unixctl.h"
 
 #include "rte_config.h"
+#include "rte_eth_vhost.h"
 #include "rte_mbuf.h"
 #include "rte_meter.h"
-#include "rte_virtio_net.h"
 
 VLOG_DEFINE_THIS_MODULE(dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -141,6 +140,9 @@  static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
 
 #define VHOST_ENQ_RETRY_NUM 8
 
+/* Array that tracks the used & unused vHost user driver IDs */
+static unsigned int vhost_drv_ids[RTE_MAX_ETHPORTS];
+
 static const struct rte_eth_conf port_conf = {
     .rxmode = {
         .mq_mode = ETH_MQ_RX_RSS,
@@ -346,12 +348,15 @@  struct netdev_dpdk {
     struct rte_eth_link link;
     int link_reset_cnt;
 
-    /* virtio-net structure for vhost device */
-    OVSRCU_TYPE(struct virtio_net *) virtio_dev;
+    /* Number of virtqueue pairs reported by the guest */
+    uint32_t vhost_qp_nb;
 
     /* Identifier used to distinguish vhost devices from each other */
     char vhost_id[PATH_MAX];
 
+    /* ID of vhost user port given to the PMD driver */
+    unsigned int vhost_pmd_id;
+
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
 
@@ -382,16 +387,23 @@  struct netdev_rxq_dpdk {
 static bool dpdk_thread_is_pmd(void);
 
 static int netdev_dpdk_construct(struct netdev *);
-
-struct virtio_net * netdev_dpdk_get_virtio(const struct netdev_dpdk *dev);
+static int netdev_dpdk_vhost_construct(struct netdev *);
 
 struct ingress_policer *
 netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev);
 
+static void link_status_changed_callback(uint8_t port_id,
+        enum rte_eth_event_type type, void *param);
+static void vring_state_changed_callback(uint8_t port_id,
+        enum rte_eth_event_type type, void *param);
+static void netdev_dpdk_remap_txqs(struct netdev_dpdk *dev);
+static void netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev);
+
 static bool
-is_dpdk_class(const struct netdev_class *class)
+is_dpdk_eth_class(const struct netdev_class *class)
 {
-    return class->construct == netdev_dpdk_construct;
+    return ((class->construct == netdev_dpdk_construct) ||
+            (class->construct == netdev_dpdk_vhost_construct));
 }
 
 /* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
@@ -616,8 +628,13 @@  dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
             continue;
         }
 
-        dev->up.n_rxq = n_rxq;
-        dev->up.n_txq = n_txq;
+        /* Only set n_*xq for physical devices. vHost User devices will set
+         * this value correctly using info from the virtio backend.
+         */
+        if (dev->type == DPDK_DEV_ETH) {
+            dev->up.n_rxq = n_rxq;
+            dev->up.n_txq = n_txq;
+        }
 
         return 0;
     }
@@ -641,8 +658,14 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
 
     rte_eth_dev_info_get(dev->port_id, &info);
 
-    n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
-    n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
+    if (dev->type == DPDK_DEV_VHOST) {
+        /* We don't know how many queues QEMU will use so set up the max */
+        n_rxq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
+        n_txq = MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT);
+    } else {
+        n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
+        n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
+    }
 
     diag = dpdk_eth_dev_queue_setup(dev, n_rxq, n_txq);
     if (diag) {
@@ -709,6 +732,85 @@  netdev_dpdk_alloc_txq(struct netdev_dpdk *dev, unsigned int n_txqs)
     }
 }
 
+void
+link_status_changed_callback(uint8_t port_id,
+                             enum rte_eth_event_type type OVS_UNUSED,
+                             void *param OVS_UNUSED)
+{
+    struct netdev_dpdk *dev;
+    int socket_id = -1;
+
+    ovs_mutex_lock(&dpdk_mutex);
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (port_id == dev->port_id) {
+            ovs_mutex_lock(&dev->mutex);
+            check_link_status(dev);
+            if (dev->link.link_status == ETH_LINK_UP) {
+                /* new device */
+                /* Get NUMA information */
+                socket_id = rte_eth_dev_socket_id(dev->port_id);
+                if (socket_id != -1 && socket_id != dev->socket_id) {
+                    dev->requested_socket_id = socket_id;
+                }
+                netdev_request_reconfigure(&dev->up);
+                netdev_change_seq_changed(&dev->up);
+                VLOG_INFO("vHost Device '%s' has been added on numa node %i",
+                          dev->vhost_id, socket_id);
+            } else {
+                /* destroy device */
+                /* Clear tx/rx queue settings. */
+                netdev_dpdk_txq_map_clear(dev);
+                netdev_request_reconfigure(&dev->up);
+                netdev_change_seq_changed(&dev->up);
+                VLOG_INFO("vHost Device '%s' has been removed", dev->vhost_id);
+            }
+            ovs_mutex_unlock(&dev->mutex);
+            break;
+        }
+    }
+
+    ovs_mutex_unlock(&dpdk_mutex);
+
+    return;
+}
+
+void
+vring_state_changed_callback(uint8_t port_id,
+                             enum rte_eth_event_type type OVS_UNUSED,
+                             void *param OVS_UNUSED)
+{
+    struct netdev_dpdk *dev;
+    struct rte_eth_vhost_queue_event event;
+    int err = 0;
+
+    err = rte_eth_vhost_get_queue_event(port_id, &event);
+    if (err || event.rx) {
+        return;
+    }
+
+    ovs_mutex_lock(&dpdk_mutex);
+    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+        if (port_id == dev->port_id) {
+            ovs_mutex_lock(&dev->mutex);
+            if (event.enable) {
+                dev->tx_q[event.queue_id].map = event.queue_id;
+                dev->vhost_qp_nb++;
+            } else {
+                dev->tx_q[event.queue_id].map = OVS_VHOST_QUEUE_DISABLED;
+                dev->vhost_qp_nb--;
+            }
+            dev->requested_n_rxq = dev->vhost_qp_nb;
+            dev->requested_n_txq = dev->vhost_qp_nb;
+            netdev_request_reconfigure(&dev->up);
+            ovs_mutex_unlock(&dev->mutex);
+            break;
+        }
+    }
+    ovs_mutex_unlock(&dpdk_mutex);
+
+    return;
+}
+
 static int
 netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
                  enum dpdk_dev_type type)
@@ -718,6 +820,7 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     int sid;
     int err = 0;
     uint32_t buf_size;
+    unsigned int nr_q = 0;
 
     ovs_mutex_init(&dev->mutex);
     ovs_mutex_lock(&dev->mutex);
@@ -727,11 +830,7 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     /* If the 'sid' is negative, it means that the kernel fails
      * to obtain the pci numa info.  In that situation, always
      * use 'SOCKET0'. */
-    if (type == DPDK_DEV_ETH) {
-        sid = rte_eth_dev_socket_id(port_no);
-    } else {
-        sid = rte_lcore_to_socket_id(rte_get_master_lcore());
-    }
+    sid = rte_eth_dev_socket_id(port_no);
 
     dev->socket_id = sid < 0 ? SOCKET0 : sid;
     dev->requested_socket_id = dev->socket_id;
@@ -761,17 +860,21 @@  netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
     netdev->n_txq = NR_QUEUE;
     dev->requested_n_rxq = netdev->n_rxq;
     dev->requested_n_txq = netdev->n_txq;
+    dev->vhost_qp_nb = 0;
 
-    if (type == DPDK_DEV_ETH) {
-        err = dpdk_eth_dev_init(dev);
-        if (err) {
-            goto unlock;
-        }
-        netdev_dpdk_alloc_txq(dev, netdev->n_txq);
-    } else {
-        netdev_dpdk_alloc_txq(dev, OVS_VHOST_MAX_QUEUE_NUM);
-        /* Enable DPDK_DEV_VHOST device and set promiscuous mode flag. */
-        dev->flags = NETDEV_UP | NETDEV_PROMISC;
+    err = dpdk_eth_dev_init(dev);
+    if (err) {
+        goto unlock;
+    }
+    nr_q = (type == DPDK_DEV_ETH ?
+            1 : MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
+    netdev_dpdk_alloc_txq(dev, nr_q);
+
+    if (type == DPDK_DEV_VHOST) {
+        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_QUEUE_STATE,
+                                      vring_state_changed_callback, NULL);
+        rte_eth_dev_callback_register(port_no, RTE_ETH_EVENT_INTR_LSC,
+                                      link_status_changed_callback, NULL);
     }
 
     ovs_list_push_back(&dpdk_list, &dev->list_node);
@@ -802,17 +905,48 @@  dpdk_dev_parse_name(const char dev_name[], const char prefix[],
     }
 }
 
+/* When attaching a vhost device to DPDK, a unique name of the format
+ * 'eth_vhostX' is expected, where X is a unique identifier.
+ * get_vhost_drv_id returns a valid X value to provide to DPDK.
+ */
+static int
+get_vhost_drv_id(void)
+{
+    int i = 0;
+
+    for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+        if (vhost_drv_ids[i] == 0) {
+            return i;
+        }
+    }
+
+    return -1;
+}
+
+static void
+set_vhost_drv_id(int id, int val)
+{
+    vhost_drv_ids[id] = val;
+}
+
 static int
 netdev_dpdk_vhost_construct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
     const char *name = netdev->name;
     int err;
+    uint8_t port_no = 0;
+    char *devargs;
+    int driver_id = 0;
+
+    if (rte_eal_init_ret) {
+        return rte_eal_init_ret;
+    }
 
     /* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
      * the file system. '/' or '\' would traverse directories, so they're not
      * acceptable in 'name'. */
-    if (strchr(name, '/') || strchr(name, '\\')) {
+    if (strchr(name, '/') || strchr(name, '\\') || strchr(name, ',')) {
         VLOG_ERR("\"%s\" is not a valid name for a vhost-user port. "
                  "A valid name must not include '/' or '\\'",
                  name);
@@ -829,18 +963,32 @@  netdev_dpdk_vhost_construct(struct netdev *netdev)
      */
     snprintf(dev->vhost_id, sizeof(dev->vhost_id), "%s/%s",
              vhost_sock_dir, name);
+    driver_id = get_vhost_drv_id();
+    if (driver_id == -1) {
+        VLOG_ERR("Unable to create vhost-user device %s - too many vhost-user"
+                 "devices registered with PMD", dev->vhost_id);
+        err = ENODEV;
+        goto out;
+    } else {
+        devargs = xasprintf("eth_vhost%u,iface=%s,queues=%i",
+                 driver_id, dev->vhost_id,
+                 MIN(OVS_VHOST_MAX_QUEUE_NUM, RTE_MAX_QUEUES_PER_PORT));
+        err = rte_eth_dev_attach(devargs, &port_no);
+    }
 
-    err = rte_vhost_driver_register(dev->vhost_id);
     if (err) {
-        VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
+        VLOG_ERR("Failed to attach vhost-user device %s to DPDK",
                  dev->vhost_id);
     } else {
         fatal_signal_add_file_to_unlink(dev->vhost_id);
         VLOG_INFO("Socket %s created for vhost-user port %s\n",
                   dev->vhost_id, name);
-        err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
+        dev->vhost_pmd_id = driver_id;
+        set_vhost_drv_id(driver_id, 1);
+        err = netdev_dpdk_init(netdev, port_no, DPDK_DEV_VHOST);
     }
 
+out:
     ovs_mutex_unlock(&dpdk_mutex);
     return err;
 }
@@ -868,20 +1016,28 @@  netdev_dpdk_construct(struct netdev *netdev)
 }
 
 static void
-netdev_dpdk_destruct(struct netdev *netdev)
+dpdk_destruct_helper(struct netdev_dpdk *dev)
 {
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-
-    ovs_mutex_lock(&dev->mutex);
     rte_eth_dev_stop(dev->port_id);
     free(ovsrcu_get_protected(struct ingress_policer *,
                               &dev->ingress_policer));
-    ovs_mutex_unlock(&dev->mutex);
 
-    ovs_mutex_lock(&dpdk_mutex);
     rte_free(dev->tx_q);
     ovs_list_remove(&dev->list_node);
     dpdk_mp_put(dev->dpdk_mp);
+}
+
+static void
+netdev_dpdk_destruct(struct netdev *netdev)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
+
+    dpdk_destruct_helper(dev);
+
+    ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 }
 
@@ -890,30 +1046,19 @@  netdev_dpdk_vhost_destruct(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    /* Guest becomes an orphan if still attached. */
-    if (netdev_dpdk_get_virtio(dev) != NULL) {
-        VLOG_ERR("Removing port '%s' while vhost device still attached.",
-                 netdev->name);
-        VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
-                 " '%s' must be restarted.",
-                 dev->vhost_id);
-    }
+    ovs_mutex_lock(&dpdk_mutex);
+    ovs_mutex_lock(&dev->mutex);
 
-    if (rte_vhost_driver_unregister(dev->vhost_id)) {
-        VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
+    if (rte_eth_dev_detach(dev->port_id, dev->vhost_id)) {
+        VLOG_ERR("Error removing vhost device %s", dev->vhost_id);
     } else {
         fatal_signal_remove_file_to_unlink(dev->vhost_id);
     }
+    set_vhost_drv_id(dev->vhost_pmd_id, 0);
 
-    ovs_mutex_lock(&dev->mutex);
-    free(ovsrcu_get_protected(struct ingress_policer *,
-                              &dev->ingress_policer));
-    ovs_mutex_unlock(&dev->mutex);
+    dpdk_destruct_helper(dev);
 
-    ovs_mutex_lock(&dpdk_mutex);
-    rte_free(dev->tx_q);
-    ovs_list_remove(&dev->list_node);
-    dpdk_mp_put(dev->dpdk_mp);
+    ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 }
 
@@ -1105,117 +1250,6 @@  ingress_policer_run(struct ingress_policer *policer, struct rte_mbuf **pkts,
     return cnt;
 }
 
-static bool
-is_vhost_running(struct virtio_net *virtio_dev)
-{
-    return (virtio_dev != NULL && (virtio_dev->flags & VIRTIO_DEV_RUNNING));
-}
-
-static inline void
-netdev_dpdk_vhost_update_rx_size_counters(struct netdev_stats *stats,
-                                          unsigned int packet_size)
-{
-    /* Hard-coded search for the size bucket. */
-    if (packet_size < 256) {
-        if (packet_size >= 128) {
-            stats->rx_128_to_255_packets++;
-        } else if (packet_size <= 64) {
-            stats->rx_1_to_64_packets++;
-        } else {
-            stats->rx_65_to_127_packets++;
-        }
-    } else {
-        if (packet_size >= 1523) {
-            stats->rx_1523_to_max_packets++;
-        } else if (packet_size >= 1024) {
-            stats->rx_1024_to_1522_packets++;
-        } else if (packet_size < 512) {
-            stats->rx_256_to_511_packets++;
-        } else {
-            stats->rx_512_to_1023_packets++;
-        }
-    }
-}
-
-static inline void
-netdev_dpdk_vhost_update_rx_counters(struct netdev_stats *stats,
-                                     struct dp_packet **packets, int count,
-                                     int dropped)
-{
-    int i;
-    unsigned int packet_size;
-    struct dp_packet *packet;
-
-    stats->rx_packets += count;
-    stats->rx_dropped += dropped;
-    for (i = 0; i < count; i++) {
-        packet = packets[i];
-        packet_size = dp_packet_size(packet);
-
-        if (OVS_UNLIKELY(packet_size < ETH_HEADER_LEN)) {
-            /* This only protects the following multicast counting from
-             * too short packets, but it does not stop the packet from
-             * further processing. */
-            stats->rx_errors++;
-            stats->rx_length_errors++;
-            continue;
-        }
-
-        netdev_dpdk_vhost_update_rx_size_counters(stats, packet_size);
-
-        struct eth_header *eh = (struct eth_header *) dp_packet_data(packet);
-        if (OVS_UNLIKELY(eth_addr_is_multicast(eh->eth_dst))) {
-            stats->multicast++;
-        }
-
-        stats->rx_bytes += packet_size;
-    }
-}
-
-/*
- * The receive path for the vhost port is the TX path out from guest.
- */
-static int
-netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
-                           struct dp_packet_batch *batch)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-    int qid = rxq->queue_id;
-    struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
-    uint16_t nb_rx = 0;
-    uint16_t dropped = 0;
-
-    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev)
-                     || !(dev->flags & NETDEV_UP))) {
-        return EAGAIN;
-    }
-
-    nb_rx = rte_vhost_dequeue_burst(virtio_dev, qid * VIRTIO_QNUM + VIRTIO_TXQ,
-                                    dev->dpdk_mp->mp,
-                                    (struct rte_mbuf **) batch->packets,
-                                    NETDEV_MAX_BURST);
-    if (!nb_rx) {
-        return EAGAIN;
-    }
-
-    if (policer) {
-        dropped = nb_rx;
-        nb_rx = ingress_policer_run(policer,
-                                    (struct rte_mbuf **) batch->packets,
-                                    nb_rx);
-        dropped -= nb_rx;
-    }
-
-    rte_spinlock_lock(&dev->stats_lock);
-    netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
-                                         nb_rx, dropped);
-    rte_spinlock_unlock(&dev->stats_lock);
-
-    batch->count = (int) nb_rx;
-    return 0;
-}
-
 static int
 netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct dp_packet_batch *batch)
 {
@@ -1269,85 +1303,6 @@  netdev_dpdk_qos_run__(struct netdev_dpdk *dev, struct rte_mbuf **pkts,
     return cnt;
 }
 
-static inline void
-netdev_dpdk_vhost_update_tx_counters(struct netdev_stats *stats,
-                                     struct dp_packet **packets,
-                                     int attempted,
-                                     int dropped)
-{
-    int i;
-    int sent = attempted - dropped;
-
-    stats->tx_packets += sent;
-    stats->tx_dropped += dropped;
-
-    for (i = 0; i < sent; i++) {
-        stats->tx_bytes += dp_packet_size(packets[i]);
-    }
-}
-
-static void
-__netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                         struct dp_packet **pkts, int cnt,
-                         bool may_steal)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-    struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
-    unsigned int total_pkts = cnt;
-    unsigned int qos_pkts = cnt;
-    int retries = 0;
-
-    qid = dev->tx_q[qid % netdev->n_txq].map;
-
-    if (OVS_UNLIKELY(!is_vhost_running(virtio_dev) || qid < 0
-                     || !(dev->flags & NETDEV_UP))) {
-        rte_spinlock_lock(&dev->stats_lock);
-        dev->stats.tx_dropped+= cnt;
-        rte_spinlock_unlock(&dev->stats_lock);
-        goto out;
-    }
-
-    rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-
-    /* Check has QoS has been configured for the netdev */
-    cnt = netdev_dpdk_qos_run__(dev, cur_pkts, cnt);
-    qos_pkts -= cnt;
-
-    do {
-        int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
-        unsigned int tx_pkts;
-
-        tx_pkts = rte_vhost_enqueue_burst(virtio_dev, vhost_qid,
-                                          cur_pkts, cnt);
-        if (OVS_LIKELY(tx_pkts)) {
-            /* Packets have been sent.*/
-            cnt -= tx_pkts;
-            /* Prepare for possible retry.*/
-            cur_pkts = &cur_pkts[tx_pkts];
-        } else {
-            /* No packets sent - do not retry.*/
-            break;
-        }
-    } while (cnt && (retries++ < VHOST_ENQ_RETRY_NUM));
-
-    rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
-
-    rte_spinlock_lock(&dev->stats_lock);
-    cnt += qos_pkts;
-    netdev_dpdk_vhost_update_tx_counters(&dev->stats, pkts, total_pkts, cnt);
-    rte_spinlock_unlock(&dev->stats_lock);
-
-out:
-    if (may_steal) {
-        int i;
-
-        for (i = 0; i < total_pkts; i++) {
-            dp_packet_delete(pkts[i]);
-        }
-    }
-}
-
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
@@ -1402,18 +1357,13 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
         newcnt++;
     }
 
-    if (dev->type == DPDK_DEV_VHOST) {
-        __netdev_dpdk_vhost_send(netdev, qid, (struct dp_packet **) mbufs,
-                                 newcnt, true);
-    } else {
-        unsigned int qos_pkts = newcnt;
+    unsigned int qos_pkts = newcnt;
 
-        /* Check if QoS has been configured for this netdev. */
-        newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
+    /* Check if QoS has been configured for this netdev. */
+    newcnt = netdev_dpdk_qos_run__(dev, mbufs, newcnt);
 
-        dropped += qos_pkts - newcnt;
-        netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
-    }
+    dropped += qos_pkts - newcnt;
+    netdev_dpdk_eth_tx_burst(dev, qid, mbufs, newcnt);
 
     if (OVS_UNLIKELY(dropped)) {
         rte_spinlock_lock(&dev->stats_lock);
@@ -1426,33 +1376,10 @@  dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
     }
 }
 
-static int
-netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
-                       struct dp_packet_batch *batch,
-                       bool may_steal, bool concurrent_txq OVS_UNUSED)
-{
-
-    if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
-        dpdk_do_tx_copy(netdev, qid, batch);
-        dp_packet_delete_batch(batch, may_steal);
-    } else {
-        dp_packet_batch_apply_cutlen(batch);
-        __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch->count,
-                                 may_steal);
-    }
-    return 0;
-}
-
 static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
-                   struct dp_packet_batch *batch, bool may_steal,
-                   bool concurrent_txq)
+                   struct dp_packet_batch *batch, bool may_steal)
 {
-    if (OVS_UNLIKELY(concurrent_txq)) {
-        qid = qid % dev->up.n_txq;
-        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
-    }
-
     if (OVS_UNLIKELY(!may_steal ||
                      batch->packets[0]->source != DPBUF_DPDK)) {
         struct netdev *netdev = &dev->up;
@@ -1512,20 +1439,50 @@  netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
             rte_spinlock_unlock(&dev->stats_lock);
         }
     }
+}
+
+static int
+netdev_dpdk_eth_send(struct netdev *netdev, int qid,
+                     struct dp_packet_batch *batch, bool may_steal,
+                     bool concurrent_txq)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        qid = qid % dev->up.n_txq;
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
 
     if (OVS_UNLIKELY(concurrent_txq)) {
         rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
     }
+
+    return 0;
 }
 
 static int
-netdev_dpdk_eth_send(struct netdev *netdev, int qid,
-                     struct dp_packet_batch *batch, bool may_steal,
-                     bool concurrent_txq)
+netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
+                       struct dp_packet_batch *batch, bool may_steal,
+                       bool concurrent_txq OVS_UNUSED)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    qid = dev->tx_q[qid % netdev->n_txq].map;
+    if (qid == -1) {
+        rte_spinlock_lock(&dev->stats_lock);
+        dev->stats.tx_dropped+= batch->count;
+        rte_spinlock_unlock(&dev->stats_lock);
+        if (may_steal) {
+            dp_packet_delete_batch(batch, may_steal);
+        }
+    } else {
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+        netdev_dpdk_send__(dev, qid, batch, may_steal);
+        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+    }
+
     return 0;
 }
 
@@ -1622,41 +1579,6 @@  out:
 static int
 netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier);
 
-static int
-netdev_dpdk_vhost_get_stats(const struct netdev *netdev,
-                            struct netdev_stats *stats)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-
-    ovs_mutex_lock(&dev->mutex);
-
-    rte_spinlock_lock(&dev->stats_lock);
-    /* Supported Stats */
-    stats->rx_packets += dev->stats.rx_packets;
-    stats->tx_packets += dev->stats.tx_packets;
-    stats->rx_dropped = dev->stats.rx_dropped;
-    stats->tx_dropped += dev->stats.tx_dropped;
-    stats->multicast = dev->stats.multicast;
-    stats->rx_bytes = dev->stats.rx_bytes;
-    stats->tx_bytes = dev->stats.tx_bytes;
-    stats->rx_errors = dev->stats.rx_errors;
-    stats->rx_length_errors = dev->stats.rx_length_errors;
-
-    stats->rx_1_to_64_packets = dev->stats.rx_1_to_64_packets;
-    stats->rx_65_to_127_packets = dev->stats.rx_65_to_127_packets;
-    stats->rx_128_to_255_packets = dev->stats.rx_128_to_255_packets;
-    stats->rx_256_to_511_packets = dev->stats.rx_256_to_511_packets;
-    stats->rx_512_to_1023_packets = dev->stats.rx_512_to_1023_packets;
-    stats->rx_1024_to_1522_packets = dev->stats.rx_1024_to_1522_packets;
-    stats->rx_1523_to_max_packets = dev->stats.rx_1523_to_max_packets;
-
-    rte_spinlock_unlock(&dev->stats_lock);
-
-    ovs_mutex_unlock(&dev->mutex);
-
-    return 0;
-}
-
 static void
 netdev_dpdk_convert_xstats(struct netdev_stats *stats,
                            const struct rte_eth_xstats *xstats,
@@ -1737,28 +1659,40 @@  netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
         return EPROTO;
     }
 
-    rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
-    if (rte_xstats_len > 0) {
-        rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats) * rte_xstats_len);
-        memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
-        rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
-                                            rte_xstats_len);
-        if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
-            netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
+    /* Extended statistics are not yet available for vHost User PMD */
+    if (dev->type == DPDK_DEV_ETH) {
+        rte_xstats_len = rte_eth_xstats_get(dev->port_id, NULL, 0);
+        if (rte_xstats_len > 0) {
+            rte_xstats = dpdk_rte_mzalloc(sizeof(*rte_xstats)
+                                          * rte_xstats_len);
+            memset(rte_xstats, 0xff, sizeof(*rte_xstats) * rte_xstats_len);
+            rte_xstats_ret = rte_eth_xstats_get(dev->port_id, rte_xstats,
+                                                rte_xstats_len);
+            if (rte_xstats_ret > 0 && rte_xstats_ret <= rte_xstats_len) {
+                netdev_dpdk_convert_xstats(stats, rte_xstats, rte_xstats_ret);
+            }
+            rte_free(rte_xstats);
+        } else {
+            VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
         }
-        rte_free(rte_xstats);
-    } else {
-        VLOG_WARN("Can't get XSTATS counters for port: %i.", dev->port_id);
     }
 
     stats->rx_packets = rte_stats.ipackets;
     stats->tx_packets = rte_stats.opackets;
     stats->rx_bytes = rte_stats.ibytes;
     stats->tx_bytes = rte_stats.obytes;
-    /* DPDK counts imissed as errors, but count them here as dropped instead */
-    stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
-    stats->tx_errors = rte_stats.oerrors;
-    stats->multicast = rte_stats.imcasts;
+
+    if (dev->type == DPDK_DEV_ETH) {
+        /* DPDK counts imissed as errors, but count them here as dropped
+         * instead */
+        stats->rx_errors = rte_stats.ierrors - rte_stats.imissed;
+        stats->tx_errors = rte_stats.oerrors;
+        stats->multicast = rte_stats.imcasts;
+    } else {
+        stats->rx_errors = UINT64_MAX;
+        stats->tx_errors = UINT64_MAX;
+        stats->multicast = UINT64_MAX;
+    }
 
     rte_spinlock_lock(&dev->stats_lock);
     stats->tx_dropped = dev->stats.tx_dropped;
@@ -1921,25 +1855,6 @@  netdev_dpdk_get_carrier(const struct netdev *netdev, bool *carrier)
     return 0;
 }
 
-static int
-netdev_dpdk_vhost_get_carrier(const struct netdev *netdev, bool *carrier)
-{
-    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
-
-    ovs_mutex_lock(&dev->mutex);
-
-    if (is_vhost_running(virtio_dev)) {
-        *carrier = 1;
-    } else {
-        *carrier = 0;
-    }
-
-    ovs_mutex_unlock(&dev->mutex);
-
-    return 0;
-}
-
 static long long int
 netdev_dpdk_get_carrier_resets(const struct netdev *netdev)
 {
@@ -1995,13 +1910,10 @@  netdev_dpdk_update_flags__(struct netdev_dpdk *dev,
             rte_eth_dev_stop(dev->port_id);
         }
     } else {
-        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed and vhost is
-         * running then change netdev's change_seq to trigger link state
-         * update. */
-        struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
+        /* If DPDK_DEV_VHOST device's NETDEV_UP flag was changed then change
+         * netdev's change_seq to trigger link state update. */
 
-        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))
-            && is_vhost_running(virtio_dev)) {
+        if ((NETDEV_UP & ((*old_flagsp ^ on) | (*old_flagsp ^ off)))) {
             netdev_change_seq_changed(&dev->up);
 
             /* Clear statistics if device is getting up. */
@@ -2096,7 +2008,7 @@  netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
 
     if (argc > 2) {
         struct netdev *netdev = netdev_from_name(argv[1]);
-        if (netdev && is_dpdk_class(netdev->netdev_class)) {
+        if (netdev && is_dpdk_eth_class(netdev->netdev_class)) {
             struct netdev_dpdk *dpdk_dev = netdev_dpdk_cast(netdev);
 
             ovs_mutex_lock(&dpdk_dev->mutex);
@@ -2124,22 +2036,6 @@  netdev_dpdk_set_admin_state(struct unixctl_conn *conn, int argc,
 }
 
 /*
- * Set virtqueue flags so that we do not receive interrupts.
- */
-static void
-set_irq_status(struct virtio_net *virtio_dev)
-{
-    uint32_t i;
-    uint64_t idx;
-
-    for (i = 0; i < virtio_dev->virt_qp_nb; i++) {
-        idx = i * VIRTIO_QNUM;
-        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_RXQ, 0);
-        rte_vhost_enable_guest_notification(virtio_dev, idx + VIRTIO_TXQ, 0);
-    }
-}
-
-/*
  * Fixes mapping for vhost-user tx queues. Must be called after each
  * enabling/disabling of queues and n_txq modifications.
  */
@@ -2180,62 +2076,6 @@  netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
     rte_free(enabled_queues);
 }
 
-/*
- * A new virtio-net device is added to a vhost port.
- */
-static int
-new_device(struct virtio_net *virtio_dev)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-    int newnode = 0;
-    long err = 0;
-
-    ovs_mutex_lock(&dpdk_mutex);
-    /* Add device to the vhost port with the same name as that passed down. */
-    LIST_FOR_EACH(dev, list_node, &dpdk_list) {
-        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
-            uint32_t qp_num = virtio_dev->virt_qp_nb;
-
-            ovs_mutex_lock(&dev->mutex);
-            /* Get NUMA information */
-            err = get_mempolicy(&newnode, NULL, 0, virtio_dev,
-                                MPOL_F_NODE | MPOL_F_ADDR);
-            if (err) {
-                VLOG_INFO("Error getting NUMA info for vHost Device '%s'",
-                        virtio_dev->ifname);
-                newnode = dev->socket_id;
-            }
-
-            dev->requested_socket_id = newnode;
-            dev->requested_n_rxq = qp_num;
-            dev->requested_n_txq = qp_num;
-            netdev_request_reconfigure(&dev->up);
-
-            ovsrcu_set(&dev->virtio_dev, virtio_dev);
-            exists = true;
-
-            /* Disable notifications. */
-            set_irq_status(virtio_dev);
-            netdev_change_seq_changed(&dev->up);
-            ovs_mutex_unlock(&dev->mutex);
-            break;
-        }
-    }
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (!exists) {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" can't be added - name not "
-                  "found", virtio_dev->ifname, virtio_dev->device_fh);
-
-        return -1;
-    }
-
-    VLOG_INFO("vHost Device '%s' %"PRIu64" has been added on numa node %i",
-              virtio_dev->ifname, virtio_dev->device_fh, newnode);
-    return 0;
-}
-
 /* Clears mapping for all available queues of vhost interface. */
 static void
 netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
@@ -2248,144 +2088,18 @@  netdev_dpdk_txq_map_clear(struct netdev_dpdk *dev)
     }
 }
 
-/*
- * Remove a virtio-net device from the specific vhost port.  Use dev->remove
- * flag to stop any more packets from being sent or received to/from a VM and
- * ensure all currently queued packets have been sent/received before removing
- *  the device.
- */
-static void
-destroy_device(volatile struct virtio_net *virtio_dev)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-
-    ovs_mutex_lock(&dpdk_mutex);
-    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
-        if (netdev_dpdk_get_virtio(dev) == virtio_dev) {
-
-            ovs_mutex_lock(&dev->mutex);
-            virtio_dev->flags &= ~VIRTIO_DEV_RUNNING;
-            ovsrcu_set(&dev->virtio_dev, NULL);
-            /* Clear tx/rx queue settings. */
-            netdev_dpdk_txq_map_clear(dev);
-            dev->requested_n_rxq = NR_QUEUE;
-            dev->requested_n_txq = NR_QUEUE;
-            netdev_request_reconfigure(&dev->up);
-
-            netdev_change_seq_changed(&dev->up);
-            ovs_mutex_unlock(&dev->mutex);
-            exists = true;
-            break;
-        }
-    }
-
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (exists == true) {
-        /*
-         * Wait for other threads to quiesce after setting the 'virtio_dev'
-         * to NULL, before returning.
-         */
-        ovsrcu_synchronize();
-        /*
-         * As call to ovsrcu_synchronize() will end the quiescent state,
-         * put thread back into quiescent state before returning.
-         */
-        ovsrcu_quiesce_start();
-        VLOG_INFO("vHost Device '%s' %"PRIu64" has been removed",
-                  virtio_dev->ifname, virtio_dev->device_fh);
-    } else {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
-                  virtio_dev->device_fh);
-    }
-}
-
-static int
-vring_state_changed(struct virtio_net *virtio_dev, uint16_t queue_id,
-                    int enable)
-{
-    struct netdev_dpdk *dev;
-    bool exists = false;
-    int qid = queue_id / VIRTIO_QNUM;
-
-    if (queue_id % VIRTIO_QNUM == VIRTIO_TXQ) {
-        return 0;
-    }
-
-    ovs_mutex_lock(&dpdk_mutex);
-    LIST_FOR_EACH (dev, list_node, &dpdk_list) {
-        if (strncmp(virtio_dev->ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
-            ovs_mutex_lock(&dev->mutex);
-            if (enable) {
-                dev->tx_q[qid].map = qid;
-            } else {
-                dev->tx_q[qid].map = OVS_VHOST_QUEUE_DISABLED;
-            }
-            netdev_dpdk_remap_txqs(dev);
-            exists = true;
-            ovs_mutex_unlock(&dev->mutex);
-            break;
-        }
-    }
-    ovs_mutex_unlock(&dpdk_mutex);
-
-    if (exists) {
-        VLOG_INFO("State of queue %d ( tx_qid %d ) of vhost device '%s' %"
-                  PRIu64" changed to \'%s\'", queue_id, qid,
-                  virtio_dev->ifname, virtio_dev->device_fh,
-                  (enable == 1) ? "enabled" : "disabled");
-    } else {
-        VLOG_INFO("vHost Device '%s' %"PRIu64" not found", virtio_dev->ifname,
-                  virtio_dev->device_fh);
-        return -1;
-    }
-
-    return 0;
-}
-
-struct virtio_net *
-netdev_dpdk_get_virtio(const struct netdev_dpdk *dev)
-{
-    return ovsrcu_get(struct virtio_net *, &dev->virtio_dev);
-}
-
 struct ingress_policer *
 netdev_dpdk_get_ingress_policer(const struct netdev_dpdk *dev)
 {
     return ovsrcu_get(struct ingress_policer *, &dev->ingress_policer);
 }
 
-/*
- * These callbacks allow virtio-net devices to be added to vhost ports when
- * configuration has been fully complete.
- */
-static const struct virtio_net_device_ops virtio_net_device_ops =
-{
-    .new_device =  new_device,
-    .destroy_device = destroy_device,
-    .vring_state_changed = vring_state_changed
-};
-
-static void *
-start_vhost_loop(void *dummy OVS_UNUSED)
-{
-     pthread_detach(pthread_self());
-     /* Put the vhost thread into quiescent state. */
-     ovsrcu_quiesce_start();
-     rte_vhost_driver_session_start();
-     return NULL;
-}
-
 static int
 dpdk_vhost_class_init(void)
 {
-    rte_vhost_driver_callback_register(&virtio_net_device_ops);
-    rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
-                            | 1ULL << VIRTIO_NET_F_HOST_TSO6
-                            | 1ULL << VIRTIO_NET_F_CSUM);
-
-    ovs_thread_create("vhost_thread", start_vhost_loop, NULL);
+    rte_eth_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4
+                                | 1ULL << VIRTIO_NET_F_HOST_TSO6
+                                | 1ULL << VIRTIO_NET_F_CSUM);
     return 0;
 }
 
@@ -2498,7 +2212,17 @@  netdev_dpdk_ring_send(struct netdev *netdev, int qid,
         dp_packet_rss_invalidate(batch->packets[i]);
     }
 
-    netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        qid = qid % dev->up.n_txq;
+        rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
+    }
+
+    netdev_dpdk_send__(dev, qid, batch, may_steal);
+
+    if (OVS_UNLIKELY(concurrent_txq)) {
+        rte_spinlock_unlock(&dev->tx_q[qid].tx_lock);
+    }
+
     return 0;
 }
 
@@ -2787,7 +2511,6 @@  static int
 netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
 {
     struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-    struct virtio_net *virtio_dev = netdev_dpdk_get_virtio(dev);
     int err = 0;
 
     ovs_mutex_lock(&dpdk_mutex);
@@ -2813,10 +2536,6 @@  netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
         }
     }
 
-    if (virtio_dev) {
-        virtio_dev->flags |= VIRTIO_DEV_RUNNING;
-    }
-
     ovs_mutex_unlock(&dev->mutex);
     ovs_mutex_unlock(&dpdk_mutex);
 
@@ -3307,12 +3026,12 @@  static const struct netdev_class OVS_UNUSED dpdk_vhost_class =
         NULL,
         NULL,
         netdev_dpdk_vhost_send,
-        netdev_dpdk_vhost_get_carrier,
-        netdev_dpdk_vhost_get_stats,
+        netdev_dpdk_get_carrier,
+        netdev_dpdk_get_stats,
         NULL,
         NULL,
         netdev_dpdk_vhost_reconfigure,
-        netdev_dpdk_vhost_rxq_recv);
+        netdev_dpdk_rxq_recv);
 
 void
 netdev_dpdk_register(void)