@@ -570,6 +570,49 @@ For users wanting to do packet forwarding using kernel stack below are the steps
where `-L`: Changes the numbers of channels of the specified network device
and `combined`: Changes the number of multi-purpose channels.
+ 4. OVS vHost client-mode & vHost reconnect (OPTIONAL)
+
+ By default, OVS DPDK acts as the vHost socket server for dpdkvhostuser
+ ports and QEMU acts as the vHost client. This means OVS creates and
+ manages the vHost socket and QEMU is the client which connects to the
+ vHost server (OVS). In QEMU v2.7 the option is available for QEMU to act
+ as the vHost server meaning the roles can be reversed and OVS can become
+ the vHost client. To enable client mode for a given dpdkvhostuserport,
+ one must specify a valid 'vhost-server-path' like so:
+
+ ```
+ ovs-vsctl set Interface dpdkvhostuser0 options:vhost-server-path=/path/to/socket
+ ```
+
+ Setting this value automatically switches the port to client mode (from
+ OVS' perspective). 'vhost-server-path' reflects the full path of the
+ socket that has been or will be created by QEMU for the given vHost User
+ port. Once a path is specified, the port will remain in 'client' mode
+ for the remainder of it's lifetime ie. it cannot be reverted back to
+ server mode.
+
+ One must append ',server' to the 'chardev' arguments on the QEMU command
+ line, to instruct QEMU to use vHost server mode for a given interface,
+ like so:
+
+ ````
+ -chardev socket,id=char0,path=/path/to/socket,server
+ ````
+
+ If the corresponding dpdkvhostuser port has not yet been configured in
+ OVS with vhost-server-path=/path/to/socket, QEMU will print a log
+ similar to the following:
+
+ `QEMU waiting for connection on: disconnected:unix:/path/to/socket,server`
+
+ QEMU will wait until the port is created sucessfully in OVS to boot the
+ VM.
+
+ One benefit of using this mode is the ability for vHost ports to
+ 'reconnect' in event of the switch crashing or being brought down. Once
+ it is brought back up, the vHost ports will reconnect automatically and
+ normal service will resume.
+
- VM Configuration with libvirt
* change the user/group, access control policty and restart libvirtd.
@@ -72,6 +72,7 @@ Post-v2.5.0
* Optional support for DPDK pdump enabled.
* Jumbo frame support
* Remove dpdkvhostcuse port type.
+ * OVS client mode for vHost and vHost reconnect (Requires QEMU 2.7)
- Increase number of registers to 16.
- ovs-benchmark: This utility has been removed due to lack of use and
bitrot.
@@ -356,10 +356,9 @@ struct netdev_dpdk {
/* True if vHost device is 'up' and has been reconfigured at least once */
bool vhost_reconfigured;
- /* Identifier used to distinguish vhost devices from each other. It does
- * not change during the lifetime of a struct netdev_dpdk. It can be read
- * without holding any mutex. */
- const char vhost_id[PATH_MAX];
+ /* Identifiers used to distinguish vhost devices from each other. */
+ char vhost_server_id[PATH_MAX];
+ char vhost_client_id[PATH_MAX];
/* In dpdk_list. */
struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
@@ -378,6 +377,9 @@ struct netdev_dpdk {
/* Socket ID detected when vHost device is brought up */
int requested_socket_id;
+ /* Denotes whether vHost port is client/server mode */
+ uint64_t vhost_driver_flags;
+
/* Ingress Policer */
OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
uint32_t policer_rate;
@@ -812,6 +814,8 @@ netdev_dpdk_init(struct netdev *netdev, unsigned int port_no,
dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
ovsrcu_index_init(&dev->vid, -1);
dev->vhost_reconfigured = false;
+ /* initialise vHost port in server mode */
+ dev->vhost_driver_flags &= ~RTE_VHOST_USER_CLIENT;
err = netdev_dpdk_mempool_configure(dev);
if (err) {
@@ -874,13 +878,22 @@ dpdk_dev_parse_name(const char dev_name[], const char prefix[],
}
}
+/* Returns a pointer to the relevant vHost socket ID depending on the mode in
+ * use */
+static char *
+get_vhost_id(struct netdev_dpdk *dev)
+ OVS_REQUIRES(dev->mutex)
+{
+ return dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT ?
+ dev->vhost_client_id : dev->vhost_server_id;
+}
+
static int
netdev_dpdk_vhost_construct(struct netdev *netdev)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
const char *name = netdev->name;
int err;
- uint64_t flags = 0;
/* 'name' is appended to 'vhost_sock_dir' and used to create a socket in
* the file system. '/' or '\' would traverse directories, so they're not
@@ -898,19 +911,24 @@ netdev_dpdk_vhost_construct(struct netdev *netdev)
ovs_mutex_lock(&dpdk_mutex);
/* Take the name of the vhost-user port and append it to the location where
- * the socket is to be created, then register the socket.
+ * the socket is to be created, then register the socket. Sockets are
+ * registered initially in 'server' mode.
*/
- snprintf(CONST_CAST(char *, dev->vhost_id), sizeof dev->vhost_id, "%s/%s",
+ snprintf(dev->vhost_server_id, sizeof dev->vhost_server_id, "%s/%s",
vhost_sock_dir, name);
- err = rte_vhost_driver_register(dev->vhost_id, flags);
+ err = rte_vhost_driver_register(dev->vhost_server_id,
+ dev->vhost_driver_flags);
if (err) {
VLOG_ERR("vhost-user socket device setup failure for socket %s\n",
- dev->vhost_id);
+ get_vhost_id(dev));
} else {
- fatal_signal_add_file_to_unlink(dev->vhost_id);
- VLOG_INFO("Socket %s created for vhost-user port %s\n",
- dev->vhost_id, name);
+ if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+ /* OVS server mode - add this socket to list for deletion */
+ fatal_signal_add_file_to_unlink(get_vhost_id(dev));
+ VLOG_INFO("Socket %s created for vhost-user port %s\n",
+ get_vhost_id(dev), name);
+ }
err = netdev_dpdk_init(netdev, -1, DPDK_DEV_VHOST);
}
@@ -964,28 +982,30 @@ netdev_dpdk_destruct(struct netdev *netdev)
* try to acquire 'dpdk_mutex' and possibly 'dev->mutex'. To avoid a
* deadlock, none of the mutexes must be held while calling this function. */
static int
-dpdk_vhost_driver_unregister(struct netdev_dpdk *dev)
+dpdk_vhost_driver_unregister(char *vhost_id)
OVS_EXCLUDED(dpdk_mutex)
OVS_EXCLUDED(dev->mutex)
{
- return rte_vhost_driver_unregister(dev->vhost_id);
+ return rte_vhost_driver_unregister(vhost_id);
}
static void
netdev_dpdk_vhost_destruct(struct netdev *netdev)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ char *vhost_id;
ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&dev->mutex);
/* Guest becomes an orphan if still attached. */
- if (netdev_dpdk_get_vid(dev) >= 0) {
+ if (netdev_dpdk_get_vid(dev) >= 0
+ && !(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
VLOG_ERR("Removing port '%s' while vhost device still attached.",
netdev->name);
VLOG_ERR("To restore connectivity after re-adding of port, VM on socket"
" '%s' must be restarted.",
- dev->vhost_id);
+ get_vhost_id(dev));
}
free(ovsrcu_get_protected(struct ingress_policer *,
@@ -995,14 +1015,18 @@ netdev_dpdk_vhost_destruct(struct netdev *netdev)
ovs_list_remove(&dev->list_node);
dpdk_mp_put(dev->dpdk_mp);
+ vhost_id = xstrdup(get_vhost_id(dev));
+
ovs_mutex_unlock(&dev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
- if (dpdk_vhost_driver_unregister(dev)) {
- VLOG_ERR("Unable to remove vhost-user socket %s", dev->vhost_id);
- } else {
- fatal_signal_remove_file_to_unlink(dev->vhost_id);
+ if (dpdk_vhost_driver_unregister(vhost_id)) {
+ VLOG_ERR("Unable to remove vhost-user socket %s", vhost_id);
+ } else if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+ /* OVS server mode - remove this socket from list for deletion */
+ fatal_signal_remove_file_to_unlink(vhost_id);
}
+ free(vhost_id);
}
static void
@@ -1064,6 +1088,23 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
}
static int
+netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
+{
+ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ const char *path;
+
+ if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)) {
+ path = smap_get(args, "vhost-server-path");
+ if (path && strcmp(path, dev->vhost_client_id)) {
+ strcpy(dev->vhost_client_id, path);
+ netdev_request_reconfigure(netdev);
+ }
+ }
+
+ return 0;
+}
+
+static int
netdev_dpdk_get_numa_id(const struct netdev *netdev)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
@@ -2264,7 +2305,7 @@ netdev_dpdk_remap_txqs(struct netdev_dpdk *dev)
}
}
- VLOG_DBG("TX queue mapping for %s\n", dev->vhost_id);
+ VLOG_DBG("TX queue mapping for %s\n", get_vhost_id(dev));
for (i = 0; i < total_txqs; i++) {
VLOG_DBG("%2d --> %2d", i, dev->tx_q[i].map);
}
@@ -2288,10 +2329,10 @@ new_device(int vid)
ovs_mutex_lock(&dpdk_mutex);
/* Add device to the vhost port with the same name as that passed down. */
LIST_FOR_EACH(dev, list_node, &dpdk_list) {
- if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
+ ovs_mutex_lock(&dev->mutex);
+ if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) {
uint32_t qp_num = rte_vhost_get_queue_num(vid);
- ovs_mutex_lock(&dev->mutex);
/* Get NUMA information */
newnode = rte_vhost_get_numa_node(vid);
if (newnode == -1) {
@@ -2321,6 +2362,7 @@ new_device(int vid)
ovs_mutex_unlock(&dev->mutex);
break;
}
+ ovs_mutex_unlock(&dev->mutex);
}
ovs_mutex_unlock(&dpdk_mutex);
@@ -2414,8 +2456,8 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
ovs_mutex_lock(&dpdk_mutex);
LIST_FOR_EACH (dev, list_node, &dpdk_list) {
- if (strncmp(ifname, dev->vhost_id, IF_NAME_SZ) == 0) {
- ovs_mutex_lock(&dev->mutex);
+ ovs_mutex_lock(&dev->mutex);
+ if (strncmp(ifname, get_vhost_id(dev), IF_NAME_SZ) == 0) {
if (enable) {
dev->tx_q[qid].map = qid;
} else {
@@ -2426,6 +2468,7 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
ovs_mutex_unlock(&dev->mutex);
break;
}
+ ovs_mutex_unlock(&dev->mutex);
}
ovs_mutex_unlock(&dpdk_mutex);
@@ -2911,6 +2954,7 @@ static int
netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ int err = 0;
ovs_mutex_lock(&dpdk_mutex);
ovs_mutex_lock(&dev->mutex);
@@ -2936,6 +2980,45 @@ netdev_dpdk_vhost_reconfigure(struct netdev *netdev)
dev->vhost_reconfigured = true;
}
+ /* Configure vHost client mode if requested and if the following criteria
+ * are met:
+ * 1. Device is currently in 'server' mode.
+ * 2. Device is currently not active.
+ * 3. A path has been specified.
+ */
+ if (!(dev->vhost_driver_flags & RTE_VHOST_USER_CLIENT)
+ && !(netdev_dpdk_get_vid(dev) >= 0)
+ && strlen(dev->vhost_client_id)) {
+ /* Unregister server-mode device */
+ char *vhost_id = xstrdup(get_vhost_id(dev));
+
+ ovs_mutex_unlock(&dev->mutex);
+ ovs_mutex_unlock(&dpdk_mutex);
+ err = dpdk_vhost_driver_unregister(vhost_id);
+ free(vhost_id);
+ ovs_mutex_lock(&dpdk_mutex);
+ ovs_mutex_lock(&dev->mutex);
+ if (err) {
+ VLOG_ERR("Unable to remove vhost-user socket %s",
+ get_vhost_id(dev));
+ } else {
+ fatal_signal_remove_file_to_unlink(get_vhost_id(dev));
+ /* Register client-mode device */
+ err = rte_vhost_driver_register(dev->vhost_client_id,
+ RTE_VHOST_USER_CLIENT);
+ if (err) {
+ VLOG_ERR("vhost-user device setup failure for device %s\n",
+ dev->vhost_client_id);
+ } else {
+ /* Configuration successful */
+ dev->vhost_driver_flags |= RTE_VHOST_USER_CLIENT;
+ VLOG_INFO("vHost User device '%s' changed to 'client' mode, "
+ "using client socket '%s'",
+ dev->up.name, get_vhost_id(dev));
+ }
+ }
+ }
+
ovs_mutex_unlock(&dev->mutex);
ovs_mutex_unlock(&dpdk_mutex);
@@ -3439,7 +3522,7 @@ static const struct netdev_class OVS_UNUSED dpdk_vhost_class =
"dpdkvhostuser",
netdev_dpdk_vhost_construct,
netdev_dpdk_vhost_destruct,
- NULL,
+ netdev_dpdk_vhost_set_config,
NULL,
netdev_dpdk_vhost_send,
netdev_dpdk_vhost_get_carrier,
@@ -2366,6 +2366,17 @@
</ul>
</p>
</column>
+
+ <column name="options" key="vhost-server-path"
+ type='{"type": "string"}'>
+ <p>
+ When specified, switches the given port permanently to 'client'
+ mode. The value specifies the path to the socket associated with a
+ vHost User client mode device that has been or will be created by
+ QEMU.
+ Only supported by DPDK vHost interfaces.
+ </p>
+ </column>
</group>
<group title="MTU">