diff mbox series

[2/2] um: drivers: Add virtio vhost-user driver

Message ID 20190731202942.29630-2-johannes@sipsolutions.net
State Superseded
Headers show
Series [1/2] um: irq: Fix LAST_IRQ usage | expand

Commit Message

Johannes Berg July 31, 2019, 8:29 p.m. UTC
From: Erel Geron <erelx.geron@intel.com>

This module allows virtio devices to be used over a vhost-user socket.
To instantiate a device, pass the (list of) device socket(s) to the
kernel or module command line, such as

	virtio_uml.device=/var/uml.socket:1

where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
i.e. network in this case.

Signed-off-by: Erel Geron <erelx.geron@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 arch/um/drivers/Kconfig      |   7 +
 arch/um/drivers/Makefile     |   1 +
 arch/um/drivers/vhost_user.h | 102 ++++
 arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
 arch/um/include/asm/irq.h    |   5 +-
 arch/um/include/shared/os.h  |   7 +
 arch/um/kernel/ksyms.c       |   4 +
 arch/um/kernel/mem.c         |   1 +
 arch/um/kernel/physmem.c     |   1 +
 arch/um/kernel/um_arch.c     |   1 +
 arch/um/os-Linux/file.c      |  64 +++
 11 files changed, 1096 insertions(+), 2 deletions(-)
 create mode 100644 arch/um/drivers/vhost_user.h
 create mode 100644 arch/um/drivers/virtio_uml.c

Comments

Anton Ivanov July 31, 2019, 8:50 p.m. UTC | #1
On 31/07/2019 21:29, Johannes Berg wrote:
> From: Erel Geron <erelx.geron@intel.com>
> 
> This module allows virtio devices to be used over a vhost-user socket.
> To instantiate a device, pass the (list of) device socket(s) to the
> kernel or module command line, such as
> 
> 	virtio_uml.device=/var/uml.socket:1
> 
> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
> i.e. network in this case.
> 
> Signed-off-by: Erel Geron <erelx.geron@intel.com>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
>   arch/um/drivers/Kconfig      |   7 +
>   arch/um/drivers/Makefile     |   1 +
>   arch/um/drivers/vhost_user.h | 102 ++++
>   arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
>   arch/um/include/asm/irq.h    |   5 +-
>   arch/um/include/shared/os.h  |   7 +
>   arch/um/kernel/ksyms.c       |   4 +
>   arch/um/kernel/mem.c         |   1 +
>   arch/um/kernel/physmem.c     |   1 +
>   arch/um/kernel/um_arch.c     |   1 +
>   arch/um/os-Linux/file.c      |  64 +++
>   11 files changed, 1096 insertions(+), 2 deletions(-)
>   create mode 100644 arch/um/drivers/vhost_user.h
>   create mode 100644 arch/um/drivers/virtio_uml.c
> 
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2638e46f50cc..fea5a0d522dc 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>   	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>   
>   endmenu
> +
> +config VIRTIO_UML
> +	tristate "UML driver for virtio devices"
> +	select VIRTIO
> +	help
> +	  This driver provides support for virtio based paravirtual device
> +	  drivers over vhost-user sockets.
> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
> index 693319839f69..3edf0a7cfd25 100644
> --- a/arch/um/drivers/Makefile
> +++ b/arch/um/drivers/Makefile
> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>   obj-$(CONFIG_UML_RANDOM) += random.o
> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>   
>   # pcap_user.o must be added explicitly.
>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
> diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
> new file mode 100644
> index 000000000000..2a9829b0782b
> --- /dev/null
> +++ b/arch/um/drivers/vhost_user.h
> @@ -0,0 +1,102 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Vhost-user protocol */
> +
> +#ifndef __VHOST_USER_H__
> +#define __VHOST_USER_H__
> +
> +/* Message flags */
> +#define VHOST_USER_FLAG_REPLY		BIT(2)
> +/* Feature bits */
> +#define VHOST_USER_F_PROTOCOL_FEATURES	30
> +/* Protocol feature bits */
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +/* Vring state index masks */
> +#define VHOST_USER_VRING_INDEX_MASK	0xff
> +#define VHOST_USER_VRING_POLL_MASK	BIT(8)
> +
> +/* Supported version */
> +#define VHOST_USER_VERSION		1
> +/* Supported transport features */
> +#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
> +/* Supported protocol features */
> +#define VHOST_USER_SUPPORTED_PROTOCOL_F	BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
> +
> +enum vhost_user_request {
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SEND_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_SET_VRING_ENDIAN = 23,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
> +};
> +
> +struct vhost_user_header {
> +	u32 request; /* Use enum vhost_user_request */
> +	u32 flags;
> +	u32 size;
> +} __packed;
> +
> +struct vhost_user_config {
> +	u32 offset;
> +	u32 size;
> +	u32 flags;
> +	u8 payload[0]; /* Variable length */
> +} __packed;
> +
> +struct vhost_user_vring_state {
> +	u32 index;
> +	u32 num;
> +} __packed;
> +
> +struct vhost_user_vring_addr {
> +	u32 index;
> +	u32 flags;
> +	u64 desc, used, avail, log;
> +} __packed;
> +
> +struct vhost_user_mem_region {
> +	u64 guest_addr;
> +	u64 size;
> +	u64 user_addr;
> +	u64 mmap_offset;
> +} __packed;
> +
> +struct vhost_user_mem_regions {
> +	u32 num;
> +	u32 padding;
> +	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
> +} __packed;
> +
> +union vhost_user_payload {
> +	u64 integer;
> +	struct vhost_user_config config;
> +	struct vhost_user_vring_state vring_state;
> +	struct vhost_user_vring_addr vring_addr;
> +	struct vhost_user_mem_regions mem_regions;
> +};
> +
> +struct vhost_user_msg {
> +	struct vhost_user_header header;
> +	union vhost_user_payload payload;
> +} __packed;
> +
> +#endif
> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
> new file mode 100644
> index 000000000000..30eeb907f42a
> --- /dev/null
> +++ b/arch/um/drivers/virtio_uml.c
> @@ -0,0 +1,905 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Virtio vhost-user driver
> + *
> + * Copyright(c) 2019 Intel Corporation
> + *
> + * This module allows virtio devices to be used over a vhost-user socket.
> + *
> + * Guest devices can be instantiated by kernel module or command line
> + * parameters. One device will be created for each parameter. Syntax:
> + *
> + *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
> + * where:
> + *		<socket>	:= vhost-user socket path to connect
> + *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
> + *		<platform_id>	:= (optional) platform device id
> + *
> + * example:
> + *		virtio_uml.device=/var/uml.socket:1
> + *
> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
> + */
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_ring.h>
> +#include <irq_kern.h>
> +#include <init.h>
> +#include <os.h>
> +#include "vhost_user.h"
> +
> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
> +#ifdef IRQ_NONE
> +#undef IRQ_NONE
> +#endif
> +
> +#define MAX_SUPPORTED_QUEUE_SIZE	256
> +
> +#define to_virtio_uml_device(_vdev) \
> +	container_of(_vdev, struct virtio_uml_device, vdev)
> +
> +struct virtio_uml_device {
> +	struct virtio_device vdev;
> +	struct platform_device *pdev;
> +
> +	int sock;
> +	u64 features;
> +	u64 protocol_features;
> +	u8 status;
> +};
> +
> +struct virtio_uml_vq_info {
> +	int kick_fd, call_fd;
> +	char name[32];
> +};
> +
> +extern unsigned long long physmem_size, highmem;
> +
> +
> +/* Vhost-user protocol */
> +
> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   int *fds, size_t num_fds)
> +{
> +	size_t size = sizeof(msg->header) + msg->header.size;
> +	int rc;
> +
> +	msg->header.flags |= VHOST_USER_VERSION;
> +	do {
> +		rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
> +				num_fds);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	if (rc != size)
> +		return -ECOMM;
> +	return 0;
> +}
> +
> +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
> +				      u32 request)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +	};
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
> +			       u32 request, u64 value)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = value,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
> +				  struct vhost_user_msg *msg)
> +{
> +	size_t size = sizeof(msg->header);
> +	int rc;
> +
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) msg, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   size_t max_payload_size)
> +{
> +	size_t size;
> +	int rc = vhost_user_recv_header(vu_dev, msg);
> +
> +	if (rc)
> +		return rc;
> +	size = msg->header.size;
> +	if (size > max_payload_size)
> +		return -EPROTO;
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
> +			       u64 *value)
> +{
> +	struct vhost_user_msg msg;
> +	int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
> +
> +	if (rc)
> +		return rc;
> +	if (msg.header.size != sizeof(msg.payload.integer))
> +		return -EPROTO;
> +	*value = msg.payload.integer;
> +	return 0;
> +}
> +
> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
> +{
> +	return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
> +}
> +
> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
> +				   u64 *features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, features);
> +}
> +
> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
> +				   u64 features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
> +}
> +
> +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 *protocol_features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev,
> +			VHOST_USER_GET_PROTOCOL_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, protocol_features);
> +}
> +
> +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 protocol_features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
> +			protocol_features);
> +}
> +
> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
> +{
> +	int rc = vhost_user_set_owner(vu_dev);
> +
> +	if (rc)
> +		return rc;
> +	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
> +	if (rc)
> +		return rc;
> +
> +	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
> +		rc = vhost_user_get_protocol_features(vu_dev,
> +				&vu_dev->protocol_features);
> +		if (rc)
> +			return rc;
> +		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
> +		rc = vhost_user_set_protocol_features(vu_dev,
> +				vu_dev->protocol_features);
> +	}
> +	return rc;
> +}
> +
> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, void *buf, u32 len)
> +{
> +	u32 cfg_size = offset + len;
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_GET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = 0;
> +	msg->payload.config.size = cfg_size;
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +	if (rc)
> +		goto error_get;
> +	rc = vhost_user_recv(vu_dev, msg, msg_size);
> +	if (rc)
> +		goto error_get;
> +	if (msg->header.size != payload_size ||
> +			msg->payload.config.size != cfg_size) {
> +		rc = -EPROTO;
> +		goto error_get;
> +	}
> +	memcpy(buf, msg->payload.config.payload + offset, len);
> +
> +error_get:
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, const void *buf, u32 len)
> +{
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + len;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_SET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = offset;
> +	msg->payload.config.size = len;
> +	memcpy(msg->payload.config.payload, buf, len);
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
> +				      struct vhost_user_mem_region *region_out)
> +{
> +	unsigned long long mem_offset;
> +	int rc = phys_mapping(addr, &mem_offset);
> +
> +	if (WARN_ON(rc < 0))
> +		return -EFAULT;
> +	*fd_out = rc;
> +	region_out->guest_addr = addr;
> +	region_out->user_addr = addr;
> +	region_out->size = size;
> +	region_out->mmap_offset = mem_offset;
> +
> +	rc = phys_mapping(addr + size - 1, &mem_offset);
> +	if (WARN_ON(rc != *fd_out))
> +		return -EFAULT;
> +	return 0;
> +}
> +
> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_MEM_TABLE,
> +		.header.size = sizeof(msg.payload.mem_regions),
> +		.payload.mem_regions.num = 1,
> +	};
> +	int fds[2];
> +	int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
> +			&msg.payload.mem_regions.regions[0]);
> +
> +	if (rc < 0)
> +		return rc;
> +	if (highmem) {
> +		msg.payload.mem_regions.num++;
> +		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
> +				&fds[1], &msg.payload.mem_regions.regions[1]);
> +		if (rc < 0)
> +			return rc;
> +	}
> +
> +	return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
> +}
> +
> +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
> +				      u32 request, u32 index, u32 num)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.vring_state),
> +		.payload.vring_state.index = index,
> +		.payload.vring_state.num = num,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
> +				    u32 index, u32 num)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
> +			index, num);
> +}
> +
> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
> +				     u32 index, u32 offset)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
> +			index, offset);
> +}
> +
> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
> +				     u32 index, u64 desc, u64 used, u64 avail,
> +				     u64 log)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_VRING_ADDR,
> +		.header.size = sizeof(msg.payload.vring_addr),
> +		.payload.vring_addr.index = index,
> +		.payload.vring_addr.desc = desc,
> +		.payload.vring_addr.used = used,
> +		.payload.vring_addr.avail = avail,
> +		.payload.vring_addr.log = log,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
> +				   u32 request, int index, int fd)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = index,
> +	};
> +
> +	if (index & ~VHOST_USER_VRING_INDEX_MASK)
> +		return -EINVAL;
> +	if (fd < 0) {
> +		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
> +		return vhost_user_send(vu_dev, &msg, NULL, 0);
> +	}
> +	return vhost_user_send(vu_dev, &msg, &fd, 1);
> +}
> +
> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
> +				       u32 index, bool enable)
> +{
> +	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
> +		return 0;
> +
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
> +			index, enable);
> +}
> +
> +
> +/* Virtio interface */
> +
> +static bool vu_notify(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	int rc;
> +
> +	do {
> +		rc = os_eventfd_write(info->kick_fd, 1);
> +	} while (rc == -EINTR);
> +	WARN_ON(rc);
> +	return !rc;
> +}
> +
> +static irqreturn_t vu_interrupt(int irq, void *opaque)
> +{
> +	struct virtqueue *vq = opaque;
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	unsigned long long value;
> +	int rc;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	do {
> +		rc = os_eventfd_read(info->call_fd, &value);
> +		if (!rc)
> +			ret |= vring_interrupt(irq, vq);
> +	} while (!rc || rc == -EINTR);
> +	WARN_ON(rc != -EAGAIN);
> +	return ret;
> +}
> +
> +
> +static void vu_get(struct virtio_device *vdev, unsigned offset,
> +		   void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
> +}
> +
> +static void vu_set(struct virtio_device *vdev, unsigned offset,
> +		   const void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
> +}
> +
> +static u8 vu_get_status(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->status;
> +}
> +
> +static void vu_set_status(struct virtio_device *vdev, u8 status)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = status;
> +}
> +
> +static void vu_reset(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = 0;
> +}
> +
> +static void vu_del_vq(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +
> +	um_free_irq(VIRTIO_IRQ, vq);
> +
> +	os_close_file(info->call_fd);
> +	os_close_file(info->kick_fd);
> +
> +	vring_del_virtqueue(vq);
> +	kfree(info);
> +}
> +
> +static void vu_del_vqs(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct virtqueue *vq, *n;
> +	u64 features;
> +
> +	/* Note: reverse order as a workaround to a decoding bug in snabb */
> +	list_for_each_entry_reverse(vq, &vdev->vqs, list)
> +		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
> +
> +	/* Ensure previous messages have been processed */
> +	WARN_ON(vhost_user_get_features(vu_dev, &features));
> +
> +	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> +		vu_del_vq(vq);
> +}
> +
> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
> +				     unsigned index, vq_callback_t *callback,
> +				     const char *name, bool ctx)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct platform_device *pdev = vu_dev->pdev;
> +	struct virtio_uml_vq_info *info;
> +	struct virtqueue *vq;
> +	int num = MAX_SUPPORTED_QUEUE_SIZE;
> +	int rc;
> +	int call_fds[2];
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		rc = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
> +			pdev->id, name);
> +
> +	rc = os_eventfd(0, 0);
> +	if (rc < 0)
> +		goto error_kick;
> +	info->kick_fd = rc;
> +
> +	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
> +	rc = os_pipe(call_fds, true, true);
> +	if (rc < 0)
> +		goto error_call;
> +	info->call_fd = call_fds[0];
> +
> +	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
> +			ctx, vu_notify, callback, info->name);
> +	if (!vq) {
> +		rc = -ENOMEM;
> +		goto error_create;
> +	}
> +	vq->priv = info;
> +	num = virtqueue_get_vring_size(vq);
> +
> +	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, vu_interrupt,
> +			IRQF_SHARED, info->name, vq);
> +	if (rc)
> +		goto error_irq;
> +
> +	rc = vhost_user_set_vring_num(vu_dev, index, num);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_base(vu_dev, index, 0);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_addr(vu_dev, index,
> +			virtqueue_get_desc_addr(vq),
> +			virtqueue_get_used_addr(vq),
> +			virtqueue_get_avail_addr(vq),
> +			(u64) -1);
> +	if (rc)
> +		goto error_setup;
> +
> +	/* Close unused write end of call fds */
> +	os_close_file(call_fds[1]);
> +
> +	return vq;
> +
> +error_setup:
> +	um_free_irq(VIRTIO_IRQ, vq);
> +error_irq:
> +	vring_del_virtqueue(vq);
> +error_create:
> +	os_close_file(call_fds[1]);
> +	os_close_file(info->call_fd);
> +error_call:
> +	os_close_file(info->kick_fd);
> +error_kick:
> +	kfree(info);
> +error_kzalloc:
> +	return ERR_PTR(rc);
> +}
> +
> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> +		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
> +		       const char * const names[], const bool *ctx,
> +		       struct irq_affinity *desc)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	int i, queue_idx, rc;
> +	struct virtqueue *vq;
> +
> +	rc = vhost_user_set_mem_table(vu_dev);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i]) {
> +			vqs[i] = NULL;
> +			continue;
> +		}
> +
> +		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> +				ctx ? ctx[i] : false);
> +		if (IS_ERR(vqs[i])) {
> +			rc = PTR_ERR(vqs[i]);
> +			goto error_setup;
> +		}
> +	}
> +
> +	list_for_each_entry(vq, &vdev->vqs, list) {
> +		struct virtio_uml_vq_info *info = vq->priv;
> +
> +		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
> +				info->kick_fd);
> +		if (rc)
> +			goto error_setup;
> +
> +		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
> +		if (rc)
> +			goto error_setup;
> +	}
> +
> +	return 0;
> +
> +error_setup:
> +	vu_del_vqs(vdev);
> +	return rc;
> +}
> +
> +static u64 vu_get_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->features;
> +}
> +
> +static int vu_finalize_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
> +
> +	vring_transport_features(vdev);
> +	vu_dev->features = vdev->features | supported;
> +
> +	return vhost_user_set_features(vu_dev, vu_dev->features);
> +}
> +
> +static const char *vu_bus_name(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->pdev->name;
> +}
> +
> +static const struct virtio_config_ops virtio_uml_config_ops = {
> +	.get = vu_get,
> +	.set = vu_set,
> +	.get_status = vu_get_status,
> +	.set_status = vu_set_status,
> +	.reset = vu_reset,
> +	.find_vqs = vu_find_vqs,
> +	.del_vqs = vu_del_vqs,
> +	.get_features = vu_get_features,
> +	.finalize_features = vu_finalize_features,
> +	.bus_name = vu_bus_name,
> +};
> +
> +
> +static void virtio_uml_release_dev(struct device *d)
> +{
> +	struct virtio_device *vdev =
> +			container_of(d, struct virtio_device, dev);
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	os_close_file(vu_dev->sock);
> +}
> +
> +/* Platform device */
> +
> +struct virtio_uml_platform_data {
> +	u32 virtio_device_id;
> +	const char *socket_path;
> +};
> +
> +static int virtio_uml_probe(struct platform_device *pdev)
> +{
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	struct virtio_uml_device *vu_dev;
> +	int rc;
> +
> +	if (!pdata)
> +		return -EINVAL;
> +
> +	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
> +	if (!vu_dev)
> +		return -ENOMEM;
> +
> +	vu_dev->vdev.dev.parent = &pdev->dev;
> +	vu_dev->vdev.dev.release = virtio_uml_release_dev;
> +	vu_dev->vdev.config = &virtio_uml_config_ops;
> +	vu_dev->vdev.id.device = pdata->virtio_device_id;
> +	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
> +	vu_dev->pdev = pdev;
> +
> +	do {
> +		rc = os_connect_socket(pdata->socket_path);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	vu_dev->sock = rc;
> +
> +	rc = vhost_user_init(vu_dev);
> +	if (rc)
> +		goto error_init;
> +
> +	platform_set_drvdata(pdev, vu_dev);
> +
> +	rc = register_virtio_device(&vu_dev->vdev);
> +	if (rc)
> +		put_device(&vu_dev->vdev.dev);
> +	return rc;
> +
> +error_init:
> +	os_close_file(vu_dev->sock);
> +	return rc;
> +}
> +
> +static int virtio_uml_remove(struct platform_device *pdev)
> +{
> +	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
> +
> +	unregister_virtio_device(&vu_dev->vdev);
> +	return 0;
> +}
> +
> +/* Command line device list */
> +
> +static void vu_cmdline_release_dev(struct device *d)
> +{
> +}
> +
> +static struct device vu_cmdline_parent = {
> +	.init_name = "virtio-uml-cmdline",
> +	.release = vu_cmdline_release_dev,
> +};
> +
> +static bool vu_cmdline_parent_registered;
> +static int vu_cmdline_id;
> +
> +static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
> +{
> +	const char *ids = strchr(device, ':');
> +	unsigned int virtio_device_id;
> +	int processed, consumed, err;
> +	char *socket_path;
> +	struct virtio_uml_platform_data *pdata;
> +	struct platform_device *pdev;
> +
> +	if (!ids || ids == device)
> +		return -EINVAL;
> +
> +	processed = sscanf(ids, ":%u%n:%d%n",
> +			&virtio_device_id, &consumed,
> +			&vu_cmdline_id, &consumed);
> +
> +	if (processed < 1 || ids[consumed])
> +		return -EINVAL;
> +
> +	if (!vu_cmdline_parent_registered) {
> +		err = device_register(&vu_cmdline_parent);
> +		if (err) {
> +			pr_err("Failed to register parent device!\n");
> +			put_device(&vu_cmdline_parent);
> +			return err;
> +		}
> +		vu_cmdline_parent_registered = true;
> +	}
> +
> +	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
> +	if (!socket_path)
> +		return -ENOMEM;
> +
> +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
> +	if (!pdata) {
> +		err = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +
> +	pdata->virtio_device_id = (u32) virtio_device_id;
> +	pdata->socket_path = socket_path;
> +
> +	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
> +			vu_cmdline_id, virtio_device_id, socket_path);
> +
> +	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
> +			vu_cmdline_id++, pdata, sizeof(*pdata));
> +	err = PTR_ERR_OR_ZERO(pdev);
> +	if (err)
> +		goto error_register;
> +	return 0;
> +
> +error_register:
> +	kfree(pdata);
> +error_kzalloc:
> +	kfree(socket_path);
> +	return err;
> +}
> +
> +static int vu_cmdline_get_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	char *buffer = data;
> +	unsigned int len = strlen(buffer);
> +
> +	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
> +			pdata->socket_path, pdata->virtio_device_id, pdev->id);
> +	return 0;
> +}
> +
> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
> +{
> +	buffer[0] = '\0';
> +	if (vu_cmdline_parent_registered)
> +		device_for_each_child(&vu_cmdline_parent, buffer,
> +				vu_cmdline_get_device);
> +	return strlen(buffer) + 1;
> +}
> +
> +static const struct kernel_param_ops vu_cmdline_param_ops = {
> +	.set = vu_cmdline_set,
> +	.get = vu_cmdline_get,
> +};
> +
> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
> +
> +
> +static int vu_unregister_cmdline_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +
> +	platform_device_unregister(pdev);
> +	kfree(pdata->socket_path);
> +	kfree(pdata);
> +	return 0;
> +}
> +
> +static void vu_unregister_cmdline_devices(void)
> +{
> +	if (vu_cmdline_parent_registered) {
> +		device_for_each_child(&vu_cmdline_parent, NULL,
> +				vu_unregister_cmdline_device);
> +		device_unregister(&vu_cmdline_parent);
> +		vu_cmdline_parent_registered = false;
> +	}
> +}
> +
> +/* Platform driver */
> +
> +static const struct of_device_id virtio_uml_match[] = {
> +	{ .compatible = "virtio,uml", },
> +	{ }
> +};
> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
> +
> +static struct platform_driver virtio_uml_driver = {
> +	.probe = virtio_uml_probe,
> +	.remove = virtio_uml_remove,
> +	.driver = {
> +		.name = "virtio-uml",
> +		.of_match_table = virtio_uml_match,
> +	},
> +};
> +
> +static int __init virtio_uml_init(void)
> +{
> +	return platform_driver_register(&virtio_uml_driver);
> +}
> +
> +static void __exit virtio_uml_exit(void)
> +{
> +	platform_driver_unregister(&virtio_uml_driver);
> +	vu_unregister_cmdline_devices();
> +}
> +
> +module_init(virtio_uml_init);
> +module_exit(virtio_uml_exit);
> +__uml_exitcall(virtio_uml_exit);
> +
> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
> index ce7a78c3bcf2..42c6205e2dc4 100644
> --- a/arch/um/include/asm/irq.h
> +++ b/arch/um/include/asm/irq.h
> @@ -17,17 +17,18 @@
>   #define TELNETD_IRQ 		12
>   #define XTERM_IRQ 		13
>   #define RANDOM_IRQ 		14
> +#define VIRTIO_IRQ		15
>   
>   #ifdef CONFIG_UML_NET_VECTOR
>   
> -#define VECTOR_BASE_IRQ		15
> +#define VECTOR_BASE_IRQ		(VIRTIO_IRQ + 1)
>   #define VECTOR_IRQ_SPACE	8
>   
>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>   
>   #else
>   
> -#define LAST_IRQ RANDOM_IRQ
> +#define LAST_IRQ VIRTIO_IRQ
>   
>   #endif
>   
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 4a62ac4251a5..da7ebbe4ebb3 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -36,6 +36,8 @@
>   #define OS_LIB_PATH	"/usr/lib/"
>   #endif
>   
> +#define OS_SENDMSG_MAX_FDS 8
> +
>   /*
>    * types taken from stat_file() in hostfs_user.c
>    * (if they are wrong here, they are wrong there...).
> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>   extern int os_falloc_punch(int fd, unsigned long long offset, int count);
> +extern int os_eventfd(unsigned int initval, int flags);
> +extern int os_eventfd_read(int fd, unsigned long long *value);
> +extern int os_eventfd_write(int fd, unsigned long long value);
> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
> +			  const int *fds, unsigned int fds_num);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
> index 232b22307fdd..f3766889226b 100644
> --- a/arch/um/kernel/ksyms.c
> +++ b/arch/um/kernel/ksyms.c
> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>   EXPORT_SYMBOL(os_major);
>   EXPORT_SYMBOL(os_minor);
>   EXPORT_SYMBOL(os_makedev);
> +EXPORT_SYMBOL(os_eventfd);
> +EXPORT_SYMBOL(os_eventfd_read);
> +EXPORT_SYMBOL(os_eventfd_write);
> +EXPORT_SYMBOL(os_sendmsg_fds);
>   
>   EXPORT_SYMBOL(add_sigio_fd);
>   EXPORT_SYMBOL(ignore_sigio_fd);
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index de58e976b9bc..f256be1d77bd 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>   
>   /* Initialized at boot time, and readonly after that */
>   unsigned long long highmem;
> +EXPORT_SYMBOL(highmem);
>   int kmalloc_ok = 0;
>   
>   /* Used during early boot */
> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
> index 5bf56af4d5b9..870c80c88a97 100644
> --- a/arch/um/kernel/physmem.c
> +++ b/arch/um/kernel/physmem.c
> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
>   
>   	return fd;
>   }
> +EXPORT_SYMBOL(phys_mapping);
>   
>   static int __init uml_mem_setup(char *line, int *add)
>   {
> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
> index a818ccef30ca..9de212bf4320 100644
> --- a/arch/um/kernel/um_arch.c
> +++ b/arch/um/kernel/um_arch.c
> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>   
>   /* Set in uml_mem_setup and modified in linux_main */
>   long long physmem_size = 32 * 1024 * 1024;
> +EXPORT_SYMBOL(physmem_size);
>   
>   static const char *usage_string =
>   "User Mode Linux v%s\n"
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index f25b110d4e70..0c4e0298398d 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -15,6 +15,7 @@
>   #include <sys/sysmacros.h>
>   #include <sys/un.h>
>   #include <sys/types.h>
> +#include <sys/eventfd.h>
>   #include <os.h>
>   
>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
>   	return n;
>   }
>   
> +int os_eventfd(unsigned int initval, int flags)
> +{
> +	int fd = eventfd(initval, flags);
> +
> +	if (fd < 0)
> +		return -errno;
> +	return fd;
> +}
> +
> +int os_eventfd_read(int fd, unsigned long long *value)
> +{
> +	eventfd_t val;
> +	int err = eventfd_read(fd, &val);
> +
> +	if (err < 0)
> +		return -errno;
> +	*value = val;
> +	return 0;
> +}
> +
> +int os_eventfd_write(int fd, unsigned long long value)
> +{
> +	int err = eventfd_write(fd, value);
> +
> +	if (err < 0)
> +		return -errno;
> +	return 0;
> +}
> +
> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
> +		   unsigned int fds_num)
> +{
> +	struct iovec iov = {
> +		.iov_base = (void *) buf,
> +		.iov_len = len,
> +	};
> +	union {
> +		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
> +		struct cmsghdr align;
> +	} u;
> +	unsigned int fds_size = sizeof(*fds) * fds_num;
> +	struct msghdr msg = {
> +		.msg_iov = &iov,
> +		.msg_iovlen = 1,
> +		.msg_control = u.control,
> +		.msg_controllen = CMSG_SPACE(fds_size),
> +	};
> +	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
> +	int err;
> +
> +	if (fds_num > OS_SENDMSG_MAX_FDS)
> +		return -EINVAL;
> +	memset(u.control, 0, sizeof(u.control));
> +	cmsg->cmsg_level = SOL_SOCKET;
> +	cmsg->cmsg_type = SCM_RIGHTS;
> +	cmsg->cmsg_len = CMSG_LEN(fds_size);
> +	memcpy(CMSG_DATA(cmsg), fds, fds_size);
> +	err = sendmsg(fd, &msg, 0);
> +
> +	if (err < 0)
> +		return -errno;
> +	return err;
> +}
> 

Cool. I can have a look at it early next week.

Brgds,
Johannes Berg July 31, 2019, 9:05 p.m. UTC | #2
On Wed, 2019-07-31 at 21:50 +0100, Anton Ivanov wrote:

> Cool. I can have a look at it early next week.

Sounds good :-)

I'll be on vacation next week and the week after, but Erel can answer
questions better anyway since he wrote it :-)

Oh, not sure if you know, but qemu has a bunch of vhost-user device
implementations:
https://github.com/qemu/qemu/tree/master/contrib

I think Erel said he tested snabb (formerly snabbswitch?) and the qemu
vhost-user-blk, maybe vhost-user-input? Drivers are in the kernel
already, of course.

johannes
Anton Ivanov July 31, 2019, 9:23 p.m. UTC | #3
On 31/07/2019 22:05, Johannes Berg wrote:
> On Wed, 2019-07-31 at 21:50 +0100, Anton Ivanov wrote:
> 
>> Cool. I can have a look at it early next week.
> 
> Sounds good :-)
> 
> I'll be on vacation next week and the week after, but Erel can answer
> questions better anyway since he wrote it :-)
> 
> Oh, not sure if you know, but qemu has a bunch of vhost-user device
> implementations:
> https://github.com/qemu/qemu/tree/master/contrib

I am familiar. I was looking at it as one of the "common IO" options 
early on, but decided to go for sendmmsg/recvmmsg vector IO instead.

> 
> I think Erel said he tested snabb (formerly snabbswitch?) and the qemu
> vhost-user-blk, maybe vhost-user-input? Drivers are in the kernel
> already, of course.

Cool.

> 
> johannes
> 
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
>
Johannes Berg July 31, 2019, 9:50 p.m. UTC | #4
On Wed, 2019-07-31 at 22:23 +0100, Anton Ivanov wrote:
> 
> I am familiar. I was looking at it as one of the "common IO" options 
> early on, but decided to go for sendmmsg/recvmmsg vector IO instead.

Sure, the vector IO is nice for netdevs :-) But I'm looking to add some
other peripherals, some of which I won't upstream the drivers for
because it makes no sense if you don't have the device implementation,
so vhost-user made a lot of sense here, since I can use a common
mechanism for all of the stuff I want to do, and virtio_console is
probably one of the things - and already exists.

johannes
Anton Ivanov Aug. 8, 2019, 7:18 a.m. UTC | #5
On 31/07/2019 21:29, Johannes Berg wrote:
> From: Erel Geron <erelx.geron@intel.com>
> 
> This module allows virtio devices to be used over a vhost-user socket.
> To instantiate a device, pass the (list of) device socket(s) to the
> kernel or module command line, such as
> 
> 	virtio_uml.device=/var/uml.socket:1
> 
> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
> i.e. network in this case.
> 
> Signed-off-by: Erel Geron <erelx.geron@intel.com>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
>   arch/um/drivers/Kconfig      |   7 +
>   arch/um/drivers/Makefile     |   1 +
>   arch/um/drivers/vhost_user.h | 102 ++++
>   arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
>   arch/um/include/asm/irq.h    |   5 +-
>   arch/um/include/shared/os.h  |   7 +
>   arch/um/kernel/ksyms.c       |   4 +
>   arch/um/kernel/mem.c         |   1 +
>   arch/um/kernel/physmem.c     |   1 +
>   arch/um/kernel/um_arch.c     |   1 +
>   arch/um/os-Linux/file.c      |  64 +++
>   11 files changed, 1096 insertions(+), 2 deletions(-)
>   create mode 100644 arch/um/drivers/vhost_user.h
>   create mode 100644 arch/um/drivers/virtio_uml.c
> 
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2638e46f50cc..fea5a0d522dc 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>   	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>   
>   endmenu
> +
> +config VIRTIO_UML
> +	tristate "UML driver for virtio devices"
> +	select VIRTIO
> +	help
> +	  This driver provides support for virtio based paravirtual device
> +	  drivers over vhost-user sockets.
> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
> index 693319839f69..3edf0a7cfd25 100644
> --- a/arch/um/drivers/Makefile
> +++ b/arch/um/drivers/Makefile
> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>   obj-$(CONFIG_UML_RANDOM) += random.o
> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>   
>   # pcap_user.o must be added explicitly.
>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
> diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
> new file mode 100644
> index 000000000000..2a9829b0782b
> --- /dev/null
> +++ b/arch/um/drivers/vhost_user.h
> @@ -0,0 +1,102 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Vhost-user protocol */
> +
> +#ifndef __VHOST_USER_H__
> +#define __VHOST_USER_H__
> +
> +/* Message flags */
> +#define VHOST_USER_FLAG_REPLY		BIT(2)
> +/* Feature bits */
> +#define VHOST_USER_F_PROTOCOL_FEATURES	30
> +/* Protocol feature bits */
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +/* Vring state index masks */
> +#define VHOST_USER_VRING_INDEX_MASK	0xff
> +#define VHOST_USER_VRING_POLL_MASK	BIT(8)
> +
> +/* Supported version */
> +#define VHOST_USER_VERSION		1
> +/* Supported transport features */
> +#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
> +/* Supported protocol features */
> +#define VHOST_USER_SUPPORTED_PROTOCOL_F	BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
> +
> +enum vhost_user_request {
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SEND_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_SET_VRING_ENDIAN = 23,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
> +};
> +
> +struct vhost_user_header {
> +	u32 request; /* Use enum vhost_user_request */
> +	u32 flags;
> +	u32 size;
> +} __packed;
> +
> +struct vhost_user_config {
> +	u32 offset;
> +	u32 size;
> +	u32 flags;
> +	u8 payload[0]; /* Variable length */
> +} __packed;
> +
> +struct vhost_user_vring_state {
> +	u32 index;
> +	u32 num;
> +} __packed;
> +
> +struct vhost_user_vring_addr {
> +	u32 index;
> +	u32 flags;
> +	u64 desc, used, avail, log;
> +} __packed;
> +
> +struct vhost_user_mem_region {
> +	u64 guest_addr;
> +	u64 size;
> +	u64 user_addr;
> +	u64 mmap_offset;
> +} __packed;
> +
> +struct vhost_user_mem_regions {
> +	u32 num;
> +	u32 padding;
> +	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
> +} __packed;
> +
> +union vhost_user_payload {
> +	u64 integer;
> +	struct vhost_user_config config;
> +	struct vhost_user_vring_state vring_state;
> +	struct vhost_user_vring_addr vring_addr;
> +	struct vhost_user_mem_regions mem_regions;
> +};
> +
> +struct vhost_user_msg {
> +	struct vhost_user_header header;
> +	union vhost_user_payload payload;
> +} __packed;
> +
> +#endif
> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
> new file mode 100644
> index 000000000000..30eeb907f42a
> --- /dev/null
> +++ b/arch/um/drivers/virtio_uml.c
> @@ -0,0 +1,905 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Virtio vhost-user driver
> + *
> + * Copyright(c) 2019 Intel Corporation
> + *
> + * This module allows virtio devices to be used over a vhost-user socket.
> + *
> + * Guest devices can be instantiated by kernel module or command line
> + * parameters. One device will be created for each parameter. Syntax:
> + *
> + *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
> + * where:
> + *		<socket>	:= vhost-user socket path to connect
> + *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
> + *		<platform_id>	:= (optional) platform device id
> + *
> + * example:
> + *		virtio_uml.device=/var/uml.socket:1
> + *
> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
> + */
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_ring.h>
> +#include <irq_kern.h>
> +#include <init.h>
> +#include <os.h>
> +#include "vhost_user.h"
> +
> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
> +#ifdef IRQ_NONE
> +#undef IRQ_NONE
> +#endif
> +
> +#define MAX_SUPPORTED_QUEUE_SIZE	256
> +
> +#define to_virtio_uml_device(_vdev) \
> +	container_of(_vdev, struct virtio_uml_device, vdev)
> +
> +struct virtio_uml_device {
> +	struct virtio_device vdev;
> +	struct platform_device *pdev;
> +
> +	int sock;
> +	u64 features;
> +	u64 protocol_features;
> +	u8 status;
> +};
> +
> +struct virtio_uml_vq_info {
> +	int kick_fd, call_fd;
> +	char name[32];
> +};
> +
> +extern unsigned long long physmem_size, highmem;
> +
> +
> +/* Vhost-user protocol */
> +
> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   int *fds, size_t num_fds)
> +{
> +	size_t size = sizeof(msg->header) + msg->header.size;
> +	int rc;
> +
> +	msg->header.flags |= VHOST_USER_VERSION;
> +	do {
> +		rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
> +				num_fds);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	if (rc != size)
> +		return -ECOMM;
> +	return 0;
> +}
> +
> +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
> +				      u32 request)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +	};
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
> +			       u32 request, u64 value)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = value,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
> +				  struct vhost_user_msg *msg)
> +{
> +	size_t size = sizeof(msg->header);
> +	int rc;
> +
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) msg, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   size_t max_payload_size)
> +{
> +	size_t size;
> +	int rc = vhost_user_recv_header(vu_dev, msg);
> +
> +	if (rc)
> +		return rc;
> +	size = msg->header.size;
> +	if (size > max_payload_size)
> +		return -EPROTO;
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
> +			       u64 *value)
> +{
> +	struct vhost_user_msg msg;
> +	int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
> +
> +	if (rc)
> +		return rc;
> +	if (msg.header.size != sizeof(msg.payload.integer))
> +		return -EPROTO;
> +	*value = msg.payload.integer;
> +	return 0;
> +}
> +
> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
> +{
> +	return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
> +}
> +
> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
> +				   u64 *features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, features);
> +}
> +
> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
> +				   u64 features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
> +}
> +
> +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 *protocol_features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev,
> +			VHOST_USER_GET_PROTOCOL_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, protocol_features);
> +}
> +
> +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 protocol_features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
> +			protocol_features);
> +}
> +
> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
> +{
> +	int rc = vhost_user_set_owner(vu_dev);
> +
> +	if (rc)
> +		return rc;
> +	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
> +	if (rc)
> +		return rc;
> +
> +	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
> +		rc = vhost_user_get_protocol_features(vu_dev,
> +				&vu_dev->protocol_features);
> +		if (rc)
> +			return rc;
> +		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
> +		rc = vhost_user_set_protocol_features(vu_dev,
> +				vu_dev->protocol_features);
> +	}
> +	return rc;
> +}
> +
> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, void *buf, u32 len)
> +{
> +	u32 cfg_size = offset + len;
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_GET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = 0;
> +	msg->payload.config.size = cfg_size;
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +	if (rc)
> +		goto error_get;
> +	rc = vhost_user_recv(vu_dev, msg, msg_size);
> +	if (rc)
> +		goto error_get;
> +	if (msg->header.size != payload_size ||
> +			msg->payload.config.size != cfg_size) {
> +		rc = -EPROTO;
> +		goto error_get;
> +	}
> +	memcpy(buf, msg->payload.config.payload + offset, len);
> +
> +error_get:
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, const void *buf, u32 len)
> +{
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + len;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_SET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = offset;
> +	msg->payload.config.size = len;
> +	memcpy(msg->payload.config.payload, buf, len);
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
> +				      struct vhost_user_mem_region *region_out)
> +{
> +	unsigned long long mem_offset;
> +	int rc = phys_mapping(addr, &mem_offset);
> +
> +	if (WARN_ON(rc < 0))
> +		return -EFAULT;
> +	*fd_out = rc;
> +	region_out->guest_addr = addr;
> +	region_out->user_addr = addr;
> +	region_out->size = size;
> +	region_out->mmap_offset = mem_offset;
> +
> +	rc = phys_mapping(addr + size - 1, &mem_offset);
> +	if (WARN_ON(rc != *fd_out))
> +		return -EFAULT;
> +	return 0;
> +}
> +
> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_MEM_TABLE,
> +		.header.size = sizeof(msg.payload.mem_regions),
> +		.payload.mem_regions.num = 1,
> +	};
> +	int fds[2];
> +	int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
> +			&msg.payload.mem_regions.regions[0]);
> +
> +	if (rc < 0)
> +		return rc;
> +	if (highmem) {
> +		msg.payload.mem_regions.num++;
> +		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
> +				&fds[1], &msg.payload.mem_regions.regions[1]);
> +		if (rc < 0)
> +			return rc;
> +	}
> +
> +	return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
> +}
> +
> +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
> +				      u32 request, u32 index, u32 num)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.vring_state),
> +		.payload.vring_state.index = index,
> +		.payload.vring_state.num = num,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
> +				    u32 index, u32 num)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
> +			index, num);
> +}
> +
> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
> +				     u32 index, u32 offset)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
> +			index, offset);
> +}
> +
> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
> +				     u32 index, u64 desc, u64 used, u64 avail,
> +				     u64 log)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_VRING_ADDR,
> +		.header.size = sizeof(msg.payload.vring_addr),
> +		.payload.vring_addr.index = index,
> +		.payload.vring_addr.desc = desc,
> +		.payload.vring_addr.used = used,
> +		.payload.vring_addr.avail = avail,
> +		.payload.vring_addr.log = log,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
> +				   u32 request, int index, int fd)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = index,
> +	};
> +
> +	if (index & ~VHOST_USER_VRING_INDEX_MASK)
> +		return -EINVAL;
> +	if (fd < 0) {
> +		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
> +		return vhost_user_send(vu_dev, &msg, NULL, 0);
> +	}
> +	return vhost_user_send(vu_dev, &msg, &fd, 1);
> +}
> +
> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
> +				       u32 index, bool enable)
> +{
> +	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
> +		return 0;
> +
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
> +			index, enable);
> +}
> +
> +
> +/* Virtio interface */
> +
> +static bool vu_notify(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	int rc;
> +
> +	do {
> +		rc = os_eventfd_write(info->kick_fd, 1);
> +	} while (rc == -EINTR);
> +	WARN_ON(rc);
> +	return !rc;
> +}
> +
> +static irqreturn_t vu_interrupt(int irq, void *opaque)
> +{
> +	struct virtqueue *vq = opaque;
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	unsigned long long value;
> +	int rc;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	do {
> +		rc = os_eventfd_read(info->call_fd, &value);
> +		if (!rc)
> +			ret |= vring_interrupt(irq, vq);
> +	} while (!rc || rc == -EINTR);
> +	WARN_ON(rc != -EAGAIN);
> +	return ret;
> +}
> +
> +
> +static void vu_get(struct virtio_device *vdev, unsigned offset,
> +		   void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
> +}
> +
> +static void vu_set(struct virtio_device *vdev, unsigned offset,
> +		   const void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
> +}
> +
> +static u8 vu_get_status(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->status;
> +}
> +
> +static void vu_set_status(struct virtio_device *vdev, u8 status)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = status;
> +}
> +
> +static void vu_reset(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = 0;
> +}
> +
> +static void vu_del_vq(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +
> +	um_free_irq(VIRTIO_IRQ, vq);
> +
> +	os_close_file(info->call_fd);
> +	os_close_file(info->kick_fd);
> +
> +	vring_del_virtqueue(vq);
> +	kfree(info);
> +}
> +
> +static void vu_del_vqs(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct virtqueue *vq, *n;
> +	u64 features;
> +
> +	/* Note: reverse order as a workaround to a decoding bug in snabb */
> +	list_for_each_entry_reverse(vq, &vdev->vqs, list)
> +		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
> +
> +	/* Ensure previous messages have been processed */
> +	WARN_ON(vhost_user_get_features(vu_dev, &features));
> +
> +	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> +		vu_del_vq(vq);
> +}
> +
> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
> +				     unsigned index, vq_callback_t *callback,
> +				     const char *name, bool ctx)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct platform_device *pdev = vu_dev->pdev;
> +	struct virtio_uml_vq_info *info;
> +	struct virtqueue *vq;
> +	int num = MAX_SUPPORTED_QUEUE_SIZE;
> +	int rc;
> +	int call_fds[2];
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		rc = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
> +			pdev->id, name);
> +
> +	rc = os_eventfd(0, 0);
> +	if (rc < 0)
> +		goto error_kick;
> +	info->kick_fd = rc;
> +
> +	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
> +	rc = os_pipe(call_fds, true, true);
> +	if (rc < 0)
> +		goto error_call;
> +	info->call_fd = call_fds[0];
> +
> +	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
> +			ctx, vu_notify, callback, info->name);
> +	if (!vq) {
> +		rc = -ENOMEM;
> +		goto error_create;
> +	}
> +	vq->priv = info;
> +	num = virtqueue_get_vring_size(vq);
> +
> +	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, vu_interrupt,
> +			IRQF_SHARED, info->name, vq);
> +	if (rc)
> +		goto error_irq;
> +
> +	rc = vhost_user_set_vring_num(vu_dev, index, num);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_base(vu_dev, index, 0);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_addr(vu_dev, index,
> +			virtqueue_get_desc_addr(vq),
> +			virtqueue_get_used_addr(vq),
> +			virtqueue_get_avail_addr(vq),
> +			(u64) -1);
> +	if (rc)
> +		goto error_setup;
> +
> +	/* Close unused write end of call fds */
> +	os_close_file(call_fds[1]);
> +
> +	return vq;
> +
> +error_setup:
> +	um_free_irq(VIRTIO_IRQ, vq);
> +error_irq:
> +	vring_del_virtqueue(vq);
> +error_create:
> +	os_close_file(call_fds[1]);
> +	os_close_file(info->call_fd);
> +error_call:
> +	os_close_file(info->kick_fd);
> +error_kick:
> +	kfree(info);
> +error_kzalloc:
> +	return ERR_PTR(rc);
> +}
> +
> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> +		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
> +		       const char * const names[], const bool *ctx,
> +		       struct irq_affinity *desc)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	int i, queue_idx, rc;
> +	struct virtqueue *vq;
> +
> +	rc = vhost_user_set_mem_table(vu_dev);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i]) {
> +			vqs[i] = NULL;
> +			continue;
> +		}
> +
> +		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> +				ctx ? ctx[i] : false);
> +		if (IS_ERR(vqs[i])) {
> +			rc = PTR_ERR(vqs[i]);
> +			goto error_setup;
> +		}
> +	}
> +
> +	list_for_each_entry(vq, &vdev->vqs, list) {
> +		struct virtio_uml_vq_info *info = vq->priv;
> +
> +		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
> +				info->kick_fd);
> +		if (rc)
> +			goto error_setup;
> +
> +		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
> +		if (rc)
> +			goto error_setup;
> +	}
> +
> +	return 0;
> +
> +error_setup:
> +	vu_del_vqs(vdev);
> +	return rc;
> +}
> +
> +static u64 vu_get_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->features;
> +}
> +
> +static int vu_finalize_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
> +
> +	vring_transport_features(vdev);
> +	vu_dev->features = vdev->features | supported;
> +
> +	return vhost_user_set_features(vu_dev, vu_dev->features);
> +}
> +
> +static const char *vu_bus_name(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->pdev->name;
> +}
> +
> +static const struct virtio_config_ops virtio_uml_config_ops = {
> +	.get = vu_get,
> +	.set = vu_set,
> +	.get_status = vu_get_status,
> +	.set_status = vu_set_status,
> +	.reset = vu_reset,
> +	.find_vqs = vu_find_vqs,
> +	.del_vqs = vu_del_vqs,
> +	.get_features = vu_get_features,
> +	.finalize_features = vu_finalize_features,
> +	.bus_name = vu_bus_name,
> +};
> +
> +
> +static void virtio_uml_release_dev(struct device *d)
> +{
> +	struct virtio_device *vdev =
> +			container_of(d, struct virtio_device, dev);
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	os_close_file(vu_dev->sock);
> +}
> +
> +/* Platform device */
> +
> +struct virtio_uml_platform_data {
> +	u32 virtio_device_id;
> +	const char *socket_path;
> +};
> +
> +static int virtio_uml_probe(struct platform_device *pdev)
> +{
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	struct virtio_uml_device *vu_dev;
> +	int rc;
> +
> +	if (!pdata)
> +		return -EINVAL;
> +
> +	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
> +	if (!vu_dev)
> +		return -ENOMEM;
> +
> +	vu_dev->vdev.dev.parent = &pdev->dev;
> +	vu_dev->vdev.dev.release = virtio_uml_release_dev;
> +	vu_dev->vdev.config = &virtio_uml_config_ops;
> +	vu_dev->vdev.id.device = pdata->virtio_device_id;
> +	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
> +	vu_dev->pdev = pdev;
> +
> +	do {
> +		rc = os_connect_socket(pdata->socket_path);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	vu_dev->sock = rc;
> +
> +	rc = vhost_user_init(vu_dev);
> +	if (rc)
> +		goto error_init;
> +
> +	platform_set_drvdata(pdev, vu_dev);
> +
> +	rc = register_virtio_device(&vu_dev->vdev);
> +	if (rc)
> +		put_device(&vu_dev->vdev.dev);
> +	return rc;
> +
> +error_init:
> +	os_close_file(vu_dev->sock);
> +	return rc;
> +}
> +
> +static int virtio_uml_remove(struct platform_device *pdev)
> +{
> +	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
> +
> +	unregister_virtio_device(&vu_dev->vdev);
> +	return 0;
> +}
> +
> +/* Command line device list */
> +
> +static void vu_cmdline_release_dev(struct device *d)
> +{
> +}
> +
> +static struct device vu_cmdline_parent = {
> +	.init_name = "virtio-uml-cmdline",
> +	.release = vu_cmdline_release_dev,
> +};
> +
> +static bool vu_cmdline_parent_registered;
> +static int vu_cmdline_id;
> +
> +static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
> +{
> +	const char *ids = strchr(device, ':');
> +	unsigned int virtio_device_id;
> +	int processed, consumed, err;
> +	char *socket_path;
> +	struct virtio_uml_platform_data *pdata;
> +	struct platform_device *pdev;
> +
> +	if (!ids || ids == device)
> +		return -EINVAL;
> +
> +	processed = sscanf(ids, ":%u%n:%d%n",
> +			&virtio_device_id, &consumed,
> +			&vu_cmdline_id, &consumed);
> +
> +	if (processed < 1 || ids[consumed])
> +		return -EINVAL;
> +
> +	if (!vu_cmdline_parent_registered) {
> +		err = device_register(&vu_cmdline_parent);
> +		if (err) {
> +			pr_err("Failed to register parent device!\n");
> +			put_device(&vu_cmdline_parent);
> +			return err;
> +		}
> +		vu_cmdline_parent_registered = true;
> +	}
> +
> +	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
> +	if (!socket_path)
> +		return -ENOMEM;
> +
> +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
> +	if (!pdata) {
> +		err = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +
> +	pdata->virtio_device_id = (u32) virtio_device_id;
> +	pdata->socket_path = socket_path;
> +
> +	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
> +			vu_cmdline_id, virtio_device_id, socket_path);
> +
> +	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
> +			vu_cmdline_id++, pdata, sizeof(*pdata));
> +	err = PTR_ERR_OR_ZERO(pdev);
> +	if (err)
> +		goto error_register;
> +	return 0;
> +
> +error_register:
> +	kfree(pdata);
> +error_kzalloc:
> +	kfree(socket_path);
> +	return err;
> +}
> +
> +static int vu_cmdline_get_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	char *buffer = data;
> +	unsigned int len = strlen(buffer);
> +
> +	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
> +			pdata->socket_path, pdata->virtio_device_id, pdev->id);
> +	return 0;
> +}
> +
> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
> +{
> +	buffer[0] = '\0';
> +	if (vu_cmdline_parent_registered)
> +		device_for_each_child(&vu_cmdline_parent, buffer,
> +				vu_cmdline_get_device);
> +	return strlen(buffer) + 1;
> +}
> +
> +static const struct kernel_param_ops vu_cmdline_param_ops = {
> +	.set = vu_cmdline_set,
> +	.get = vu_cmdline_get,
> +};
> +
> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
> +
> +
> +static int vu_unregister_cmdline_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +
> +	platform_device_unregister(pdev);
> +	kfree(pdata->socket_path);
> +	kfree(pdata);
> +	return 0;
> +}
> +
> +static void vu_unregister_cmdline_devices(void)
> +{
> +	if (vu_cmdline_parent_registered) {
> +		device_for_each_child(&vu_cmdline_parent, NULL,
> +				vu_unregister_cmdline_device);
> +		device_unregister(&vu_cmdline_parent);
> +		vu_cmdline_parent_registered = false;
> +	}
> +}
> +
> +/* Platform driver */
> +
> +static const struct of_device_id virtio_uml_match[] = {
> +	{ .compatible = "virtio,uml", },
> +	{ }
> +};
> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
> +
> +static struct platform_driver virtio_uml_driver = {
> +	.probe = virtio_uml_probe,
> +	.remove = virtio_uml_remove,
> +	.driver = {
> +		.name = "virtio-uml",
> +		.of_match_table = virtio_uml_match,
> +	},
> +};
> +
> +static int __init virtio_uml_init(void)
> +{
> +	return platform_driver_register(&virtio_uml_driver);
> +}
> +
> +static void __exit virtio_uml_exit(void)
> +{
> +	platform_driver_unregister(&virtio_uml_driver);
> +	vu_unregister_cmdline_devices();
> +}
> +
> +module_init(virtio_uml_init);
> +module_exit(virtio_uml_exit);
> +__uml_exitcall(virtio_uml_exit);
> +
> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
> index ce7a78c3bcf2..42c6205e2dc4 100644
> --- a/arch/um/include/asm/irq.h
> +++ b/arch/um/include/asm/irq.h
> @@ -17,17 +17,18 @@
>   #define TELNETD_IRQ 		12
>   #define XTERM_IRQ 		13
>   #define RANDOM_IRQ 		14
> +#define VIRTIO_IRQ		15
>   
>   #ifdef CONFIG_UML_NET_VECTOR
>   
> -#define VECTOR_BASE_IRQ		15
> +#define VECTOR_BASE_IRQ		(VIRTIO_IRQ + 1)
>   #define VECTOR_IRQ_SPACE	8
>   
>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>   
>   #else
>   
> -#define LAST_IRQ RANDOM_IRQ
> +#define LAST_IRQ VIRTIO_IRQ
>   
>   #endif
>   
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 4a62ac4251a5..da7ebbe4ebb3 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -36,6 +36,8 @@
>   #define OS_LIB_PATH	"/usr/lib/"
>   #endif
>   
> +#define OS_SENDMSG_MAX_FDS 8
> +
>   /*
>    * types taken from stat_file() in hostfs_user.c
>    * (if they are wrong here, they are wrong there...).
> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>   extern int os_falloc_punch(int fd, unsigned long long offset, int count);
> +extern int os_eventfd(unsigned int initval, int flags);
> +extern int os_eventfd_read(int fd, unsigned long long *value);
> +extern int os_eventfd_write(int fd, unsigned long long value);
> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
> +			  const int *fds, unsigned int fds_num);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
> index 232b22307fdd..f3766889226b 100644
> --- a/arch/um/kernel/ksyms.c
> +++ b/arch/um/kernel/ksyms.c
> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>   EXPORT_SYMBOL(os_major);
>   EXPORT_SYMBOL(os_minor);
>   EXPORT_SYMBOL(os_makedev);
> +EXPORT_SYMBOL(os_eventfd);
> +EXPORT_SYMBOL(os_eventfd_read);
> +EXPORT_SYMBOL(os_eventfd_write);
> +EXPORT_SYMBOL(os_sendmsg_fds);
>   
>   EXPORT_SYMBOL(add_sigio_fd);
>   EXPORT_SYMBOL(ignore_sigio_fd);
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index de58e976b9bc..f256be1d77bd 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>   
>   /* Initialized at boot time, and readonly after that */
>   unsigned long long highmem;
> +EXPORT_SYMBOL(highmem);
>   int kmalloc_ok = 0;
>   
>   /* Used during early boot */
> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
> index 5bf56af4d5b9..870c80c88a97 100644
> --- a/arch/um/kernel/physmem.c
> +++ b/arch/um/kernel/physmem.c
> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
>   
>   	return fd;
>   }
> +EXPORT_SYMBOL(phys_mapping);
>   
>   static int __init uml_mem_setup(char *line, int *add)
>   {
> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
> index a818ccef30ca..9de212bf4320 100644
> --- a/arch/um/kernel/um_arch.c
> +++ b/arch/um/kernel/um_arch.c
> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>   
>   /* Set in uml_mem_setup and modified in linux_main */
>   long long physmem_size = 32 * 1024 * 1024;
> +EXPORT_SYMBOL(physmem_size);
>   
>   static const char *usage_string =
>   "User Mode Linux v%s\n"
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index f25b110d4e70..0c4e0298398d 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -15,6 +15,7 @@
>   #include <sys/sysmacros.h>
>   #include <sys/un.h>
>   #include <sys/types.h>
> +#include <sys/eventfd.h>
>   #include <os.h>
>   
>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
>   	return n;
>   }
>   
> +int os_eventfd(unsigned int initval, int flags)
> +{
> +	int fd = eventfd(initval, flags);
> +
> +	if (fd < 0)
> +		return -errno;
> +	return fd;
> +}
> +
> +int os_eventfd_read(int fd, unsigned long long *value)
> +{
> +	eventfd_t val;
> +	int err = eventfd_read(fd, &val);
> +
> +	if (err < 0)
> +		return -errno;
> +	*value = val;
> +	return 0;
> +}
> +
> +int os_eventfd_write(int fd, unsigned long long value)
> +{
> +	int err = eventfd_write(fd, value);
> +
> +	if (err < 0)
> +		return -errno;
> +	return 0;
> +}
> +
> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
> +		   unsigned int fds_num)
> +{
> +	struct iovec iov = {
> +		.iov_base = (void *) buf,
> +		.iov_len = len,
> +	};
> +	union {
> +		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
> +		struct cmsghdr align;
> +	} u;
> +	unsigned int fds_size = sizeof(*fds) * fds_num;
> +	struct msghdr msg = {
> +		.msg_iov = &iov,
> +		.msg_iovlen = 1,
> +		.msg_control = u.control,
> +		.msg_controllen = CMSG_SPACE(fds_size),
> +	};
> +	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
> +	int err;
> +
> +	if (fds_num > OS_SENDMSG_MAX_FDS)
> +		return -EINVAL;
> +	memset(u.control, 0, sizeof(u.control));
> +	cmsg->cmsg_level = SOL_SOCKET;
> +	cmsg->cmsg_type = SCM_RIGHTS;
> +	cmsg->cmsg_len = CMSG_LEN(fds_size);
> +	memcpy(CMSG_DATA(cmsg), fds, fds_size);
> +	err = sendmsg(fd, &msg, 0);
> +
> +	if (err < 0)
> +		return -errno;
> +	return err;
> +}
> 


Patch looks OK on first read, builds OK, I will test it vs Netsys:BESS 
DPDK virtio-user driver for a couple of days (that will also show if we 
need the "RFC" extra portion).

Brgds,
Anton Ivanov Aug. 14, 2019, 5:39 a.m. UTC | #6
On 31/07/2019 21:29, Johannes Berg wrote:
> From: Erel Geron <erelx.geron@intel.com>
> 
> This module allows virtio devices to be used over a vhost-user socket.
> To instantiate a device, pass the (list of) device socket(s) to the
> kernel or module command line, such as
> 
> 	virtio_uml.device=/var/uml.socket:1
> 
> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
> i.e. network in this case.
> 
> Signed-off-by: Erel Geron <erelx.geron@intel.com>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
>   arch/um/drivers/Kconfig      |   7 +
>   arch/um/drivers/Makefile     |   1 +
>   arch/um/drivers/vhost_user.h | 102 ++++
>   arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
>   arch/um/include/asm/irq.h    |   5 +-
>   arch/um/include/shared/os.h  |   7 +
>   arch/um/kernel/ksyms.c       |   4 +
>   arch/um/kernel/mem.c         |   1 +
>   arch/um/kernel/physmem.c     |   1 +
>   arch/um/kernel/um_arch.c     |   1 +
>   arch/um/os-Linux/file.c      |  64 +++
>   11 files changed, 1096 insertions(+), 2 deletions(-)
>   create mode 100644 arch/um/drivers/vhost_user.h
>   create mode 100644 arch/um/drivers/virtio_uml.c
> 
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2638e46f50cc..fea5a0d522dc 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>   	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>   
>   endmenu
> +
> +config VIRTIO_UML
> +	tristate "UML driver for virtio devices"
> +	select VIRTIO
> +	help
> +	  This driver provides support for virtio based paravirtual device
> +	  drivers over vhost-user sockets.
> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
> index 693319839f69..3edf0a7cfd25 100644
> --- a/arch/um/drivers/Makefile
> +++ b/arch/um/drivers/Makefile
> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>   obj-$(CONFIG_UML_RANDOM) += random.o
> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>   
>   # pcap_user.o must be added explicitly.
>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
> diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
> new file mode 100644
> index 000000000000..2a9829b0782b
> --- /dev/null
> +++ b/arch/um/drivers/vhost_user.h
> @@ -0,0 +1,102 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Vhost-user protocol */
> +
> +#ifndef __VHOST_USER_H__
> +#define __VHOST_USER_H__
> +
> +/* Message flags */
> +#define VHOST_USER_FLAG_REPLY		BIT(2)
> +/* Feature bits */
> +#define VHOST_USER_F_PROTOCOL_FEATURES	30
> +/* Protocol feature bits */
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +/* Vring state index masks */
> +#define VHOST_USER_VRING_INDEX_MASK	0xff
> +#define VHOST_USER_VRING_POLL_MASK	BIT(8)
> +
> +/* Supported version */
> +#define VHOST_USER_VERSION		1
> +/* Supported transport features */
> +#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
> +/* Supported protocol features */
> +#define VHOST_USER_SUPPORTED_PROTOCOL_F	BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
> +
> +enum vhost_user_request {
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SEND_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_SET_VRING_ENDIAN = 23,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
> +};
> +
> +struct vhost_user_header {
> +	u32 request; /* Use enum vhost_user_request */
> +	u32 flags;
> +	u32 size;
> +} __packed;
> +
> +struct vhost_user_config {
> +	u32 offset;
> +	u32 size;
> +	u32 flags;
> +	u8 payload[0]; /* Variable length */
> +} __packed;
> +
> +struct vhost_user_vring_state {
> +	u32 index;
> +	u32 num;
> +} __packed;
> +
> +struct vhost_user_vring_addr {
> +	u32 index;
> +	u32 flags;
> +	u64 desc, used, avail, log;
> +} __packed;
> +
> +struct vhost_user_mem_region {
> +	u64 guest_addr;
> +	u64 size;
> +	u64 user_addr;
> +	u64 mmap_offset;
> +} __packed;
> +
> +struct vhost_user_mem_regions {
> +	u32 num;
> +	u32 padding;
> +	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
> +} __packed;
> +
> +union vhost_user_payload {
> +	u64 integer;
> +	struct vhost_user_config config;
> +	struct vhost_user_vring_state vring_state;
> +	struct vhost_user_vring_addr vring_addr;
> +	struct vhost_user_mem_regions mem_regions;
> +};
> +
> +struct vhost_user_msg {
> +	struct vhost_user_header header;
> +	union vhost_user_payload payload;
> +} __packed;
> +
> +#endif
> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
> new file mode 100644
> index 000000000000..30eeb907f42a
> --- /dev/null
> +++ b/arch/um/drivers/virtio_uml.c
> @@ -0,0 +1,905 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Virtio vhost-user driver
> + *
> + * Copyright(c) 2019 Intel Corporation
> + *
> + * This module allows virtio devices to be used over a vhost-user socket.
> + *
> + * Guest devices can be instantiated by kernel module or command line
> + * parameters. One device will be created for each parameter. Syntax:
> + *
> + *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
> + * where:
> + *		<socket>	:= vhost-user socket path to connect
> + *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
> + *		<platform_id>	:= (optional) platform device id
> + *
> + * example:
> + *		virtio_uml.device=/var/uml.socket:1
> + *
> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
> + */
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_ring.h>
> +#include <irq_kern.h>
> +#include <init.h>
> +#include <os.h>
> +#include "vhost_user.h"
> +
> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
> +#ifdef IRQ_NONE
> +#undef IRQ_NONE
> +#endif
> +
> +#define MAX_SUPPORTED_QUEUE_SIZE	256
> +
> +#define to_virtio_uml_device(_vdev) \
> +	container_of(_vdev, struct virtio_uml_device, vdev)
> +
> +struct virtio_uml_device {
> +	struct virtio_device vdev;
> +	struct platform_device *pdev;
> +
> +	int sock;
> +	u64 features;
> +	u64 protocol_features;
> +	u8 status;
> +};
> +
> +struct virtio_uml_vq_info {
> +	int kick_fd, call_fd;
> +	char name[32];
> +};
> +
> +extern unsigned long long physmem_size, highmem;
> +
> +
> +/* Vhost-user protocol */
> +
> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   int *fds, size_t num_fds)
> +{
> +	size_t size = sizeof(msg->header) + msg->header.size;
> +	int rc;
> +
> +	msg->header.flags |= VHOST_USER_VERSION;
> +	do {
> +		rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
> +				num_fds);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	if (rc != size)
> +		return -ECOMM;
> +	return 0;
> +}
> +
> +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
> +				      u32 request)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +	};
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
> +			       u32 request, u64 value)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = value,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
> +				  struct vhost_user_msg *msg)
> +{
> +	size_t size = sizeof(msg->header);
> +	int rc;
> +
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) msg, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   size_t max_payload_size)
> +{
> +	size_t size;
> +	int rc = vhost_user_recv_header(vu_dev, msg);
> +
> +	if (rc)
> +		return rc;
> +	size = msg->header.size;
> +	if (size > max_payload_size)
> +		return -EPROTO;
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
> +			       u64 *value)
> +{
> +	struct vhost_user_msg msg;
> +	int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
> +
> +	if (rc)
> +		return rc;
> +	if (msg.header.size != sizeof(msg.payload.integer))
> +		return -EPROTO;
> +	*value = msg.payload.integer;
> +	return 0;
> +}
> +
> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
> +{
> +	return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
> +}
> +
> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
> +				   u64 *features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, features);
> +}
> +
> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
> +				   u64 features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
> +}
> +
> +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 *protocol_features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev,
> +			VHOST_USER_GET_PROTOCOL_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, protocol_features);
> +}
> +
> +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 protocol_features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
> +			protocol_features);
> +}
> +
> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
> +{
> +	int rc = vhost_user_set_owner(vu_dev);
> +
> +	if (rc)
> +		return rc;
> +	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
> +	if (rc)
> +		return rc;
> +
> +	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
> +		rc = vhost_user_get_protocol_features(vu_dev,
> +				&vu_dev->protocol_features);
> +		if (rc)
> +			return rc;
> +		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
> +		rc = vhost_user_set_protocol_features(vu_dev,
> +				vu_dev->protocol_features);
> +	}
> +	return rc;
> +}
> +
> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, void *buf, u32 len)
> +{
> +	u32 cfg_size = offset + len;
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_GET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = 0;
> +	msg->payload.config.size = cfg_size;
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +	if (rc)
> +		goto error_get;
> +	rc = vhost_user_recv(vu_dev, msg, msg_size);
> +	if (rc)
> +		goto error_get;
> +	if (msg->header.size != payload_size ||
> +			msg->payload.config.size != cfg_size) {
> +		rc = -EPROTO;
> +		goto error_get;
> +	}
> +	memcpy(buf, msg->payload.config.payload + offset, len);
> +
> +error_get:
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, const void *buf, u32 len)
> +{
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + len;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_SET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = offset;
> +	msg->payload.config.size = len;
> +	memcpy(msg->payload.config.payload, buf, len);
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
> +				      struct vhost_user_mem_region *region_out)
> +{
> +	unsigned long long mem_offset;
> +	int rc = phys_mapping(addr, &mem_offset);
> +
> +	if (WARN_ON(rc < 0))
> +		return -EFAULT;
> +	*fd_out = rc;
> +	region_out->guest_addr = addr;
> +	region_out->user_addr = addr;
> +	region_out->size = size;
> +	region_out->mmap_offset = mem_offset;
> +
> +	rc = phys_mapping(addr + size - 1, &mem_offset);
> +	if (WARN_ON(rc != *fd_out))
> +		return -EFAULT;
> +	return 0;
> +}
> +
> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_MEM_TABLE,
> +		.header.size = sizeof(msg.payload.mem_regions),
> +		.payload.mem_regions.num = 1,
> +	};
> +	int fds[2];
> +	int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
> +			&msg.payload.mem_regions.regions[0]);
> +
> +	if (rc < 0)
> +		return rc;
> +	if (highmem) {
> +		msg.payload.mem_regions.num++;
> +		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
> +				&fds[1], &msg.payload.mem_regions.regions[1]);
> +		if (rc < 0)
> +			return rc;
> +	}
> +
> +	return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
> +}
> +
> +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
> +				      u32 request, u32 index, u32 num)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.vring_state),
> +		.payload.vring_state.index = index,
> +		.payload.vring_state.num = num,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
> +				    u32 index, u32 num)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
> +			index, num);
> +}
> +
> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
> +				     u32 index, u32 offset)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
> +			index, offset);
> +}
> +
> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
> +				     u32 index, u64 desc, u64 used, u64 avail,
> +				     u64 log)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_VRING_ADDR,
> +		.header.size = sizeof(msg.payload.vring_addr),
> +		.payload.vring_addr.index = index,
> +		.payload.vring_addr.desc = desc,
> +		.payload.vring_addr.used = used,
> +		.payload.vring_addr.avail = avail,
> +		.payload.vring_addr.log = log,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
> +				   u32 request, int index, int fd)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = index,
> +	};
> +
> +	if (index & ~VHOST_USER_VRING_INDEX_MASK)
> +		return -EINVAL;
> +	if (fd < 0) {
> +		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
> +		return vhost_user_send(vu_dev, &msg, NULL, 0);
> +	}
> +	return vhost_user_send(vu_dev, &msg, &fd, 1);
> +}
> +
> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
> +				       u32 index, bool enable)
> +{
> +	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
> +		return 0;
> +
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
> +			index, enable);
> +}
> +
> +
> +/* Virtio interface */
> +
> +static bool vu_notify(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	int rc;
> +
> +	do {
> +		rc = os_eventfd_write(info->kick_fd, 1);
> +	} while (rc == -EINTR);
> +	WARN_ON(rc);
> +	return !rc;
> +}
> +
> +static irqreturn_t vu_interrupt(int irq, void *opaque)
> +{
> +	struct virtqueue *vq = opaque;
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	unsigned long long value;
> +	int rc;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	do {
> +		rc = os_eventfd_read(info->call_fd, &value);
> +		if (!rc)
> +			ret |= vring_interrupt(irq, vq);
> +	} while (!rc || rc == -EINTR);
> +	WARN_ON(rc != -EAGAIN);
> +	return ret;
> +}
> +
> +
> +static void vu_get(struct virtio_device *vdev, unsigned offset,
> +		   void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
> +}
> +
> +static void vu_set(struct virtio_device *vdev, unsigned offset,
> +		   const void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
> +}
> +
> +static u8 vu_get_status(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->status;
> +}
> +
> +static void vu_set_status(struct virtio_device *vdev, u8 status)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = status;
> +}
> +
> +static void vu_reset(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = 0;
> +}
> +
> +static void vu_del_vq(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +
> +	um_free_irq(VIRTIO_IRQ, vq);
> +
> +	os_close_file(info->call_fd);
> +	os_close_file(info->kick_fd);
> +
> +	vring_del_virtqueue(vq);
> +	kfree(info);
> +}
> +
> +static void vu_del_vqs(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct virtqueue *vq, *n;
> +	u64 features;
> +
> +	/* Note: reverse order as a workaround to a decoding bug in snabb */
> +	list_for_each_entry_reverse(vq, &vdev->vqs, list)
> +		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
> +
> +	/* Ensure previous messages have been processed */
> +	WARN_ON(vhost_user_get_features(vu_dev, &features));
> +
> +	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> +		vu_del_vq(vq);
> +}
> +
> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
> +				     unsigned index, vq_callback_t *callback,
> +				     const char *name, bool ctx)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct platform_device *pdev = vu_dev->pdev;
> +	struct virtio_uml_vq_info *info;
> +	struct virtqueue *vq;
> +	int num = MAX_SUPPORTED_QUEUE_SIZE;
> +	int rc;
> +	int call_fds[2];
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		rc = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
> +			pdev->id, name);
> +
> +	rc = os_eventfd(0, 0);
> +	if (rc < 0)
> +		goto error_kick;
> +	info->kick_fd = rc;
> +
> +	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
> +	rc = os_pipe(call_fds, true, true);
> +	if (rc < 0)
> +		goto error_call;
> +	info->call_fd = call_fds[0];
> +
> +	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
> +			ctx, vu_notify, callback, info->name);
> +	if (!vq) {
> +		rc = -ENOMEM;
> +		goto error_create;
> +	}
> +	vq->priv = info;
> +	num = virtqueue_get_vring_size(vq);
> +
> +	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, vu_interrupt,
> +			IRQF_SHARED, info->name, vq);
> +	if (rc)
> +		goto error_irq;
> +
> +	rc = vhost_user_set_vring_num(vu_dev, index, num);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_base(vu_dev, index, 0);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_addr(vu_dev, index,
> +			virtqueue_get_desc_addr(vq),
> +			virtqueue_get_used_addr(vq),
> +			virtqueue_get_avail_addr(vq),
> +			(u64) -1);
> +	if (rc)
> +		goto error_setup;
> +
> +	/* Close unused write end of call fds */
> +	os_close_file(call_fds[1]);
> +
> +	return vq;
> +
> +error_setup:
> +	um_free_irq(VIRTIO_IRQ, vq);
> +error_irq:
> +	vring_del_virtqueue(vq);
> +error_create:
> +	os_close_file(call_fds[1]);
> +	os_close_file(info->call_fd);
> +error_call:
> +	os_close_file(info->kick_fd);
> +error_kick:
> +	kfree(info);
> +error_kzalloc:
> +	return ERR_PTR(rc);
> +}
> +
> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> +		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
> +		       const char * const names[], const bool *ctx,
> +		       struct irq_affinity *desc)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	int i, queue_idx, rc;
> +	struct virtqueue *vq;
> +
> +	rc = vhost_user_set_mem_table(vu_dev);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i]) {
> +			vqs[i] = NULL;
> +			continue;
> +		}
> +
> +		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> +				ctx ? ctx[i] : false);
> +		if (IS_ERR(vqs[i])) {
> +			rc = PTR_ERR(vqs[i]);
> +			goto error_setup;
> +		}
> +	}
> +
> +	list_for_each_entry(vq, &vdev->vqs, list) {
> +		struct virtio_uml_vq_info *info = vq->priv;
> +
> +		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
> +				info->kick_fd);
> +		if (rc)
> +			goto error_setup;
> +
> +		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
> +		if (rc)
> +			goto error_setup;
> +	}
> +
> +	return 0;
> +
> +error_setup:
> +	vu_del_vqs(vdev);
> +	return rc;
> +}
> +
> +static u64 vu_get_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->features;
> +}
> +
> +static int vu_finalize_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
> +
> +	vring_transport_features(vdev);
> +	vu_dev->features = vdev->features | supported;
> +
> +	return vhost_user_set_features(vu_dev, vu_dev->features);
> +}
> +
> +static const char *vu_bus_name(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->pdev->name;
> +}
> +
> +static const struct virtio_config_ops virtio_uml_config_ops = {
> +	.get = vu_get,
> +	.set = vu_set,
> +	.get_status = vu_get_status,
> +	.set_status = vu_set_status,
> +	.reset = vu_reset,
> +	.find_vqs = vu_find_vqs,
> +	.del_vqs = vu_del_vqs,
> +	.get_features = vu_get_features,
> +	.finalize_features = vu_finalize_features,
> +	.bus_name = vu_bus_name,
> +};
> +
> +
> +static void virtio_uml_release_dev(struct device *d)
> +{
> +	struct virtio_device *vdev =
> +			container_of(d, struct virtio_device, dev);
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	os_close_file(vu_dev->sock);
> +}
> +
> +/* Platform device */
> +
> +struct virtio_uml_platform_data {
> +	u32 virtio_device_id;
> +	const char *socket_path;
> +};
> +
> +static int virtio_uml_probe(struct platform_device *pdev)
> +{
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	struct virtio_uml_device *vu_dev;
> +	int rc;
> +
> +	if (!pdata)
> +		return -EINVAL;
> +
> +	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
> +	if (!vu_dev)
> +		return -ENOMEM;
> +
> +	vu_dev->vdev.dev.parent = &pdev->dev;
> +	vu_dev->vdev.dev.release = virtio_uml_release_dev;
> +	vu_dev->vdev.config = &virtio_uml_config_ops;
> +	vu_dev->vdev.id.device = pdata->virtio_device_id;
> +	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
> +	vu_dev->pdev = pdev;
> +
> +	do {
> +		rc = os_connect_socket(pdata->socket_path);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	vu_dev->sock = rc;
> +
> +	rc = vhost_user_init(vu_dev);
> +	if (rc)
> +		goto error_init;
> +
> +	platform_set_drvdata(pdev, vu_dev);
> +
> +	rc = register_virtio_device(&vu_dev->vdev);
> +	if (rc)
> +		put_device(&vu_dev->vdev.dev);
> +	return rc;
> +
> +error_init:
> +	os_close_file(vu_dev->sock);
> +	return rc;
> +}
> +
> +static int virtio_uml_remove(struct platform_device *pdev)
> +{
> +	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
> +
> +	unregister_virtio_device(&vu_dev->vdev);
> +	return 0;
> +}
> +
> +/* Command line device list */
> +
> +static void vu_cmdline_release_dev(struct device *d)
> +{
> +}
> +
> +static struct device vu_cmdline_parent = {
> +	.init_name = "virtio-uml-cmdline",
> +	.release = vu_cmdline_release_dev,
> +};
> +
> +static bool vu_cmdline_parent_registered;
> +static int vu_cmdline_id;
> +
> +static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
> +{
> +	const char *ids = strchr(device, ':');
> +	unsigned int virtio_device_id;
> +	int processed, consumed, err;
> +	char *socket_path;
> +	struct virtio_uml_platform_data *pdata;
> +	struct platform_device *pdev;
> +
> +	if (!ids || ids == device)
> +		return -EINVAL;
> +
> +	processed = sscanf(ids, ":%u%n:%d%n",
> +			&virtio_device_id, &consumed,
> +			&vu_cmdline_id, &consumed);
> +
> +	if (processed < 1 || ids[consumed])
> +		return -EINVAL;
> +
> +	if (!vu_cmdline_parent_registered) {
> +		err = device_register(&vu_cmdline_parent);
> +		if (err) {
> +			pr_err("Failed to register parent device!\n");
> +			put_device(&vu_cmdline_parent);
> +			return err;
> +		}
> +		vu_cmdline_parent_registered = true;
> +	}
> +
> +	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
> +	if (!socket_path)
> +		return -ENOMEM;
> +
> +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
> +	if (!pdata) {
> +		err = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +
> +	pdata->virtio_device_id = (u32) virtio_device_id;
> +	pdata->socket_path = socket_path;
> +
> +	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
> +			vu_cmdline_id, virtio_device_id, socket_path);
> +
> +	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
> +			vu_cmdline_id++, pdata, sizeof(*pdata));
> +	err = PTR_ERR_OR_ZERO(pdev);
> +	if (err)
> +		goto error_register;
> +	return 0;
> +
> +error_register:
> +	kfree(pdata);
> +error_kzalloc:
> +	kfree(socket_path);
> +	return err;
> +}
> +
> +static int vu_cmdline_get_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	char *buffer = data;
> +	unsigned int len = strlen(buffer);
> +
> +	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
> +			pdata->socket_path, pdata->virtio_device_id, pdev->id);
> +	return 0;
> +}
> +
> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
> +{
> +	buffer[0] = '\0';
> +	if (vu_cmdline_parent_registered)
> +		device_for_each_child(&vu_cmdline_parent, buffer,
> +				vu_cmdline_get_device);
> +	return strlen(buffer) + 1;
> +}
> +
> +static const struct kernel_param_ops vu_cmdline_param_ops = {
> +	.set = vu_cmdline_set,
> +	.get = vu_cmdline_get,
> +};
> +
> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
> +
> +
> +static int vu_unregister_cmdline_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +
> +	platform_device_unregister(pdev);
> +	kfree(pdata->socket_path);
> +	kfree(pdata);
> +	return 0;
> +}
> +
> +static void vu_unregister_cmdline_devices(void)
> +{
> +	if (vu_cmdline_parent_registered) {
> +		device_for_each_child(&vu_cmdline_parent, NULL,
> +				vu_unregister_cmdline_device);
> +		device_unregister(&vu_cmdline_parent);
> +		vu_cmdline_parent_registered = false;
> +	}
> +}
> +
> +/* Platform driver */
> +
> +static const struct of_device_id virtio_uml_match[] = {
> +	{ .compatible = "virtio,uml", },
> +	{ }
> +};
> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
> +
> +static struct platform_driver virtio_uml_driver = {
> +	.probe = virtio_uml_probe,
> +	.remove = virtio_uml_remove,
> +	.driver = {
> +		.name = "virtio-uml",
> +		.of_match_table = virtio_uml_match,
> +	},
> +};
> +
> +static int __init virtio_uml_init(void)
> +{
> +	return platform_driver_register(&virtio_uml_driver);
> +}
> +
> +static void __exit virtio_uml_exit(void)
> +{
> +	platform_driver_unregister(&virtio_uml_driver);
> +	vu_unregister_cmdline_devices();
> +}
> +
> +module_init(virtio_uml_init);
> +module_exit(virtio_uml_exit);
> +__uml_exitcall(virtio_uml_exit);
> +
> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
> index ce7a78c3bcf2..42c6205e2dc4 100644
> --- a/arch/um/include/asm/irq.h
> +++ b/arch/um/include/asm/irq.h
> @@ -17,17 +17,18 @@
>   #define TELNETD_IRQ 		12
>   #define XTERM_IRQ 		13
>   #define RANDOM_IRQ 		14
> +#define VIRTIO_IRQ		15
>   
>   #ifdef CONFIG_UML_NET_VECTOR
>   
> -#define VECTOR_BASE_IRQ		15
> +#define VECTOR_BASE_IRQ		(VIRTIO_IRQ + 1)
>   #define VECTOR_IRQ_SPACE	8
>   
>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>   
>   #else
>   
> -#define LAST_IRQ RANDOM_IRQ
> +#define LAST_IRQ VIRTIO_IRQ
>   
>   #endif
>   
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 4a62ac4251a5..da7ebbe4ebb3 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -36,6 +36,8 @@
>   #define OS_LIB_PATH	"/usr/lib/"
>   #endif
>   
> +#define OS_SENDMSG_MAX_FDS 8
> +
>   /*
>    * types taken from stat_file() in hostfs_user.c
>    * (if they are wrong here, they are wrong there...).
> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>   extern int os_falloc_punch(int fd, unsigned long long offset, int count);
> +extern int os_eventfd(unsigned int initval, int flags);
> +extern int os_eventfd_read(int fd, unsigned long long *value);
> +extern int os_eventfd_write(int fd, unsigned long long value);
> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
> +			  const int *fds, unsigned int fds_num);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
> index 232b22307fdd..f3766889226b 100644
> --- a/arch/um/kernel/ksyms.c
> +++ b/arch/um/kernel/ksyms.c
> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>   EXPORT_SYMBOL(os_major);
>   EXPORT_SYMBOL(os_minor);
>   EXPORT_SYMBOL(os_makedev);
> +EXPORT_SYMBOL(os_eventfd);
> +EXPORT_SYMBOL(os_eventfd_read);
> +EXPORT_SYMBOL(os_eventfd_write);
> +EXPORT_SYMBOL(os_sendmsg_fds);
>   
>   EXPORT_SYMBOL(add_sigio_fd);
>   EXPORT_SYMBOL(ignore_sigio_fd);
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index de58e976b9bc..f256be1d77bd 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>   
>   /* Initialized at boot time, and readonly after that */
>   unsigned long long highmem;
> +EXPORT_SYMBOL(highmem);
>   int kmalloc_ok = 0;
>   
>   /* Used during early boot */
> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
> index 5bf56af4d5b9..870c80c88a97 100644
> --- a/arch/um/kernel/physmem.c
> +++ b/arch/um/kernel/physmem.c
> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
>   
>   	return fd;
>   }
> +EXPORT_SYMBOL(phys_mapping);
>   
>   static int __init uml_mem_setup(char *line, int *add)
>   {
> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
> index a818ccef30ca..9de212bf4320 100644
> --- a/arch/um/kernel/um_arch.c
> +++ b/arch/um/kernel/um_arch.c
> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>   
>   /* Set in uml_mem_setup and modified in linux_main */
>   long long physmem_size = 32 * 1024 * 1024;
> +EXPORT_SYMBOL(physmem_size);
>   
>   static const char *usage_string =
>   "User Mode Linux v%s\n"
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index f25b110d4e70..0c4e0298398d 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -15,6 +15,7 @@
>   #include <sys/sysmacros.h>
>   #include <sys/un.h>
>   #include <sys/types.h>
> +#include <sys/eventfd.h>
>   #include <os.h>
>   
>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
>   	return n;
>   }
>   
> +int os_eventfd(unsigned int initval, int flags)
> +{
> +	int fd = eventfd(initval, flags);
> +
> +	if (fd < 0)
> +		return -errno;
> +	return fd;
> +}
> +
> +int os_eventfd_read(int fd, unsigned long long *value)
> +{
> +	eventfd_t val;
> +	int err = eventfd_read(fd, &val);
> +
> +	if (err < 0)
> +		return -errno;
> +	*value = val;
> +	return 0;
> +}
> +
> +int os_eventfd_write(int fd, unsigned long long value)
> +{
> +	int err = eventfd_write(fd, value);
> +
> +	if (err < 0)
> +		return -errno;
> +	return 0;
> +}
> +
> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
> +		   unsigned int fds_num)
> +{
> +	struct iovec iov = {
> +		.iov_base = (void *) buf,
> +		.iov_len = len,
> +	};
> +	union {
> +		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
> +		struct cmsghdr align;
> +	} u;
> +	unsigned int fds_size = sizeof(*fds) * fds_num;
> +	struct msghdr msg = {
> +		.msg_iov = &iov,
> +		.msg_iovlen = 1,
> +		.msg_control = u.control,
> +		.msg_controllen = CMSG_SPACE(fds_size),
> +	};
> +	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
> +	int err;
> +
> +	if (fds_num > OS_SENDMSG_MAX_FDS)
> +		return -EINVAL;
> +	memset(u.control, 0, sizeof(u.control));
> +	cmsg->cmsg_level = SOL_SOCKET;
> +	cmsg->cmsg_type = SCM_RIGHTS;
> +	cmsg->cmsg_len = CMSG_LEN(fds_size);
> +	memcpy(CMSG_DATA(cmsg), fds, fds_size);
> +	err = sendmsg(fd, &msg, 0);
> +
> +	if (err < 0)
> +		return -errno;
> +	return err;
> +}
> 

Can you send me your config?

It goes into an always busy loop early in boot with mine and the boot 
never completes. I will debug it in the meantime, but having the config 
with which it works will be helpful.

For the record - I have vector enabled and I tried virtio and virtio-net 
compiled in.
Anton Ivanov Aug. 14, 2019, 5:49 a.m. UTC | #7
On 14/08/2019 06:39, Anton Ivanov wrote:
> 
> 
> On 31/07/2019 21:29, Johannes Berg wrote:
>> From: Erel Geron <erelx.geron@intel.com>
>>
>> This module allows virtio devices to be used over a vhost-user socket.
>> To instantiate a device, pass the (list of) device socket(s) to the
>> kernel or module command line, such as
>>
>>     virtio_uml.device=/var/uml.socket:1
>>
>> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
>> i.e. network in this case.
>>
>> Signed-off-by: Erel Geron <erelx.geron@intel.com>
>> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
>> ---
>>   arch/um/drivers/Kconfig      |   7 +
>>   arch/um/drivers/Makefile     |   1 +
>>   arch/um/drivers/vhost_user.h | 102 ++++
>>   arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
>>   arch/um/include/asm/irq.h    |   5 +-
>>   arch/um/include/shared/os.h  |   7 +
>>   arch/um/kernel/ksyms.c       |   4 +
>>   arch/um/kernel/mem.c         |   1 +
>>   arch/um/kernel/physmem.c     |   1 +
>>   arch/um/kernel/um_arch.c     |   1 +
>>   arch/um/os-Linux/file.c      |  64 +++
>>   11 files changed, 1096 insertions(+), 2 deletions(-)
>>   create mode 100644 arch/um/drivers/vhost_user.h
>>   create mode 100644 arch/um/drivers/virtio_uml.c
>>
>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>> index 2638e46f50cc..fea5a0d522dc 100644
>> --- a/arch/um/drivers/Kconfig
>> +++ b/arch/um/drivers/Kconfig
>> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>>         Startup example: 
>> "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>>   endmenu
>> +
>> +config VIRTIO_UML
>> +    tristate "UML driver for virtio devices"
>> +    select VIRTIO
>> +    help
>> +      This driver provides support for virtio based paravirtual device
>> +      drivers over vhost-user sockets.
>> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
>> index 693319839f69..3edf0a7cfd25 100644
>> --- a/arch/um/drivers/Makefile
>> +++ b/arch/um/drivers/Makefile
>> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>>   obj-$(CONFIG_UML_RANDOM) += random.o
>> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>>   # pcap_user.o must be added explicitly.
>>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o 
>> pcap_user.o vde_user.o vector_user.o
>> diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
>> new file mode 100644
>> index 000000000000..2a9829b0782b
>> --- /dev/null
>> +++ b/arch/um/drivers/vhost_user.h
>> @@ -0,0 +1,102 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/* Vhost-user protocol */
>> +
>> +#ifndef __VHOST_USER_H__
>> +#define __VHOST_USER_H__
>> +
>> +/* Message flags */
>> +#define VHOST_USER_FLAG_REPLY        BIT(2)
>> +/* Feature bits */
>> +#define VHOST_USER_F_PROTOCOL_FEATURES    30
>> +/* Protocol feature bits */
>> +#define VHOST_USER_PROTOCOL_F_CONFIG    9
>> +/* Vring state index masks */
>> +#define VHOST_USER_VRING_INDEX_MASK    0xff
>> +#define VHOST_USER_VRING_POLL_MASK    BIT(8)
>> +
>> +/* Supported version */
>> +#define VHOST_USER_VERSION        1
>> +/* Supported transport features */
>> +#define VHOST_USER_SUPPORTED_F        
>> BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
>> +/* Supported protocol features */
>> +#define VHOST_USER_SUPPORTED_PROTOCOL_F    
>> BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
>> +
>> +enum vhost_user_request {
>> +    VHOST_USER_GET_FEATURES = 1,
>> +    VHOST_USER_SET_FEATURES = 2,
>> +    VHOST_USER_SET_OWNER = 3,
>> +    VHOST_USER_RESET_OWNER = 4,
>> +    VHOST_USER_SET_MEM_TABLE = 5,
>> +    VHOST_USER_SET_LOG_BASE = 6,
>> +    VHOST_USER_SET_LOG_FD = 7,
>> +    VHOST_USER_SET_VRING_NUM = 8,
>> +    VHOST_USER_SET_VRING_ADDR = 9,
>> +    VHOST_USER_SET_VRING_BASE = 10,
>> +    VHOST_USER_GET_VRING_BASE = 11,
>> +    VHOST_USER_SET_VRING_KICK = 12,
>> +    VHOST_USER_SET_VRING_CALL = 13,
>> +    VHOST_USER_SET_VRING_ERR = 14,
>> +    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
>> +    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
>> +    VHOST_USER_GET_QUEUE_NUM = 17,
>> +    VHOST_USER_SET_VRING_ENABLE = 18,
>> +    VHOST_USER_SEND_RARP = 19,
>> +    VHOST_USER_NET_SEND_MTU = 20,
>> +    VHOST_USER_SET_SLAVE_REQ_FD = 21,
>> +    VHOST_USER_IOTLB_MSG = 22,
>> +    VHOST_USER_SET_VRING_ENDIAN = 23,
>> +    VHOST_USER_GET_CONFIG = 24,
>> +    VHOST_USER_SET_CONFIG = 25,
>> +};
>> +
>> +struct vhost_user_header {
>> +    u32 request; /* Use enum vhost_user_request */
>> +    u32 flags;
>> +    u32 size;
>> +} __packed;
>> +
>> +struct vhost_user_config {
>> +    u32 offset;
>> +    u32 size;
>> +    u32 flags;
>> +    u8 payload[0]; /* Variable length */
>> +} __packed;
>> +
>> +struct vhost_user_vring_state {
>> +    u32 index;
>> +    u32 num;
>> +} __packed;
>> +
>> +struct vhost_user_vring_addr {
>> +    u32 index;
>> +    u32 flags;
>> +    u64 desc, used, avail, log;
>> +} __packed;
>> +
>> +struct vhost_user_mem_region {
>> +    u64 guest_addr;
>> +    u64 size;
>> +    u64 user_addr;
>> +    u64 mmap_offset;
>> +} __packed;
>> +
>> +struct vhost_user_mem_regions {
>> +    u32 num;
>> +    u32 padding;
>> +    struct vhost_user_mem_region regions[2]; /* Currently supporting 
>> 2 */
>> +} __packed;
>> +
>> +union vhost_user_payload {
>> +    u64 integer;
>> +    struct vhost_user_config config;
>> +    struct vhost_user_vring_state vring_state;
>> +    struct vhost_user_vring_addr vring_addr;
>> +    struct vhost_user_mem_regions mem_regions;
>> +};
>> +
>> +struct vhost_user_msg {
>> +    struct vhost_user_header header;
>> +    union vhost_user_payload payload;
>> +} __packed;
>> +
>> +#endif
>> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
>> new file mode 100644
>> index 000000000000..30eeb907f42a
>> --- /dev/null
>> +++ b/arch/um/drivers/virtio_uml.c
>> @@ -0,0 +1,905 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Virtio vhost-user driver
>> + *
>> + * Copyright(c) 2019 Intel Corporation
>> + *
>> + * This module allows virtio devices to be used over a vhost-user 
>> socket.
>> + *
>> + * Guest devices can be instantiated by kernel module or command line
>> + * parameters. One device will be created for each parameter. Syntax:
>> + *
>> + *        [virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
>> + * where:
>> + *        <socket>    := vhost-user socket path to connect
>> + *        <virtio_id>    := virtio device id (as in virtio_ids.h)
>> + *        <platform_id>    := (optional) platform device id
>> + *
>> + * example:
>> + *        virtio_uml.device=/var/uml.socket:1
>> + *
>> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, 
>> ARM Ltd.
>> + */
>> +#include <linux/module.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/slab.h>
>> +#include <linux/virtio.h>
>> +#include <linux/virtio_config.h>
>> +#include <linux/virtio_ring.h>
>> +#include <irq_kern.h>
>> +#include <init.h>
>> +#include <os.h>
>> +#include "vhost_user.h"
>> +
>> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
>> +#ifdef IRQ_NONE
>> +#undef IRQ_NONE
>> +#endif
>> +
>> +#define MAX_SUPPORTED_QUEUE_SIZE    256
>> +
>> +#define to_virtio_uml_device(_vdev) \
>> +    container_of(_vdev, struct virtio_uml_device, vdev)
>> +
>> +struct virtio_uml_device {
>> +    struct virtio_device vdev;
>> +    struct platform_device *pdev;
>> +
>> +    int sock;
>> +    u64 features;
>> +    u64 protocol_features;
>> +    u8 status;
>> +};
>> +
>> +struct virtio_uml_vq_info {
>> +    int kick_fd, call_fd;
>> +    char name[32];
>> +};
>> +
>> +extern unsigned long long physmem_size, highmem;
>> +
>> +
>> +/* Vhost-user protocol */
>> +
>> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
>> +               struct vhost_user_msg *msg,
>> +               int *fds, size_t num_fds)
>> +{
>> +    size_t size = sizeof(msg->header) + msg->header.size;
>> +    int rc;
>> +
>> +    msg->header.flags |= VHOST_USER_VERSION;
>> +    do {
>> +        rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
>> +                num_fds);
>> +    } while (rc == -EINTR);
>> +    if (rc < 0)
>> +        return rc;
>> +    if (rc != size)
>> +        return -ECOMM;
>> +    return 0;
>> +}
>> +
>> +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
>> +                      u32 request)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = request,
>> +    };
>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>> +}
>> +
>> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
>> +                   u32 request, u64 value)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = request,
>> +        .header.size = sizeof(msg.payload.integer),
>> +        .payload.integer = value,
>> +    };
>> +
>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>> +}
>> +
>> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
>> +                  struct vhost_user_msg *msg)
>> +{
>> +    size_t size = sizeof(msg->header);
>> +    int rc;
>> +
>> +    do {
>> +        rc = os_read_file(vu_dev->sock, (void *) msg, size);
>> +    } while (rc == -EINTR);
>> +    if (rc != size)
>> +        return -EPROTO;
>> +    if (msg->header.flags != (VHOST_USER_FLAG_REPLY | 
>> VHOST_USER_VERSION))
>> +        return -EPROTO;
>> +    return 0;
>> +}
>> +
>> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
>> +               struct vhost_user_msg *msg,
>> +               size_t max_payload_size)
>> +{
>> +    size_t size;
>> +    int rc = vhost_user_recv_header(vu_dev, msg);
>> +
>> +    if (rc)
>> +        return rc;
>> +    size = msg->header.size;
>> +    if (size > max_payload_size)
>> +        return -EPROTO;
>> +    do {
>> +        rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
>> +    } while (rc == -EINTR);
>> +    if (rc != size)
>> +        return -EPROTO;
>> +    return 0;
>> +}
>> +
>> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
>> +                   u64 *value)
>> +{
>> +    struct vhost_user_msg msg;
>> +    int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
>> +
>> +    if (rc)
>> +        return rc;
>> +    if (msg.header.size != sizeof(msg.payload.integer))
>> +        return -EPROTO;
>> +    *value = msg.payload.integer;
>> +    return 0;
>> +}
>> +
>> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
>> +{
>> +    return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
>> +}
>> +
>> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
>> +                   u64 *features)
>> +{
>> +    int rc = vhost_user_send_no_payload(vu_dev, 
>> VHOST_USER_GET_FEATURES);
>> +
>> +    if (rc)
>> +        return rc;
>> +    return vhost_user_recv_u64(vu_dev, features);
>> +}
>> +
>> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
>> +                   u64 features)
>> +{
>> +    return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, 
>> features);
>> +}
>> +
>> +static int vhost_user_get_protocol_features(struct virtio_uml_device 
>> *vu_dev,
>> +                        u64 *protocol_features)
>> +{
>> +    int rc = vhost_user_send_no_payload(vu_dev,
>> +            VHOST_USER_GET_PROTOCOL_FEATURES);
>> +
>> +    if (rc)
>> +        return rc;
>> +    return vhost_user_recv_u64(vu_dev, protocol_features);
>> +}
>> +
>> +static int vhost_user_set_protocol_features(struct virtio_uml_device 
>> *vu_dev,
>> +                        u64 protocol_features)
>> +{
>> +    return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
>> +            protocol_features);
>> +}
>> +
>> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
>> +{
>> +    int rc = vhost_user_set_owner(vu_dev);
>> +
>> +    if (rc)
>> +        return rc;
>> +    rc = vhost_user_get_features(vu_dev, &vu_dev->features);
>> +    if (rc)
>> +        return rc;
>> +
>> +    if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
>> +        rc = vhost_user_get_protocol_features(vu_dev,
>> +                &vu_dev->protocol_features);
>> +        if (rc)
>> +            return rc;
>> +        vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
>> +        rc = vhost_user_set_protocol_features(vu_dev,
>> +                vu_dev->protocol_features);
>> +    }
>> +    return rc;
>> +}
>> +
>> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
>> +                 u32 offset, void *buf, u32 len)
>> +{
>> +    u32 cfg_size = offset + len;
>> +    struct vhost_user_msg *msg;
>> +    size_t payload_size = sizeof(msg->payload.config) + cfg_size;
>> +    size_t msg_size = sizeof(msg->header) + payload_size;
>> +    int rc;
>> +
>> +    if (!(vu_dev->protocol_features &
>> +            BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
>> +        return -EOPNOTSUPP;
>> +
>> +    msg = kzalloc(msg_size, GFP_KERNEL);
>> +    if (!msg)
>> +        return -ENOMEM;
>> +    msg->header.request = VHOST_USER_GET_CONFIG;
>> +    msg->header.size = payload_size;
>> +    msg->payload.config.offset = 0;
>> +    msg->payload.config.size = cfg_size;
>> +
>> +    rc = vhost_user_send(vu_dev, msg, NULL, 0);
>> +    if (rc)
>> +        goto error_get;
>> +    rc = vhost_user_recv(vu_dev, msg, msg_size);
>> +    if (rc)
>> +        goto error_get;
>> +    if (msg->header.size != payload_size ||
>> +            msg->payload.config.size != cfg_size) {
>> +        rc = -EPROTO;
>> +        goto error_get;
>> +    }
>> +    memcpy(buf, msg->payload.config.payload + offset, len);
>> +
>> +error_get:
>> +    kfree(msg);
>> +    return rc;
>> +}
>> +
>> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
>> +                 u32 offset, const void *buf, u32 len)
>> +{
>> +    struct vhost_user_msg *msg;
>> +    size_t payload_size = sizeof(msg->payload.config) + len;
>> +    size_t msg_size = sizeof(msg->header) + payload_size;
>> +    int rc;
>> +
>> +    if (!(vu_dev->protocol_features &
>> +            BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
>> +        return -EOPNOTSUPP;
>> +
>> +    msg = kzalloc(msg_size, GFP_KERNEL);
>> +    if (!msg)
>> +        return -ENOMEM;
>> +    msg->header.request = VHOST_USER_SET_CONFIG;
>> +    msg->header.size = payload_size;
>> +    msg->payload.config.offset = offset;
>> +    msg->payload.config.size = len;
>> +    memcpy(msg->payload.config.payload, buf, len);
>> +
>> +    rc = vhost_user_send(vu_dev, msg, NULL, 0);
>> +
>> +    kfree(msg);
>> +    return rc;
>> +}
>> +
>> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
>> +                      struct vhost_user_mem_region *region_out)
>> +{
>> +    unsigned long long mem_offset;
>> +    int rc = phys_mapping(addr, &mem_offset);
>> +
>> +    if (WARN_ON(rc < 0))
>> +        return -EFAULT;
>> +    *fd_out = rc;
>> +    region_out->guest_addr = addr;
>> +    region_out->user_addr = addr;
>> +    region_out->size = size;
>> +    region_out->mmap_offset = mem_offset;
>> +
>> +    rc = phys_mapping(addr + size - 1, &mem_offset);
>> +    if (WARN_ON(rc != *fd_out))
>> +        return -EFAULT;
>> +    return 0;
>> +}
>> +
>> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = VHOST_USER_SET_MEM_TABLE,
>> +        .header.size = sizeof(msg.payload.mem_regions),
>> +        .payload.mem_regions.num = 1,
>> +    };
>> +    int fds[2];
>> +    int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
>> +            &msg.payload.mem_regions.regions[0]);
>> +
>> +    if (rc < 0)
>> +        return rc;
>> +    if (highmem) {
>> +        msg.payload.mem_regions.num++;
>> +        rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
>> +                &fds[1], &msg.payload.mem_regions.regions[1]);
>> +        if (rc < 0)
>> +            return rc;
>> +    }
>> +
>> +    return vhost_user_send(vu_dev, &msg, fds, 
>> msg.payload.mem_regions.num);
>> +}
>> +
>> +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
>> +                      u32 request, u32 index, u32 num)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = request,
>> +        .header.size = sizeof(msg.payload.vring_state),
>> +        .payload.vring_state.index = index,
>> +        .payload.vring_state.num = num,
>> +    };
>> +
>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>> +}
>> +
>> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
>> +                    u32 index, u32 num)
>> +{
>> +    return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
>> +            index, num);
>> +}
>> +
>> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
>> +                     u32 index, u32 offset)
>> +{
>> +    return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
>> +            index, offset);
>> +}
>> +
>> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
>> +                     u32 index, u64 desc, u64 used, u64 avail,
>> +                     u64 log)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = VHOST_USER_SET_VRING_ADDR,
>> +        .header.size = sizeof(msg.payload.vring_addr),
>> +        .payload.vring_addr.index = index,
>> +        .payload.vring_addr.desc = desc,
>> +        .payload.vring_addr.used = used,
>> +        .payload.vring_addr.avail = avail,
>> +        .payload.vring_addr.log = log,
>> +    };
>> +
>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>> +}
>> +
>> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
>> +                   u32 request, int index, int fd)
>> +{
>> +    struct vhost_user_msg msg = {
>> +        .header.request = request,
>> +        .header.size = sizeof(msg.payload.integer),
>> +        .payload.integer = index,
>> +    };
>> +
>> +    if (index & ~VHOST_USER_VRING_INDEX_MASK)
>> +        return -EINVAL;
>> +    if (fd < 0) {
>> +        msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
>> +        return vhost_user_send(vu_dev, &msg, NULL, 0);
>> +    }
>> +    return vhost_user_send(vu_dev, &msg, &fd, 1);
>> +}
>> +
>> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
>> +                     int index, int fd)
>> +{
>> +    return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
>> +            index, fd);
>> +}
>> +
>> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
>> +                     int index, int fd)
>> +{
>> +    return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
>> +            index, fd);
>> +}
>> +
>> +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
>> +                       u32 index, bool enable)
>> +{
>> +    if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
>> +        return 0;
>> +
>> +    return vhost_user_set_vring_state(vu_dev, 
>> VHOST_USER_SET_VRING_ENABLE,
>> +            index, enable);
>> +}
>> +
>> +
>> +/* Virtio interface */
>> +
>> +static bool vu_notify(struct virtqueue *vq)
>> +{
>> +    struct virtio_uml_vq_info *info = vq->priv;
>> +    int rc;
>> +
>> +    do {
>> +        rc = os_eventfd_write(info->kick_fd, 1);
>> +    } while (rc == -EINTR);
>> +    WARN_ON(rc);
>> +    return !rc;
>> +}
>> +
>> +static irqreturn_t vu_interrupt(int irq, void *opaque)
>> +{
>> +    struct virtqueue *vq = opaque;
>> +    struct virtio_uml_vq_info *info = vq->priv;
>> +    unsigned long long value;
>> +    int rc;
>> +    irqreturn_t ret = IRQ_NONE;
>> +
>> +    do {
>> +        rc = os_eventfd_read(info->call_fd, &value);
>> +        if (!rc)
>> +            ret |= vring_interrupt(irq, vq);
>> +    } while (!rc || rc == -EINTR);
>> +    WARN_ON(rc != -EAGAIN);
>> +    return ret;
>> +}
>> +
>> +
>> +static void vu_get(struct virtio_device *vdev, unsigned offset,
>> +           void *buf, unsigned len)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
>> +}
>> +
>> +static void vu_set(struct virtio_device *vdev, unsigned offset,
>> +           const void *buf, unsigned len)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
>> +}
>> +
>> +static u8 vu_get_status(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    return vu_dev->status;
>> +}
>> +
>> +static void vu_set_status(struct virtio_device *vdev, u8 status)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    vu_dev->status = status;
>> +}
>> +
>> +static void vu_reset(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    vu_dev->status = 0;
>> +}
>> +
>> +static void vu_del_vq(struct virtqueue *vq)
>> +{
>> +    struct virtio_uml_vq_info *info = vq->priv;
>> +
>> +    um_free_irq(VIRTIO_IRQ, vq);
>> +
>> +    os_close_file(info->call_fd);
>> +    os_close_file(info->kick_fd);
>> +
>> +    vring_del_virtqueue(vq);
>> +    kfree(info);
>> +}
>> +
>> +static void vu_del_vqs(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +    struct virtqueue *vq, *n;
>> +    u64 features;
>> +
>> +    /* Note: reverse order as a workaround to a decoding bug in snabb */
>> +    list_for_each_entry_reverse(vq, &vdev->vqs, list)
>> +        WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
>> +
>> +    /* Ensure previous messages have been processed */
>> +    WARN_ON(vhost_user_get_features(vu_dev, &features));
>> +
>> +    list_for_each_entry_safe(vq, n, &vdev->vqs, list)
>> +        vu_del_vq(vq);
>> +}
>> +
>> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
>> +                     unsigned index, vq_callback_t *callback,
>> +                     const char *name, bool ctx)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +    struct platform_device *pdev = vu_dev->pdev;
>> +    struct virtio_uml_vq_info *info;
>> +    struct virtqueue *vq;
>> +    int num = MAX_SUPPORTED_QUEUE_SIZE;
>> +    int rc;
>> +    int call_fds[2];
>> +
>> +    info = kzalloc(sizeof(*info), GFP_KERNEL);
>> +    if (!info) {
>> +        rc = -ENOMEM;
>> +        goto error_kzalloc;
>> +    }
>> +    snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
>> +            pdev->id, name);
>> +
>> +    rc = os_eventfd(0, 0);
>> +    if (rc < 0)
>> +        goto error_kick;
>> +    info->kick_fd = rc;
>> +
>> +    /* Use a pipe for call fd, since SIGIO is not supported for 
>> eventfd */
>> +    rc = os_pipe(call_fds, true, true);
>> +    if (rc < 0)
>> +        goto error_call;
>> +    info->call_fd = call_fds[0];
>> +
>> +    vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
>> +            ctx, vu_notify, callback, info->name);
>> +    if (!vq) {
>> +        rc = -ENOMEM;
>> +        goto error_create;
>> +    }
>> +    vq->priv = info;
>> +    num = virtqueue_get_vring_size(vq);
>> +
>> +    rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, 
>> vu_interrupt,
>> +            IRQF_SHARED, info->name, vq);
>> +    if (rc)
>> +        goto error_irq;
>> +
>> +    rc = vhost_user_set_vring_num(vu_dev, index, num);
>> +    if (rc)
>> +        goto error_setup;
>> +
>> +    rc = vhost_user_set_vring_base(vu_dev, index, 0);
>> +    if (rc)
>> +        goto error_setup;
>> +
>> +    rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
>> +    if (rc)
>> +        goto error_setup;
>> +
>> +    rc = vhost_user_set_vring_addr(vu_dev, index,
>> +            virtqueue_get_desc_addr(vq),
>> +            virtqueue_get_used_addr(vq),
>> +            virtqueue_get_avail_addr(vq),
>> +            (u64) -1);
>> +    if (rc)
>> +        goto error_setup;
>> +
>> +    /* Close unused write end of call fds */
>> +    os_close_file(call_fds[1]);
>> +
>> +    return vq;
>> +
>> +error_setup:
>> +    um_free_irq(VIRTIO_IRQ, vq);
>> +error_irq:
>> +    vring_del_virtqueue(vq);
>> +error_create:
>> +    os_close_file(call_fds[1]);
>> +    os_close_file(info->call_fd);
>> +error_call:
>> +    os_close_file(info->kick_fd);
>> +error_kick:
>> +    kfree(info);
>> +error_kzalloc:
>> +    return ERR_PTR(rc);
>> +}
>> +
>> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>> +               struct virtqueue *vqs[], vq_callback_t *callbacks[],
>> +               const char * const names[], const bool *ctx,
>> +               struct irq_affinity *desc)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +    int i, queue_idx, rc;
>> +    struct virtqueue *vq;
>> +
>> +    rc = vhost_user_set_mem_table(vu_dev);
>> +    if (rc)
>> +        return rc;
>> +
>> +    for (i = 0; i < nvqs; ++i) {
>> +        if (!names[i]) {
>> +            vqs[i] = NULL;
>> +            continue;
>> +        }
>> +
>> +        vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
>> +                ctx ? ctx[i] : false);
>> +        if (IS_ERR(vqs[i])) {
>> +            rc = PTR_ERR(vqs[i]);
>> +            goto error_setup;
>> +        }
>> +    }
>> +
>> +    list_for_each_entry(vq, &vdev->vqs, list) {
>> +        struct virtio_uml_vq_info *info = vq->priv;
>> +
>> +        rc = vhost_user_set_vring_kick(vu_dev, vq->index,
>> +                info->kick_fd);
>> +        if (rc)
>> +            goto error_setup;
>> +
>> +        rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
>> +        if (rc)
>> +            goto error_setup;
>> +    }
>> +
>> +    return 0;
>> +
>> +error_setup:
>> +    vu_del_vqs(vdev);
>> +    return rc;
>> +}
>> +
>> +static u64 vu_get_features(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    return vu_dev->features;
>> +}
>> +
>> +static int vu_finalize_features(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +    u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
>> +
>> +    vring_transport_features(vdev);
>> +    vu_dev->features = vdev->features | supported;
>> +
>> +    return vhost_user_set_features(vu_dev, vu_dev->features);
>> +}
>> +
>> +static const char *vu_bus_name(struct virtio_device *vdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    return vu_dev->pdev->name;
>> +}
>> +
>> +static const struct virtio_config_ops virtio_uml_config_ops = {
>> +    .get = vu_get,
>> +    .set = vu_set,
>> +    .get_status = vu_get_status,
>> +    .set_status = vu_set_status,
>> +    .reset = vu_reset,
>> +    .find_vqs = vu_find_vqs,
>> +    .del_vqs = vu_del_vqs,
>> +    .get_features = vu_get_features,
>> +    .finalize_features = vu_finalize_features,
>> +    .bus_name = vu_bus_name,
>> +};
>> +
>> +
>> +static void virtio_uml_release_dev(struct device *d)
>> +{
>> +    struct virtio_device *vdev =
>> +            container_of(d, struct virtio_device, dev);
>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>> +
>> +    os_close_file(vu_dev->sock);
>> +}
>> +
>> +/* Platform device */
>> +
>> +struct virtio_uml_platform_data {
>> +    u32 virtio_device_id;
>> +    const char *socket_path;
>> +};
>> +
>> +static int virtio_uml_probe(struct platform_device *pdev)
>> +{
>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>> +    struct virtio_uml_device *vu_dev;
>> +    int rc;
>> +
>> +    if (!pdata)
>> +        return -EINVAL;
>> +
>> +    vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
>> +    if (!vu_dev)
>> +        return -ENOMEM;
>> +
>> +    vu_dev->vdev.dev.parent = &pdev->dev;
>> +    vu_dev->vdev.dev.release = virtio_uml_release_dev;
>> +    vu_dev->vdev.config = &virtio_uml_config_ops;
>> +    vu_dev->vdev.id.device = pdata->virtio_device_id;
>> +    vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
>> +    vu_dev->pdev = pdev;
>> +
>> +    do {
>> +        rc = os_connect_socket(pdata->socket_path);
>> +    } while (rc == -EINTR);
>> +    if (rc < 0)
>> +        return rc;
>> +    vu_dev->sock = rc;
>> +
>> +    rc = vhost_user_init(vu_dev);
>> +    if (rc)
>> +        goto error_init;
>> +
>> +    platform_set_drvdata(pdev, vu_dev);
>> +
>> +    rc = register_virtio_device(&vu_dev->vdev);
>> +    if (rc)
>> +        put_device(&vu_dev->vdev.dev);
>> +    return rc;
>> +
>> +error_init:
>> +    os_close_file(vu_dev->sock);
>> +    return rc;
>> +}
>> +
>> +static int virtio_uml_remove(struct platform_device *pdev)
>> +{
>> +    struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
>> +
>> +    unregister_virtio_device(&vu_dev->vdev);
>> +    return 0;
>> +}
>> +
>> +/* Command line device list */
>> +
>> +static void vu_cmdline_release_dev(struct device *d)
>> +{
>> +}
>> +
>> +static struct device vu_cmdline_parent = {
>> +    .init_name = "virtio-uml-cmdline",
>> +    .release = vu_cmdline_release_dev,
>> +};
>> +
>> +static bool vu_cmdline_parent_registered;
>> +static int vu_cmdline_id;
>> +
>> +static int vu_cmdline_set(const char *device, const struct 
>> kernel_param *kp)
>> +{
>> +    const char *ids = strchr(device, ':');
>> +    unsigned int virtio_device_id;
>> +    int processed, consumed, err;
>> +    char *socket_path;
>> +    struct virtio_uml_platform_data *pdata;
>> +    struct platform_device *pdev;
>> +
>> +    if (!ids || ids == device)
>> +        return -EINVAL;
>> +
>> +    processed = sscanf(ids, ":%u%n:%d%n",
>> +            &virtio_device_id, &consumed,
>> +            &vu_cmdline_id, &consumed);
>> +
>> +    if (processed < 1 || ids[consumed])
>> +        return -EINVAL;
>> +
>> +    if (!vu_cmdline_parent_registered) {
>> +        err = device_register(&vu_cmdline_parent);
>> +        if (err) {
>> +            pr_err("Failed to register parent device!\n");
>> +            put_device(&vu_cmdline_parent);
>> +            return err;
>> +        }
>> +        vu_cmdline_parent_registered = true;
>> +    }
>> +
>> +    socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
>> +    if (!socket_path)
>> +        return -ENOMEM;
>> +
>> +    pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
>> +    if (!pdata) {
>> +        err = -ENOMEM;
>> +        goto error_kzalloc;
>> +    }
>> +
>> +    pdata->virtio_device_id = (u32) virtio_device_id;
>> +    pdata->socket_path = socket_path;
>> +
>> +    pr_info("Registering device virtio-uml.%d id=%d at %s\n",
>> +            vu_cmdline_id, virtio_device_id, socket_path);
>> +
>> +    pdev = platform_device_register_data(&vu_cmdline_parent, 
>> "virtio-uml",
>> +            vu_cmdline_id++, pdata, sizeof(*pdata));
>> +    err = PTR_ERR_OR_ZERO(pdev);
>> +    if (err)
>> +        goto error_register;
>> +    return 0;
>> +
>> +error_register:
>> +    kfree(pdata);
>> +error_kzalloc:
>> +    kfree(socket_path);
>> +    return err;
>> +}
>> +
>> +static int vu_cmdline_get_device(struct device *dev, void *data)
>> +{
>> +    struct platform_device *pdev = to_platform_device(dev);
>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>> +    char *buffer = data;
>> +    unsigned int len = strlen(buffer);
>> +
>> +    snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
>> +            pdata->socket_path, pdata->virtio_device_id, pdev->id);
>> +    return 0;
>> +}
>> +
>> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
>> +{
>> +    buffer[0] = '\0';
>> +    if (vu_cmdline_parent_registered)
>> +        device_for_each_child(&vu_cmdline_parent, buffer,
>> +                vu_cmdline_get_device);
>> +    return strlen(buffer) + 1;
>> +}
>> +
>> +static const struct kernel_param_ops vu_cmdline_param_ops = {
>> +    .set = vu_cmdline_set,
>> +    .get = vu_cmdline_get,
>> +};
>> +
>> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
>> +
>> +
>> +static int vu_unregister_cmdline_device(struct device *dev, void *data)
>> +{
>> +    struct platform_device *pdev = to_platform_device(dev);
>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>> +
>> +    platform_device_unregister(pdev);
>> +    kfree(pdata->socket_path);
>> +    kfree(pdata);
>> +    return 0;
>> +}
>> +
>> +static void vu_unregister_cmdline_devices(void)
>> +{
>> +    if (vu_cmdline_parent_registered) {
>> +        device_for_each_child(&vu_cmdline_parent, NULL,
>> +                vu_unregister_cmdline_device);
>> +        device_unregister(&vu_cmdline_parent);
>> +        vu_cmdline_parent_registered = false;
>> +    }
>> +}
>> +
>> +/* Platform driver */
>> +
>> +static const struct of_device_id virtio_uml_match[] = {
>> +    { .compatible = "virtio,uml", },
>> +    { }
>> +};
>> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
>> +
>> +static struct platform_driver virtio_uml_driver = {
>> +    .probe = virtio_uml_probe,
>> +    .remove = virtio_uml_remove,
>> +    .driver = {
>> +        .name = "virtio-uml",
>> +        .of_match_table = virtio_uml_match,
>> +    },
>> +};
>> +
>> +static int __init virtio_uml_init(void)
>> +{
>> +    return platform_driver_register(&virtio_uml_driver);
>> +}
>> +
>> +static void __exit virtio_uml_exit(void)
>> +{
>> +    platform_driver_unregister(&virtio_uml_driver);
>> +    vu_unregister_cmdline_devices();
>> +}
>> +
>> +module_init(virtio_uml_init);
>> +module_exit(virtio_uml_exit);
>> +__uml_exitcall(virtio_uml_exit);
>> +
>> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
>> +MODULE_LICENSE("GPL");
>> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
>> index ce7a78c3bcf2..42c6205e2dc4 100644
>> --- a/arch/um/include/asm/irq.h
>> +++ b/arch/um/include/asm/irq.h
>> @@ -17,17 +17,18 @@
>>   #define TELNETD_IRQ         12
>>   #define XTERM_IRQ         13
>>   #define RANDOM_IRQ         14
>> +#define VIRTIO_IRQ        15
>>   #ifdef CONFIG_UML_NET_VECTOR
>> -#define VECTOR_BASE_IRQ        15
>> +#define VECTOR_BASE_IRQ        (VIRTIO_IRQ + 1)
>>   #define VECTOR_IRQ_SPACE    8
>>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>>   #else
>> -#define LAST_IRQ RANDOM_IRQ
>> +#define LAST_IRQ VIRTIO_IRQ
>>   #endif
>> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
>> index 4a62ac4251a5..da7ebbe4ebb3 100644
>> --- a/arch/um/include/shared/os.h
>> +++ b/arch/um/include/shared/os.h
>> @@ -36,6 +36,8 @@
>>   #define OS_LIB_PATH    "/usr/lib/"
>>   #endif
>> +#define OS_SENDMSG_MAX_FDS 8
>> +
>>   /*
>>    * types taken from stat_file() in hostfs_user.c
>>    * (if they are wrong here, they are wrong there...).
>> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>>   extern unsigned os_minor(unsigned long long dev);
>>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>>   extern int os_falloc_punch(int fd, unsigned long long offset, int 
>> count);
>> +extern int os_eventfd(unsigned int initval, int flags);
>> +extern int os_eventfd_read(int fd, unsigned long long *value);
>> +extern int os_eventfd_write(int fd, unsigned long long value);
>> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
>> +              const int *fds, unsigned int fds_num);
>>   /* start_up.c */
>>   extern void os_early_checks(void);
>> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
>> index 232b22307fdd..f3766889226b 100644
>> --- a/arch/um/kernel/ksyms.c
>> +++ b/arch/um/kernel/ksyms.c
>> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>>   EXPORT_SYMBOL(os_major);
>>   EXPORT_SYMBOL(os_minor);
>>   EXPORT_SYMBOL(os_makedev);
>> +EXPORT_SYMBOL(os_eventfd);
>> +EXPORT_SYMBOL(os_eventfd_read);
>> +EXPORT_SYMBOL(os_eventfd_write);
>> +EXPORT_SYMBOL(os_sendmsg_fds);
>>   EXPORT_SYMBOL(add_sigio_fd);
>>   EXPORT_SYMBOL(ignore_sigio_fd);
>> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
>> index de58e976b9bc..f256be1d77bd 100644
>> --- a/arch/um/kernel/mem.c
>> +++ b/arch/um/kernel/mem.c
>> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>>   /* Initialized at boot time, and readonly after that */
>>   unsigned long long highmem;
>> +EXPORT_SYMBOL(highmem);
>>   int kmalloc_ok = 0;
>>   /* Used during early boot */
>> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
>> index 5bf56af4d5b9..870c80c88a97 100644
>> --- a/arch/um/kernel/physmem.c
>> +++ b/arch/um/kernel/physmem.c
>> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long 
>> long *offset_out)
>>       return fd;
>>   }
>> +EXPORT_SYMBOL(phys_mapping);
>>   static int __init uml_mem_setup(char *line, int *add)
>>   {
>> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
>> index a818ccef30ca..9de212bf4320 100644
>> --- a/arch/um/kernel/um_arch.c
>> +++ b/arch/um/kernel/um_arch.c
>> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>>   /* Set in uml_mem_setup and modified in linux_main */
>>   long long physmem_size = 32 * 1024 * 1024;
>> +EXPORT_SYMBOL(physmem_size);
>>   static const char *usage_string =
>>   "User Mode Linux v%s\n"
>> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
>> index f25b110d4e70..0c4e0298398d 100644
>> --- a/arch/um/os-Linux/file.c
>> +++ b/arch/um/os-Linux/file.c
>> @@ -15,6 +15,7 @@
>>   #include <sys/sysmacros.h>
>>   #include <sys/un.h>
>>   #include <sys/types.h>
>> +#include <sys/eventfd.h>
>>   #include <os.h>
>>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
>> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long 
>> offset, int len)
>>       return n;
>>   }
>> +int os_eventfd(unsigned int initval, int flags)
>> +{
>> +    int fd = eventfd(initval, flags);
>> +
>> +    if (fd < 0)
>> +        return -errno;
>> +    return fd;
>> +}
>> +
>> +int os_eventfd_read(int fd, unsigned long long *value)
>> +{
>> +    eventfd_t val;
>> +    int err = eventfd_read(fd, &val);
>> +
>> +    if (err < 0)
>> +        return -errno;
>> +    *value = val;
>> +    return 0;
>> +}
>> +
>> +int os_eventfd_write(int fd, unsigned long long value)
>> +{
>> +    int err = eventfd_write(fd, value);
>> +
>> +    if (err < 0)
>> +        return -errno;
>> +    return 0;
>> +}
>> +
>> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const 
>> int *fds,
>> +           unsigned int fds_num)
>> +{
>> +    struct iovec iov = {
>> +        .iov_base = (void *) buf,
>> +        .iov_len = len,
>> +    };
>> +    union {
>> +        char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
>> +        struct cmsghdr align;
>> +    } u;
>> +    unsigned int fds_size = sizeof(*fds) * fds_num;
>> +    struct msghdr msg = {
>> +        .msg_iov = &iov,
>> +        .msg_iovlen = 1,
>> +        .msg_control = u.control,
>> +        .msg_controllen = CMSG_SPACE(fds_size),
>> +    };
>> +    struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
>> +    int err;
>> +
>> +    if (fds_num > OS_SENDMSG_MAX_FDS)
>> +        return -EINVAL;
>> +    memset(u.control, 0, sizeof(u.control));
>> +    cmsg->cmsg_level = SOL_SOCKET;
>> +    cmsg->cmsg_type = SCM_RIGHTS;
>> +    cmsg->cmsg_len = CMSG_LEN(fds_size);
>> +    memcpy(CMSG_DATA(cmsg), fds, fds_size);
>> +    err = sendmsg(fd, &msg, 0);
>> +
>> +    if (err < 0)
>> +        return -errno;
>> +    return err;
>> +}
>>
> 
> Can you send me your config?
> 
> It goes into an always busy loop early in boot with mine and the boot 
> never completes. I will debug it in the meantime, but having the config 
> with which it works will be helpful.
> 
> For the record - I have vector enabled and I tried virtio and virtio-net 
> compiled in.
> 

It blows up as a module too:
[  197.850000] Registering device virtio-uml.0 id=1 at 
/tmp/uml-test-vhost.sock
root@stretch-opx:/lib/modules/5.3.0-rc3-00291-g1d45d9d8ec47/kernel# [ 
197.900000] ------------[ cut here ]------------
[  197.900000] WARNING: CPU: 0 PID: 1038 at 
arch/um/drivers/virtio_uml.c:449 vu_get+0xfb/0x130 [virtio_uml]
[  197.900000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
virtio
[  197.900000] CPU: 0 PID: 1038 Comm: systemd-udevd Not tainted 
5.3.0-rc3-00291-g1d45d9d8ec47 #35
[  197.900000] Stack:
[  197.900000]  d47f38d0 60749c68 d47f3918 00000000
[  197.900000]  00000009 00000000 d47f38e0 60749cbc
[  197.900000]  d47f3950 60052a5e 601e5830 d47d3780
[  197.900000] Call Trace:
[  197.900000]  [<e1b63b7b>] ? vu_get+0xfb/0x130 [virtio_uml]
[  197.900000]  [<600967ba>] ? printk+0x0/0x94
[  197.900000]  [<6002c759>] show_stack+0x129/0x190
[  197.900000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
[  197.900000]  [<60749cbc>] dump_stack+0x2a/0x2e
[  197.900000]  [<60052a5e>] __warn+0x13e/0x170
[  197.900000]  [<601e5830>] ? kernfs_next_descendant_post+0x0/0xd0
[  197.900000]  [<601e66c3>] ? kernfs_activate+0x123/0x150
[  197.900000]  [<60052c06>] warn_slowpath_null+0x46/0x4b
[  197.900000]  [<e1b63b7b>] vu_get+0xfb/0x130 [virtio_uml]
[  197.900000]  [<e1abe1d0>] ? 
virtio_check_driver_offered_feature+0x0/0xe0 [virtio]
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<e1c25f86>] virtnet_validate+0x1c6/0x253 [virtio_net]
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<e1abe65d>] virtio_dev_probe+0x1dd/0x2b0 [virtio]
[  197.900000]  [<604b6745>] really_probe+0x245/0x4e0
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<604b6c42>] driver_probe_device+0x112/0x140
[  197.900000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
[  197.900000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
[  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.900000]  [<6074b660>] ? klist_next+0x0/0x150
[  197.900000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
[  197.900000]  [<604b5fe3>] driver_attach+0x23/0x30
[  197.900000]  [<604b548f>] bus_add_driver+0x18f/0x300
[  197.900000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
[  197.900000]  [<604b790f>] driver_register+0xdf/0x140
[  197.900000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
[  197.900000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 [virtio_net]
[  197.900000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 [virtio_net]
[  197.900000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
[  197.900000]  [<60047570>] ? set_signals+0x0/0x50
[  197.900000]  [<60047598>] ? set_signals+0x28/0x50
[  197.900000]  [<6013c328>] ? kfree+0xf8/0x130
[  197.900000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
[  197.900000]  [<600b84d0>] ? module_put+0x0/0x50
[  197.900000]  [<600bd274>] do_init_module+0x8a/0x296
[  197.900000]  [<60122dd9>] ? __vfree+0x79/0x80
[  197.900000]  [<600bbb12>] load_module+0x1fa2/0x2750
[  197.900000]  [<6014a8d0>] ? kernel_read+0x0/0x50
[  197.900000]  [<60754b20>] ? strlen+0x0/0x30
[  197.900000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
[  197.900000]  [<600bc621>] sys_finit_module+0x111/0x160
[  197.900000]  [<60793fc0>] ? ptrace+0x0/0xa0
[  197.900000]  [<6002ee58>] handle_syscall+0x98/0xc0
[  197.900000]  [<60761e80>] ? __waitpid+0x0/0xa0
[  197.900000]  [<6004a8a6>] userspace+0x4c6/0x5b0
[  197.900000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
[  197.900000]  [<6002b0e4>] fork_handler+0x94/0xa0
[  197.900000]
[  197.900000] ---[ end trace e3d2f999bd5bee3c ]---
[  197.900000] ------------[ cut here ]------------
[  197.900000] WARNING: CPU: 0 PID: 1038 at 
arch/um/drivers/virtio_uml.c:449 vu_get+0xfb/0x130 [virtio_uml]
[  197.900000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
virtio
[  197.900000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        W 
        5.3.0-rc3-00291-g1d45d9d8ec47 #35
[  197.900000] Stack:
[  197.900000]  d47f3860 60749c68 d47f38a8 00000000
[  197.900000]  00000009 00000000 d47f3870 60749cbc
[  197.900000]  d47f38e0 60052a5e 00000000 00000000
[  197.900000] Call Trace:
[  197.900000]  [<e1b63b7b>] ? vu_get+0xfb/0x130 [virtio_uml]
[  197.900000]  [<600967ba>] ? printk+0x0/0x94
[  197.900000]  [<6002c759>] show_stack+0x129/0x190
[  197.900000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
[  197.900000]  [<60749cbc>] dump_stack+0x2a/0x2e
[  197.900000]  [<60052a5e>] __warn+0x13e/0x170
[  197.900000]  [<60052c06>] warn_slowpath_null+0x46/0x4b
[  197.900000]  [<e1b63b7b>] vu_get+0xfb/0x130 [virtio_uml]
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<e1c2240f>] virtnet_probe+0x5f/0x1210 [virtio_net]
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
[  197.900000]  [<604b6745>] really_probe+0x245/0x4e0
[  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.900000]  [<604b6c42>] driver_probe_device+0x112/0x140
[  197.900000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
[  197.900000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
[  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.900000]  [<6074b660>] ? klist_next+0x0/0x150
[  197.900000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
[  197.900000]  [<604b5fe3>] driver_attach+0x23/0x30
[  197.900000]  [<604b548f>] bus_add_driver+0x18f/0x300
[  197.900000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
[  197.900000]  [<604b790f>] driver_register+0xdf/0x140
[  197.900000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
[  197.900000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 [virtio_net]
[  197.900000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 [virtio_net]
[  197.900000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
[  197.900000]  [<60047570>] ? set_signals+0x0/0x50
[  197.900000]  [<60047598>] ? set_signals+0x28/0x50
[  197.900000]  [<6013c328>] ? kfree+0xf8/0x130
[  197.900000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
[  197.900000]  [<600b84d0>] ? module_put+0x0/0x50
[  197.900000]  [<600bd274>] do_init_module+0x8a/0x296
[  197.900000]  [<60122dd9>] ? __vfree+0x79/0x80
[  197.900000]  [<600bbb12>] load_module+0x1fa2/0x2750
[  197.900000]  [<6014a8d0>] ? kernel_read+0x0/0x50
[  197.900000]  [<60754b20>] ? strlen+0x0/0x30
[  197.900000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
[  197.900000]  [<600bc621>] sys_finit_module+0x111/0x160
[  197.900000]  [<60793fc0>] ? ptrace+0x0/0xa0
[  197.900000]  [<6002ee58>] handle_syscall+0x98/0xc0
[  197.900000]  [<60761e80>] ? __waitpid+0x0/0xa0
[  197.900000]  [<6004a8a6>] userspace+0x4c6/0x5b0
[  197.900000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
[  197.900000]  [<6002b0e4>] fork_handler+0x94/0xa0
[  197.900000]
[  197.900000] ---[ end trace e3d2f999bd5bee3d ]---
[  197.910000] ------------[ cut here ]------------
[  197.910000] WARNING: CPU: 0 PID: 1038 at net/core/dev.c:1564 
dev_disable_lro+0x187/0x1e0
[  197.910000] netdevice: eth0: failed to disable LRO!
[  197.910000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
virtio
[  197.910000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        W 
        5.3.0-rc3-00291-g1d45d9d8ec47 #35
[  197.910000] Stack:
[  197.910000]  d47f3650 60749c68 d47f3698 d47f36e0
[  197.910000]  00000009 00000000 d47f3660 60749cbc
[  197.910000]  d47f36d0 60052a5e 00000000 60549c77
[  197.910000] Call Trace:
[  197.910000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
[  197.910000]  [<600967ba>] ? printk+0x0/0x94
[  197.910000]  [<6002c759>] show_stack+0x129/0x190
[  197.910000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
[  197.910000]  [<60749cbc>] dump_stack+0x2a/0x2e
[  197.910000]  [<60052a5e>] __warn+0x13e/0x170
[  197.910000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
[  197.910000]  [<600967ba>] ? printk+0x0/0x94
[  197.910000]  [<607d29f0>] ? _raw_spin_lock_irqsave+0x0/0x40
[  197.910000]  [<60052a90>] ? warn_slowpath_fmt+0x0/0xa0
[  197.910000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
[  197.910000]  [<60052b27>] warn_slowpath_fmt+0x97/0xa0
[  197.910000]  [<60047598>] ? set_signals+0x28/0x50
[  197.910000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
[  197.910000]  [<60549ab8>] ? netdev_update_features+0x18/0x50
[  197.910000]  [<60559023>] ? neigh_parms_alloc+0x173/0x190
[  197.910000]  [<60549c77>] dev_disable_lro+0x187/0x1e0
[  197.910000]  [<606968e0>] inetdev_init+0x110/0x200
[  197.920000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
[  197.920000]  [<60696a66>] inetdev_event+0x96/0x690
[  197.920000]  [<607d2ae0>] ? _raw_spin_unlock_irqrestore+0x0/0x30
[  197.920000]  [<60076720>] notifier_call_chain+0x40/0x80
[  197.920000]  [<6053e370>] ? call_netdevice_notifiers_info+0x0/0x90
[  197.920000]  [<60047570>] ? set_signals+0x0/0x50
[  197.920000]  [<60076828>] raw_notifier_call_chain+0x18/0x20
[  197.920000]  [<6053e3ec>] call_netdevice_notifiers_info+0x7c/0x90
[  197.920000]  [<6054a57f>] register_netdevice+0x57f/0x670
[  197.920000]  [<6013c230>] ? kfree+0x0/0x130
[  197.920000]  [<6054a69e>] register_netdev+0x2e/0x50
[  197.920000]  [<e1c230b7>] virtnet_probe+0xd07/0x1210 [virtio_net]
[  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.920000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
[  197.920000]  [<604b6745>] really_probe+0x245/0x4e0
[  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.920000]  [<604b6c42>] driver_probe_device+0x112/0x140
[  197.920000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
[  197.920000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
[  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.920000]  [<6074b660>] ? klist_next+0x0/0x150
[  197.920000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
[  197.920000]  [<604b5fe3>] driver_attach+0x23/0x30
[  197.920000]  [<604b548f>] bus_add_driver+0x18f/0x300
[  197.920000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
[  197.920000]  [<604b790f>] driver_register+0xdf/0x140
[  197.920000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
[  197.920000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 [virtio_net]
[  197.920000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 [virtio_net]
[  197.920000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
[  197.920000]  [<60047570>] ? set_signals+0x0/0x50
[  197.920000]  [<60047598>] ? set_signals+0x28/0x50
[  197.920000]  [<6013c328>] ? kfree+0xf8/0x130
[  197.920000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
[  197.920000]  [<600b84d0>] ? module_put+0x0/0x50
[  197.920000]  [<600bd274>] do_init_module+0x8a/0x296
[  197.920000]  [<60122dd9>] ? __vfree+0x79/0x80
[  197.920000]  [<600bbb12>] load_module+0x1fa2/0x2750
[  197.920000]  [<6014a8d0>] ? kernel_read+0x0/0x50
[  197.920000]  [<60754b20>] ? strlen+0x0/0x30
[  197.920000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
[  197.920000]  [<600bc621>] sys_finit_module+0x111/0x160
[  197.920000]  [<60793fc0>] ? ptrace+0x0/0xa0
[  197.920000]  [<6002ee58>] handle_syscall+0x98/0xc0
[  197.920000]  [<60761e80>] ? __waitpid+0x0/0xa0
[  197.920000]  [<6004a8a6>] userspace+0x4c6/0x5b0
[  197.920000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
[  197.920000]  [<6002b0e4>] fork_handler+0x94/0xa0
[  197.920000]
[  197.920000] ---[ end trace e3d2f999bd5bee3e ]---
[  197.920000] ------------[ cut here ]------------
[  197.920000] WARNING: CPU: 0 PID: 1038 at net/core/dev.c:1564 
dev_disable_lro+0x187/0x1e0
[  197.920000] netdevice: eth0: failed to disable LRO!
[  197.920000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
virtio
[  197.920000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        W 
        5.3.0-rc3-00291-g1d45d9d8ec47 #35
[  197.920000] Stack:
[  197.920000]  d47f3610 60749c68 d47f3658 d47f36a0
[  197.920000]  00000009 00000000 d47f3620 60749cbc
[  197.920000]  d47f3690 60052a5e d19dad00 60549c77
[  197.920000] Call Trace:
[  197.920000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
[  197.920000]  [<600967ba>] ? printk+0x0/0x94
[  197.920000]  [<6002c759>] show_stack+0x129/0x190
[  197.920000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
[  197.920000]  [<60749cbc>] dump_stack+0x2a/0x2e
[  197.920000]  [<60052a5e>] __warn+0x13e/0x170
[  197.920000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
[  197.920000]  [<600967ba>] ? printk+0x0/0x94
[  197.920000]  [<60052a90>] ? warn_slowpath_fmt+0x0/0xa0
[  197.920000]  [<60052b27>] warn_slowpath_fmt+0x97/0xa0
[  197.920000]  [<60047598>] ? set_signals+0x28/0x50
[  197.920000]  [<60549ab8>] ? netdev_update_features+0x18/0x50
[  197.920000]  [<60559023>] ? neigh_parms_alloc+0x173/0x190
[  197.920000]  [<60549c77>] dev_disable_lro+0x187/0x1e0
[  197.920000]  [<6013b1f0>] ? kmem_cache_alloc+0x0/0x110
[  197.920000]  [<6013b1f0>] ? kmem_cache_alloc+0x0/0x110
[  197.920000]  [<606d78aa>] ipv6_add_dev+0x1ba/0x670
[  197.920000]  [<606de0b8>] addrconf_notify+0x68/0xcd0
[  197.920000]  [<607d2b35>] ? _raw_spin_unlock_bh+0x25/0x30
[  197.920000]  [<606bce4c>] ? clusterip_netdev_event+0x10c/0x120
[  197.920000]  [<60076720>] notifier_call_chain+0x40/0x80
[  197.920000]  [<6053e370>] ? call_netdevice_notifiers_info+0x0/0x90
[  197.920000]  [<60047570>] ? set_signals+0x0/0x50
[  197.920000]  [<60076828>] raw_notifier_call_chain+0x18/0x20
[  197.920000]  [<6053e3ec>] call_netdevice_notifiers_info+0x7c/0x90
[  197.920000]  [<6054a57f>] register_netdevice+0x57f/0x670
[  197.920000]  [<6013c230>] ? kfree+0x0/0x130
[  197.920000]  [<6054a69e>] register_netdev+0x2e/0x50
[  197.920000]  [<e1c230b7>] virtnet_probe+0xd07/0x1210 [virtio_net]
[  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.920000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
[  197.920000]  [<604b6745>] really_probe+0x245/0x4e0
[  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
[  197.920000]  [<604b6c42>] driver_probe_device+0x112/0x140
[  197.920000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
[  197.920000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
[  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
[  197.920000]  [<6074b660>] ? klist_next+0x0/0x150
[  197.920000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
[  197.920000]  [<604b5fe3>] driver_attach+0x23/0x30
[  197.920000]  [<604b548f>] bus_add_driver+0x18f/0x300
[  197.920000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
[  197.920000]  [<604b790f>] driver_register+0xdf/0x140
[  197.920000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
[  197.920000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 [virtio_net]
[  197.920000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 [virtio_net]
[  197.930000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
[  197.930000]  [<60047570>] ? set_signals+0x0/0x50
[  197.930000]  [<60047598>] ? set_signals+0x28/0x50
[  197.930000]  [<6013c328>] ? kfree+0xf8/0x130
[  197.930000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
[  197.930000]  [<600b84d0>] ? module_put+0x0/0x50
[  197.930000]  [<600bd274>] do_init_module+0x8a/0x296
[  197.930000]  [<60122dd9>] ? __vfree+0x79/0x80
[  197.930000]  [<600bbb12>] load_module+0x1fa2/0x2750
[  197.930000]  [<6014a8d0>] ? kernel_read+0x0/0x50
[  197.930000]  [<60754b20>] ? strlen+0x0/0x30
[  197.930000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
[  197.930000]  [<600bc621>] sys_finit_module+0x111/0x160
[  197.930000]  [<60793fc0>] ? ptrace+0x0/0xa0
[  197.930000]  [<6002ee58>] handle_syscall+0x98/0xc0
[  197.930000]  [<60761e80>] ? __waitpid+0x0/0xa0
[  197.930000]  [<6004a8a6>] userspace+0x4c6/0x5b0
[  197.930000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
[  197.930000]  [<6002b0e4>] fork_handler+0x94/0xa0
[  197.930000]
[  197.930000] ---[ end trace e3d2f999bd5bee3f ]---
Anton Ivanov Aug. 14, 2019, 10:34 a.m. UTC | #8
On 14/08/2019 06:49, Anton Ivanov wrote:
>
>
> On 14/08/2019 06:39, Anton Ivanov wrote:
>>
>>
>> On 31/07/2019 21:29, Johannes Berg wrote:
>>> From: Erel Geron <erelx.geron@intel.com>
>>>
>>> This module allows virtio devices to be used over a vhost-user socket.
>>> To instantiate a device, pass the (list of) device socket(s) to the
>>> kernel or module command line, such as
>>>
>>>     virtio_uml.device=/var/uml.socket:1
>>>
>>> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
>>> i.e. network in this case.
>>>
>>> Signed-off-by: Erel Geron <erelx.geron@intel.com>
>>> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
>>> ---
>>>   arch/um/drivers/Kconfig      |   7 +
>>>   arch/um/drivers/Makefile     |   1 +
>>>   arch/um/drivers/vhost_user.h | 102 ++++
>>>   arch/um/drivers/virtio_uml.c | 905 
>>> +++++++++++++++++++++++++++++++++++
>>>   arch/um/include/asm/irq.h    |   5 +-
>>>   arch/um/include/shared/os.h  |   7 +
>>>   arch/um/kernel/ksyms.c       |   4 +
>>>   arch/um/kernel/mem.c         |   1 +
>>>   arch/um/kernel/physmem.c     |   1 +
>>>   arch/um/kernel/um_arch.c     |   1 +
>>>   arch/um/os-Linux/file.c      |  64 +++
>>>   11 files changed, 1096 insertions(+), 2 deletions(-)
>>>   create mode 100644 arch/um/drivers/vhost_user.h
>>>   create mode 100644 arch/um/drivers/virtio_uml.c
>>>
>>> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
>>> index 2638e46f50cc..fea5a0d522dc 100644
>>> --- a/arch/um/drivers/Kconfig
>>> +++ b/arch/um/drivers/Kconfig
>>> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>>>         Startup example: 
>>> "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>>>   endmenu
>>> +
>>> +config VIRTIO_UML
>>> +    tristate "UML driver for virtio devices"
>>> +    select VIRTIO
>>> +    help
>>> +      This driver provides support for virtio based paravirtual device
>>> +      drivers over vhost-user sockets.
>>> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
>>> index 693319839f69..3edf0a7cfd25 100644
>>> --- a/arch/um/drivers/Makefile
>>> +++ b/arch/um/drivers/Makefile
>>> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>>>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>>>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>>>   obj-$(CONFIG_UML_RANDOM) += random.o
>>> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>>>   # pcap_user.o must be added explicitly.
>>>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o 
>>> pcap_user.o vde_user.o vector_user.o
>>> diff --git a/arch/um/drivers/vhost_user.h 
>>> b/arch/um/drivers/vhost_user.h
>>> new file mode 100644
>>> index 000000000000..2a9829b0782b
>>> --- /dev/null
>>> +++ b/arch/um/drivers/vhost_user.h
>>> @@ -0,0 +1,102 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/* Vhost-user protocol */
>>> +
>>> +#ifndef __VHOST_USER_H__
>>> +#define __VHOST_USER_H__
>>> +
>>> +/* Message flags */
>>> +#define VHOST_USER_FLAG_REPLY        BIT(2)
>>> +/* Feature bits */
>>> +#define VHOST_USER_F_PROTOCOL_FEATURES    30
>>> +/* Protocol feature bits */
>>> +#define VHOST_USER_PROTOCOL_F_CONFIG    9
>>> +/* Vring state index masks */
>>> +#define VHOST_USER_VRING_INDEX_MASK    0xff
>>> +#define VHOST_USER_VRING_POLL_MASK    BIT(8)
>>> +
>>> +/* Supported version */
>>> +#define VHOST_USER_VERSION        1
>>> +/* Supported transport features */
>>> +#define VHOST_USER_SUPPORTED_F BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
>>> +/* Supported protocol features */
>>> +#define VHOST_USER_SUPPORTED_PROTOCOL_F 
>>> BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
>>> +
>>> +enum vhost_user_request {
>>> +    VHOST_USER_GET_FEATURES = 1,
>>> +    VHOST_USER_SET_FEATURES = 2,
>>> +    VHOST_USER_SET_OWNER = 3,
>>> +    VHOST_USER_RESET_OWNER = 4,
>>> +    VHOST_USER_SET_MEM_TABLE = 5,
>>> +    VHOST_USER_SET_LOG_BASE = 6,
>>> +    VHOST_USER_SET_LOG_FD = 7,
>>> +    VHOST_USER_SET_VRING_NUM = 8,
>>> +    VHOST_USER_SET_VRING_ADDR = 9,
>>> +    VHOST_USER_SET_VRING_BASE = 10,
>>> +    VHOST_USER_GET_VRING_BASE = 11,
>>> +    VHOST_USER_SET_VRING_KICK = 12,
>>> +    VHOST_USER_SET_VRING_CALL = 13,
>>> +    VHOST_USER_SET_VRING_ERR = 14,
>>> +    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
>>> +    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
>>> +    VHOST_USER_GET_QUEUE_NUM = 17,
>>> +    VHOST_USER_SET_VRING_ENABLE = 18,
>>> +    VHOST_USER_SEND_RARP = 19,
>>> +    VHOST_USER_NET_SEND_MTU = 20,
>>> +    VHOST_USER_SET_SLAVE_REQ_FD = 21,
>>> +    VHOST_USER_IOTLB_MSG = 22,
>>> +    VHOST_USER_SET_VRING_ENDIAN = 23,
>>> +    VHOST_USER_GET_CONFIG = 24,
>>> +    VHOST_USER_SET_CONFIG = 25,
>>> +};
>>> +
>>> +struct vhost_user_header {
>>> +    u32 request; /* Use enum vhost_user_request */
>>> +    u32 flags;
>>> +    u32 size;
>>> +} __packed;
>>> +
>>> +struct vhost_user_config {
>>> +    u32 offset;
>>> +    u32 size;
>>> +    u32 flags;
>>> +    u8 payload[0]; /* Variable length */
>>> +} __packed;
>>> +
>>> +struct vhost_user_vring_state {
>>> +    u32 index;
>>> +    u32 num;
>>> +} __packed;
>>> +
>>> +struct vhost_user_vring_addr {
>>> +    u32 index;
>>> +    u32 flags;
>>> +    u64 desc, used, avail, log;
>>> +} __packed;
>>> +
>>> +struct vhost_user_mem_region {
>>> +    u64 guest_addr;
>>> +    u64 size;
>>> +    u64 user_addr;
>>> +    u64 mmap_offset;
>>> +} __packed;
>>> +
>>> +struct vhost_user_mem_regions {
>>> +    u32 num;
>>> +    u32 padding;
>>> +    struct vhost_user_mem_region regions[2]; /* Currently 
>>> supporting 2 */
>>> +} __packed;
>>> +
>>> +union vhost_user_payload {
>>> +    u64 integer;
>>> +    struct vhost_user_config config;
>>> +    struct vhost_user_vring_state vring_state;
>>> +    struct vhost_user_vring_addr vring_addr;
>>> +    struct vhost_user_mem_regions mem_regions;
>>> +};
>>> +
>>> +struct vhost_user_msg {
>>> +    struct vhost_user_header header;
>>> +    union vhost_user_payload payload;
>>> +} __packed;
>>> +
>>> +#endif
>>> diff --git a/arch/um/drivers/virtio_uml.c 
>>> b/arch/um/drivers/virtio_uml.c
>>> new file mode 100644
>>> index 000000000000..30eeb907f42a
>>> --- /dev/null
>>> +++ b/arch/um/drivers/virtio_uml.c
>>> @@ -0,0 +1,905 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/*
>>> + * Virtio vhost-user driver
>>> + *
>>> + * Copyright(c) 2019 Intel Corporation
>>> + *
>>> + * This module allows virtio devices to be used over a vhost-user 
>>> socket.
>>> + *
>>> + * Guest devices can be instantiated by kernel module or command line
>>> + * parameters. One device will be created for each parameter. Syntax:
>>> + *
>>> + * [virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
>>> + * where:
>>> + *        <socket>    := vhost-user socket path to connect
>>> + *        <virtio_id>    := virtio device id (as in virtio_ids.h)
>>> + *        <platform_id>    := (optional) platform device id
>>> + *
>>> + * example:
>>> + *        virtio_uml.device=/var/uml.socket:1
>>> + *
>>> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, 
>>> ARM Ltd.
>>> + */
>>> +#include <linux/module.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/slab.h>
>>> +#include <linux/virtio.h>
>>> +#include <linux/virtio_config.h>
>>> +#include <linux/virtio_ring.h>
>>> +#include <irq_kern.h>
>>> +#include <init.h>
>>> +#include <os.h>
>>> +#include "vhost_user.h"
>>> +
>>> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
>>> +#ifdef IRQ_NONE
>>> +#undef IRQ_NONE
>>> +#endif
>>> +
>>> +#define MAX_SUPPORTED_QUEUE_SIZE    256
>>> +
>>> +#define to_virtio_uml_device(_vdev) \
>>> +    container_of(_vdev, struct virtio_uml_device, vdev)
>>> +
>>> +struct virtio_uml_device {
>>> +    struct virtio_device vdev;
>>> +    struct platform_device *pdev;
>>> +
>>> +    int sock;
>>> +    u64 features;
>>> +    u64 protocol_features;
>>> +    u8 status;
>>> +};
>>> +
>>> +struct virtio_uml_vq_info {
>>> +    int kick_fd, call_fd;
>>> +    char name[32];
>>> +};
>>> +
>>> +extern unsigned long long physmem_size, highmem;
>>> +
>>> +
>>> +/* Vhost-user protocol */
>>> +
>>> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
>>> +               struct vhost_user_msg *msg,
>>> +               int *fds, size_t num_fds)
>>> +{
>>> +    size_t size = sizeof(msg->header) + msg->header.size;
>>> +    int rc;
>>> +
>>> +    msg->header.flags |= VHOST_USER_VERSION;
>>> +    do {
>>> +        rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
>>> +                num_fds);
>>> +    } while (rc == -EINTR);
>>> +    if (rc < 0)
>>> +        return rc;
>>> +    if (rc != size)
>>> +        return -ECOMM;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vhost_user_send_no_payload(struct virtio_uml_device 
>>> *vu_dev,
>>> +                      u32 request)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = request,
>>> +    };
>>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>>> +}
>>> +
>>> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
>>> +                   u32 request, u64 value)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = request,
>>> +        .header.size = sizeof(msg.payload.integer),
>>> +        .payload.integer = value,
>>> +    };
>>> +
>>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>>> +}
>>> +
>>> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
>>> +                  struct vhost_user_msg *msg)
>>> +{
>>> +    size_t size = sizeof(msg->header);
>>> +    int rc;
>>> +
>>> +    do {
>>> +        rc = os_read_file(vu_dev->sock, (void *) msg, size);
>>> +    } while (rc == -EINTR);
>>> +    if (rc != size)
>>> +        return -EPROTO;
>>> +    if (msg->header.flags != (VHOST_USER_FLAG_REPLY | 
>>> VHOST_USER_VERSION))
>>> +        return -EPROTO;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
>>> +               struct vhost_user_msg *msg,
>>> +               size_t max_payload_size)
>>> +{
>>> +    size_t size;
>>> +    int rc = vhost_user_recv_header(vu_dev, msg);
>>> +
>>> +    if (rc)
>>> +        return rc;
>>> +    size = msg->header.size;
>>> +    if (size > max_payload_size)
>>> +        return -EPROTO;
>>> +    do {
>>> +        rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
>>> +    } while (rc == -EINTR);
>>> +    if (rc != size)
>>> +        return -EPROTO;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
>>> +                   u64 *value)
>>> +{
>>> +    struct vhost_user_msg msg;
>>> +    int rc = vhost_user_recv(vu_dev, &msg, 
>>> sizeof(msg.payload.integer));
>>> +
>>> +    if (rc)
>>> +        return rc;
>>> +    if (msg.header.size != sizeof(msg.payload.integer))
>>> +        return -EPROTO;
>>> +    *value = msg.payload.integer;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
>>> +{
>>> +    return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
>>> +}
>>> +
>>> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
>>> +                   u64 *features)
>>> +{
>>> +    int rc = vhost_user_send_no_payload(vu_dev, 
>>> VHOST_USER_GET_FEATURES);
>>> +
>>> +    if (rc)
>>> +        return rc;
>>> +    return vhost_user_recv_u64(vu_dev, features);
>>> +}
>>> +
>>> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
>>> +                   u64 features)
>>> +{
>>> +    return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, 
>>> features);
>>> +}
>>> +
>>> +static int vhost_user_get_protocol_features(struct 
>>> virtio_uml_device *vu_dev,
>>> +                        u64 *protocol_features)
>>> +{
>>> +    int rc = vhost_user_send_no_payload(vu_dev,
>>> +            VHOST_USER_GET_PROTOCOL_FEATURES);
>>> +
>>> +    if (rc)
>>> +        return rc;
>>> +    return vhost_user_recv_u64(vu_dev, protocol_features);
>>> +}
>>> +
>>> +static int vhost_user_set_protocol_features(struct 
>>> virtio_uml_device *vu_dev,
>>> +                        u64 protocol_features)
>>> +{
>>> +    return vhost_user_send_u64(vu_dev, 
>>> VHOST_USER_SET_PROTOCOL_FEATURES,
>>> +            protocol_features);
>>> +}
>>> +
>>> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
>>> +{
>>> +    int rc = vhost_user_set_owner(vu_dev);
>>> +
>>> +    if (rc)
>>> +        return rc;
>>> +    rc = vhost_user_get_features(vu_dev, &vu_dev->features);
>>> +    if (rc)
>>> +        return rc;
>>> +
>>> +    if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
>>> +        rc = vhost_user_get_protocol_features(vu_dev,
>>> +                &vu_dev->protocol_features);
>>> +        if (rc)
>>> +            return rc;
>>> +        vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
>>> +        rc = vhost_user_set_protocol_features(vu_dev,
>>> +                vu_dev->protocol_features);
>>> +    }
>>> +    return rc;
>>> +}
>>> +
>>> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
>>> +                 u32 offset, void *buf, u32 len)
>>> +{
>>> +    u32 cfg_size = offset + len;
>>> +    struct vhost_user_msg *msg;
>>> +    size_t payload_size = sizeof(msg->payload.config) + cfg_size;
>>> +    size_t msg_size = sizeof(msg->header) + payload_size;
>>> +    int rc;
>>> +
>>> +    if (!(vu_dev->protocol_features &
>>> +            BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    msg = kzalloc(msg_size, GFP_KERNEL);
>>> +    if (!msg)
>>> +        return -ENOMEM;
>>> +    msg->header.request = VHOST_USER_GET_CONFIG;
>>> +    msg->header.size = payload_size;
>>> +    msg->payload.config.offset = 0;
>>> +    msg->payload.config.size = cfg_size;
>>> +
>>> +    rc = vhost_user_send(vu_dev, msg, NULL, 0);
>>> +    if (rc)
>>> +        goto error_get;
>>> +    rc = vhost_user_recv(vu_dev, msg, msg_size);
>>> +    if (rc)
>>> +        goto error_get;
>>> +    if (msg->header.size != payload_size ||
>>> +            msg->payload.config.size != cfg_size) {
>>> +        rc = -EPROTO;
>>> +        goto error_get;
>>> +    }
>>> +    memcpy(buf, msg->payload.config.payload + offset, len);
>>> +
>>> +error_get:
>>> +    kfree(msg);
>>> +    return rc;
>>> +}
>>> +
>>> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
>>> +                 u32 offset, const void *buf, u32 len)
>>> +{
>>> +    struct vhost_user_msg *msg;
>>> +    size_t payload_size = sizeof(msg->payload.config) + len;
>>> +    size_t msg_size = sizeof(msg->header) + payload_size;
>>> +    int rc;
>>> +
>>> +    if (!(vu_dev->protocol_features &
>>> +            BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    msg = kzalloc(msg_size, GFP_KERNEL);
>>> +    if (!msg)
>>> +        return -ENOMEM;
>>> +    msg->header.request = VHOST_USER_SET_CONFIG;
>>> +    msg->header.size = payload_size;
>>> +    msg->payload.config.offset = offset;
>>> +    msg->payload.config.size = len;
>>> +    memcpy(msg->payload.config.payload, buf, len);
>>> +
>>> +    rc = vhost_user_send(vu_dev, msg, NULL, 0);
>>> +
>>> +    kfree(msg);
>>> +    return rc;
>>> +}
>>> +
>>> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
>>> +                      struct vhost_user_mem_region *region_out)
>>> +{
>>> +    unsigned long long mem_offset;
>>> +    int rc = phys_mapping(addr, &mem_offset);
>>> +
>>> +    if (WARN_ON(rc < 0))
>>> +        return -EFAULT;
>>> +    *fd_out = rc;
>>> +    region_out->guest_addr = addr;
>>> +    region_out->user_addr = addr;
>>> +    region_out->size = size;
>>> +    region_out->mmap_offset = mem_offset;
>>> +
>>> +    rc = phys_mapping(addr + size - 1, &mem_offset);
>>> +    if (WARN_ON(rc != *fd_out))
>>> +        return -EFAULT;
>>> +    return 0;
>>> +}
>>> +
>>> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = VHOST_USER_SET_MEM_TABLE,
>>> +        .header.size = sizeof(msg.payload.mem_regions),
>>> +        .payload.mem_regions.num = 1,
>>> +    };
>>> +    int fds[2];
>>> +    int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
>>> +            &msg.payload.mem_regions.regions[0]);
>>> +
>>> +    if (rc < 0)
>>> +        return rc;
>>> +    if (highmem) {
>>> +        msg.payload.mem_regions.num++;
>>> +        rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
>>> +                &fds[1], &msg.payload.mem_regions.regions[1]);
>>> +        if (rc < 0)
>>> +            return rc;
>>> +    }
>>> +
>>> +    return vhost_user_send(vu_dev, &msg, fds, 
>>> msg.payload.mem_regions.num);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_state(struct virtio_uml_device 
>>> *vu_dev,
>>> +                      u32 request, u32 index, u32 num)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = request,
>>> +        .header.size = sizeof(msg.payload.vring_state),
>>> +        .payload.vring_state.index = index,
>>> +        .payload.vring_state.num = num,
>>> +    };
>>> +
>>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
>>> +                    u32 index, u32 num)
>>> +{
>>> +    return vhost_user_set_vring_state(vu_dev, 
>>> VHOST_USER_SET_VRING_NUM,
>>> +            index, num);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
>>> +                     u32 index, u32 offset)
>>> +{
>>> +    return vhost_user_set_vring_state(vu_dev, 
>>> VHOST_USER_SET_VRING_BASE,
>>> +            index, offset);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
>>> +                     u32 index, u64 desc, u64 used, u64 avail,
>>> +                     u64 log)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = VHOST_USER_SET_VRING_ADDR,
>>> +        .header.size = sizeof(msg.payload.vring_addr),
>>> +        .payload.vring_addr.index = index,
>>> +        .payload.vring_addr.desc = desc,
>>> +        .payload.vring_addr.used = used,
>>> +        .payload.vring_addr.avail = avail,
>>> +        .payload.vring_addr.log = log,
>>> +    };
>>> +
>>> +    return vhost_user_send(vu_dev, &msg, NULL, 0);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
>>> +                   u32 request, int index, int fd)
>>> +{
>>> +    struct vhost_user_msg msg = {
>>> +        .header.request = request,
>>> +        .header.size = sizeof(msg.payload.integer),
>>> +        .payload.integer = index,
>>> +    };
>>> +
>>> +    if (index & ~VHOST_USER_VRING_INDEX_MASK)
>>> +        return -EINVAL;
>>> +    if (fd < 0) {
>>> +        msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
>>> +        return vhost_user_send(vu_dev, &msg, NULL, 0);
>>> +    }
>>> +    return vhost_user_send(vu_dev, &msg, &fd, 1);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
>>> +                     int index, int fd)
>>> +{
>>> +    return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
>>> +            index, fd);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
>>> +                     int index, int fd)
>>> +{
>>> +    return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
>>> +            index, fd);
>>> +}
>>> +
>>> +static int vhost_user_set_vring_enable(struct virtio_uml_device 
>>> *vu_dev,
>>> +                       u32 index, bool enable)
>>> +{
>>> +    if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
>>> +        return 0;
>>> +
>>> +    return vhost_user_set_vring_state(vu_dev, 
>>> VHOST_USER_SET_VRING_ENABLE,
>>> +            index, enable);
>>> +}
>>> +
>>> +
>>> +/* Virtio interface */
>>> +
>>> +static bool vu_notify(struct virtqueue *vq)
>>> +{
>>> +    struct virtio_uml_vq_info *info = vq->priv;
>>> +    int rc;
>>> +
>>> +    do {
>>> +        rc = os_eventfd_write(info->kick_fd, 1);
>>> +    } while (rc == -EINTR);
>>> +    WARN_ON(rc);
>>> +    return !rc;
>>> +}
>>> +
>>> +static irqreturn_t vu_interrupt(int irq, void *opaque)
>>> +{
>>> +    struct virtqueue *vq = opaque;
>>> +    struct virtio_uml_vq_info *info = vq->priv;
>>> +    unsigned long long value;
>>> +    int rc;
>>> +    irqreturn_t ret = IRQ_NONE;
>>> +
>>> +    do {
>>> +        rc = os_eventfd_read(info->call_fd, &value);
>>> +        if (!rc)
>>> +            ret |= vring_interrupt(irq, vq);
>>> +    } while (!rc || rc == -EINTR);
>>> +    WARN_ON(rc != -EAGAIN);
>>> +    return ret;
>>> +}
>>> +
>>> +
>>> +static void vu_get(struct virtio_device *vdev, unsigned offset,
>>> +           void *buf, unsigned len)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
>>> +}
>>> +
>>> +static void vu_set(struct virtio_device *vdev, unsigned offset,
>>> +           const void *buf, unsigned len)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
>>> +}
>>> +
>>> +static u8 vu_get_status(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    return vu_dev->status;
>>> +}
>>> +
>>> +static void vu_set_status(struct virtio_device *vdev, u8 status)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    vu_dev->status = status;
>>> +}
>>> +
>>> +static void vu_reset(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    vu_dev->status = 0;
>>> +}
>>> +
>>> +static void vu_del_vq(struct virtqueue *vq)
>>> +{
>>> +    struct virtio_uml_vq_info *info = vq->priv;
>>> +
>>> +    um_free_irq(VIRTIO_IRQ, vq);
>>> +
>>> +    os_close_file(info->call_fd);
>>> +    os_close_file(info->kick_fd);
>>> +
>>> +    vring_del_virtqueue(vq);
>>> +    kfree(info);
>>> +}
>>> +
>>> +static void vu_del_vqs(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +    struct virtqueue *vq, *n;
>>> +    u64 features;
>>> +
>>> +    /* Note: reverse order as a workaround to a decoding bug in 
>>> snabb */
>>> +    list_for_each_entry_reverse(vq, &vdev->vqs, list)
>>> +        WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, 
>>> false));
>>> +
>>> +    /* Ensure previous messages have been processed */
>>> +    WARN_ON(vhost_user_get_features(vu_dev, &features));
>>> +
>>> +    list_for_each_entry_safe(vq, n, &vdev->vqs, list)
>>> +        vu_del_vq(vq);
>>> +}
>>> +
>>> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
>>> +                     unsigned index, vq_callback_t *callback,
>>> +                     const char *name, bool ctx)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +    struct platform_device *pdev = vu_dev->pdev;
>>> +    struct virtio_uml_vq_info *info;
>>> +    struct virtqueue *vq;
>>> +    int num = MAX_SUPPORTED_QUEUE_SIZE;
>>> +    int rc;
>>> +    int call_fds[2];
>>> +
>>> +    info = kzalloc(sizeof(*info), GFP_KERNEL);
>>> +    if (!info) {
>>> +        rc = -ENOMEM;
>>> +        goto error_kzalloc;
>>> +    }
>>> +    snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
>>> +            pdev->id, name);
>>> +
>>> +    rc = os_eventfd(0, 0);
>>> +    if (rc < 0)
>>> +        goto error_kick;
>>> +    info->kick_fd = rc;
>>> +
>>> +    /* Use a pipe for call fd, since SIGIO is not supported for 
>>> eventfd */
>>> +    rc = os_pipe(call_fds, true, true);
>>> +    if (rc < 0)
>>> +        goto error_call;
>>> +    info->call_fd = call_fds[0];
>>> +
>>> +    vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, 
>>> true,
>>> +            ctx, vu_notify, callback, info->name);
>>> +    if (!vq) {
>>> +        rc = -ENOMEM;
>>> +        goto error_create;
>>> +    }
>>> +    vq->priv = info;
>>> +    num = virtqueue_get_vring_size(vq);
>>> +
>>> +    rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, 
>>> vu_interrupt,
>>> +            IRQF_SHARED, info->name, vq);
>>> +    if (rc)
>>> +        goto error_irq;
>>> +
>>> +    rc = vhost_user_set_vring_num(vu_dev, index, num);
>>> +    if (rc)
>>> +        goto error_setup;
>>> +
>>> +    rc = vhost_user_set_vring_base(vu_dev, index, 0);
>>> +    if (rc)
>>> +        goto error_setup;
>>> +
>>> +    rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
>>> +    if (rc)
>>> +        goto error_setup;
>>> +
>>> +    rc = vhost_user_set_vring_addr(vu_dev, index,
>>> +            virtqueue_get_desc_addr(vq),
>>> +            virtqueue_get_used_addr(vq),
>>> +            virtqueue_get_avail_addr(vq),
>>> +            (u64) -1);
>>> +    if (rc)
>>> +        goto error_setup;
>>> +
>>> +    /* Close unused write end of call fds */
>>> +    os_close_file(call_fds[1]);
>>> +
>>> +    return vq;
>>> +
>>> +error_setup:
>>> +    um_free_irq(VIRTIO_IRQ, vq);
>>> +error_irq:
>>> +    vring_del_virtqueue(vq);
>>> +error_create:
>>> +    os_close_file(call_fds[1]);
>>> +    os_close_file(info->call_fd);
>>> +error_call:
>>> +    os_close_file(info->kick_fd);
>>> +error_kick:
>>> +    kfree(info);
>>> +error_kzalloc:
>>> +    return ERR_PTR(rc);
>>> +}
>>> +
>>> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
>>> +               struct virtqueue *vqs[], vq_callback_t *callbacks[],
>>> +               const char * const names[], const bool *ctx,
>>> +               struct irq_affinity *desc)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +    int i, queue_idx, rc;
>>> +    struct virtqueue *vq;
>>> +
>>> +    rc = vhost_user_set_mem_table(vu_dev);
>>> +    if (rc)
>>> +        return rc;
>>> +
>>> +    for (i = 0; i < nvqs; ++i) {
>>> +        if (!names[i]) {
>>> +            vqs[i] = NULL;
>>> +            continue;
>>> +        }
>>> +
>>> +        vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], 
>>> names[i],
>>> +                ctx ? ctx[i] : false);
>>> +        if (IS_ERR(vqs[i])) {
>>> +            rc = PTR_ERR(vqs[i]);
>>> +            goto error_setup;
>>> +        }
>>> +    }
>>> +
>>> +    list_for_each_entry(vq, &vdev->vqs, list) {
>>> +        struct virtio_uml_vq_info *info = vq->priv;
>>> +
>>> +        rc = vhost_user_set_vring_kick(vu_dev, vq->index,
>>> +                info->kick_fd);
>>> +        if (rc)
>>> +            goto error_setup;
>>> +
>>> +        rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
>>> +        if (rc)
>>> +            goto error_setup;
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +error_setup:
>>> +    vu_del_vqs(vdev);
>>> +    return rc;
>>> +}
>>> +
>>> +static u64 vu_get_features(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    return vu_dev->features;
>>> +}
>>> +
>>> +static int vu_finalize_features(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +    u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
>>> +
>>> +    vring_transport_features(vdev);
>>> +    vu_dev->features = vdev->features | supported;
>>> +
>>> +    return vhost_user_set_features(vu_dev, vu_dev->features);
>>> +}
>>> +
>>> +static const char *vu_bus_name(struct virtio_device *vdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    return vu_dev->pdev->name;
>>> +}
>>> +
>>> +static const struct virtio_config_ops virtio_uml_config_ops = {
>>> +    .get = vu_get,
>>> +    .set = vu_set,
>>> +    .get_status = vu_get_status,
>>> +    .set_status = vu_set_status,
>>> +    .reset = vu_reset,
>>> +    .find_vqs = vu_find_vqs,
>>> +    .del_vqs = vu_del_vqs,
>>> +    .get_features = vu_get_features,
>>> +    .finalize_features = vu_finalize_features,
>>> +    .bus_name = vu_bus_name,
>>> +};
>>> +
>>> +
>>> +static void virtio_uml_release_dev(struct device *d)
>>> +{
>>> +    struct virtio_device *vdev =
>>> +            container_of(d, struct virtio_device, dev);
>>> +    struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
>>> +
>>> +    os_close_file(vu_dev->sock);
>>> +}
>>> +
>>> +/* Platform device */
>>> +
>>> +struct virtio_uml_platform_data {
>>> +    u32 virtio_device_id;
>>> +    const char *socket_path;
>>> +};
>>> +
>>> +static int virtio_uml_probe(struct platform_device *pdev)
>>> +{
>>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>>> +    struct virtio_uml_device *vu_dev;
>>> +    int rc;
>>> +
>>> +    if (!pdata)
>>> +        return -EINVAL;
>>> +
>>> +    vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
>>> +    if (!vu_dev)
>>> +        return -ENOMEM;
>>> +
>>> +    vu_dev->vdev.dev.parent = &pdev->dev;
>>> +    vu_dev->vdev.dev.release = virtio_uml_release_dev;
>>> +    vu_dev->vdev.config = &virtio_uml_config_ops;
>>> +    vu_dev->vdev.id.device = pdata->virtio_device_id;
>>> +    vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
>>> +    vu_dev->pdev = pdev;
>>> +
>>> +    do {
>>> +        rc = os_connect_socket(pdata->socket_path);
>>> +    } while (rc == -EINTR);
>>> +    if (rc < 0)
>>> +        return rc;
>>> +    vu_dev->sock = rc;
>>> +
>>> +    rc = vhost_user_init(vu_dev);
>>> +    if (rc)
>>> +        goto error_init;
>>> +
>>> +    platform_set_drvdata(pdev, vu_dev);
>>> +
>>> +    rc = register_virtio_device(&vu_dev->vdev);
>>> +    if (rc)
>>> +        put_device(&vu_dev->vdev.dev);
>>> +    return rc;
>>> +
>>> +error_init:
>>> +    os_close_file(vu_dev->sock);
>>> +    return rc;
>>> +}
>>> +
>>> +static int virtio_uml_remove(struct platform_device *pdev)
>>> +{
>>> +    struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
>>> +
>>> +    unregister_virtio_device(&vu_dev->vdev);
>>> +    return 0;
>>> +}
>>> +
>>> +/* Command line device list */
>>> +
>>> +static void vu_cmdline_release_dev(struct device *d)
>>> +{
>>> +}
>>> +
>>> +static struct device vu_cmdline_parent = {
>>> +    .init_name = "virtio-uml-cmdline",
>>> +    .release = vu_cmdline_release_dev,
>>> +};
>>> +
>>> +static bool vu_cmdline_parent_registered;
>>> +static int vu_cmdline_id;
>>> +
>>> +static int vu_cmdline_set(const char *device, const struct 
>>> kernel_param *kp)
>>> +{
>>> +    const char *ids = strchr(device, ':');
>>> +    unsigned int virtio_device_id;
>>> +    int processed, consumed, err;
>>> +    char *socket_path;
>>> +    struct virtio_uml_platform_data *pdata;
>>> +    struct platform_device *pdev;
>>> +
>>> +    if (!ids || ids == device)
>>> +        return -EINVAL;
>>> +
>>> +    processed = sscanf(ids, ":%u%n:%d%n",
>>> +            &virtio_device_id, &consumed,
>>> +            &vu_cmdline_id, &consumed);
>>> +
>>> +    if (processed < 1 || ids[consumed])
>>> +        return -EINVAL;
>>> +
>>> +    if (!vu_cmdline_parent_registered) {
>>> +        err = device_register(&vu_cmdline_parent);
>>> +        if (err) {
>>> +            pr_err("Failed to register parent device!\n");
>>> +            put_device(&vu_cmdline_parent);
>>> +            return err;
>>> +        }
>>> +        vu_cmdline_parent_registered = true;
>>> +    }
>>> +
>>> +    socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
>>> +    if (!socket_path)
>>> +        return -ENOMEM;
>>> +
>>> +    pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
>>> +    if (!pdata) {
>>> +        err = -ENOMEM;
>>> +        goto error_kzalloc;
>>> +    }
>>> +
>>> +    pdata->virtio_device_id = (u32) virtio_device_id;
>>> +    pdata->socket_path = socket_path;
>>> +
>>> +    pr_info("Registering device virtio-uml.%d id=%d at %s\n",
>>> +            vu_cmdline_id, virtio_device_id, socket_path);
>>> +
>>> +    pdev = platform_device_register_data(&vu_cmdline_parent, 
>>> "virtio-uml",
>>> +            vu_cmdline_id++, pdata, sizeof(*pdata));
>>> +    err = PTR_ERR_OR_ZERO(pdev);
>>> +    if (err)
>>> +        goto error_register;
>>> +    return 0;
>>> +
>>> +error_register:
>>> +    kfree(pdata);
>>> +error_kzalloc:
>>> +    kfree(socket_path);
>>> +    return err;
>>> +}
>>> +
>>> +static int vu_cmdline_get_device(struct device *dev, void *data)
>>> +{
>>> +    struct platform_device *pdev = to_platform_device(dev);
>>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>>> +    char *buffer = data;
>>> +    unsigned int len = strlen(buffer);
>>> +
>>> +    snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
>>> +            pdata->socket_path, pdata->virtio_device_id, pdev->id);
>>> +    return 0;
>>> +}
>>> +
>>> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
>>> +{
>>> +    buffer[0] = '\0';
>>> +    if (vu_cmdline_parent_registered)
>>> +        device_for_each_child(&vu_cmdline_parent, buffer,
>>> +                vu_cmdline_get_device);
>>> +    return strlen(buffer) + 1;
>>> +}
>>> +
>>> +static const struct kernel_param_ops vu_cmdline_param_ops = {
>>> +    .set = vu_cmdline_set,
>>> +    .get = vu_cmdline_get,
>>> +};
>>> +
>>> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
>>> +
>>> +
>>> +static int vu_unregister_cmdline_device(struct device *dev, void 
>>> *data)
>>> +{
>>> +    struct platform_device *pdev = to_platform_device(dev);
>>> +    struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
>>> +
>>> +    platform_device_unregister(pdev);
>>> +    kfree(pdata->socket_path);
>>> +    kfree(pdata);
>>> +    return 0;
>>> +}
>>> +
>>> +static void vu_unregister_cmdline_devices(void)
>>> +{
>>> +    if (vu_cmdline_parent_registered) {
>>> +        device_for_each_child(&vu_cmdline_parent, NULL,
>>> +                vu_unregister_cmdline_device);
>>> +        device_unregister(&vu_cmdline_parent);
>>> +        vu_cmdline_parent_registered = false;
>>> +    }
>>> +}
>>> +
>>> +/* Platform driver */
>>> +
>>> +static const struct of_device_id virtio_uml_match[] = {
>>> +    { .compatible = "virtio,uml", },
>>> +    { }
>>> +};
>>> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
>>> +
>>> +static struct platform_driver virtio_uml_driver = {
>>> +    .probe = virtio_uml_probe,
>>> +    .remove = virtio_uml_remove,
>>> +    .driver = {
>>> +        .name = "virtio-uml",
>>> +        .of_match_table = virtio_uml_match,
>>> +    },
>>> +};
>>> +
>>> +static int __init virtio_uml_init(void)
>>> +{
>>> +    return platform_driver_register(&virtio_uml_driver);
>>> +}
>>> +
>>> +static void __exit virtio_uml_exit(void)
>>> +{
>>> +    platform_driver_unregister(&virtio_uml_driver);
>>> +    vu_unregister_cmdline_devices();
>>> +}
>>> +
>>> +module_init(virtio_uml_init);
>>> +module_exit(virtio_uml_exit);
>>> +__uml_exitcall(virtio_uml_exit);
>>> +
>>> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
>>> +MODULE_LICENSE("GPL");
>>> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
>>> index ce7a78c3bcf2..42c6205e2dc4 100644
>>> --- a/arch/um/include/asm/irq.h
>>> +++ b/arch/um/include/asm/irq.h
>>> @@ -17,17 +17,18 @@
>>>   #define TELNETD_IRQ         12
>>>   #define XTERM_IRQ         13
>>>   #define RANDOM_IRQ         14
>>> +#define VIRTIO_IRQ        15
>>>   #ifdef CONFIG_UML_NET_VECTOR
>>> -#define VECTOR_BASE_IRQ        15
>>> +#define VECTOR_BASE_IRQ        (VIRTIO_IRQ + 1)
>>>   #define VECTOR_IRQ_SPACE    8
>>>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>>>   #else
>>> -#define LAST_IRQ RANDOM_IRQ
>>> +#define LAST_IRQ VIRTIO_IRQ
>>>   #endif
>>> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
>>> index 4a62ac4251a5..da7ebbe4ebb3 100644
>>> --- a/arch/um/include/shared/os.h
>>> +++ b/arch/um/include/shared/os.h
>>> @@ -36,6 +36,8 @@
>>>   #define OS_LIB_PATH    "/usr/lib/"
>>>   #endif
>>> +#define OS_SENDMSG_MAX_FDS 8
>>> +
>>>   /*
>>>    * types taken from stat_file() in hostfs_user.c
>>>    * (if they are wrong here, they are wrong there...).
>>> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>>>   extern unsigned os_minor(unsigned long long dev);
>>>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>>>   extern int os_falloc_punch(int fd, unsigned long long offset, int 
>>> count);
>>> +extern int os_eventfd(unsigned int initval, int flags);
>>> +extern int os_eventfd_read(int fd, unsigned long long *value);
>>> +extern int os_eventfd_write(int fd, unsigned long long value);
>>> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
>>> +              const int *fds, unsigned int fds_num);
>>>   /* start_up.c */
>>>   extern void os_early_checks(void);
>>> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
>>> index 232b22307fdd..f3766889226b 100644
>>> --- a/arch/um/kernel/ksyms.c
>>> +++ b/arch/um/kernel/ksyms.c
>>> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>>>   EXPORT_SYMBOL(os_major);
>>>   EXPORT_SYMBOL(os_minor);
>>>   EXPORT_SYMBOL(os_makedev);
>>> +EXPORT_SYMBOL(os_eventfd);
>>> +EXPORT_SYMBOL(os_eventfd_read);
>>> +EXPORT_SYMBOL(os_eventfd_write);
>>> +EXPORT_SYMBOL(os_sendmsg_fds);
>>>   EXPORT_SYMBOL(add_sigio_fd);
>>>   EXPORT_SYMBOL(ignore_sigio_fd);
>>> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
>>> index de58e976b9bc..f256be1d77bd 100644
>>> --- a/arch/um/kernel/mem.c
>>> +++ b/arch/um/kernel/mem.c
>>> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>>>   /* Initialized at boot time, and readonly after that */
>>>   unsigned long long highmem;
>>> +EXPORT_SYMBOL(highmem);
>>>   int kmalloc_ok = 0;
>>>   /* Used during early boot */
>>> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
>>> index 5bf56af4d5b9..870c80c88a97 100644
>>> --- a/arch/um/kernel/physmem.c
>>> +++ b/arch/um/kernel/physmem.c
>>> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned 
>>> long long *offset_out)
>>>       return fd;
>>>   }
>>> +EXPORT_SYMBOL(phys_mapping);
>>>   static int __init uml_mem_setup(char *line, int *add)
>>>   {
>>> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
>>> index a818ccef30ca..9de212bf4320 100644
>>> --- a/arch/um/kernel/um_arch.c
>>> +++ b/arch/um/kernel/um_arch.c
>>> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>>>   /* Set in uml_mem_setup and modified in linux_main */
>>>   long long physmem_size = 32 * 1024 * 1024;
>>> +EXPORT_SYMBOL(physmem_size);
>>>   static const char *usage_string =
>>>   "User Mode Linux v%s\n"
>>> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
>>> index f25b110d4e70..0c4e0298398d 100644
>>> --- a/arch/um/os-Linux/file.c
>>> +++ b/arch/um/os-Linux/file.c
>>> @@ -15,6 +15,7 @@
>>>   #include <sys/sysmacros.h>
>>>   #include <sys/un.h>
>>>   #include <sys/types.h>
>>> +#include <sys/eventfd.h>
>>>   #include <os.h>
>>>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
>>> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long 
>>> offset, int len)
>>>       return n;
>>>   }
>>> +int os_eventfd(unsigned int initval, int flags)
>>> +{
>>> +    int fd = eventfd(initval, flags);
>>> +
>>> +    if (fd < 0)
>>> +        return -errno;
>>> +    return fd;
>>> +}
>>> +
>>> +int os_eventfd_read(int fd, unsigned long long *value)
>>> +{
>>> +    eventfd_t val;
>>> +    int err = eventfd_read(fd, &val);
>>> +
>>> +    if (err < 0)
>>> +        return -errno;
>>> +    *value = val;
>>> +    return 0;
>>> +}
>>> +
>>> +int os_eventfd_write(int fd, unsigned long long value)
>>> +{
>>> +    int err = eventfd_write(fd, value);
>>> +
>>> +    if (err < 0)
>>> +        return -errno;
>>> +    return 0;
>>> +}
>>> +
>>> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const 
>>> int *fds,
>>> +           unsigned int fds_num)
>>> +{
>>> +    struct iovec iov = {
>>> +        .iov_base = (void *) buf,
>>> +        .iov_len = len,
>>> +    };
>>> +    union {
>>> +        char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
>>> +        struct cmsghdr align;
>>> +    } u;
>>> +    unsigned int fds_size = sizeof(*fds) * fds_num;
>>> +    struct msghdr msg = {
>>> +        .msg_iov = &iov,
>>> +        .msg_iovlen = 1,
>>> +        .msg_control = u.control,
>>> +        .msg_controllen = CMSG_SPACE(fds_size),
>>> +    };
>>> +    struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
>>> +    int err;
>>> +
>>> +    if (fds_num > OS_SENDMSG_MAX_FDS)
>>> +        return -EINVAL;
>>> +    memset(u.control, 0, sizeof(u.control));
>>> +    cmsg->cmsg_level = SOL_SOCKET;
>>> +    cmsg->cmsg_type = SCM_RIGHTS;
>>> +    cmsg->cmsg_len = CMSG_LEN(fds_size);
>>> +    memcpy(CMSG_DATA(cmsg), fds, fds_size);
>>> +    err = sendmsg(fd, &msg, 0);
>>> +
>>> +    if (err < 0)
>>> +        return -errno;
>>> +    return err;
>>> +}
>>>
>>
>> Can you send me your config?
>>
>> It goes into an always busy loop early in boot with mine and the boot 
>> never completes. I will debug it in the meantime, but having the 
>> config with which it works will be helpful.
>>
>> For the record - I have vector enabled and I tried virtio and 
>> virtio-net compiled in.
>>
>
> It blows up as a module too:
> [  197.850000] Registering device virtio-uml.0 id=1 at 
> /tmp/uml-test-vhost.sock
> root@stretch-opx:/lib/modules/5.3.0-rc3-00291-g1d45d9d8ec47/kernel# [ 
> 197.900000] ------------[ cut here ]------------
> [  197.900000] WARNING: CPU: 0 PID: 1038 at 
> arch/um/drivers/virtio_uml.c:449 vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
> virtio
> [  197.900000] CPU: 0 PID: 1038 Comm: systemd-udevd Not tainted 
> 5.3.0-rc3-00291-g1d45d9d8ec47 #35
> [  197.900000] Stack:
> [  197.900000]  d47f38d0 60749c68 d47f3918 00000000
> [  197.900000]  00000009 00000000 d47f38e0 60749cbc
> [  197.900000]  d47f3950 60052a5e 601e5830 d47d3780
> [  197.900000] Call Trace:
> [  197.900000]  [<e1b63b7b>] ? vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000]  [<600967ba>] ? printk+0x0/0x94
> [  197.900000]  [<6002c759>] show_stack+0x129/0x190
> [  197.900000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
> [  197.900000]  [<60749cbc>] dump_stack+0x2a/0x2e
> [  197.900000]  [<60052a5e>] __warn+0x13e/0x170
> [  197.900000]  [<601e5830>] ? kernfs_next_descendant_post+0x0/0xd0
> [  197.900000]  [<601e66c3>] ? kernfs_activate+0x123/0x150
> [  197.900000]  [<60052c06>] warn_slowpath_null+0x46/0x4b
> [  197.900000]  [<e1b63b7b>] vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000]  [<e1abe1d0>] ? 
> virtio_check_driver_offered_feature+0x0/0xe0 [virtio]
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<e1c25f86>] virtnet_validate+0x1c6/0x253 [virtio_net]
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<e1abe65d>] virtio_dev_probe+0x1dd/0x2b0 [virtio]
> [  197.900000]  [<604b6745>] really_probe+0x245/0x4e0
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<604b6c42>] driver_probe_device+0x112/0x140
> [  197.900000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
> [  197.900000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
> [  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.900000]  [<6074b660>] ? klist_next+0x0/0x150
> [  197.900000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
> [  197.900000]  [<604b5fe3>] driver_attach+0x23/0x30
> [  197.900000]  [<604b548f>] bus_add_driver+0x18f/0x300
> [  197.900000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
> [  197.900000]  [<604b790f>] driver_register+0xdf/0x140
> [  197.900000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
> [  197.900000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 
> [virtio_net]
> [  197.900000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 
> [virtio_net]
> [  197.900000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
> [  197.900000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.900000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.900000]  [<6013c328>] ? kfree+0xf8/0x130
> [  197.900000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
> [  197.900000]  [<600b84d0>] ? module_put+0x0/0x50
> [  197.900000]  [<600bd274>] do_init_module+0x8a/0x296
> [  197.900000]  [<60122dd9>] ? __vfree+0x79/0x80
> [  197.900000]  [<600bbb12>] load_module+0x1fa2/0x2750
> [  197.900000]  [<6014a8d0>] ? kernel_read+0x0/0x50
> [  197.900000]  [<60754b20>] ? strlen+0x0/0x30
> [  197.900000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
> [  197.900000]  [<600bc621>] sys_finit_module+0x111/0x160
> [  197.900000]  [<60793fc0>] ? ptrace+0x0/0xa0
> [  197.900000]  [<6002ee58>] handle_syscall+0x98/0xc0
> [  197.900000]  [<60761e80>] ? __waitpid+0x0/0xa0
> [  197.900000]  [<6004a8a6>] userspace+0x4c6/0x5b0
> [  197.900000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
> [  197.900000]  [<6002b0e4>] fork_handler+0x94/0xa0
> [  197.900000]
> [  197.900000] ---[ end trace e3d2f999bd5bee3c ]---
> [  197.900000] ------------[ cut here ]------------
> [  197.900000] WARNING: CPU: 0 PID: 1038 at 
> arch/um/drivers/virtio_uml.c:449 vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
> virtio
> [  197.900000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        
> W        5.3.0-rc3-00291-g1d45d9d8ec47 #35
> [  197.900000] Stack:
> [  197.900000]  d47f3860 60749c68 d47f38a8 00000000
> [  197.900000]  00000009 00000000 d47f3870 60749cbc
> [  197.900000]  d47f38e0 60052a5e 00000000 00000000
> [  197.900000] Call Trace:
> [  197.900000]  [<e1b63b7b>] ? vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000]  [<600967ba>] ? printk+0x0/0x94
> [  197.900000]  [<6002c759>] show_stack+0x129/0x190
> [  197.900000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
> [  197.900000]  [<60749cbc>] dump_stack+0x2a/0x2e
> [  197.900000]  [<60052a5e>] __warn+0x13e/0x170
> [  197.900000]  [<60052c06>] warn_slowpath_null+0x46/0x4b
> [  197.900000]  [<e1b63b7b>] vu_get+0xfb/0x130 [virtio_uml]
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<e1c2240f>] virtnet_probe+0x5f/0x1210 [virtio_net]
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
> [  197.900000]  [<604b6745>] really_probe+0x245/0x4e0
> [  197.900000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.900000]  [<604b6c42>] driver_probe_device+0x112/0x140
> [  197.900000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
> [  197.900000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
> [  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.900000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.900000]  [<6074b660>] ? klist_next+0x0/0x150
> [  197.900000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
> [  197.900000]  [<604b5fe3>] driver_attach+0x23/0x30
> [  197.900000]  [<604b548f>] bus_add_driver+0x18f/0x300
> [  197.900000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
> [  197.900000]  [<604b790f>] driver_register+0xdf/0x140
> [  197.900000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
> [  197.900000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 
> [virtio_net]
> [  197.900000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 
> [virtio_net]
> [  197.900000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
> [  197.900000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.900000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.900000]  [<6013c328>] ? kfree+0xf8/0x130
> [  197.900000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
> [  197.900000]  [<600b84d0>] ? module_put+0x0/0x50
> [  197.900000]  [<600bd274>] do_init_module+0x8a/0x296
> [  197.900000]  [<60122dd9>] ? __vfree+0x79/0x80
> [  197.900000]  [<600bbb12>] load_module+0x1fa2/0x2750
> [  197.900000]  [<6014a8d0>] ? kernel_read+0x0/0x50
> [  197.900000]  [<60754b20>] ? strlen+0x0/0x30
> [  197.900000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
> [  197.900000]  [<600bc621>] sys_finit_module+0x111/0x160
> [  197.900000]  [<60793fc0>] ? ptrace+0x0/0xa0
> [  197.900000]  [<6002ee58>] handle_syscall+0x98/0xc0
> [  197.900000]  [<60761e80>] ? __waitpid+0x0/0xa0
> [  197.900000]  [<6004a8a6>] userspace+0x4c6/0x5b0
> [  197.900000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
> [  197.900000]  [<6002b0e4>] fork_handler+0x94/0xa0
> [  197.900000]
> [  197.900000] ---[ end trace e3d2f999bd5bee3d ]---
> [  197.910000] ------------[ cut here ]------------
> [  197.910000] WARNING: CPU: 0 PID: 1038 at net/core/dev.c:1564 
> dev_disable_lro+0x187/0x1e0
> [  197.910000] netdevice: eth0: failed to disable LRO!
> [  197.910000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
> virtio
> [  197.910000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        
> W        5.3.0-rc3-00291-g1d45d9d8ec47 #35
> [  197.910000] Stack:
> [  197.910000]  d47f3650 60749c68 d47f3698 d47f36e0
> [  197.910000]  00000009 00000000 d47f3660 60749cbc
> [  197.910000]  d47f36d0 60052a5e 00000000 60549c77
> [  197.910000] Call Trace:
> [  197.910000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
> [  197.910000]  [<600967ba>] ? printk+0x0/0x94
> [  197.910000]  [<6002c759>] show_stack+0x129/0x190
> [  197.910000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
> [  197.910000]  [<60749cbc>] dump_stack+0x2a/0x2e
> [  197.910000]  [<60052a5e>] __warn+0x13e/0x170
> [  197.910000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
> [  197.910000]  [<600967ba>] ? printk+0x0/0x94
> [  197.910000]  [<607d29f0>] ? _raw_spin_lock_irqsave+0x0/0x40
> [  197.910000]  [<60052a90>] ? warn_slowpath_fmt+0x0/0xa0
> [  197.910000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
> [  197.910000]  [<60052b27>] warn_slowpath_fmt+0x97/0xa0
> [  197.910000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.910000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
> [  197.910000]  [<60549ab8>] ? netdev_update_features+0x18/0x50
> [  197.910000]  [<60559023>] ? neigh_parms_alloc+0x173/0x190
> [  197.910000]  [<60549c77>] dev_disable_lro+0x187/0x1e0
> [  197.910000]  [<606968e0>] inetdev_init+0x110/0x200
> [  197.920000]  [<6055d920>] ? rtnl_is_locked+0x0/0x20
> [  197.920000]  [<60696a66>] inetdev_event+0x96/0x690
> [  197.920000]  [<607d2ae0>] ? _raw_spin_unlock_irqrestore+0x0/0x30
> [  197.920000]  [<60076720>] notifier_call_chain+0x40/0x80
> [  197.920000]  [<6053e370>] ? call_netdevice_notifiers_info+0x0/0x90
> [  197.920000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.920000]  [<60076828>] raw_notifier_call_chain+0x18/0x20
> [  197.920000]  [<6053e3ec>] call_netdevice_notifiers_info+0x7c/0x90
> [  197.920000]  [<6054a57f>] register_netdevice+0x57f/0x670
> [  197.920000]  [<6013c230>] ? kfree+0x0/0x130
> [  197.920000]  [<6054a69e>] register_netdev+0x2e/0x50
> [  197.920000]  [<e1c230b7>] virtnet_probe+0xd07/0x1210 [virtio_net]
> [  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.920000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
> [  197.920000]  [<604b6745>] really_probe+0x245/0x4e0
> [  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.920000]  [<604b6c42>] driver_probe_device+0x112/0x140
> [  197.920000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
> [  197.920000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
> [  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.920000]  [<6074b660>] ? klist_next+0x0/0x150
> [  197.920000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
> [  197.920000]  [<604b5fe3>] driver_attach+0x23/0x30
> [  197.920000]  [<604b548f>] bus_add_driver+0x18f/0x300
> [  197.920000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
> [  197.920000]  [<604b790f>] driver_register+0xdf/0x140
> [  197.920000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
> [  197.920000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 
> [virtio_net]
> [  197.920000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 
> [virtio_net]
> [  197.920000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
> [  197.920000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.920000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.920000]  [<6013c328>] ? kfree+0xf8/0x130
> [  197.920000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
> [  197.920000]  [<600b84d0>] ? module_put+0x0/0x50
> [  197.920000]  [<600bd274>] do_init_module+0x8a/0x296
> [  197.920000]  [<60122dd9>] ? __vfree+0x79/0x80
> [  197.920000]  [<600bbb12>] load_module+0x1fa2/0x2750
> [  197.920000]  [<6014a8d0>] ? kernel_read+0x0/0x50
> [  197.920000]  [<60754b20>] ? strlen+0x0/0x30
> [  197.920000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
> [  197.920000]  [<600bc621>] sys_finit_module+0x111/0x160
> [  197.920000]  [<60793fc0>] ? ptrace+0x0/0xa0
> [  197.920000]  [<6002ee58>] handle_syscall+0x98/0xc0
> [  197.920000]  [<60761e80>] ? __waitpid+0x0/0xa0
> [  197.920000]  [<6004a8a6>] userspace+0x4c6/0x5b0
> [  197.920000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
> [  197.920000]  [<6002b0e4>] fork_handler+0x94/0xa0
> [  197.920000]
> [  197.920000] ---[ end trace e3d2f999bd5bee3e ]---
> [  197.920000] ------------[ cut here ]------------
> [  197.920000] WARNING: CPU: 0 PID: 1038 at net/core/dev.c:1564 
> dev_disable_lro+0x187/0x1e0
> [  197.920000] netdevice: eth0: failed to disable LRO!
> [  197.920000] Modules linked in: virtio_net(+) virtio_uml virtio_ring 
> virtio
> [  197.920000] CPU: 0 PID: 1038 Comm: systemd-udevd Tainted: G        
> W        5.3.0-rc3-00291-g1d45d9d8ec47 #35
> [  197.920000] Stack:
> [  197.920000]  d47f3610 60749c68 d47f3658 d47f36a0
> [  197.920000]  00000009 00000000 d47f3620 60749cbc
> [  197.920000]  d47f3690 60052a5e d19dad00 60549c77
> [  197.920000] Call Trace:
> [  197.920000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
> [  197.920000]  [<600967ba>] ? printk+0x0/0x94
> [  197.920000]  [<6002c759>] show_stack+0x129/0x190
> [  197.920000]  [<60749c68>] ? dump_stack_print_info+0xe8/0x100
> [  197.920000]  [<60749cbc>] dump_stack+0x2a/0x2e
> [  197.920000]  [<60052a5e>] __warn+0x13e/0x170
> [  197.920000]  [<60549c77>] ? dev_disable_lro+0x187/0x1e0
> [  197.920000]  [<600967ba>] ? printk+0x0/0x94
> [  197.920000]  [<60052a90>] ? warn_slowpath_fmt+0x0/0xa0
> [  197.920000]  [<60052b27>] warn_slowpath_fmt+0x97/0xa0
> [  197.920000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.920000]  [<60549ab8>] ? netdev_update_features+0x18/0x50
> [  197.920000]  [<60559023>] ? neigh_parms_alloc+0x173/0x190
> [  197.920000]  [<60549c77>] dev_disable_lro+0x187/0x1e0
> [  197.920000]  [<6013b1f0>] ? kmem_cache_alloc+0x0/0x110
> [  197.920000]  [<6013b1f0>] ? kmem_cache_alloc+0x0/0x110
> [  197.920000]  [<606d78aa>] ipv6_add_dev+0x1ba/0x670
> [  197.920000]  [<606de0b8>] addrconf_notify+0x68/0xcd0
> [  197.920000]  [<607d2b35>] ? _raw_spin_unlock_bh+0x25/0x30
> [  197.920000]  [<606bce4c>] ? clusterip_netdev_event+0x10c/0x120
> [  197.920000]  [<60076720>] notifier_call_chain+0x40/0x80
> [  197.920000]  [<6053e370>] ? call_netdevice_notifiers_info+0x0/0x90
> [  197.920000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.920000]  [<60076828>] raw_notifier_call_chain+0x18/0x20
> [  197.920000]  [<6053e3ec>] call_netdevice_notifiers_info+0x7c/0x90
> [  197.920000]  [<6054a57f>] register_netdevice+0x57f/0x670
> [  197.920000]  [<6013c230>] ? kfree+0x0/0x130
> [  197.920000]  [<6054a69e>] register_netdev+0x2e/0x50
> [  197.920000]  [<e1c230b7>] virtnet_probe+0xd07/0x1210 [virtio_net]
> [  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.920000]  [<e1abe674>] virtio_dev_probe+0x1f4/0x2b0 [virtio]
> [  197.920000]  [<604b6745>] really_probe+0x245/0x4e0
> [  197.920000]  [<60076cc0>] ? blocking_notifier_call_chain+0x0/0x20
> [  197.920000]  [<604b6c42>] driver_probe_device+0x112/0x140
> [  197.920000]  [<604b6f2f>] device_driver_attach+0x4f/0x80
> [  197.920000]  [<604b7105>] __driver_attach+0x1a5/0x1c0
> [  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.920000]  [<604b6f60>] ? __driver_attach+0x0/0x1c0
> [  197.920000]  [<6074b660>] ? klist_next+0x0/0x150
> [  197.920000]  [<604b3c06>] bus_for_each_dev+0x76/0xc0
> [  197.920000]  [<604b5fe3>] driver_attach+0x23/0x30
> [  197.920000]  [<604b548f>] bus_add_driver+0x18f/0x300
> [  197.920000]  [<60053690>] ? __cpuhp_setup_state+0x0/0x20
> [  197.920000]  [<604b790f>] driver_register+0xdf/0x140
> [  197.920000]  [<e1abe3c2>] register_virtio_driver+0x82/0x90 [virtio]
> [  197.920000]  [<e1c2b09b>] virtio_net_driver_init+0x9b/0xd0 
> [virtio_net]
> [  197.920000]  [<e1c2b000>] ? virtio_net_driver_init+0x0/0xd0 
> [virtio_net]
> [  197.930000]  [<6002935a>] do_one_initcall+0x8a/0x1e0
> [  197.930000]  [<60047570>] ? set_signals+0x0/0x50
> [  197.930000]  [<60047598>] ? set_signals+0x28/0x50
> [  197.930000]  [<6013c328>] ? kfree+0xf8/0x130
> [  197.930000]  [<6013b295>] ? kmem_cache_alloc+0xa5/0x110
> [  197.930000]  [<600b84d0>] ? module_put+0x0/0x50
> [  197.930000]  [<600bd274>] do_init_module+0x8a/0x296
> [  197.930000]  [<60122dd9>] ? __vfree+0x79/0x80
> [  197.930000]  [<600bbb12>] load_module+0x1fa2/0x2750
> [  197.930000]  [<6014a8d0>] ? kernel_read+0x0/0x50
> [  197.930000]  [<60754b20>] ? strlen+0x0/0x30
> [  197.930000]  [<601e96d0>] ? sysfs_create_bin_file+0x0/0xa0
> [  197.930000]  [<600bc621>] sys_finit_module+0x111/0x160
> [  197.930000]  [<60793fc0>] ? ptrace+0x0/0xa0
> [  197.930000]  [<6002ee58>] handle_syscall+0x98/0xc0
> [  197.930000]  [<60761e80>] ? __waitpid+0x0/0xa0
> [  197.930000]  [<6004a8a6>] userspace+0x4c6/0x5b0
> [  197.930000]  [<6002eeb0>] ? copy_chunk_to_user+0x0/0x30
> [  197.930000]  [<6002b0e4>] fork_handler+0x94/0xa0
> [  197.930000]
> [  197.930000] ---[ end trace e3d2f999bd5bee3f ]---
>
Reading the code which leads to it, it will need some revision - the 
value and the nature of the error being fed into the WARN is completely 
lost.

This should be netdev_err() followed by an optional WARN on specific 
values as well as cleaner recovery if something has gone wrong. At the 
moment if something has gone wrong it goes belly up losing any 
information on what exactly happened in the get/set routines.
Geron, ErelX Aug. 14, 2019, 1:38 p.m. UTC | #9
Our config is based on defconfig.
Added CONFIG_VIRTIO_UML, CONFIG_VIRTIO_BLK and CONFIG_VIRTIO_NET.

Please note that we have two working examples:

One example is with qemu/contrib/vhost-user-blk:

# Prepare disk file
truncate -s 5120 my_file
# Run server
./vhost-user-blk -b my_file -s /tmp/my_blk
# Run uml
./linux ... virtio_uml.device=/tmp/my_blk:2

And another example is with snabb:

# Run server with two interconnected interfaces
# (need to add a vm2vm program, as shown at: https://github.com/snabbco/snabb/issues/1119)
./snabb vm2vm /tmp/first /tmp/second
# Run uml
./linux ... virtio_uml.device=/tmp/first:1
./linux ... virtio_uml.device=/tmp/second:1

---------------------------------------------------------------------
A member of the Intel Corporation group of companies

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Anton Ivanov Aug. 15, 2019, 5:03 a.m. UTC | #10
On 14/08/2019 14:38, Geron, ErelX wrote:
> Our config is based on defconfig.
> Added CONFIG_VIRTIO_UML, CONFIG_VIRTIO_BLK and CONFIG_VIRTIO_NET.
> 
> Please note that we have two working examples:
> 
> One example is with qemu/contrib/vhost-user-blk:
> 
> # Prepare disk file
> truncate -s 5120 my_file
> # Run server
> ./vhost-user-blk -b my_file -s /tmp/my_blk
> # Run uml
> ./linux ... virtio_uml.device=/tmp/my_blk:2
> 
> And another example is with snabb:
> 
> # Run server with two interconnected interfaces
> # (need to add a vm2vm program, as shown at: https://github.com/snabbco/snabb/issues/1119)
> ./snabb vm2vm /tmp/first /tmp/second
> # Run uml
> ./linux ... virtio_uml.device=/tmp/first:1
> ./linux ... virtio_uml.device=/tmp/second:1

I tried with BESS which for vhost-user is more or less DPDK "as is".

The config used was:

PMDPort(name="in0", pci="01:00.1")
PMDPort(name='u0', vdev="eth_vhost0,iface=/tmp/uml-test-vhost.sock")
PortInc(port="in0") -> PortOut(port="u0")
PortInc(port="u0") -> PortOut(port="in0")

This wires back to back a port on an e1000e and vhost-user instance.

Something in the vhost-user negotiation fails and the way the code is 
written at present the nature of the error is lost.

For example, in vu_get()

It does WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));

It should be something along the lines of

rc = vhost_user_get_config(vu_dev, offset, buf, len)

if (unlikely(rc)) {
	printk(KERN_ERR "vu get returned error %i", rc);
	WARN();
}

There are other places where IMO it will need a similar substitution.

Brgds,

A.


> 
> ---------------------------------------------------------------------
> A member of the Intel Corporation group of companies
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
>
Johannes Berg Aug. 20, 2019, 7:18 a.m. UTC | #11
On Thu, 2019-08-15 at 06:03 +0100, Anton Ivanov wrote:
> 
> rc = vhost_user_get_config(vu_dev, offset, buf, len)
> 
> if (unlikely(rc)) {
> 	printk(KERN_ERR "vu get returned error %i", rc);
> 	WARN();
> }

Just for the record, IMHO that's better written as

rc = vhost_user_get_config(...);
WARN(rc, "vu_get_config returned error %d\n", rc);

since that will embed the message/error code into the warning message.

johannes
Anton Ivanov Aug. 20, 2019, 7:28 a.m. UTC | #12
On 20/08/2019 08:18, Johannes Berg wrote:
> On Thu, 2019-08-15 at 06:03 +0100, Anton Ivanov wrote:
>>
>> rc = vhost_user_get_config(vu_dev, offset, buf, len)
>>
>> if (unlikely(rc)) {
>> 	printk(KERN_ERR "vu get returned error %i", rc);
>> 	WARN();
>> }
> 
> Just for the record, IMHO that's better written as
> 
> rc = vhost_user_get_config(...);
> WARN(rc, "vu_get_config returned error %d\n", rc);
> 
> since that will embed the message/error code into the warning message.
> 
> johannes
> 
> 
> _______________________________________________
> linux-um mailing list
> linux-um@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-um
> 

Agree.

In either case, the current WARN() and WARN_ON in the patch should be 
revised to ensure that the error code is not lost.
Anton Ivanov Oct. 8, 2019, 9:26 a.m. UTC | #13
On 31/07/2019 21:29, Johannes Berg wrote:
> From: Erel Geron <erelx.geron@intel.com>
> 
> This module allows virtio devices to be used over a vhost-user socket.
> To instantiate a device, pass the (list of) device socket(s) to the
> kernel or module command line, such as
> 
> 	virtio_uml.device=/var/uml.socket:1
> 
> where the "1" is the device ID, from include/uapi/linux/virtio_ids.h,
> i.e. network in this case.
> 
> Signed-off-by: Erel Geron <erelx.geron@intel.com>
> Signed-off-by: Johannes Berg <johannes.berg@intel.com>
> ---
>   arch/um/drivers/Kconfig      |   7 +
>   arch/um/drivers/Makefile     |   1 +
>   arch/um/drivers/vhost_user.h | 102 ++++
>   arch/um/drivers/virtio_uml.c | 905 +++++++++++++++++++++++++++++++++++
>   arch/um/include/asm/irq.h    |   5 +-
>   arch/um/include/shared/os.h  |   7 +
>   arch/um/kernel/ksyms.c       |   4 +
>   arch/um/kernel/mem.c         |   1 +
>   arch/um/kernel/physmem.c     |   1 +
>   arch/um/kernel/um_arch.c     |   1 +
>   arch/um/os-Linux/file.c      |  64 +++
>   11 files changed, 1096 insertions(+), 2 deletions(-)
>   create mode 100644 arch/um/drivers/vhost_user.h
>   create mode 100644 arch/um/drivers/virtio_uml.c
> 
> diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
> index 2638e46f50cc..fea5a0d522dc 100644
> --- a/arch/um/drivers/Kconfig
> +++ b/arch/um/drivers/Kconfig
> @@ -335,3 +335,10 @@ config UML_NET_SLIRP
>   	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
>   
>   endmenu
> +
> +config VIRTIO_UML
> +	tristate "UML driver for virtio devices"
> +	select VIRTIO
> +	help
> +	  This driver provides support for virtio based paravirtual device
> +	  drivers over vhost-user sockets.
> diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
> index 693319839f69..3edf0a7cfd25 100644
> --- a/arch/um/drivers/Makefile
> +++ b/arch/um/drivers/Makefile
> @@ -61,6 +61,7 @@ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
>   obj-$(CONFIG_UML_WATCHDOG) += harddog.o
>   obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
>   obj-$(CONFIG_UML_RANDOM) += random.o
> +obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
>   
>   # pcap_user.o must be added explicitly.
>   USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
> diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
> new file mode 100644
> index 000000000000..2a9829b0782b
> --- /dev/null
> +++ b/arch/um/drivers/vhost_user.h
> @@ -0,0 +1,102 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/* Vhost-user protocol */
> +
> +#ifndef __VHOST_USER_H__
> +#define __VHOST_USER_H__
> +
> +/* Message flags */
> +#define VHOST_USER_FLAG_REPLY		BIT(2)
> +/* Feature bits */
> +#define VHOST_USER_F_PROTOCOL_FEATURES	30
> +/* Protocol feature bits */
> +#define VHOST_USER_PROTOCOL_F_CONFIG	9
> +/* Vring state index masks */
> +#define VHOST_USER_VRING_INDEX_MASK	0xff
> +#define VHOST_USER_VRING_POLL_MASK	BIT(8)
> +
> +/* Supported version */
> +#define VHOST_USER_VERSION		1
> +/* Supported transport features */
> +#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
> +/* Supported protocol features */
> +#define VHOST_USER_SUPPORTED_PROTOCOL_F	BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
> +
> +enum vhost_user_request {
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
> +	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
> +	VHOST_USER_GET_QUEUE_NUM = 17,
> +	VHOST_USER_SET_VRING_ENABLE = 18,
> +	VHOST_USER_SEND_RARP = 19,
> +	VHOST_USER_NET_SEND_MTU = 20,
> +	VHOST_USER_SET_SLAVE_REQ_FD = 21,
> +	VHOST_USER_IOTLB_MSG = 22,
> +	VHOST_USER_SET_VRING_ENDIAN = 23,
> +	VHOST_USER_GET_CONFIG = 24,
> +	VHOST_USER_SET_CONFIG = 25,
> +};
> +
> +struct vhost_user_header {
> +	u32 request; /* Use enum vhost_user_request */
> +	u32 flags;
> +	u32 size;
> +} __packed;
> +
> +struct vhost_user_config {
> +	u32 offset;
> +	u32 size;
> +	u32 flags;
> +	u8 payload[0]; /* Variable length */
> +} __packed;
> +
> +struct vhost_user_vring_state {
> +	u32 index;
> +	u32 num;
> +} __packed;
> +
> +struct vhost_user_vring_addr {
> +	u32 index;
> +	u32 flags;
> +	u64 desc, used, avail, log;
> +} __packed;
> +
> +struct vhost_user_mem_region {
> +	u64 guest_addr;
> +	u64 size;
> +	u64 user_addr;
> +	u64 mmap_offset;
> +} __packed;
> +
> +struct vhost_user_mem_regions {
> +	u32 num;
> +	u32 padding;
> +	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
> +} __packed;
> +
> +union vhost_user_payload {
> +	u64 integer;
> +	struct vhost_user_config config;
> +	struct vhost_user_vring_state vring_state;
> +	struct vhost_user_vring_addr vring_addr;
> +	struct vhost_user_mem_regions mem_regions;
> +};
> +
> +struct vhost_user_msg {
> +	struct vhost_user_header header;
> +	union vhost_user_payload payload;
> +} __packed;
> +
> +#endif
> diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
> new file mode 100644
> index 000000000000..30eeb907f42a
> --- /dev/null
> +++ b/arch/um/drivers/virtio_uml.c
> @@ -0,0 +1,905 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Virtio vhost-user driver
> + *
> + * Copyright(c) 2019 Intel Corporation
> + *
> + * This module allows virtio devices to be used over a vhost-user socket.
> + *
> + * Guest devices can be instantiated by kernel module or command line
> + * parameters. One device will be created for each parameter. Syntax:
> + *
> + *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
> + * where:
> + *		<socket>	:= vhost-user socket path to connect
> + *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
> + *		<platform_id>	:= (optional) platform device id
> + *
> + * example:
> + *		virtio_uml.device=/var/uml.socket:1
> + *
> + * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
> + */
> +#include <linux/module.h>
> +#include <linux/platform_device.h>
> +#include <linux/slab.h>
> +#include <linux/virtio.h>
> +#include <linux/virtio_config.h>
> +#include <linux/virtio_ring.h>
> +#include <irq_kern.h>
> +#include <init.h>
> +#include <os.h>
> +#include "vhost_user.h"
> +
> +/* Workaround due to a conflict between irq_user.h and irqreturn.h */
> +#ifdef IRQ_NONE
> +#undef IRQ_NONE
> +#endif
> +
> +#define MAX_SUPPORTED_QUEUE_SIZE	256
> +
> +#define to_virtio_uml_device(_vdev) \
> +	container_of(_vdev, struct virtio_uml_device, vdev)
> +
> +struct virtio_uml_device {
> +	struct virtio_device vdev;
> +	struct platform_device *pdev;
> +
> +	int sock;
> +	u64 features;
> +	u64 protocol_features;
> +	u8 status;
> +};
> +
> +struct virtio_uml_vq_info {
> +	int kick_fd, call_fd;
> +	char name[32];
> +};
> +
> +extern unsigned long long physmem_size, highmem;
> +
> +
> +/* Vhost-user protocol */
> +
> +static int vhost_user_send(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   int *fds, size_t num_fds)
> +{
> +	size_t size = sizeof(msg->header) + msg->header.size;
> +	int rc;
> +
> +	msg->header.flags |= VHOST_USER_VERSION;
> +	do {
> +		rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
> +				num_fds);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	if (rc != size)
> +		return -ECOMM;
> +	return 0;
> +}
> +
> +static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
> +				      u32 request)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +	};
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
> +			       u32 request, u64 value)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = value,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
> +				  struct vhost_user_msg *msg)
> +{
> +	size_t size = sizeof(msg->header);
> +	int rc;
> +
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) msg, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv(struct virtio_uml_device *vu_dev,
> +			   struct vhost_user_msg *msg,
> +			   size_t max_payload_size)
> +{
> +	size_t size;
> +	int rc = vhost_user_recv_header(vu_dev, msg);
> +
> +	if (rc)
> +		return rc;
> +	size = msg->header.size;
> +	if (size > max_payload_size)
> +		return -EPROTO;
> +	do {
> +		rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
> +	} while (rc == -EINTR);
> +	if (rc != size)
> +		return -EPROTO;
> +	return 0;
> +}
> +
> +static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
> +			       u64 *value)
> +{
> +	struct vhost_user_msg msg;
> +	int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
> +
> +	if (rc)
> +		return rc;
> +	if (msg.header.size != sizeof(msg.payload.integer))
> +		return -EPROTO;
> +	*value = msg.payload.integer;
> +	return 0;
> +}
> +
> +static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
> +{
> +	return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
> +}
> +
> +static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
> +				   u64 *features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, features);
> +}
> +
> +static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
> +				   u64 features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
> +}
> +
> +static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 *protocol_features)
> +{
> +	int rc = vhost_user_send_no_payload(vu_dev,
> +			VHOST_USER_GET_PROTOCOL_FEATURES);
> +
> +	if (rc)
> +		return rc;
> +	return vhost_user_recv_u64(vu_dev, protocol_features);
> +}
> +
> +static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
> +					    u64 protocol_features)
> +{
> +	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
> +			protocol_features);
> +}
> +
> +static int vhost_user_init(struct virtio_uml_device *vu_dev)
> +{
> +	int rc = vhost_user_set_owner(vu_dev);
> +
> +	if (rc)
> +		return rc;
> +	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
> +	if (rc)
> +		return rc;
> +
> +	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
> +		rc = vhost_user_get_protocol_features(vu_dev,
> +				&vu_dev->protocol_features);
> +		if (rc)
> +			return rc;
> +		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
> +		rc = vhost_user_set_protocol_features(vu_dev,
> +				vu_dev->protocol_features);
> +	}
> +	return rc;
> +}
> +
> +static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, void *buf, u32 len)
> +{
> +	u32 cfg_size = offset + len;
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_GET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = 0;
> +	msg->payload.config.size = cfg_size;
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +	if (rc)
> +		goto error_get;
> +	rc = vhost_user_recv(vu_dev, msg, msg_size);
> +	if (rc)
> +		goto error_get;
> +	if (msg->header.size != payload_size ||
> +			msg->payload.config.size != cfg_size) {
> +		rc = -EPROTO;
> +		goto error_get;
> +	}
> +	memcpy(buf, msg->payload.config.payload + offset, len);
> +
> +error_get:
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
> +				 u32 offset, const void *buf, u32 len)
> +{
> +	struct vhost_user_msg *msg;
> +	size_t payload_size = sizeof(msg->payload.config) + len;
> +	size_t msg_size = sizeof(msg->header) + payload_size;
> +	int rc;
> +
> +	if (!(vu_dev->protocol_features &
> +			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
> +		return -EOPNOTSUPP;
> +
> +	msg = kzalloc(msg_size, GFP_KERNEL);
> +	if (!msg)
> +		return -ENOMEM;
> +	msg->header.request = VHOST_USER_SET_CONFIG;
> +	msg->header.size = payload_size;
> +	msg->payload.config.offset = offset;
> +	msg->payload.config.size = len;
> +	memcpy(msg->payload.config.payload, buf, len);
> +
> +	rc = vhost_user_send(vu_dev, msg, NULL, 0);
> +
> +	kfree(msg);
> +	return rc;
> +}
> +
> +static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
> +				      struct vhost_user_mem_region *region_out)
> +{
> +	unsigned long long mem_offset;
> +	int rc = phys_mapping(addr, &mem_offset);
> +
> +	if (WARN_ON(rc < 0))
> +		return -EFAULT;
> +	*fd_out = rc;
> +	region_out->guest_addr = addr;
> +	region_out->user_addr = addr;
> +	region_out->size = size;
> +	region_out->mmap_offset = mem_offset;
> +
> +	rc = phys_mapping(addr + size - 1, &mem_offset);
> +	if (WARN_ON(rc != *fd_out))
> +		return -EFAULT;
> +	return 0;
> +}
> +
> +static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_MEM_TABLE,
> +		.header.size = sizeof(msg.payload.mem_regions),
> +		.payload.mem_regions.num = 1,
> +	};
> +	int fds[2];
> +	int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
> +			&msg.payload.mem_regions.regions[0]);
> +
> +	if (rc < 0)
> +		return rc;
> +	if (highmem) {
> +		msg.payload.mem_regions.num++;
> +		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
> +				&fds[1], &msg.payload.mem_regions.regions[1]);
> +		if (rc < 0)
> +			return rc;
> +	}
> +
> +	return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
> +}
> +
> +static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
> +				      u32 request, u32 index, u32 num)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.vring_state),
> +		.payload.vring_state.index = index,
> +		.payload.vring_state.num = num,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
> +				    u32 index, u32 num)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
> +			index, num);
> +}
> +
> +static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
> +				     u32 index, u32 offset)
> +{
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
> +			index, offset);
> +}
> +
> +static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
> +				     u32 index, u64 desc, u64 used, u64 avail,
> +				     u64 log)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = VHOST_USER_SET_VRING_ADDR,
> +		.header.size = sizeof(msg.payload.vring_addr),
> +		.payload.vring_addr.index = index,
> +		.payload.vring_addr.desc = desc,
> +		.payload.vring_addr.used = used,
> +		.payload.vring_addr.avail = avail,
> +		.payload.vring_addr.log = log,
> +	};
> +
> +	return vhost_user_send(vu_dev, &msg, NULL, 0);
> +}
> +
> +static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
> +				   u32 request, int index, int fd)
> +{
> +	struct vhost_user_msg msg = {
> +		.header.request = request,
> +		.header.size = sizeof(msg.payload.integer),
> +		.payload.integer = index,
> +	};
> +
> +	if (index & ~VHOST_USER_VRING_INDEX_MASK)
> +		return -EINVAL;
> +	if (fd < 0) {
> +		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
> +		return vhost_user_send(vu_dev, &msg, NULL, 0);
> +	}
> +	return vhost_user_send(vu_dev, &msg, &fd, 1);
> +}
> +
> +static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
> +				     int index, int fd)
> +{
> +	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
> +			index, fd);
> +}
> +
> +static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
> +				       u32 index, bool enable)
> +{
> +	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
> +		return 0;
> +
> +	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
> +			index, enable);
> +}
> +
> +
> +/* Virtio interface */
> +
> +static bool vu_notify(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	int rc;
> +
> +	do {
> +		rc = os_eventfd_write(info->kick_fd, 1);
> +	} while (rc == -EINTR);
> +	WARN_ON(rc);
> +	return !rc;
> +}
> +
> +static irqreturn_t vu_interrupt(int irq, void *opaque)
> +{
> +	struct virtqueue *vq = opaque;
> +	struct virtio_uml_vq_info *info = vq->priv;
> +	unsigned long long value;
> +	int rc;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	do {
> +		rc = os_eventfd_read(info->call_fd, &value);
> +		if (!rc)
> +			ret |= vring_interrupt(irq, vq);
> +	} while (!rc || rc == -EINTR);
> +	WARN_ON(rc != -EAGAIN);
> +	return ret;
> +}
> +
> +
> +static void vu_get(struct virtio_device *vdev, unsigned offset,
> +		   void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
> +}
> +
> +static void vu_set(struct virtio_device *vdev, unsigned offset,
> +		   const void *buf, unsigned len)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
> +}
> +
> +static u8 vu_get_status(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->status;
> +}
> +
> +static void vu_set_status(struct virtio_device *vdev, u8 status)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = status;
> +}
> +
> +static void vu_reset(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	vu_dev->status = 0;
> +}
> +
> +static void vu_del_vq(struct virtqueue *vq)
> +{
> +	struct virtio_uml_vq_info *info = vq->priv;
> +
> +	um_free_irq(VIRTIO_IRQ, vq);
> +
> +	os_close_file(info->call_fd);
> +	os_close_file(info->kick_fd);
> +
> +	vring_del_virtqueue(vq);
> +	kfree(info);
> +}
> +
> +static void vu_del_vqs(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct virtqueue *vq, *n;
> +	u64 features;
> +
> +	/* Note: reverse order as a workaround to a decoding bug in snabb */
> +	list_for_each_entry_reverse(vq, &vdev->vqs, list)
> +		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
> +
> +	/* Ensure previous messages have been processed */
> +	WARN_ON(vhost_user_get_features(vu_dev, &features));
> +
> +	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
> +		vu_del_vq(vq);
> +}
> +
> +static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
> +				     unsigned index, vq_callback_t *callback,
> +				     const char *name, bool ctx)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	struct platform_device *pdev = vu_dev->pdev;
> +	struct virtio_uml_vq_info *info;
> +	struct virtqueue *vq;
> +	int num = MAX_SUPPORTED_QUEUE_SIZE;
> +	int rc;
> +	int call_fds[2];
> +
> +	info = kzalloc(sizeof(*info), GFP_KERNEL);
> +	if (!info) {
> +		rc = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
> +			pdev->id, name);
> +
> +	rc = os_eventfd(0, 0);
> +	if (rc < 0)
> +		goto error_kick;
> +	info->kick_fd = rc;
> +
> +	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
> +	rc = os_pipe(call_fds, true, true);
> +	if (rc < 0)
> +		goto error_call;
> +	info->call_fd = call_fds[0];
> +
> +	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
> +			ctx, vu_notify, callback, info->name);
> +	if (!vq) {
> +		rc = -ENOMEM;
> +		goto error_create;
> +	}
> +	vq->priv = info;
> +	num = virtqueue_get_vring_size(vq);
> +
> +	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, vu_interrupt,
> +			IRQF_SHARED, info->name, vq);
> +	if (rc)
> +		goto error_irq;
> +
> +	rc = vhost_user_set_vring_num(vu_dev, index, num);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_base(vu_dev, index, 0);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
> +	if (rc)
> +		goto error_setup;
> +
> +	rc = vhost_user_set_vring_addr(vu_dev, index,
> +			virtqueue_get_desc_addr(vq),
> +			virtqueue_get_used_addr(vq),
> +			virtqueue_get_avail_addr(vq),
> +			(u64) -1);
> +	if (rc)
> +		goto error_setup;
> +
> +	/* Close unused write end of call fds */
> +	os_close_file(call_fds[1]);
> +
> +	return vq;
> +
> +error_setup:
> +	um_free_irq(VIRTIO_IRQ, vq);
> +error_irq:
> +	vring_del_virtqueue(vq);
> +error_create:
> +	os_close_file(call_fds[1]);
> +	os_close_file(info->call_fd);
> +error_call:
> +	os_close_file(info->kick_fd);
> +error_kick:
> +	kfree(info);
> +error_kzalloc:
> +	return ERR_PTR(rc);
> +}
> +
> +static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
> +		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
> +		       const char * const names[], const bool *ctx,
> +		       struct irq_affinity *desc)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	int i, queue_idx, rc;
> +	struct virtqueue *vq;
> +
> +	rc = vhost_user_set_mem_table(vu_dev);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < nvqs; ++i) {
> +		if (!names[i]) {
> +			vqs[i] = NULL;
> +			continue;
> +		}
> +
> +		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
> +				ctx ? ctx[i] : false);
> +		if (IS_ERR(vqs[i])) {
> +			rc = PTR_ERR(vqs[i]);
> +			goto error_setup;
> +		}
> +	}
> +
> +	list_for_each_entry(vq, &vdev->vqs, list) {
> +		struct virtio_uml_vq_info *info = vq->priv;
> +
> +		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
> +				info->kick_fd);
> +		if (rc)
> +			goto error_setup;
> +
> +		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
> +		if (rc)
> +			goto error_setup;
> +	}
> +
> +	return 0;
> +
> +error_setup:
> +	vu_del_vqs(vdev);
> +	return rc;
> +}
> +
> +static u64 vu_get_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->features;
> +}
> +
> +static int vu_finalize_features(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
> +
> +	vring_transport_features(vdev);
> +	vu_dev->features = vdev->features | supported;
> +
> +	return vhost_user_set_features(vu_dev, vu_dev->features);
> +}
> +
> +static const char *vu_bus_name(struct virtio_device *vdev)
> +{
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	return vu_dev->pdev->name;
> +}
> +
> +static const struct virtio_config_ops virtio_uml_config_ops = {
> +	.get = vu_get,
> +	.set = vu_set,
> +	.get_status = vu_get_status,
> +	.set_status = vu_set_status,
> +	.reset = vu_reset,
> +	.find_vqs = vu_find_vqs,
> +	.del_vqs = vu_del_vqs,
> +	.get_features = vu_get_features,
> +	.finalize_features = vu_finalize_features,
> +	.bus_name = vu_bus_name,
> +};
> +
> +
> +static void virtio_uml_release_dev(struct device *d)
> +{
> +	struct virtio_device *vdev =
> +			container_of(d, struct virtio_device, dev);
> +	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
> +
> +	os_close_file(vu_dev->sock);
> +}
> +
> +/* Platform device */
> +
> +struct virtio_uml_platform_data {
> +	u32 virtio_device_id;
> +	const char *socket_path;
> +};
> +
> +static int virtio_uml_probe(struct platform_device *pdev)
> +{
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	struct virtio_uml_device *vu_dev;
> +	int rc;
> +
> +	if (!pdata)
> +		return -EINVAL;
> +
> +	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
> +	if (!vu_dev)
> +		return -ENOMEM;
> +
> +	vu_dev->vdev.dev.parent = &pdev->dev;
> +	vu_dev->vdev.dev.release = virtio_uml_release_dev;
> +	vu_dev->vdev.config = &virtio_uml_config_ops;
> +	vu_dev->vdev.id.device = pdata->virtio_device_id;
> +	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
> +	vu_dev->pdev = pdev;
> +
> +	do {
> +		rc = os_connect_socket(pdata->socket_path);
> +	} while (rc == -EINTR);
> +	if (rc < 0)
> +		return rc;
> +	vu_dev->sock = rc;
> +
> +	rc = vhost_user_init(vu_dev);
> +	if (rc)
> +		goto error_init;
> +
> +	platform_set_drvdata(pdev, vu_dev);
> +
> +	rc = register_virtio_device(&vu_dev->vdev);
> +	if (rc)
> +		put_device(&vu_dev->vdev.dev);
> +	return rc;
> +
> +error_init:
> +	os_close_file(vu_dev->sock);
> +	return rc;
> +}
> +
> +static int virtio_uml_remove(struct platform_device *pdev)
> +{
> +	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
> +
> +	unregister_virtio_device(&vu_dev->vdev);
> +	return 0;
> +}
> +
> +/* Command line device list */
> +
> +static void vu_cmdline_release_dev(struct device *d)
> +{
> +}
> +
> +static struct device vu_cmdline_parent = {
> +	.init_name = "virtio-uml-cmdline",
> +	.release = vu_cmdline_release_dev,
> +};
> +
> +static bool vu_cmdline_parent_registered;
> +static int vu_cmdline_id;
> +
> +static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
> +{
> +	const char *ids = strchr(device, ':');
> +	unsigned int virtio_device_id;
> +	int processed, consumed, err;
> +	char *socket_path;
> +	struct virtio_uml_platform_data *pdata;
> +	struct platform_device *pdev;
> +
> +	if (!ids || ids == device)
> +		return -EINVAL;
> +
> +	processed = sscanf(ids, ":%u%n:%d%n",
> +			&virtio_device_id, &consumed,
> +			&vu_cmdline_id, &consumed);
> +
> +	if (processed < 1 || ids[consumed])
> +		return -EINVAL;
> +
> +	if (!vu_cmdline_parent_registered) {
> +		err = device_register(&vu_cmdline_parent);
> +		if (err) {
> +			pr_err("Failed to register parent device!\n");
> +			put_device(&vu_cmdline_parent);
> +			return err;
> +		}
> +		vu_cmdline_parent_registered = true;
> +	}
> +
> +	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
> +	if (!socket_path)
> +		return -ENOMEM;
> +
> +	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
> +	if (!pdata) {
> +		err = -ENOMEM;
> +		goto error_kzalloc;
> +	}
> +
> +	pdata->virtio_device_id = (u32) virtio_device_id;
> +	pdata->socket_path = socket_path;
> +
> +	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
> +			vu_cmdline_id, virtio_device_id, socket_path);
> +
> +	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
> +			vu_cmdline_id++, pdata, sizeof(*pdata));
> +	err = PTR_ERR_OR_ZERO(pdev);
> +	if (err)
> +		goto error_register;
> +	return 0;
> +
> +error_register:
> +	kfree(pdata);
> +error_kzalloc:
> +	kfree(socket_path);
> +	return err;
> +}
> +
> +static int vu_cmdline_get_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +	char *buffer = data;
> +	unsigned int len = strlen(buffer);
> +
> +	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
> +			pdata->socket_path, pdata->virtio_device_id, pdev->id);
> +	return 0;
> +}
> +
> +static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
> +{
> +	buffer[0] = '\0';
> +	if (vu_cmdline_parent_registered)
> +		device_for_each_child(&vu_cmdline_parent, buffer,
> +				vu_cmdline_get_device);
> +	return strlen(buffer) + 1;
> +}
> +
> +static const struct kernel_param_ops vu_cmdline_param_ops = {
> +	.set = vu_cmdline_set,
> +	.get = vu_cmdline_get,
> +};
> +
> +device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
> +
> +
> +static int vu_unregister_cmdline_device(struct device *dev, void *data)
> +{
> +	struct platform_device *pdev = to_platform_device(dev);
> +	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
> +
> +	platform_device_unregister(pdev);
> +	kfree(pdata->socket_path);
> +	kfree(pdata);
> +	return 0;
> +}
> +
> +static void vu_unregister_cmdline_devices(void)
> +{
> +	if (vu_cmdline_parent_registered) {
> +		device_for_each_child(&vu_cmdline_parent, NULL,
> +				vu_unregister_cmdline_device);
> +		device_unregister(&vu_cmdline_parent);
> +		vu_cmdline_parent_registered = false;
> +	}
> +}
> +
> +/* Platform driver */
> +
> +static const struct of_device_id virtio_uml_match[] = {
> +	{ .compatible = "virtio,uml", },
> +	{ }
> +};
> +MODULE_DEVICE_TABLE(of, virtio_uml_match);
> +
> +static struct platform_driver virtio_uml_driver = {
> +	.probe = virtio_uml_probe,
> +	.remove = virtio_uml_remove,
> +	.driver = {
> +		.name = "virtio-uml",
> +		.of_match_table = virtio_uml_match,
> +	},
> +};
> +
> +static int __init virtio_uml_init(void)
> +{
> +	return platform_driver_register(&virtio_uml_driver);
> +}
> +
> +static void __exit virtio_uml_exit(void)
> +{
> +	platform_driver_unregister(&virtio_uml_driver);
> +	vu_unregister_cmdline_devices();
> +}
> +
> +module_init(virtio_uml_init);
> +module_exit(virtio_uml_exit);
> +__uml_exitcall(virtio_uml_exit);
> +
> +MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
> index ce7a78c3bcf2..42c6205e2dc4 100644
> --- a/arch/um/include/asm/irq.h
> +++ b/arch/um/include/asm/irq.h
> @@ -17,17 +17,18 @@
>   #define TELNETD_IRQ 		12
>   #define XTERM_IRQ 		13
>   #define RANDOM_IRQ 		14
> +#define VIRTIO_IRQ		15
>   
>   #ifdef CONFIG_UML_NET_VECTOR
>   
> -#define VECTOR_BASE_IRQ		15
> +#define VECTOR_BASE_IRQ		(VIRTIO_IRQ + 1)
>   #define VECTOR_IRQ_SPACE	8
>   
>   #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
>   
>   #else
>   
> -#define LAST_IRQ RANDOM_IRQ
> +#define LAST_IRQ VIRTIO_IRQ
>   
>   #endif
>   
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 4a62ac4251a5..da7ebbe4ebb3 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -36,6 +36,8 @@
>   #define OS_LIB_PATH	"/usr/lib/"
>   #endif
>   
> +#define OS_SENDMSG_MAX_FDS 8
> +
>   /*
>    * types taken from stat_file() in hostfs_user.c
>    * (if they are wrong here, they are wrong there...).
> @@ -176,6 +178,11 @@ extern unsigned os_major(unsigned long long dev);
>   extern unsigned os_minor(unsigned long long dev);
>   extern unsigned long long os_makedev(unsigned major, unsigned minor);
>   extern int os_falloc_punch(int fd, unsigned long long offset, int count);
> +extern int os_eventfd(unsigned int initval, int flags);
> +extern int os_eventfd_read(int fd, unsigned long long *value);
> +extern int os_eventfd_write(int fd, unsigned long long value);
> +extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
> +			  const int *fds, unsigned int fds_num);
>   
>   /* start_up.c */
>   extern void os_early_checks(void);
> diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
> index 232b22307fdd..f3766889226b 100644
> --- a/arch/um/kernel/ksyms.c
> +++ b/arch/um/kernel/ksyms.c
> @@ -38,6 +38,10 @@ EXPORT_SYMBOL(run_helper);
>   EXPORT_SYMBOL(os_major);
>   EXPORT_SYMBOL(os_minor);
>   EXPORT_SYMBOL(os_makedev);
> +EXPORT_SYMBOL(os_eventfd);
> +EXPORT_SYMBOL(os_eventfd_read);
> +EXPORT_SYMBOL(os_eventfd_write);
> +EXPORT_SYMBOL(os_sendmsg_fds);
>   
>   EXPORT_SYMBOL(add_sigio_fd);
>   EXPORT_SYMBOL(ignore_sigio_fd);
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index de58e976b9bc..f256be1d77bd 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -31,6 +31,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
>   
>   /* Initialized at boot time, and readonly after that */
>   unsigned long long highmem;
> +EXPORT_SYMBOL(highmem);
>   int kmalloc_ok = 0;
>   
>   /* Used during early boot */
> diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
> index 5bf56af4d5b9..870c80c88a97 100644
> --- a/arch/um/kernel/physmem.c
> +++ b/arch/um/kernel/physmem.c
> @@ -143,6 +143,7 @@ int phys_mapping(unsigned long phys, unsigned long long *offset_out)
>   
>   	return fd;
>   }
> +EXPORT_SYMBOL(phys_mapping);
>   
>   static int __init uml_mem_setup(char *line, int *add)
>   {
> diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
> index a818ccef30ca..9de212bf4320 100644
> --- a/arch/um/kernel/um_arch.c
> +++ b/arch/um/kernel/um_arch.c
> @@ -113,6 +113,7 @@ static int have_root __initdata = 0;
>   
>   /* Set in uml_mem_setup and modified in linux_main */
>   long long physmem_size = 32 * 1024 * 1024;
> +EXPORT_SYMBOL(physmem_size);
>   
>   static const char *usage_string =
>   "User Mode Linux v%s\n"
> diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
> index f25b110d4e70..0c4e0298398d 100644
> --- a/arch/um/os-Linux/file.c
> +++ b/arch/um/os-Linux/file.c
> @@ -15,6 +15,7 @@
>   #include <sys/sysmacros.h>
>   #include <sys/un.h>
>   #include <sys/types.h>
> +#include <sys/eventfd.h>
>   #include <os.h>
>   
>   static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
> @@ -620,3 +621,66 @@ int os_falloc_punch(int fd, unsigned long long offset, int len)
>   	return n;
>   }
>   
> +int os_eventfd(unsigned int initval, int flags)
> +{
> +	int fd = eventfd(initval, flags);
> +
> +	if (fd < 0)
> +		return -errno;
> +	return fd;
> +}
> +
> +int os_eventfd_read(int fd, unsigned long long *value)
> +{
> +	eventfd_t val;
> +	int err = eventfd_read(fd, &val);
> +
> +	if (err < 0)
> +		return -errno;
> +	*value = val;
> +	return 0;
> +}
> +
> +int os_eventfd_write(int fd, unsigned long long value)
> +{
> +	int err = eventfd_write(fd, value);
> +
> +	if (err < 0)
> +		return -errno;
> +	return 0;
> +}
> +
> +int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
> +		   unsigned int fds_num)
> +{
> +	struct iovec iov = {
> +		.iov_base = (void *) buf,
> +		.iov_len = len,
> +	};
> +	union {
> +		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
> +		struct cmsghdr align;
> +	} u;
> +	unsigned int fds_size = sizeof(*fds) * fds_num;
> +	struct msghdr msg = {
> +		.msg_iov = &iov,
> +		.msg_iovlen = 1,
> +		.msg_control = u.control,
> +		.msg_controllen = CMSG_SPACE(fds_size),
> +	};
> +	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
> +	int err;
> +
> +	if (fds_num > OS_SENDMSG_MAX_FDS)
> +		return -EINVAL;
> +	memset(u.control, 0, sizeof(u.control));
> +	cmsg->cmsg_level = SOL_SOCKET;
> +	cmsg->cmsg_type = SCM_RIGHTS;
> +	cmsg->cmsg_len = CMSG_LEN(fds_size);
> +	memcpy(CMSG_DATA(cmsg), fds, fds_size);
> +	err = sendmsg(fd, &msg, 0);
> +
> +	if (err < 0)
> +		return -errno;
> +	return err;
> +}
> 


It just hit me - this violates UML security model.

In UML (and any virtualization for that matter) the guest side is never 
allowed to specify any paths, files, etc locations on the host side.

This driver accepts a path argument to a host side path being supplied 
by the guest side.

One way of getting around this will be to sort out what uml does for 
iomem and make it generic and not just memory and just boot time.

This way boot or uml_mconsole can supply a file id and a driver can 
associate with this id.
Johannes Berg Oct. 8, 2019, 11:57 a.m. UTC | #14
On Tue, 2019-10-08 at 10:26 +0100, Anton Ivanov wrote:

> It just hit me - this violates UML security model.

Hmm.

> In UML (and any virtualization for that matter) the guest side is never 
> allowed to specify any paths, files, etc locations on the host side.

Yeah, I guess that makes sense.

> This driver accepts a path argument to a host side path being supplied 
> by the guest side.

Right, sort of.

> One way of getting around this will be to sort out what uml does for 
> iomem and make it generic and not just memory and just boot time.
> 
> This way boot or uml_mconsole can supply a file id and a driver can 
> associate with this id.

Or maybe just not allow this to be a module? Then it cannot be loaded
from inside, and the command-line must be given from outside, I think?
We could change it to use __uml_setup() too.

Perhaps with kexec() it's possible to influence the next kernel's
command line though, but that might already be an issue then?

johannes
Anton Ivanov Oct. 8, 2019, 2:17 p.m. UTC | #15
On 08/10/2019 12:57, Johannes Berg wrote:
> On Tue, 2019-10-08 at 10:26 +0100, Anton Ivanov wrote:
> 
>> It just hit me - this violates UML security model.
> 
> Hmm.
> 
>> In UML (and any virtualization for that matter) the guest side is never
>> allowed to specify any paths, files, etc locations on the host side.
> 
> Yeah, I guess that makes sense.
> 
>> This driver accepts a path argument to a host side path being supplied
>> by the guest side.
> 
> Right, sort of.
> 
>> One way of getting around this will be to sort out what uml does for
>> iomem and make it generic and not just memory and just boot time.
>>
>> This way boot or uml_mconsole can supply a file id and a driver can
>> associate with this id.
> 
> Or maybe just not allow this to be a module? Then it cannot be loaded
> from inside, and the command-line must be given from outside, I think?
> We could change it to use __uml_setup() too.

That would do it.

> 
> Perhaps with kexec() it's possible to influence the next kernel's
> command line though, but that might already be an issue then?

It is an issue, if kexec works.

I do not quite see how UML memory set-up and initial setup will work if 
you try to kexec UML from UML.

In fact, you will need a specially built kernel to start with, the 
normal UML kernel build should not work.
> 
> johannes
> 
>
Johannes Berg Oct. 8, 2019, 2:49 p.m. UTC | #16
On Tue, 2019-10-08 at 15:17 +0100, Anton Ivanov wrote:

> > Or maybe just not allow this to be a module? Then it cannot be loaded
> > from inside, and the command-line must be given from outside, I think?
> > We could change it to use __uml_setup() too.
> 
> That would do it.

I think that's probably easiest then.

> > Perhaps with kexec() it's possible to influence the next kernel's
> > command line though, but that might already be an issue then?
> 
> It is an issue, if kexec works.

I was misremembering the code in main() - it does an exec() of itself
(with unchanged parameters) for a *reboot*. Somehow I associated that
with kexec().

johannes
diff mbox series

Patch

diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 2638e46f50cc..fea5a0d522dc 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -335,3 +335,10 @@  config UML_NET_SLIRP
 	  Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
 
 endmenu
+
+config VIRTIO_UML
+	tristate "UML driver for virtio devices"
+	select VIRTIO
+	help
+	  This driver provides support for virtio based paravirtual device
+	  drivers over vhost-user sockets.
diff --git a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
index 693319839f69..3edf0a7cfd25 100644
--- a/arch/um/drivers/Makefile
+++ b/arch/um/drivers/Makefile
@@ -61,6 +61,7 @@  obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
 obj-$(CONFIG_UML_WATCHDOG) += harddog.o
 obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
 obj-$(CONFIG_UML_RANDOM) += random.o
+obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
 
 # pcap_user.o must be added explicitly.
 USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o
diff --git a/arch/um/drivers/vhost_user.h b/arch/um/drivers/vhost_user.h
new file mode 100644
index 000000000000..2a9829b0782b
--- /dev/null
+++ b/arch/um/drivers/vhost_user.h
@@ -0,0 +1,102 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Vhost-user protocol */
+
+#ifndef __VHOST_USER_H__
+#define __VHOST_USER_H__
+
+/* Message flags */
+#define VHOST_USER_FLAG_REPLY		BIT(2)
+/* Feature bits */
+#define VHOST_USER_F_PROTOCOL_FEATURES	30
+/* Protocol feature bits */
+#define VHOST_USER_PROTOCOL_F_CONFIG	9
+/* Vring state index masks */
+#define VHOST_USER_VRING_INDEX_MASK	0xff
+#define VHOST_USER_VRING_POLL_MASK	BIT(8)
+
+/* Supported version */
+#define VHOST_USER_VERSION		1
+/* Supported transport features */
+#define VHOST_USER_SUPPORTED_F		BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)
+/* Supported protocol features */
+#define VHOST_USER_SUPPORTED_PROTOCOL_F	BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)
+
+enum vhost_user_request {
+	VHOST_USER_GET_FEATURES = 1,
+	VHOST_USER_SET_FEATURES = 2,
+	VHOST_USER_SET_OWNER = 3,
+	VHOST_USER_RESET_OWNER = 4,
+	VHOST_USER_SET_MEM_TABLE = 5,
+	VHOST_USER_SET_LOG_BASE = 6,
+	VHOST_USER_SET_LOG_FD = 7,
+	VHOST_USER_SET_VRING_NUM = 8,
+	VHOST_USER_SET_VRING_ADDR = 9,
+	VHOST_USER_SET_VRING_BASE = 10,
+	VHOST_USER_GET_VRING_BASE = 11,
+	VHOST_USER_SET_VRING_KICK = 12,
+	VHOST_USER_SET_VRING_CALL = 13,
+	VHOST_USER_SET_VRING_ERR = 14,
+	VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+	VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+	VHOST_USER_GET_QUEUE_NUM = 17,
+	VHOST_USER_SET_VRING_ENABLE = 18,
+	VHOST_USER_SEND_RARP = 19,
+	VHOST_USER_NET_SEND_MTU = 20,
+	VHOST_USER_SET_SLAVE_REQ_FD = 21,
+	VHOST_USER_IOTLB_MSG = 22,
+	VHOST_USER_SET_VRING_ENDIAN = 23,
+	VHOST_USER_GET_CONFIG = 24,
+	VHOST_USER_SET_CONFIG = 25,
+};
+
+struct vhost_user_header {
+	u32 request; /* Use enum vhost_user_request */
+	u32 flags;
+	u32 size;
+} __packed;
+
+struct vhost_user_config {
+	u32 offset;
+	u32 size;
+	u32 flags;
+	u8 payload[0]; /* Variable length */
+} __packed;
+
+struct vhost_user_vring_state {
+	u32 index;
+	u32 num;
+} __packed;
+
+struct vhost_user_vring_addr {
+	u32 index;
+	u32 flags;
+	u64 desc, used, avail, log;
+} __packed;
+
+struct vhost_user_mem_region {
+	u64 guest_addr;
+	u64 size;
+	u64 user_addr;
+	u64 mmap_offset;
+} __packed;
+
+struct vhost_user_mem_regions {
+	u32 num;
+	u32 padding;
+	struct vhost_user_mem_region regions[2]; /* Currently supporting 2 */
+} __packed;
+
+union vhost_user_payload {
+	u64 integer;
+	struct vhost_user_config config;
+	struct vhost_user_vring_state vring_state;
+	struct vhost_user_vring_addr vring_addr;
+	struct vhost_user_mem_regions mem_regions;
+};
+
+struct vhost_user_msg {
+	struct vhost_user_header header;
+	union vhost_user_payload payload;
+} __packed;
+
+#endif
diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c
new file mode 100644
index 000000000000..30eeb907f42a
--- /dev/null
+++ b/arch/um/drivers/virtio_uml.c
@@ -0,0 +1,905 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio vhost-user driver
+ *
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This module allows virtio devices to be used over a vhost-user socket.
+ *
+ * Guest devices can be instantiated by kernel module or command line
+ * parameters. One device will be created for each parameter. Syntax:
+ *
+ *		[virtio_uml.]device=<socket>:<virtio_id>[:<platform_id>]
+ * where:
+ *		<socket>	:= vhost-user socket path to connect
+ *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
+ *		<platform_id>	:= (optional) platform device id
+ *
+ * example:
+ *		virtio_uml.device=/var/uml.socket:1
+ *
+ * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
+ */
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <irq_kern.h>
+#include <init.h>
+#include <os.h>
+#include "vhost_user.h"
+
+/* Workaround due to a conflict between irq_user.h and irqreturn.h */
+#ifdef IRQ_NONE
+#undef IRQ_NONE
+#endif
+
+#define MAX_SUPPORTED_QUEUE_SIZE	256
+
+#define to_virtio_uml_device(_vdev) \
+	container_of(_vdev, struct virtio_uml_device, vdev)
+
+struct virtio_uml_device {
+	struct virtio_device vdev;
+	struct platform_device *pdev;
+
+	int sock;
+	u64 features;
+	u64 protocol_features;
+	u8 status;
+};
+
+struct virtio_uml_vq_info {
+	int kick_fd, call_fd;
+	char name[32];
+};
+
+extern unsigned long long physmem_size, highmem;
+
+
+/* Vhost-user protocol */
+
+static int vhost_user_send(struct virtio_uml_device *vu_dev,
+			   struct vhost_user_msg *msg,
+			   int *fds, size_t num_fds)
+{
+	size_t size = sizeof(msg->header) + msg->header.size;
+	int rc;
+
+	msg->header.flags |= VHOST_USER_VERSION;
+	do {
+		rc = os_sendmsg_fds(vu_dev->sock, (void *) msg, size, fds,
+				num_fds);
+	} while (rc == -EINTR);
+	if (rc < 0)
+		return rc;
+	if (rc != size)
+		return -ECOMM;
+	return 0;
+}
+
+static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
+				      u32 request)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+	};
+	return vhost_user_send(vu_dev, &msg, NULL, 0);
+}
+
+static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
+			       u32 request, u64 value)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.integer),
+		.payload.integer = value,
+	};
+
+	return vhost_user_send(vu_dev, &msg, NULL, 0);
+}
+
+static int vhost_user_recv_header(struct virtio_uml_device *vu_dev,
+				  struct vhost_user_msg *msg)
+{
+	size_t size = sizeof(msg->header);
+	int rc;
+
+	do {
+		rc = os_read_file(vu_dev->sock, (void *) msg, size);
+	} while (rc == -EINTR);
+	if (rc != size)
+		return -EPROTO;
+	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
+		return -EPROTO;
+	return 0;
+}
+
+static int vhost_user_recv(struct virtio_uml_device *vu_dev,
+			   struct vhost_user_msg *msg,
+			   size_t max_payload_size)
+{
+	size_t size;
+	int rc = vhost_user_recv_header(vu_dev, msg);
+
+	if (rc)
+		return rc;
+	size = msg->header.size;
+	if (size > max_payload_size)
+		return -EPROTO;
+	do {
+		rc = os_read_file(vu_dev->sock, (void *) &msg->payload, size);
+	} while (rc == -EINTR);
+	if (rc != size)
+		return -EPROTO;
+	return 0;
+}
+
+static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
+			       u64 *value)
+{
+	struct vhost_user_msg msg;
+	int rc = vhost_user_recv(vu_dev, &msg, sizeof(msg.payload.integer));
+
+	if (rc)
+		return rc;
+	if (msg.header.size != sizeof(msg.payload.integer))
+		return -EPROTO;
+	*value = msg.payload.integer;
+	return 0;
+}
+
+static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
+{
+	return vhost_user_send_no_payload(vu_dev, VHOST_USER_SET_OWNER);
+}
+
+static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
+				   u64 *features)
+{
+	int rc = vhost_user_send_no_payload(vu_dev, VHOST_USER_GET_FEATURES);
+
+	if (rc)
+		return rc;
+	return vhost_user_recv_u64(vu_dev, features);
+}
+
+static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
+				   u64 features)
+{
+	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
+}
+
+static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
+					    u64 *protocol_features)
+{
+	int rc = vhost_user_send_no_payload(vu_dev,
+			VHOST_USER_GET_PROTOCOL_FEATURES);
+
+	if (rc)
+		return rc;
+	return vhost_user_recv_u64(vu_dev, protocol_features);
+}
+
+static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
+					    u64 protocol_features)
+{
+	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
+			protocol_features);
+}
+
+static int vhost_user_init(struct virtio_uml_device *vu_dev)
+{
+	int rc = vhost_user_set_owner(vu_dev);
+
+	if (rc)
+		return rc;
+	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
+	if (rc)
+		return rc;
+
+	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
+		rc = vhost_user_get_protocol_features(vu_dev,
+				&vu_dev->protocol_features);
+		if (rc)
+			return rc;
+		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
+		rc = vhost_user_set_protocol_features(vu_dev,
+				vu_dev->protocol_features);
+	}
+	return rc;
+}
+
+static int vhost_user_get_config(struct virtio_uml_device *vu_dev,
+				 u32 offset, void *buf, u32 len)
+{
+	u32 cfg_size = offset + len;
+	struct vhost_user_msg *msg;
+	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
+	size_t msg_size = sizeof(msg->header) + payload_size;
+	int rc;
+
+	if (!(vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+		return -EOPNOTSUPP;
+
+	msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	msg->header.request = VHOST_USER_GET_CONFIG;
+	msg->header.size = payload_size;
+	msg->payload.config.offset = 0;
+	msg->payload.config.size = cfg_size;
+
+	rc = vhost_user_send(vu_dev, msg, NULL, 0);
+	if (rc)
+		goto error_get;
+	rc = vhost_user_recv(vu_dev, msg, msg_size);
+	if (rc)
+		goto error_get;
+	if (msg->header.size != payload_size ||
+			msg->payload.config.size != cfg_size) {
+		rc = -EPROTO;
+		goto error_get;
+	}
+	memcpy(buf, msg->payload.config.payload + offset, len);
+
+error_get:
+	kfree(msg);
+	return rc;
+}
+
+static int vhost_user_set_config(struct virtio_uml_device *vu_dev,
+				 u32 offset, const void *buf, u32 len)
+{
+	struct vhost_user_msg *msg;
+	size_t payload_size = sizeof(msg->payload.config) + len;
+	size_t msg_size = sizeof(msg->header) + payload_size;
+	int rc;
+
+	if (!(vu_dev->protocol_features &
+			BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
+		return -EOPNOTSUPP;
+
+	msg = kzalloc(msg_size, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+	msg->header.request = VHOST_USER_SET_CONFIG;
+	msg->header.size = payload_size;
+	msg->payload.config.offset = offset;
+	msg->payload.config.size = len;
+	memcpy(msg->payload.config.payload, buf, len);
+
+	rc = vhost_user_send(vu_dev, msg, NULL, 0);
+
+	kfree(msg);
+	return rc;
+}
+
+static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
+				      struct vhost_user_mem_region *region_out)
+{
+	unsigned long long mem_offset;
+	int rc = phys_mapping(addr, &mem_offset);
+
+	if (WARN_ON(rc < 0))
+		return -EFAULT;
+	*fd_out = rc;
+	region_out->guest_addr = addr;
+	region_out->user_addr = addr;
+	region_out->size = size;
+	region_out->mmap_offset = mem_offset;
+
+	rc = phys_mapping(addr + size - 1, &mem_offset);
+	if (WARN_ON(rc != *fd_out))
+		return -EFAULT;
+	return 0;
+}
+
+static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
+{
+	struct vhost_user_msg msg = {
+		.header.request = VHOST_USER_SET_MEM_TABLE,
+		.header.size = sizeof(msg.payload.mem_regions),
+		.payload.mem_regions.num = 1,
+	};
+	int fds[2];
+	int rc = vhost_user_init_mem_region(0, physmem_size, &fds[0],
+			&msg.payload.mem_regions.regions[0]);
+
+	if (rc < 0)
+		return rc;
+	if (highmem) {
+		msg.payload.mem_regions.num++;
+		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
+				&fds[1], &msg.payload.mem_regions.regions[1]);
+		if (rc < 0)
+			return rc;
+	}
+
+	return vhost_user_send(vu_dev, &msg, fds, msg.payload.mem_regions.num);
+}
+
+static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
+				      u32 request, u32 index, u32 num)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.vring_state),
+		.payload.vring_state.index = index,
+		.payload.vring_state.num = num,
+	};
+
+	return vhost_user_send(vu_dev, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
+				    u32 index, u32 num)
+{
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
+			index, num);
+}
+
+static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
+				     u32 index, u32 offset)
+{
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
+			index, offset);
+}
+
+static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
+				     u32 index, u64 desc, u64 used, u64 avail,
+				     u64 log)
+{
+	struct vhost_user_msg msg = {
+		.header.request = VHOST_USER_SET_VRING_ADDR,
+		.header.size = sizeof(msg.payload.vring_addr),
+		.payload.vring_addr.index = index,
+		.payload.vring_addr.desc = desc,
+		.payload.vring_addr.used = used,
+		.payload.vring_addr.avail = avail,
+		.payload.vring_addr.log = log,
+	};
+
+	return vhost_user_send(vu_dev, &msg, NULL, 0);
+}
+
+static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
+				   u32 request, int index, int fd)
+{
+	struct vhost_user_msg msg = {
+		.header.request = request,
+		.header.size = sizeof(msg.payload.integer),
+		.payload.integer = index,
+	};
+
+	if (index & ~VHOST_USER_VRING_INDEX_MASK)
+		return -EINVAL;
+	if (fd < 0) {
+		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
+		return vhost_user_send(vu_dev, &msg, NULL, 0);
+	}
+	return vhost_user_send(vu_dev, &msg, &fd, 1);
+}
+
+static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
+				     int index, int fd)
+{
+	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
+			index, fd);
+}
+
+static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
+				     int index, int fd)
+{
+	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
+			index, fd);
+}
+
+static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
+				       u32 index, bool enable)
+{
+	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
+		return 0;
+
+	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
+			index, enable);
+}
+
+
+/* Virtio interface */
+
+static bool vu_notify(struct virtqueue *vq)
+{
+	struct virtio_uml_vq_info *info = vq->priv;
+	int rc;
+
+	do {
+		rc = os_eventfd_write(info->kick_fd, 1);
+	} while (rc == -EINTR);
+	WARN_ON(rc);
+	return !rc;
+}
+
+static irqreturn_t vu_interrupt(int irq, void *opaque)
+{
+	struct virtqueue *vq = opaque;
+	struct virtio_uml_vq_info *info = vq->priv;
+	unsigned long long value;
+	int rc;
+	irqreturn_t ret = IRQ_NONE;
+
+	do {
+		rc = os_eventfd_read(info->call_fd, &value);
+		if (!rc)
+			ret |= vring_interrupt(irq, vq);
+	} while (!rc || rc == -EINTR);
+	WARN_ON(rc != -EAGAIN);
+	return ret;
+}
+
+
+static void vu_get(struct virtio_device *vdev, unsigned offset,
+		   void *buf, unsigned len)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	WARN_ON(vhost_user_get_config(vu_dev, offset, buf, len));
+}
+
+static void vu_set(struct virtio_device *vdev, unsigned offset,
+		   const void *buf, unsigned len)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	WARN_ON(vhost_user_set_config(vu_dev, offset, buf, len));
+}
+
+static u8 vu_get_status(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->status;
+}
+
+static void vu_set_status(struct virtio_device *vdev, u8 status)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vu_dev->status = status;
+}
+
+static void vu_reset(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	vu_dev->status = 0;
+}
+
+static void vu_del_vq(struct virtqueue *vq)
+{
+	struct virtio_uml_vq_info *info = vq->priv;
+
+	um_free_irq(VIRTIO_IRQ, vq);
+
+	os_close_file(info->call_fd);
+	os_close_file(info->kick_fd);
+
+	vring_del_virtqueue(vq);
+	kfree(info);
+}
+
+static void vu_del_vqs(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	struct virtqueue *vq, *n;
+	u64 features;
+
+	/* Note: reverse order as a workaround to a decoding bug in snabb */
+	list_for_each_entry_reverse(vq, &vdev->vqs, list)
+		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
+
+	/* Ensure previous messages have been processed */
+	WARN_ON(vhost_user_get_features(vu_dev, &features));
+
+	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
+		vu_del_vq(vq);
+}
+
+static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
+				     unsigned index, vq_callback_t *callback,
+				     const char *name, bool ctx)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	struct platform_device *pdev = vu_dev->pdev;
+	struct virtio_uml_vq_info *info;
+	struct virtqueue *vq;
+	int num = MAX_SUPPORTED_QUEUE_SIZE;
+	int rc;
+	int call_fds[2];
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info) {
+		rc = -ENOMEM;
+		goto error_kzalloc;
+	}
+	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
+			pdev->id, name);
+
+	rc = os_eventfd(0, 0);
+	if (rc < 0)
+		goto error_kick;
+	info->kick_fd = rc;
+
+	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
+	rc = os_pipe(call_fds, true, true);
+	if (rc < 0)
+		goto error_call;
+	info->call_fd = call_fds[0];
+
+	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
+			ctx, vu_notify, callback, info->name);
+	if (!vq) {
+		rc = -ENOMEM;
+		goto error_create;
+	}
+	vq->priv = info;
+	num = virtqueue_get_vring_size(vq);
+
+	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ, vu_interrupt,
+			IRQF_SHARED, info->name, vq);
+	if (rc)
+		goto error_irq;
+
+	rc = vhost_user_set_vring_num(vu_dev, index, num);
+	if (rc)
+		goto error_setup;
+
+	rc = vhost_user_set_vring_base(vu_dev, index, 0);
+	if (rc)
+		goto error_setup;
+
+	rc = vhost_user_set_vring_call(vu_dev, index, call_fds[1]);
+	if (rc)
+		goto error_setup;
+
+	rc = vhost_user_set_vring_addr(vu_dev, index,
+			virtqueue_get_desc_addr(vq),
+			virtqueue_get_used_addr(vq),
+			virtqueue_get_avail_addr(vq),
+			(u64) -1);
+	if (rc)
+		goto error_setup;
+
+	/* Close unused write end of call fds */
+	os_close_file(call_fds[1]);
+
+	return vq;
+
+error_setup:
+	um_free_irq(VIRTIO_IRQ, vq);
+error_irq:
+	vring_del_virtqueue(vq);
+error_create:
+	os_close_file(call_fds[1]);
+	os_close_file(info->call_fd);
+error_call:
+	os_close_file(info->kick_fd);
+error_kick:
+	kfree(info);
+error_kzalloc:
+	return ERR_PTR(rc);
+}
+
+static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
+		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
+		       const char * const names[], const bool *ctx,
+		       struct irq_affinity *desc)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	int i, queue_idx, rc;
+	struct virtqueue *vq;
+
+	rc = vhost_user_set_mem_table(vu_dev);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < nvqs; ++i) {
+		if (!names[i]) {
+			vqs[i] = NULL;
+			continue;
+		}
+
+		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
+				ctx ? ctx[i] : false);
+		if (IS_ERR(vqs[i])) {
+			rc = PTR_ERR(vqs[i]);
+			goto error_setup;
+		}
+	}
+
+	list_for_each_entry(vq, &vdev->vqs, list) {
+		struct virtio_uml_vq_info *info = vq->priv;
+
+		rc = vhost_user_set_vring_kick(vu_dev, vq->index,
+				info->kick_fd);
+		if (rc)
+			goto error_setup;
+
+		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
+		if (rc)
+			goto error_setup;
+	}
+
+	return 0;
+
+error_setup:
+	vu_del_vqs(vdev);
+	return rc;
+}
+
+static u64 vu_get_features(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->features;
+}
+
+static int vu_finalize_features(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
+
+	vring_transport_features(vdev);
+	vu_dev->features = vdev->features | supported;
+
+	return vhost_user_set_features(vu_dev, vu_dev->features);
+}
+
+static const char *vu_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	return vu_dev->pdev->name;
+}
+
+static const struct virtio_config_ops virtio_uml_config_ops = {
+	.get = vu_get,
+	.set = vu_set,
+	.get_status = vu_get_status,
+	.set_status = vu_set_status,
+	.reset = vu_reset,
+	.find_vqs = vu_find_vqs,
+	.del_vqs = vu_del_vqs,
+	.get_features = vu_get_features,
+	.finalize_features = vu_finalize_features,
+	.bus_name = vu_bus_name,
+};
+
+
+static void virtio_uml_release_dev(struct device *d)
+{
+	struct virtio_device *vdev =
+			container_of(d, struct virtio_device, dev);
+	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
+
+	os_close_file(vu_dev->sock);
+}
+
+/* Platform device */
+
+struct virtio_uml_platform_data {
+	u32 virtio_device_id;
+	const char *socket_path;
+};
+
+static int virtio_uml_probe(struct platform_device *pdev)
+{
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+	struct virtio_uml_device *vu_dev;
+	int rc;
+
+	if (!pdata)
+		return -EINVAL;
+
+	vu_dev = devm_kzalloc(&pdev->dev, sizeof(*vu_dev), GFP_KERNEL);
+	if (!vu_dev)
+		return -ENOMEM;
+
+	vu_dev->vdev.dev.parent = &pdev->dev;
+	vu_dev->vdev.dev.release = virtio_uml_release_dev;
+	vu_dev->vdev.config = &virtio_uml_config_ops;
+	vu_dev->vdev.id.device = pdata->virtio_device_id;
+	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
+	vu_dev->pdev = pdev;
+
+	do {
+		rc = os_connect_socket(pdata->socket_path);
+	} while (rc == -EINTR);
+	if (rc < 0)
+		return rc;
+	vu_dev->sock = rc;
+
+	rc = vhost_user_init(vu_dev);
+	if (rc)
+		goto error_init;
+
+	platform_set_drvdata(pdev, vu_dev);
+
+	rc = register_virtio_device(&vu_dev->vdev);
+	if (rc)
+		put_device(&vu_dev->vdev.dev);
+	return rc;
+
+error_init:
+	os_close_file(vu_dev->sock);
+	return rc;
+}
+
+static int virtio_uml_remove(struct platform_device *pdev)
+{
+	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
+
+	unregister_virtio_device(&vu_dev->vdev);
+	return 0;
+}
+
+/* Command line device list */
+
+static void vu_cmdline_release_dev(struct device *d)
+{
+}
+
+static struct device vu_cmdline_parent = {
+	.init_name = "virtio-uml-cmdline",
+	.release = vu_cmdline_release_dev,
+};
+
+static bool vu_cmdline_parent_registered;
+static int vu_cmdline_id;
+
+static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
+{
+	const char *ids = strchr(device, ':');
+	unsigned int virtio_device_id;
+	int processed, consumed, err;
+	char *socket_path;
+	struct virtio_uml_platform_data *pdata;
+	struct platform_device *pdev;
+
+	if (!ids || ids == device)
+		return -EINVAL;
+
+	processed = sscanf(ids, ":%u%n:%d%n",
+			&virtio_device_id, &consumed,
+			&vu_cmdline_id, &consumed);
+
+	if (processed < 1 || ids[consumed])
+		return -EINVAL;
+
+	if (!vu_cmdline_parent_registered) {
+		err = device_register(&vu_cmdline_parent);
+		if (err) {
+			pr_err("Failed to register parent device!\n");
+			put_device(&vu_cmdline_parent);
+			return err;
+		}
+		vu_cmdline_parent_registered = true;
+	}
+
+	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
+	if (!socket_path)
+		return -ENOMEM;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata) {
+		err = -ENOMEM;
+		goto error_kzalloc;
+	}
+
+	pdata->virtio_device_id = (u32) virtio_device_id;
+	pdata->socket_path = socket_path;
+
+	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
+			vu_cmdline_id, virtio_device_id, socket_path);
+
+	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
+			vu_cmdline_id++, pdata, sizeof(*pdata));
+	err = PTR_ERR_OR_ZERO(pdev);
+	if (err)
+		goto error_register;
+	return 0;
+
+error_register:
+	kfree(pdata);
+error_kzalloc:
+	kfree(socket_path);
+	return err;
+}
+
+static int vu_cmdline_get_device(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+	char *buffer = data;
+	unsigned int len = strlen(buffer);
+
+	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
+			pdata->socket_path, pdata->virtio_device_id, pdev->id);
+	return 0;
+}
+
+static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
+{
+	buffer[0] = '\0';
+	if (vu_cmdline_parent_registered)
+		device_for_each_child(&vu_cmdline_parent, buffer,
+				vu_cmdline_get_device);
+	return strlen(buffer) + 1;
+}
+
+static const struct kernel_param_ops vu_cmdline_param_ops = {
+	.set = vu_cmdline_set,
+	.get = vu_cmdline_get,
+};
+
+device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
+
+
+static int vu_unregister_cmdline_device(struct device *dev, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
+
+	platform_device_unregister(pdev);
+	kfree(pdata->socket_path);
+	kfree(pdata);
+	return 0;
+}
+
+static void vu_unregister_cmdline_devices(void)
+{
+	if (vu_cmdline_parent_registered) {
+		device_for_each_child(&vu_cmdline_parent, NULL,
+				vu_unregister_cmdline_device);
+		device_unregister(&vu_cmdline_parent);
+		vu_cmdline_parent_registered = false;
+	}
+}
+
+/* Platform driver */
+
+static const struct of_device_id virtio_uml_match[] = {
+	{ .compatible = "virtio,uml", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, virtio_uml_match);
+
+static struct platform_driver virtio_uml_driver = {
+	.probe = virtio_uml_probe,
+	.remove = virtio_uml_remove,
+	.driver = {
+		.name = "virtio-uml",
+		.of_match_table = virtio_uml_match,
+	},
+};
+
+static int __init virtio_uml_init(void)
+{
+	return platform_driver_register(&virtio_uml_driver);
+}
+
+static void __exit virtio_uml_exit(void)
+{
+	platform_driver_unregister(&virtio_uml_driver);
+	vu_unregister_cmdline_devices();
+}
+
+module_init(virtio_uml_init);
+module_exit(virtio_uml_exit);
+__uml_exitcall(virtio_uml_exit);
+
+MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
+MODULE_LICENSE("GPL");
diff --git a/arch/um/include/asm/irq.h b/arch/um/include/asm/irq.h
index ce7a78c3bcf2..42c6205e2dc4 100644
--- a/arch/um/include/asm/irq.h
+++ b/arch/um/include/asm/irq.h
@@ -17,17 +17,18 @@ 
 #define TELNETD_IRQ 		12
 #define XTERM_IRQ 		13
 #define RANDOM_IRQ 		14
+#define VIRTIO_IRQ		15
 
 #ifdef CONFIG_UML_NET_VECTOR
 
-#define VECTOR_BASE_IRQ		15
+#define VECTOR_BASE_IRQ		(VIRTIO_IRQ + 1)
 #define VECTOR_IRQ_SPACE	8
 
 #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ - 1)
 
 #else
 
-#define LAST_IRQ RANDOM_IRQ
+#define LAST_IRQ VIRTIO_IRQ
 
 #endif
 
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 4a62ac4251a5..da7ebbe4ebb3 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -36,6 +36,8 @@ 
 #define OS_LIB_PATH	"/usr/lib/"
 #endif
 
+#define OS_SENDMSG_MAX_FDS 8
+
 /*
  * types taken from stat_file() in hostfs_user.c
  * (if they are wrong here, they are wrong there...).
@@ -176,6 +178,11 @@  extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
 extern int os_falloc_punch(int fd, unsigned long long offset, int count);
+extern int os_eventfd(unsigned int initval, int flags);
+extern int os_eventfd_read(int fd, unsigned long long *value);
+extern int os_eventfd_write(int fd, unsigned long long value);
+extern int os_sendmsg_fds(int fd, const void *buf, unsigned int len,
+			  const int *fds, unsigned int fds_num);
 
 /* start_up.c */
 extern void os_early_checks(void);
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 232b22307fdd..f3766889226b 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -38,6 +38,10 @@  EXPORT_SYMBOL(run_helper);
 EXPORT_SYMBOL(os_major);
 EXPORT_SYMBOL(os_minor);
 EXPORT_SYMBOL(os_makedev);
+EXPORT_SYMBOL(os_eventfd);
+EXPORT_SYMBOL(os_eventfd_read);
+EXPORT_SYMBOL(os_eventfd_write);
+EXPORT_SYMBOL(os_sendmsg_fds);
 
 EXPORT_SYMBOL(add_sigio_fd);
 EXPORT_SYMBOL(ignore_sigio_fd);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index de58e976b9bc..f256be1d77bd 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -31,6 +31,7 @@  pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 /* Initialized at boot time, and readonly after that */
 unsigned long long highmem;
+EXPORT_SYMBOL(highmem);
 int kmalloc_ok = 0;
 
 /* Used during early boot */
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 5bf56af4d5b9..870c80c88a97 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -143,6 +143,7 @@  int phys_mapping(unsigned long phys, unsigned long long *offset_out)
 
 	return fd;
 }
+EXPORT_SYMBOL(phys_mapping);
 
 static int __init uml_mem_setup(char *line, int *add)
 {
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index a818ccef30ca..9de212bf4320 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -113,6 +113,7 @@  static int have_root __initdata = 0;
 
 /* Set in uml_mem_setup and modified in linux_main */
 long long physmem_size = 32 * 1024 * 1024;
+EXPORT_SYMBOL(physmem_size);
 
 static const char *usage_string =
 "User Mode Linux v%s\n"
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index f25b110d4e70..0c4e0298398d 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -15,6 +15,7 @@ 
 #include <sys/sysmacros.h>
 #include <sys/un.h>
 #include <sys/types.h>
+#include <sys/eventfd.h>
 #include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
@@ -620,3 +621,66 @@  int os_falloc_punch(int fd, unsigned long long offset, int len)
 	return n;
 }
 
+int os_eventfd(unsigned int initval, int flags)
+{
+	int fd = eventfd(initval, flags);
+
+	if (fd < 0)
+		return -errno;
+	return fd;
+}
+
+int os_eventfd_read(int fd, unsigned long long *value)
+{
+	eventfd_t val;
+	int err = eventfd_read(fd, &val);
+
+	if (err < 0)
+		return -errno;
+	*value = val;
+	return 0;
+}
+
+int os_eventfd_write(int fd, unsigned long long value)
+{
+	int err = eventfd_write(fd, value);
+
+	if (err < 0)
+		return -errno;
+	return 0;
+}
+
+int os_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds,
+		   unsigned int fds_num)
+{
+	struct iovec iov = {
+		.iov_base = (void *) buf,
+		.iov_len = len,
+	};
+	union {
+		char control[CMSG_SPACE(sizeof(*fds) * OS_SENDMSG_MAX_FDS)];
+		struct cmsghdr align;
+	} u;
+	unsigned int fds_size = sizeof(*fds) * fds_num;
+	struct msghdr msg = {
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+		.msg_control = u.control,
+		.msg_controllen = CMSG_SPACE(fds_size),
+	};
+	struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+	int err;
+
+	if (fds_num > OS_SENDMSG_MAX_FDS)
+		return -EINVAL;
+	memset(u.control, 0, sizeof(u.control));
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SCM_RIGHTS;
+	cmsg->cmsg_len = CMSG_LEN(fds_size);
+	memcpy(CMSG_DATA(cmsg), fds, fds_size);
+	err = sendmsg(fd, &msg, 0);
+
+	if (err < 0)
+		return -errno;
+	return err;
+}