diff mbox

[v6,5/8] Add domain socket communication for vhost-user backend

Message ID 1389623119-15863-6-git-send-email-a.motakis@virtualopensystems.com
State New
Headers show

Commit Message

Antonios Motakis Jan. 13, 2014, 2:25 p.m. UTC
Add structures for passing vhost-user messages over a unix domain socket.
This is the equivalent of the existing vhost-kernel ioctls.

Connect to the named unix domain socket. The system call sendmsg
is used for communication. To be able to pass file descriptors
between processes - we use SCM_RIGHTS type in the message control header.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
 hw/virtio/vhost-backend.c | 306 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 301 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin Jan. 14, 2014, 11:10 a.m. UTC | #1
On Mon, Jan 13, 2014 at 03:25:16PM +0100, Antonios Motakis wrote:
> Add structures for passing vhost-user messages over a unix domain socket.
> This is the equivalent of the existing vhost-kernel ioctls.
> 
> Connect to the named unix domain socket. The system call sendmsg
> is used for communication. To be able to pass file descriptors
> between processes - we use SCM_RIGHTS type in the message control header.
> 
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>

Not all comments in v5 review of this file have been
addressed.
I think if a comment was wrong it's a good idea to
add clarification in code on why it is, so future readers
aren't confused.

In particular I think always explicitly opening
domain sockets and forcing a specific client/server
model is a problem.

I think we also want the protocol documented in docs/
so future users don't have to read the code to connect
to qemu.

Can you add a unit test for this code?

We ship pxe ROM that enables virtio net,
so you can write a small stub that works as the
remote side of this protocol, poke at
guest memory to see that it has
the expected content (compare to what we get with
readl).

In theory you could also get and put some buffers
though that's not a must.


> ---
>  hw/virtio/vhost-backend.c | 306 +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 301 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 1d83e1d..460ee02 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -11,34 +11,330 @@
>  #include "hw/virtio/vhost.h"
>  #include "hw/virtio/vhost-backend.h"
>  #include "qemu/error-report.h"
> +#include "qemu/sockets.h"
>  
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <linux/vhost.h>
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +#define VHOST_USER_SOCKTO            (300) /* msec */
> +
> +typedef enum VhostUserRequest {
> +    VHOST_USER_NONE = 0,
> +    VHOST_USER_GET_FEATURES = 1,
> +    VHOST_USER_SET_FEATURES = 2,
> +    VHOST_USER_SET_OWNER = 3,
> +    VHOST_USER_RESET_OWNER = 4,
> +    VHOST_USER_SET_MEM_TABLE = 5,
> +    VHOST_USER_SET_LOG_BASE = 6,
> +    VHOST_USER_SET_LOG_FD = 7,
> +    VHOST_USER_SET_VRING_NUM = 8,
> +    VHOST_USER_SET_VRING_ADDR = 9,
> +    VHOST_USER_SET_VRING_BASE = 10,
> +    VHOST_USER_GET_VRING_BASE = 11,
> +    VHOST_USER_SET_VRING_KICK = 12,
> +    VHOST_USER_SET_VRING_CALL = 13,
> +    VHOST_USER_SET_VRING_ERR = 14,
> +    VHOST_USER_NET_SET_BACKEND = 15,
> +    VHOST_USER_ECHO = 16,
> +    VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +    uint64_t guest_phys_addr;
> +    uint64_t memory_size;
> +    uint64_t userspace_addr;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +    uint32_t nregions;
> +    uint32_t padding;
> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1<<2)
> +    uint32_t flags;
> +    uint32_t size; /* the following payload size */
> +    union {
> +        uint64_t u64;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        VhostUserMemory memory;
> +    };
> +} QEMU_PACKED VhostUserMsg;
> +
> +static VhostUserMsg m __attribute__ ((unused));
> +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> +                            + sizeof(m.flags) \
> +                            + sizeof(m.size))
> +
> +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +
> +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> +{
> +    ssize_t r;
> +    uint8_t *p = (uint8_t *) msg;
> +
> +    /* read the header */
> +    do {
> +        r = read(fd, p, VHOST_USER_HDR_SIZE);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to read msg header, reason: %s\n",
> +                     strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != VHOST_USER_HDR_SIZE) {
> +        error_report("Failed to read msg header. Read %zu instead of %zu.\n",
> +                     r, VHOST_USER_HDR_SIZE);
> +        goto fail;
> +    }
> +
> +    /* validate received flags */
> +    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
> +        error_report("Failed to read msg header."
> +                     " Flags 0x%x instead of 0x%x.\n",
> +                     msg->flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
> +        goto fail;
> +    }
> +
> +    /* validate message size is sane */
> +    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
> +        error_report("Failed to read msg header."
> +                     " Size %d exceeds the maximum %zu.\n",
> +                     msg->size, VHOST_USER_PAYLOAD_SIZE);
> +        goto fail;
> +    }
> +
> +    p += VHOST_USER_HDR_SIZE;
> +
> +    /* read the payload */
> +    do {
> +        r = read(fd, p, msg->size);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to read msg payload, reason: %s\n",
> +                     strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != msg->size) {
> +        error_report("Failed to read msg payload. Read %zu instead of %d.\n",
> +                     r, msg->size);
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    return -1;
> +}
> +
> +static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
> +        size_t fd_num)
> +{
> +    int r;
> +
> +    struct msghdr msgh;
> +    struct iovec iov;
> +
> +    size_t fd_size = fd_num * sizeof(int);
> +    char control[CMSG_SPACE(fd_size)];
> +    struct cmsghdr *cmsg;
> +
> +    memset(&msgh, 0, sizeof(msgh));
> +    memset(control, 0, sizeof(control));
> +
> +    /* set the payload */
> +    iov.iov_base = msg;
> +    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
> +
> +    msgh.msg_iov = &iov;
> +    msgh.msg_iovlen = 1;
> +
> +    if (fd_num) {
> +        msgh.msg_control = control;
> +        msgh.msg_controllen = sizeof(control);
> +
> +        cmsg = CMSG_FIRSTHDR(&msgh);
> +
> +        cmsg->cmsg_len = CMSG_LEN(fd_size);
> +        cmsg->cmsg_level = SOL_SOCKET;
> +        cmsg->cmsg_type = SCM_RIGHTS;
> +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
> +    } else {
> +        msgh.msg_control = 0;
> +        msgh.msg_controllen = 0;
> +    }
> +
> +    do {
> +        r = sendmsg(fd, &msgh, 0);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to send msg(%d), reason: %s\n",
> +                msg->request, strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != VHOST_USER_HDR_SIZE + msg->size) {
> +        error_report("Failed to send msg(%d). Sent %d instead of %zu.\n",
> +                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    return -1;
> +}
> +
> +static int vhost_user_echo(struct vhost_dev *dev)
> +{
> +    VhostUserMsg msg;
> +    int fd = dev->control;
> +
> +    if (fd < 0) {
> +        return 0;
> +    }
> +
> +    /* check connection */
> +    msg.request = VHOST_USER_ECHO;
> +    msg.flags = VHOST_USER_VERSION;
> +    msg.size = 0;
> +
> +    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
> +        error_report("ECHO failed\n");
> +        return -1;
> +    }
> +
> +    if (vhost_user_recv(fd, &msg) < 0) {
> +        error_report("ECHO failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
>  
>  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>          void *arg)
>  {
> +    int fd = dev->control;
> +    VhostUserMsg msg;
> +    int result = 0;
> +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> +    size_t fd_num = 0;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_call not implemented\n");
>  
> -    return -1;
> +    if (fd < 0) {
> +        return 0;
> +    }
> +
> +    msg.request = VHOST_USER_NONE;
> +    msg.flags = VHOST_USER_VERSION;
> +    msg.size = 0;
> +
> +    switch (request) {
> +    default:
> +        error_report("vhost-user trying to send unhandled ioctl\n");
> +        return -1;
> +        break;
> +    }
> +
> +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> +
> +    return result;
>  }
>  
>  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
>  {
> +    int fd = -1;
> +    struct sockaddr_un un;
> +    struct timeval tv;
> +    size_t len;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_init not implemented\n");
>  
> +    /* Create the socket */
> +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
> +    if (fd == -1) {
> +        error_report("socket: %s", strerror(errno));
> +        return -1;
> +    }
> +
> +    /* Set socket options */
> +    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
> +    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
> +
> +    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
> +            == -1) {
> +        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
> +            == -1) {
> +        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    /* Connect */
> +    un.sun_family = AF_UNIX;
> +    strcpy(un.sun_path, devpath);
> +
> +    len = sizeof(un.sun_family) + strlen(devpath);
> +
> +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> +        error_report("connect: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    /* Cleanup if there is previous connection left */
> +    if (dev->control >= 0) {
> +        dev->vhost_ops->vhost_backend_cleanup(dev);
> +    }
> +    dev->control = fd;
> +
> +    if (vhost_user_echo(dev) < 0) {
> +        dev->control = -1;
> +        goto fail;
> +    }
> +
> +    return fd;
> +
> +fail:
> +    close(fd);
>      return -1;
> +
>  }
>  
>  static int vhost_user_cleanup(struct vhost_dev *dev)
>  {
> +    int r = 0;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_cleanup not implemented\n");
>  
> -    return -1;
> +    if (dev->control >= 0) {
> +        r = close(dev->control);
> +    }
> +    dev->control = -1;
> +
> +    return r;
>  }
>  
>  static const VhostOps user_ops = {
> -- 
> 1.8.3.2
>
Antonios Motakis Jan. 14, 2014, 6:14 p.m. UTC | #2
On Tue, Jan 14, 2014 at 12:10 PM, Michael S. Tsirkin <mst@redhat.com> wrote:

> On Mon, Jan 13, 2014 at 03:25:16PM +0100, Antonios Motakis wrote:
> > Add structures for passing vhost-user messages over a unix domain socket.
> > This is the equivalent of the existing vhost-kernel ioctls.
> >
> > Connect to the named unix domain socket. The system call sendmsg
> > is used for communication. To be able to pass file descriptors
> > between processes - we use SCM_RIGHTS type in the message control header.
> >
> > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
>
> Not all comments in v5 review of this file have been
> addressed.
> I think if a comment was wrong it's a good idea to
> add clarification in code on why it is, so future readers
> aren't confused.
>
> In particular I think always explicitly opening
> domain sockets and forcing a specific client/server
> model is a problem.
>
>
I think we need to clarify how the model would work when using QEMU as the
server. As a client it is clear, because it parallels what vhost does when
used with the kernel, however as a server it is not completely clear what
the use case is and how it would work.


> I think we also want the protocol documented in docs/
> so future users don't have to read the code to connect
> to qemu.
>
> Can you add a unit test for this code?
>
> We ship pxe ROM that enables virtio net,
> so you can write a small stub that works as the
> remote side of this protocol, poke at
> guest memory to see that it has
> the expected content (compare to what we get with
> readl).
>
> In theory you could also get and put some buffers
> though that's not a must.
>
>
> > ---
> >  hw/virtio/vhost-backend.c | 306
> +++++++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 301 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> > index 1d83e1d..460ee02 100644
> > --- a/hw/virtio/vhost-backend.c
> > +++ b/hw/virtio/vhost-backend.c
> > @@ -11,34 +11,330 @@
> >  #include "hw/virtio/vhost.h"
> >  #include "hw/virtio/vhost-backend.h"
> >  #include "qemu/error-report.h"
> > +#include "qemu/sockets.h"
> >
> >  #include <fcntl.h>
> >  #include <unistd.h>
> >  #include <sys/ioctl.h>
> > +#include <sys/socket.h>
> > +#include <sys/un.h>
> > +#include <linux/vhost.h>
> > +
> > +#define VHOST_MEMORY_MAX_NREGIONS    8
> > +#define VHOST_USER_SOCKTO            (300) /* msec */
> > +
> > +typedef enum VhostUserRequest {
> > +    VHOST_USER_NONE = 0,
> > +    VHOST_USER_GET_FEATURES = 1,
> > +    VHOST_USER_SET_FEATURES = 2,
> > +    VHOST_USER_SET_OWNER = 3,
> > +    VHOST_USER_RESET_OWNER = 4,
> > +    VHOST_USER_SET_MEM_TABLE = 5,
> > +    VHOST_USER_SET_LOG_BASE = 6,
> > +    VHOST_USER_SET_LOG_FD = 7,
> > +    VHOST_USER_SET_VRING_NUM = 8,
> > +    VHOST_USER_SET_VRING_ADDR = 9,
> > +    VHOST_USER_SET_VRING_BASE = 10,
> > +    VHOST_USER_GET_VRING_BASE = 11,
> > +    VHOST_USER_SET_VRING_KICK = 12,
> > +    VHOST_USER_SET_VRING_CALL = 13,
> > +    VHOST_USER_SET_VRING_ERR = 14,
> > +    VHOST_USER_NET_SET_BACKEND = 15,
> > +    VHOST_USER_ECHO = 16,
> > +    VHOST_USER_MAX
> > +} VhostUserRequest;
> > +
> > +typedef struct VhostUserMemoryRegion {
> > +    uint64_t guest_phys_addr;
> > +    uint64_t memory_size;
> > +    uint64_t userspace_addr;
> > +} VhostUserMemoryRegion;
> > +
> > +typedef struct VhostUserMemory {
> > +    uint32_t nregions;
> > +    uint32_t padding;
> > +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> > +} VhostUserMemory;
> > +
> > +typedef struct VhostUserMsg {
> > +    VhostUserRequest request;
> > +
> > +#define VHOST_USER_VERSION_MASK     (0x3)
> > +#define VHOST_USER_REPLY_MASK       (0x1<<2)
> > +    uint32_t flags;
> > +    uint32_t size; /* the following payload size */
> > +    union {
> > +        uint64_t u64;
> > +        struct vhost_vring_state state;
> > +        struct vhost_vring_addr addr;
> > +        VhostUserMemory memory;
> > +    };
> > +} QEMU_PACKED VhostUserMsg;
> > +
> > +static VhostUserMsg m __attribute__ ((unused));
> > +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> > +                            + sizeof(m.flags) \
> > +                            + sizeof(m.size))
> > +
> > +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> > +
> > +/* The version of the protocol we support */
> > +#define VHOST_USER_VERSION    (0x1)
> > +
> > +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> > +{
> > +    ssize_t r;
> > +    uint8_t *p = (uint8_t *) msg;
> > +
> > +    /* read the header */
> > +    do {
> > +        r = read(fd, p, VHOST_USER_HDR_SIZE);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to read msg header, reason: %s\n",
> > +                     strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != VHOST_USER_HDR_SIZE) {
> > +        error_report("Failed to read msg header. Read %zu instead of
> %zu.\n",
> > +                     r, VHOST_USER_HDR_SIZE);
> > +        goto fail;
> > +    }
> > +
> > +    /* validate received flags */
> > +    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
> > +        error_report("Failed to read msg header."
> > +                     " Flags 0x%x instead of 0x%x.\n",
> > +                     msg->flags, VHOST_USER_REPLY_MASK |
> VHOST_USER_VERSION);
> > +        goto fail;
> > +    }
> > +
> > +    /* validate message size is sane */
> > +    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
> > +        error_report("Failed to read msg header."
> > +                     " Size %d exceeds the maximum %zu.\n",
> > +                     msg->size, VHOST_USER_PAYLOAD_SIZE);
> > +        goto fail;
> > +    }
> > +
> > +    p += VHOST_USER_HDR_SIZE;
> > +
> > +    /* read the payload */
> > +    do {
> > +        r = read(fd, p, msg->size);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to read msg payload, reason: %s\n",
> > +                     strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != msg->size) {
> > +        error_report("Failed to read msg payload. Read %zu instead of
> %d.\n",
> > +                     r, msg->size);
> > +        goto fail;
> > +    }
> > +
> > +    return 0;
> > +
> > +fail:
> > +    return -1;
> > +}
> > +
> > +static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
> > +        size_t fd_num)
> > +{
> > +    int r;
> > +
> > +    struct msghdr msgh;
> > +    struct iovec iov;
> > +
> > +    size_t fd_size = fd_num * sizeof(int);
> > +    char control[CMSG_SPACE(fd_size)];
> > +    struct cmsghdr *cmsg;
> > +
> > +    memset(&msgh, 0, sizeof(msgh));
> > +    memset(control, 0, sizeof(control));
> > +
> > +    /* set the payload */
> > +    iov.iov_base = msg;
> > +    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
> > +
> > +    msgh.msg_iov = &iov;
> > +    msgh.msg_iovlen = 1;
> > +
> > +    if (fd_num) {
> > +        msgh.msg_control = control;
> > +        msgh.msg_controllen = sizeof(control);
> > +
> > +        cmsg = CMSG_FIRSTHDR(&msgh);
> > +
> > +        cmsg->cmsg_len = CMSG_LEN(fd_size);
> > +        cmsg->cmsg_level = SOL_SOCKET;
> > +        cmsg->cmsg_type = SCM_RIGHTS;
> > +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
> > +    } else {
> > +        msgh.msg_control = 0;
> > +        msgh.msg_controllen = 0;
> > +    }
> > +
> > +    do {
> > +        r = sendmsg(fd, &msgh, 0);
> > +    } while (r < 0 && errno == EINTR);
> > +
> > +    if (r < 0) {
> > +        error_report("Failed to send msg(%d), reason: %s\n",
> > +                msg->request, strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (r != VHOST_USER_HDR_SIZE + msg->size) {
> > +        error_report("Failed to send msg(%d). Sent %d instead of
> %zu.\n",
> > +                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
> > +        goto fail;
> > +    }
> > +
> > +    return 0;
> > +
> > +fail:
> > +    return -1;
> > +}
> > +
> > +static int vhost_user_echo(struct vhost_dev *dev)
> > +{
> > +    VhostUserMsg msg;
> > +    int fd = dev->control;
> > +
> > +    if (fd < 0) {
> > +        return 0;
> > +    }
> > +
> > +    /* check connection */
> > +    msg.request = VHOST_USER_ECHO;
> > +    msg.flags = VHOST_USER_VERSION;
> > +    msg.size = 0;
> > +
> > +    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
> > +        error_report("ECHO failed\n");
> > +        return -1;
> > +    }
> > +
> > +    if (vhost_user_recv(fd, &msg) < 0) {
> > +        error_report("ECHO failed\n");
> > +        return -1;
> > +    }
> > +
> > +    return 0;
> > +}
> >
> >  static int vhost_user_call(struct vhost_dev *dev, unsigned long int
> request,
> >          void *arg)
> >  {
> > +    int fd = dev->control;
> > +    VhostUserMsg msg;
> > +    int result = 0;
> > +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> > +    size_t fd_num = 0;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_call not implemented\n");
> >
> > -    return -1;
> > +    if (fd < 0) {
> > +        return 0;
> > +    }
> > +
> > +    msg.request = VHOST_USER_NONE;
> > +    msg.flags = VHOST_USER_VERSION;
> > +    msg.size = 0;
> > +
> > +    switch (request) {
> > +    default:
> > +        error_report("vhost-user trying to send unhandled ioctl\n");
> > +        return -1;
> > +        break;
> > +    }
> > +
> > +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> > +
> > +    return result;
> >  }
> >
> >  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
> >  {
> > +    int fd = -1;
> > +    struct sockaddr_un un;
> > +    struct timeval tv;
> > +    size_t len;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_init not implemented\n");
> >
> > +    /* Create the socket */
> > +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
> > +    if (fd == -1) {
> > +        error_report("socket: %s", strerror(errno));
> > +        return -1;
> > +    }
> > +
> > +    /* Set socket options */
> > +    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
> > +    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
> > +
> > +    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct
> timeval))
> > +            == -1) {
> > +        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct
> timeval))
> > +            == -1) {
> > +        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    /* Connect */
> > +    un.sun_family = AF_UNIX;
> > +    strcpy(un.sun_path, devpath);
> > +
> > +    len = sizeof(un.sun_family) + strlen(devpath);
> > +
> > +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> > +        error_report("connect: %s", strerror(errno));
> > +        goto fail;
> > +    }
> > +
> > +    /* Cleanup if there is previous connection left */
> > +    if (dev->control >= 0) {
> > +        dev->vhost_ops->vhost_backend_cleanup(dev);
> > +    }
> > +    dev->control = fd;
> > +
> > +    if (vhost_user_echo(dev) < 0) {
> > +        dev->control = -1;
> > +        goto fail;
> > +    }
> > +
> > +    return fd;
> > +
> > +fail:
> > +    close(fd);
> >      return -1;
> > +
> >  }
> >
> >  static int vhost_user_cleanup(struct vhost_dev *dev)
> >  {
> > +    int r = 0;
> > +
> >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> > -    error_report("vhost_user_cleanup not implemented\n");
> >
> > -    return -1;
> > +    if (dev->control >= 0) {
> > +        r = close(dev->control);
> > +    }
> > +    dev->control = -1;
> > +
> > +    return r;
> >  }
> >
> >  static const VhostOps user_ops = {
> > --
> > 1.8.3.2
> >
>
Michael S. Tsirkin Jan. 15, 2014, 9:13 a.m. UTC | #3
On Tue, Jan 14, 2014 at 07:14:38PM +0100, Antonios Motakis wrote:
> 
> 
> 
> On Tue, Jan 14, 2014 at 12:10 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
> 
>     On Mon, Jan 13, 2014 at 03:25:16PM +0100, Antonios Motakis wrote:
>     > Add structures for passing vhost-user messages over a unix domain socket.
>     > This is the equivalent of the existing vhost-kernel ioctls.
>     >
>     > Connect to the named unix domain socket. The system call sendmsg
>     > is used for communication. To be able to pass file descriptors
>     > between processes - we use SCM_RIGHTS type in the message control header.
>     >
>     > Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
>     > Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> 
>     Not all comments in v5 review of this file have been
>     addressed.
>     I think if a comment was wrong it's a good idea to
>     add clarification in code on why it is, so future readers
>     aren't confused.
> 
>     In particular I think always explicitly opening
>     domain sockets and forcing a specific client/server
>     model is a problem.
> 
> 
> 
> I think we need to clarify how the model would work when using QEMU as the
> server. As a client it is clear, because it parallels what vhost does when used
> with the kernel, however as a server it is not completely clear what the use
> case is and how it would work.
>  

You prefer connecting from qemu to vhost but the reverse mode
seems just as valid, it's an arbitrary decision.
If you don't specify nowait, server can simply wait for connection.
So pretty simple really, not much will change.

If you allow nowait, it's harder because it's not clear what to do
if there's a command that needs a response.
Maybe stop VCPU at that point, but that's not very easy.
For now I'm ok with not supporting nowait.


>     I think we also want the protocol documented in docs/
>     so future users don't have to read the code to connect
>     to qemu.
> 
>     Can you add a unit test for this code?
> 
>     We ship pxe ROM that enables virtio net,
>     so you can write a small stub that works as the
>     remote side of this protocol, poke at
>     guest memory to see that it has
>     the expected content (compare to what we get with
>     readl).
> 
>     In theory you could also get and put some buffers
>     though that's not a must.
> 
> 
>     > ---
>     >  hw/virtio/vhost-backend.c | 306
>     +++++++++++++++++++++++++++++++++++++++++++++-
>     >  1 file changed, 301 insertions(+), 5 deletions(-)
>     >
>     > diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
>     > index 1d83e1d..460ee02 100644
>     > --- a/hw/virtio/vhost-backend.c
>     > +++ b/hw/virtio/vhost-backend.c
>     > @@ -11,34 +11,330 @@
>     >  #include "hw/virtio/vhost.h"
>     >  #include "hw/virtio/vhost-backend.h"
>     >  #include "qemu/error-report.h"
>     > +#include "qemu/sockets.h"
>     >
>     >  #include <fcntl.h>
>     >  #include <unistd.h>
>     >  #include <sys/ioctl.h>
>     > +#include <sys/socket.h>
>     > +#include <sys/un.h>
>     > +#include <linux/vhost.h>
>     > +
>     > +#define VHOST_MEMORY_MAX_NREGIONS    8
>     > +#define VHOST_USER_SOCKTO            (300) /* msec */
>     > +
>     > +typedef enum VhostUserRequest {
>     > +    VHOST_USER_NONE = 0,
>     > +    VHOST_USER_GET_FEATURES = 1,
>     > +    VHOST_USER_SET_FEATURES = 2,
>     > +    VHOST_USER_SET_OWNER = 3,
>     > +    VHOST_USER_RESET_OWNER = 4,
>     > +    VHOST_USER_SET_MEM_TABLE = 5,
>     > +    VHOST_USER_SET_LOG_BASE = 6,
>     > +    VHOST_USER_SET_LOG_FD = 7,
>     > +    VHOST_USER_SET_VRING_NUM = 8,
>     > +    VHOST_USER_SET_VRING_ADDR = 9,
>     > +    VHOST_USER_SET_VRING_BASE = 10,
>     > +    VHOST_USER_GET_VRING_BASE = 11,
>     > +    VHOST_USER_SET_VRING_KICK = 12,
>     > +    VHOST_USER_SET_VRING_CALL = 13,
>     > +    VHOST_USER_SET_VRING_ERR = 14,
>     > +    VHOST_USER_NET_SET_BACKEND = 15,
>     > +    VHOST_USER_ECHO = 16,
>     > +    VHOST_USER_MAX
>     > +} VhostUserRequest;
>     > +
>     > +typedef struct VhostUserMemoryRegion {
>     > +    uint64_t guest_phys_addr;
>     > +    uint64_t memory_size;
>     > +    uint64_t userspace_addr;
>     > +} VhostUserMemoryRegion;
>     > +
>     > +typedef struct VhostUserMemory {
>     > +    uint32_t nregions;
>     > +    uint32_t padding;
>     > +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
>     > +} VhostUserMemory;
>     > +
>     > +typedef struct VhostUserMsg {
>     > +    VhostUserRequest request;
>     > +
>     > +#define VHOST_USER_VERSION_MASK     (0x3)
>     > +#define VHOST_USER_REPLY_MASK       (0x1<<2)
>     > +    uint32_t flags;
>     > +    uint32_t size; /* the following payload size */
>     > +    union {
>     > +        uint64_t u64;
>     > +        struct vhost_vring_state state;
>     > +        struct vhost_vring_addr addr;
>     > +        VhostUserMemory memory;
>     > +    };
>     > +} QEMU_PACKED VhostUserMsg;
>     > +
>     > +static VhostUserMsg m __attribute__ ((unused));
>     > +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
>     > +                            + sizeof(m.flags) \
>     > +                            + sizeof(m.size))
>     > +
>     > +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
>     > +
>     > +/* The version of the protocol we support */
>     > +#define VHOST_USER_VERSION    (0x1)
>     > +
>     > +static int vhost_user_recv(int fd, VhostUserMsg *msg)
>     > +{
>     > +    ssize_t r;
>     > +    uint8_t *p = (uint8_t *) msg;
>     > +
>     > +    /* read the header */
>     > +    do {
>     > +        r = read(fd, p, VHOST_USER_HDR_SIZE);
>     > +    } while (r < 0 && errno == EINTR);
>     > +
>     > +    if (r < 0) {
>     > +        error_report("Failed to read msg header, reason: %s\n",
>     > +                     strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    if (r != VHOST_USER_HDR_SIZE) {
>     > +        error_report("Failed to read msg header. Read %zu instead of
>     %zu.\n",
>     > +                     r, VHOST_USER_HDR_SIZE);
>     > +        goto fail;
>     > +    }
>     > +
>     > +    /* validate received flags */
>     > +    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
>     > +        error_report("Failed to read msg header."
>     > +                     " Flags 0x%x instead of 0x%x.\n",
>     > +                     msg->flags, VHOST_USER_REPLY_MASK |
>     VHOST_USER_VERSION);
>     > +        goto fail;
>     > +    }
>     > +
>     > +    /* validate message size is sane */
>     > +    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
>     > +        error_report("Failed to read msg header."
>     > +                     " Size %d exceeds the maximum %zu.\n",
>     > +                     msg->size, VHOST_USER_PAYLOAD_SIZE);
>     > +        goto fail;
>     > +    }
>     > +
>     > +    p += VHOST_USER_HDR_SIZE;
>     > +
>     > +    /* read the payload */
>     > +    do {
>     > +        r = read(fd, p, msg->size);
>     > +    } while (r < 0 && errno == EINTR);
>     > +
>     > +    if (r < 0) {
>     > +        error_report("Failed to read msg payload, reason: %s\n",
>     > +                     strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    if (r != msg->size) {
>     > +        error_report("Failed to read msg payload. Read %zu instead of
>     %d.\n",
>     > +                     r, msg->size);
>     > +        goto fail;
>     > +    }
>     > +
>     > +    return 0;
>     > +
>     > +fail:
>     > +    return -1;
>     > +}
>     > +
>     > +static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
>     > +        size_t fd_num)
>     > +{
>     > +    int r;
>     > +
>     > +    struct msghdr msgh;
>     > +    struct iovec iov;
>     > +
>     > +    size_t fd_size = fd_num * sizeof(int);
>     > +    char control[CMSG_SPACE(fd_size)];
>     > +    struct cmsghdr *cmsg;
>     > +
>     > +    memset(&msgh, 0, sizeof(msgh));
>     > +    memset(control, 0, sizeof(control));
>     > +
>     > +    /* set the payload */
>     > +    iov.iov_base = msg;
>     > +    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
>     > +
>     > +    msgh.msg_iov = &iov;
>     > +    msgh.msg_iovlen = 1;
>     > +
>     > +    if (fd_num) {
>     > +        msgh.msg_control = control;
>     > +        msgh.msg_controllen = sizeof(control);
>     > +
>     > +        cmsg = CMSG_FIRSTHDR(&msgh);
>     > +
>     > +        cmsg->cmsg_len = CMSG_LEN(fd_size);
>     > +        cmsg->cmsg_level = SOL_SOCKET;
>     > +        cmsg->cmsg_type = SCM_RIGHTS;
>     > +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
>     > +    } else {
>     > +        msgh.msg_control = 0;
>     > +        msgh.msg_controllen = 0;
>     > +    }
>     > +
>     > +    do {
>     > +        r = sendmsg(fd, &msgh, 0);
>     > +    } while (r < 0 && errno == EINTR);
>     > +
>     > +    if (r < 0) {
>     > +        error_report("Failed to send msg(%d), reason: %s\n",
>     > +                msg->request, strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    if (r != VHOST_USER_HDR_SIZE + msg->size) {
>     > +        error_report("Failed to send msg(%d). Sent %d instead of %zu.\
>     n",
>     > +                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
>     > +        goto fail;
>     > +    }
>     > +
>     > +    return 0;
>     > +
>     > +fail:
>     > +    return -1;
>     > +}
>     > +
>     > +static int vhost_user_echo(struct vhost_dev *dev)
>     > +{
>     > +    VhostUserMsg msg;
>     > +    int fd = dev->control;
>     > +
>     > +    if (fd < 0) {
>     > +        return 0;
>     > +    }
>     > +
>     > +    /* check connection */
>     > +    msg.request = VHOST_USER_ECHO;
>     > +    msg.flags = VHOST_USER_VERSION;
>     > +    msg.size = 0;
>     > +
>     > +    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
>     > +        error_report("ECHO failed\n");
>     > +        return -1;
>     > +    }
>     > +
>     > +    if (vhost_user_recv(fd, &msg) < 0) {
>     > +        error_report("ECHO failed\n");
>     > +        return -1;
>     > +    }
>     > +
>     > +    return 0;
>     > +}
>     >
>     >  static int vhost_user_call(struct vhost_dev *dev, unsigned long int
>     request,
>     >          void *arg)
>     >  {
>     > +    int fd = dev->control;
>     > +    VhostUserMsg msg;
>     > +    int result = 0;
>     > +    int fds[VHOST_MEMORY_MAX_NREGIONS];
>     > +    size_t fd_num = 0;
>     > +
>     >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>     > -    error_report("vhost_user_call not implemented\n");
>     >
>     > -    return -1;
>     > +    if (fd < 0) {
>     > +        return 0;
>     > +    }
>     > +
>     > +    msg.request = VHOST_USER_NONE;
>     > +    msg.flags = VHOST_USER_VERSION;
>     > +    msg.size = 0;
>     > +
>     > +    switch (request) {
>     > +    default:
>     > +        error_report("vhost-user trying to send unhandled ioctl\n");
>     > +        return -1;
>     > +        break;
>     > +    }
>     > +
>     > +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
>     > +
>     > +    return result;
>     >  }
>     >
>     >  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
>     >  {
>     > +    int fd = -1;
>     > +    struct sockaddr_un un;
>     > +    struct timeval tv;
>     > +    size_t len;
>     > +
>     >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>     > -    error_report("vhost_user_init not implemented\n");
>     >
>     > +    /* Create the socket */
>     > +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
>     > +    if (fd == -1) {
>     > +        error_report("socket: %s", strerror(errno));
>     > +        return -1;
>     > +    }
>     > +
>     > +    /* Set socket options */
>     > +    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
>     > +    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
>     > +
>     > +    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct
>     timeval))
>     > +            == -1) {
>     > +        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct
>     timeval))
>     > +            == -1) {
>     > +        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    /* Connect */
>     > +    un.sun_family = AF_UNIX;
>     > +    strcpy(un.sun_path, devpath);
>     > +
>     > +    len = sizeof(un.sun_family) + strlen(devpath);
>     > +
>     > +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
>     > +        error_report("connect: %s", strerror(errno));
>     > +        goto fail;
>     > +    }
>     > +
>     > +    /* Cleanup if there is previous connection left */
>     > +    if (dev->control >= 0) {
>     > +        dev->vhost_ops->vhost_backend_cleanup(dev);
>     > +    }
>     > +    dev->control = fd;
>     > +
>     > +    if (vhost_user_echo(dev) < 0) {
>     > +        dev->control = -1;
>     > +        goto fail;
>     > +    }
>     > +
>     > +    return fd;
>     > +
>     > +fail:
>     > +    close(fd);
>     >      return -1;
>     > +
>     >  }
>     >
>     >  static int vhost_user_cleanup(struct vhost_dev *dev)
>     >  {
>     > +    int r = 0;
>     > +
>     >      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
>     > -    error_report("vhost_user_cleanup not implemented\n");
>     >
>     > -    return -1;
>     > +    if (dev->control >= 0) {
>     > +        r = close(dev->control);
>     > +    }
>     > +    dev->control = -1;
>     > +
>     > +    return r;
>     >  }
>     >
>     >  static const VhostOps user_ops = {
>     > --
>     > 1.8.3.2
>     >
> 
>
diff mbox

Patch

diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 1d83e1d..460ee02 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -11,34 +11,330 @@ 
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-backend.h"
 #include "qemu/error-report.h"
+#include "qemu/sockets.h"
 
 #include <fcntl.h>
 #include <unistd.h>
 #include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <linux/vhost.h>
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+#define VHOST_USER_SOCKTO            (300) /* msec */
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_NET_SET_BACKEND = 15,
+    VHOST_USER_ECHO = 16,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    uint32_t nregions;
+    uint32_t padding;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+#define VHOST_USER_VERSION_MASK     (0x3)
+#define VHOST_USER_REPLY_MASK       (0x1<<2)
+    uint32_t flags;
+    uint32_t size; /* the following payload size */
+    union {
+        uint64_t u64;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        VhostUserMemory memory;
+    };
+} QEMU_PACKED VhostUserMsg;
+
+static VhostUserMsg m __attribute__ ((unused));
+#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
+                            + sizeof(m.flags) \
+                            + sizeof(m.size))
+
+#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
+
+/* The version of the protocol we support */
+#define VHOST_USER_VERSION    (0x1)
+
+static int vhost_user_recv(int fd, VhostUserMsg *msg)
+{
+    ssize_t r;
+    uint8_t *p = (uint8_t *) msg;
+
+    /* read the header */
+    do {
+        r = read(fd, p, VHOST_USER_HDR_SIZE);
+    } while (r < 0 && errno == EINTR);
+
+    if (r < 0) {
+        error_report("Failed to read msg header, reason: %s\n",
+                     strerror(errno));
+        goto fail;
+    }
+
+    if (r != VHOST_USER_HDR_SIZE) {
+        error_report("Failed to read msg header. Read %zu instead of %zu.\n",
+                     r, VHOST_USER_HDR_SIZE);
+        goto fail;
+    }
+
+    /* validate received flags */
+    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
+        error_report("Failed to read msg header."
+                     " Flags 0x%x instead of 0x%x.\n",
+                     msg->flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
+        goto fail;
+    }
+
+    /* validate message size is sane */
+    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
+        error_report("Failed to read msg header."
+                     " Size %d exceeds the maximum %zu.\n",
+                     msg->size, VHOST_USER_PAYLOAD_SIZE);
+        goto fail;
+    }
+
+    p += VHOST_USER_HDR_SIZE;
+
+    /* read the payload */
+    do {
+        r = read(fd, p, msg->size);
+    } while (r < 0 && errno == EINTR);
+
+    if (r < 0) {
+        error_report("Failed to read msg payload, reason: %s\n",
+                     strerror(errno));
+        goto fail;
+    }
+
+    if (r != msg->size) {
+        error_report("Failed to read msg payload. Read %zu instead of %d.\n",
+                     r, msg->size);
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    return -1;
+}
+
+static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
+        size_t fd_num)
+{
+    int r;
+
+    struct msghdr msgh;
+    struct iovec iov;
+
+    size_t fd_size = fd_num * sizeof(int);
+    char control[CMSG_SPACE(fd_size)];
+    struct cmsghdr *cmsg;
+
+    memset(&msgh, 0, sizeof(msgh));
+    memset(control, 0, sizeof(control));
+
+    /* set the payload */
+    iov.iov_base = msg;
+    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
+
+    msgh.msg_iov = &iov;
+    msgh.msg_iovlen = 1;
+
+    if (fd_num) {
+        msgh.msg_control = control;
+        msgh.msg_controllen = sizeof(control);
+
+        cmsg = CMSG_FIRSTHDR(&msgh);
+
+        cmsg->cmsg_len = CMSG_LEN(fd_size);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(cmsg), fds, fd_size);
+    } else {
+        msgh.msg_control = 0;
+        msgh.msg_controllen = 0;
+    }
+
+    do {
+        r = sendmsg(fd, &msgh, 0);
+    } while (r < 0 && errno == EINTR);
+
+    if (r < 0) {
+        error_report("Failed to send msg(%d), reason: %s\n",
+                msg->request, strerror(errno));
+        goto fail;
+    }
+
+    if (r != VHOST_USER_HDR_SIZE + msg->size) {
+        error_report("Failed to send msg(%d). Sent %d instead of %zu.\n",
+                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    return -1;
+}
+
+static int vhost_user_echo(struct vhost_dev *dev)
+{
+    VhostUserMsg msg;
+    int fd = dev->control;
+
+    if (fd < 0) {
+        return 0;
+    }
+
+    /* check connection */
+    msg.request = VHOST_USER_ECHO;
+    msg.flags = VHOST_USER_VERSION;
+    msg.size = 0;
+
+    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
+        error_report("ECHO failed\n");
+        return -1;
+    }
+
+    if (vhost_user_recv(fd, &msg) < 0) {
+        error_report("ECHO failed\n");
+        return -1;
+    }
+
+    return 0;
+}
 
 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
         void *arg)
 {
+    int fd = dev->control;
+    VhostUserMsg msg;
+    int result = 0;
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    size_t fd_num = 0;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    error_report("vhost_user_call not implemented\n");
 
-    return -1;
+    if (fd < 0) {
+        return 0;
+    }
+
+    msg.request = VHOST_USER_NONE;
+    msg.flags = VHOST_USER_VERSION;
+    msg.size = 0;
+
+    switch (request) {
+    default:
+        error_report("vhost-user trying to send unhandled ioctl\n");
+        return -1;
+        break;
+    }
+
+    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
+
+    return result;
 }
 
 static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
 {
+    int fd = -1;
+    struct sockaddr_un un;
+    struct timeval tv;
+    size_t len;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    error_report("vhost_user_init not implemented\n");
 
+    /* Create the socket */
+    fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (fd == -1) {
+        error_report("socket: %s", strerror(errno));
+        return -1;
+    }
+
+    /* Set socket options */
+    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
+    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
+
+    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
+            == -1) {
+        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
+        goto fail;
+    }
+
+    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
+            == -1) {
+        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
+        goto fail;
+    }
+
+    /* Connect */
+    un.sun_family = AF_UNIX;
+    strcpy(un.sun_path, devpath);
+
+    len = sizeof(un.sun_family) + strlen(devpath);
+
+    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
+        error_report("connect: %s", strerror(errno));
+        goto fail;
+    }
+
+    /* Cleanup if there is previous connection left */
+    if (dev->control >= 0) {
+        dev->vhost_ops->vhost_backend_cleanup(dev);
+    }
+    dev->control = fd;
+
+    if (vhost_user_echo(dev) < 0) {
+        dev->control = -1;
+        goto fail;
+    }
+
+    return fd;
+
+fail:
+    close(fd);
     return -1;
+
 }
 
 static int vhost_user_cleanup(struct vhost_dev *dev)
 {
+    int r = 0;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    error_report("vhost_user_cleanup not implemented\n");
 
-    return -1;
+    if (dev->control >= 0) {
+        r = close(dev->control);
+    }
+    dev->control = -1;
+
+    return r;
 }
 
 static const VhostOps user_ops = {