diff mbox

[v4,4/7] Add domain socket communication for vhost-user backend

Message ID 1387548640-7120-5-git-send-email-m.hamayun@virtualopensystems.com
State New
Headers show

Commit Message

Mian M. Hamayun Dec. 20, 2013, 2:10 p.m. UTC
From: Antonios Motakis <a.motakis@virtualopensystems.com>

Add structures for passing vhost-user messages over a unix domain socket.
This is the equivalent to the existing vhost-kernel ioctls.

Connect to the named unix domain socket. The system call sendmsg
is used for communication. To be able to pass file descriptors
between processes - we use SCM_RIGHTS type in the message control header.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
---
 hw/virtio/vhost-backend.c | 167 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 161 insertions(+), 6 deletions(-)

Comments

Michael S. Tsirkin Dec. 23, 2013, 4:39 p.m. UTC | #1
On Fri, Dec 20, 2013 at 03:10:37PM +0100, Mian M. Hamayun wrote:
> From: Antonios Motakis <a.motakis@virtualopensystems.com>
> 
> Add structures for passing vhost-user messages over a unix domain socket.
> This is the equivalent to the existing vhost-kernel ioctls.
> 
> Connect to the named unix domain socket. The system call sendmsg
> is used for communication. To be able to pass file descriptors
> between processes - we use SCM_RIGHTS type in the message control header.
> 
> Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
> Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
> ---
>  hw/virtio/vhost-backend.c | 167 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 161 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 847809f..96d3bf0 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -14,30 +14,185 @@
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <linux/vhost.h>
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +
> +typedef enum VhostUserRequest {
> +    VHOST_USER_NONE = 0,
> +    VHOST_USER_GET_FEATURES = 1,
> +    VHOST_USER_SET_FEATURES = 2,
> +    VHOST_USER_SET_OWNER = 3,
> +    VHOST_USER_RESET_OWNER = 4,
> +    VHOST_USER_SET_MEM_TABLE = 5,
> +    VHOST_USER_SET_LOG_BASE = 6,
> +    VHOST_USER_SET_LOG_FD = 7,
> +    VHOST_USER_SET_VRING_NUM = 8,
> +    VHOST_USER_SET_VRING_ADDR = 9,
> +    VHOST_USER_SET_VRING_BASE = 10,
> +    VHOST_USER_GET_VRING_BASE = 11,
> +    VHOST_USER_SET_VRING_KICK = 12,
> +    VHOST_USER_SET_VRING_CALL = 13,
> +    VHOST_USER_SET_VRING_ERR = 14,
> +    VHOST_USER_NET_SET_BACKEND = 15,
> +    VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +    __u64 guest_phys_addr;
> +    __u64 memory_size;
> +    __u64 userspace_addr;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +    __u32 nregions;

There will be padding here: not a good idea as it will
be different depending on the compiler.
Better add an explicit dummy 32 field here.

> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +
> +    int flags;

Same thing here.

> +    union {
> +        uint64_t    u64;
> +        int         fd;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        struct vhost_vring_file file;
> +
> +        VhostUserMemory memory;

A union of fields of different sizes, this
is likely to leak data from stack.

> +    };
> +} VhostUserMsg;
> +
> +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> +{
> +    ssize_t r = read(fd, msg, sizeof(VhostUserMsg));

Can't this return EINTR?

> +
> +    return (r == sizeof(VhostUserMsg)) ? 0 : -1;

() not needed around == here.

> +}
> +
> +static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds,
> +        size_t fd_num)
> +{
> +    int r;
> +
> +    struct msghdr msgh;
> +    struct iovec iov[1];
> +
> +    size_t fd_size = fd_num * sizeof(int);
> +    char control[CMSG_SPACE(fd_size)];
> +    struct cmsghdr *cmsg;
> +
> +    memset(&msgh, 0, sizeof(msgh));
> +    memset(control, 0, sizeof(control));
> +
> +    /* set the payload */
> +    iov[0].iov_base = (void *) msg;

Don't put space after ).


> +    iov[0].iov_len = sizeof(VhostUserMsg);
> +
> +    msgh.msg_iov = iov;
> +    msgh.msg_iovlen = 1;
> +
> +    if (fd_num) {
> +        msgh.msg_control = control;
> +        msgh.msg_controllen = sizeof(control);
> +
> +        cmsg = CMSG_FIRSTHDR(&msgh);
> +
> +        cmsg->cmsg_len = CMSG_LEN(fd_size);
> +        cmsg->cmsg_level = SOL_SOCKET;
> +        cmsg->cmsg_type = SCM_RIGHTS;
> +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
> +    } else {
> +        msgh.msg_control = 0;
> +        msgh.msg_controllen = 0;
> +    }
> +
> +    do {
> +        r = sendmsg(fd, &msgh, 0);
> +    } while (r < 0 && errno == EINTR);

Won't this block, making guest unavailable,
if server is slow in consuming our messages?


> +
> +    if (r < 0) {
> +        fprintf(stderr, "Failed to send msg(%d), reason: %s\n",
> +                msg->request, strerror(errno));

Don't use fprintf for error messages, they might not
be seen by management.

> +    } else {
> +        r = 0;
> +    }
> +
> +    return r;
> +}
>  
>  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>          void *arg)
>  {
> +    int fd = dev->control;
> +    VhostUserMsg msg;
> +    int result = 0, need_reply = 0;
> +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> +    size_t fd_num = 0;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    fprintf(stderr, "vhost_user_call not implemented\n");
>  
> -    return -1;
> +    switch (request) {
> +    default:
> +        fprintf(stderr, "vhost-user trying to send unhandled ioctl\n");
> +        return -1;
> +        break;
> +    }
> +
> +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);

I don't get it.
So msg is sent without being initialized?

> +
> +    if (!result && need_reply) {
> +        result = vhost_user_recv(fd, &msg);
> +        if (!result) {
> +            switch (request) {
> +            default:
> +                break;
> +            }

What does this mean?

> +        }
> +    }
> +
> +    return result;
>  }
>  
>  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
>  {
> +    int fd = -1;
> +    struct sockaddr_un un;
> +    size_t len;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    fprintf(stderr, "vhost_user_init not implemented\n");
>  
> -    return -1;
> +    /* Create the socket */
> +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
> +    if (fd == -1) {
> +        perror("socket");
> +        return -1;
> +    }
> +
> +    un.sun_family = AF_UNIX;
> +    strcpy(un.sun_path, devpath);
> +
> +    len = sizeof(un.sun_family) + strlen(devpath);
> +
> +    /* Connect */
> +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> +        perror("connect");
> +        return -1;
> +    }
> +

Just connect and start using then?
This protocol does not look well thought out.
What if you want to extend some messages in the
future? Change it in some ways?

> +    dev->control = fd;
> +
> +    return fd;
>  }
>  
>  static int vhost_user_cleanup(struct vhost_dev *dev)
>  {
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    fprintf(stderr, "vhost_user_cleanup not implemented\n");
>  
> -    return -1;
> +    return close(dev->control);
>  }
>  
>  static const VhostOps user_ops = {
> -- 
> 1.8.3.2
diff mbox

Patch

diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
index 847809f..96d3bf0 100644
--- a/hw/virtio/vhost-backend.c
+++ b/hw/virtio/vhost-backend.c
@@ -14,30 +14,185 @@ 
 #include <fcntl.h>
 #include <unistd.h>
 #include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <linux/vhost.h>
+
+#define VHOST_MEMORY_MAX_NREGIONS    8
+
+typedef enum VhostUserRequest {
+    VHOST_USER_NONE = 0,
+    VHOST_USER_GET_FEATURES = 1,
+    VHOST_USER_SET_FEATURES = 2,
+    VHOST_USER_SET_OWNER = 3,
+    VHOST_USER_RESET_OWNER = 4,
+    VHOST_USER_SET_MEM_TABLE = 5,
+    VHOST_USER_SET_LOG_BASE = 6,
+    VHOST_USER_SET_LOG_FD = 7,
+    VHOST_USER_SET_VRING_NUM = 8,
+    VHOST_USER_SET_VRING_ADDR = 9,
+    VHOST_USER_SET_VRING_BASE = 10,
+    VHOST_USER_GET_VRING_BASE = 11,
+    VHOST_USER_SET_VRING_KICK = 12,
+    VHOST_USER_SET_VRING_CALL = 13,
+    VHOST_USER_SET_VRING_ERR = 14,
+    VHOST_USER_NET_SET_BACKEND = 15,
+    VHOST_USER_MAX
+} VhostUserRequest;
+
+typedef struct VhostUserMemoryRegion {
+    __u64 guest_phys_addr;
+    __u64 memory_size;
+    __u64 userspace_addr;
+} VhostUserMemoryRegion;
+
+typedef struct VhostUserMemory {
+    __u32 nregions;
+    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
+} VhostUserMemory;
+
+typedef struct VhostUserMsg {
+    VhostUserRequest request;
+
+    int flags;
+    union {
+        uint64_t    u64;
+        int         fd;
+        struct vhost_vring_state state;
+        struct vhost_vring_addr addr;
+        struct vhost_vring_file file;
+
+        VhostUserMemory memory;
+    };
+} VhostUserMsg;
+
+static int vhost_user_recv(int fd, VhostUserMsg *msg)
+{
+    ssize_t r = read(fd, msg, sizeof(VhostUserMsg));
+
+    return (r == sizeof(VhostUserMsg)) ? 0 : -1;
+}
+
+static int vhost_user_send_fds(int fd, const VhostUserMsg *msg, int *fds,
+        size_t fd_num)
+{
+    int r;
+
+    struct msghdr msgh;
+    struct iovec iov[1];
+
+    size_t fd_size = fd_num * sizeof(int);
+    char control[CMSG_SPACE(fd_size)];
+    struct cmsghdr *cmsg;
+
+    memset(&msgh, 0, sizeof(msgh));
+    memset(control, 0, sizeof(control));
+
+    /* set the payload */
+    iov[0].iov_base = (void *) msg;
+    iov[0].iov_len = sizeof(VhostUserMsg);
+
+    msgh.msg_iov = iov;
+    msgh.msg_iovlen = 1;
+
+    if (fd_num) {
+        msgh.msg_control = control;
+        msgh.msg_controllen = sizeof(control);
+
+        cmsg = CMSG_FIRSTHDR(&msgh);
+
+        cmsg->cmsg_len = CMSG_LEN(fd_size);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        memcpy(CMSG_DATA(cmsg), fds, fd_size);
+    } else {
+        msgh.msg_control = 0;
+        msgh.msg_controllen = 0;
+    }
+
+    do {
+        r = sendmsg(fd, &msgh, 0);
+    } while (r < 0 && errno == EINTR);
+
+    if (r < 0) {
+        fprintf(stderr, "Failed to send msg(%d), reason: %s\n",
+                msg->request, strerror(errno));
+    } else {
+        r = 0;
+    }
+
+    return r;
+}
 
 static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
         void *arg)
 {
+    int fd = dev->control;
+    VhostUserMsg msg;
+    int result = 0, need_reply = 0;
+    int fds[VHOST_MEMORY_MAX_NREGIONS];
+    size_t fd_num = 0;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    fprintf(stderr, "vhost_user_call not implemented\n");
 
-    return -1;
+    switch (request) {
+    default:
+        fprintf(stderr, "vhost-user trying to send unhandled ioctl\n");
+        return -1;
+        break;
+    }
+
+    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
+
+    if (!result && need_reply) {
+        result = vhost_user_recv(fd, &msg);
+        if (!result) {
+            switch (request) {
+            default:
+                break;
+            }
+        }
+    }
+
+    return result;
 }
 
 static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
 {
+    int fd = -1;
+    struct sockaddr_un un;
+    size_t len;
+
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    fprintf(stderr, "vhost_user_init not implemented\n");
 
-    return -1;
+    /* Create the socket */
+    fd = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (fd == -1) {
+        perror("socket");
+        return -1;
+    }
+
+    un.sun_family = AF_UNIX;
+    strcpy(un.sun_path, devpath);
+
+    len = sizeof(un.sun_family) + strlen(devpath);
+
+    /* Connect */
+    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
+        perror("connect");
+        return -1;
+    }
+
+    dev->control = fd;
+
+    return fd;
 }
 
 static int vhost_user_cleanup(struct vhost_dev *dev)
 {
     assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
-    fprintf(stderr, "vhost_user_cleanup not implemented\n");
 
-    return -1;
+    return close(dev->control);
 }
 
 static const VhostOps user_ops = {