Patchwork sheepdog: add support for connecting to unix domain socket

login
register
mail settings
Submitter MORITA Kazutaka
Date Jan. 15, 2013, 2:12 p.m.
Message ID <1358259160-7815-1-git-send-email-morita.kazutaka@lab.ntt.co.jp>
Download mbox | patch
Permalink /patch/212168/
State New
Headers show

Comments

MORITA Kazutaka - Jan. 15, 2013, 2:12 p.m.
This patch adds support for a unix domain socket for a connection
between qemu and local sheepdog server.  You can use the unix domain
socket with the following syntax like NBD driver:

 $ qemu sheepdog:unix:<socket path>:<image name>

Signed-off-by: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
---
 block/sheepdog.c |  153 ++++++++++++++++++++---------------------------------
 qemu-options.hx  |   18 +++----
 2 files changed, 66 insertions(+), 105 deletions(-)
Stefan Hajnoczi - Jan. 16, 2013, 2:21 p.m.
On Tue, Jan 15, 2013 at 11:12:40PM +0900, MORITA Kazutaka wrote:
> +static int set_nodelay(int fd)
> +{
> +    int opt = 1;
> +    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
> +}
> +

Please split this into a separate patch that moves the function to
util/osdep.c and names it socket_set_nodelay().  You can put it below
socket_set_cork().

> @@ -804,23 +781,14 @@ static int set_nodelay(int fd)
>   */
>  static int get_sheep_fd(BDRVSheepdogState *s)
>  {
> -    int ret, fd;
> +    int fd;
>  
> -    fd = connect_to_sdog(s->addr, s->port);
> +    fd = connect_to_sdog(s->host_spec);

The patch would be easier to review if you split out a separate patch to
move from addr/port to host_spec.  Then the final patch can focus just
on UNIX domain sockets without all the addr/port to host_spec changes.

>      if (fd < 0) {
>          error_report("%s", strerror(errno));

connect_to_sdog() does not set errno and it already reports errors
internally.  Can this error_report() be dropped?

>          return fd;
>      }
>  
> -    socket_set_nonblock(fd);

Where is nonblock being set now that you've removed this?

> diff --git a/qemu-options.hx b/qemu-options.hx
> index 9df0cde..cbd2d4b 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2059,17 +2059,15 @@ devices.
>  
>  Syntax for specifying a sheepdog device
>  @table @list
> -``sheepdog:<vdiname>''
> -
> -``sheepdog:<vdiname>:<snapid>''
> -
> -``sheepdog:<vdiname>:<tag>''
> -
> -``sheepdog:<host>:<port>:<vdiname>''
> -
> -``sheepdog:<host>:<port>:<vdiname>:<snapid>''
> +using TCP:
> +@example
> +sheepdog:[<hostname>:<port>:]<vdiname>[:<snapid or tag>]
> +@end example
>  
> -``sheepdog:<host>:<port>:<vdiname>:<tag>''
> +using Unix Domain Socket:
> +@example
> +sheepdog:unix:<domain-socket>:<vdiname>[:<snapid or tag>]
> +@end example

Please document that <domain-socket> must be an absolute path.  This
will prevent confusing users who try a relative path.

Stefan
MORITA Kazutaka - Jan. 19, 2013, 3:13 p.m.
At Wed, 16 Jan 2013 15:21:09 +0100,
Stefan Hajnoczi wrote:
> 
> On Tue, Jan 15, 2013 at 11:12:40PM +0900, MORITA Kazutaka wrote:
> > +static int set_nodelay(int fd)
> > +{
> > +    int opt = 1;
> > +    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
> > +}
> > +
> 
> Please split this into a separate patch that moves the function to
> util/osdep.c and names it socket_set_nodelay().  You can put it below
> socket_set_cork().

Agreed.

> 
> > @@ -804,23 +781,14 @@ static int set_nodelay(int fd)
> >   */
> >  static int get_sheep_fd(BDRVSheepdogState *s)
> >  {
> > -    int ret, fd;
> > +    int fd;
> >  
> > -    fd = connect_to_sdog(s->addr, s->port);
> > +    fd = connect_to_sdog(s->host_spec);
> 
> The patch would be easier to review if you split out a separate patch to
> move from addr/port to host_spec.  Then the final patch can focus just
> on UNIX domain sockets without all the addr/port to host_spec changes.

Agreed.

> 
> >      if (fd < 0) {
> >          error_report("%s", strerror(errno));
> 
> connect_to_sdog() does not set errno and it already reports errors
> internally.  Can this error_report() be dropped?

Yes, I'll remove it.

> 
> >          return fd;
> >      }
> >  
> > -    socket_set_nonblock(fd);
> 
> Where is nonblock being set now that you've removed this?

Oops, I thought that unix_connect and inet_connect return nonblocking
sockets, but it was wrong.

> >  
> > -``sheepdog:<host>:<port>:<vdiname>:<tag>''
> > +using Unix Domain Socket:
> > +@example
> > +sheepdog:unix:<domain-socket>:<vdiname>[:<snapid or tag>]
> > +@end example
> 
> Please document that <domain-socket> must be an absolute path.  This
> will prevent confusing users who try a relative path.

Okay.

Thanks for your comments, I'll send v2.

Kazutaka

Patch

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 3e49bb8..59cbc22 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -20,8 +20,7 @@ 
 
 #define SD_PROTO_VER 0x01
 
-#define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT "7000"
+#define SD_DEFAULT_ADDR_AND_PORT "localhost:7000"
 
 #define SD_OP_CREATE_AND_WRITE_OBJ  0x01
 #define SD_OP_READ_OBJ       0x02
@@ -297,8 +296,11 @@  typedef struct BDRVSheepdogState {
     bool is_snapshot;
     uint32_t cache_flags;
 
-    char *addr;
-    char *port;
+    /* If it begins with  'unix:/', this is a UNIX domain socket. Otherwise,
+     * it's a string of the form <hostname>:<port>
+     */
+    char *host_spec;
+
     int fd;
 
     CoMutex lock;
@@ -354,6 +356,12 @@  static const char * sd_strerror(int err)
     return "Invalid error code";
 }
 
+static int set_nodelay(int fd)
+{
+    int opt = 1;
+    return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
+}
+
 /*
  * Sheepdog I/O handling:
  *
@@ -446,56 +454,34 @@  static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
     return acb;
 }
 
-static int connect_to_sdog(const char *addr, const char *port)
+static int connect_to_sdog(const char *host_spec)
 {
-    char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
-    int fd, ret;
-    struct addrinfo hints, *res, *res0;
+    int fd;
+    const char *path;
+    Error *err = NULL;
 
-    if (!addr) {
-        addr = SD_DEFAULT_ADDR;
-        port = SD_DEFAULT_PORT;
+    if (host_spec == NULL) {
+        host_spec = SD_DEFAULT_ADDR_AND_PORT;
     }
 
-    memset(&hints, 0, sizeof(hints));
-    hints.ai_socktype = SOCK_STREAM;
-
-    ret = getaddrinfo(addr, port, &hints, &res0);
-    if (ret) {
-        error_report("unable to get address info %s, %s",
-                     addr, strerror(errno));
-        return -errno;
-    }
-
-    for (res = res0; res; res = res->ai_next) {
-        ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
-                          sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
-        if (ret) {
-            continue;
-        }
-
-        fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
-        if (fd < 0) {
-            continue;
-        }
+    if (strstart(host_spec, "unix:", &path) && path[0] == '/') {
+        fd = unix_connect(path, &err);
+    } else {
+        fd = inet_connect(host_spec, &err);
 
-    reconnect:
-        ret = connect(fd, res->ai_addr, res->ai_addrlen);
-        if (ret < 0) {
-            if (errno == EINTR) {
-                goto reconnect;
+        if (err == NULL) {
+            int ret = set_nodelay(fd);
+            if (ret < 0) {
+                error_report("%s", strerror(errno));
             }
-            close(fd);
-            break;
         }
+    }
 
-        dprintf("connected to %s:%s\n", addr, port);
-        goto success;
+    if (err != NULL) {
+        qerror_report_err(err);
+        error_free(err);
     }
-    fd = -errno;
-    error_report("failed connect to %s:%s", addr, port);
-success:
-    freeaddrinfo(res0);
+
     return fd;
 }
 
@@ -787,15 +773,6 @@  static int aio_flush_request(void *opaque)
         !QLIST_EMPTY(&s->pending_aio_head);
 }
 
-static int set_nodelay(int fd)
-{
-    int ret, opt;
-
-    opt = 1;
-    ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
-    return ret;
-}
-
 /*
  * Return a socket discriptor to read/write objects.
  *
@@ -804,23 +781,14 @@  static int set_nodelay(int fd)
  */
 static int get_sheep_fd(BDRVSheepdogState *s)
 {
-    int ret, fd;
+    int fd;
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         error_report("%s", strerror(errno));
         return fd;
     }
 
-    socket_set_nonblock(fd);
-
-    ret = set_nodelay(fd);
-    if (ret) {
-        error_report("%s", strerror(errno));
-        closesocket(fd);
-        return -errno;
-    }
-
     qemu_aio_set_fd_handler(fd, co_read_response, NULL, aio_flush_request, s);
     return fd;
 }
@@ -829,12 +797,10 @@  static int get_sheep_fd(BDRVSheepdogState *s)
  * Parse a filename
  *
  * filename must be one of the following formats:
- *   1. [vdiname]
- *   2. [vdiname]:[snapid]
- *   3. [vdiname]:[tag]
- *   4. [hostname]:[port]:[vdiname]
- *   5. [hostname]:[port]:[vdiname]:[snapid]
- *   6. [hostname]:[port]:[vdiname]:[tag]
+ *   - using TCP
+ *     [<hostname>:<port>:]<vdiname>[:<snapid or tag>]
+ *   - using Unix Domain Socket
+ *     unix:<domain-socket>:<vdiname>[:<snapid or tag>]
  *
  * You can boot from the snapshot images by specifying `snapid` or
  * `tag'.
@@ -860,18 +826,15 @@  static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
     }
     p = q;
 
-    /* use the first two tokens as hostname and port number. */
+    /* use the first two tokens as host_spec. */
     if (nr_sep >= 2) {
-        s->addr = p;
+        s->host_spec = p;
         p = strchr(p, ':');
-        *p++ = '\0';
-
-        s->port = p;
+        p++;
         p = strchr(p, ':');
         *p++ = '\0';
     } else {
-        s->addr = NULL;
-        s->port = 0;
+        s->host_spec = NULL;
     }
 
     pstrcpy(vdi, SD_MAX_VDI_LEN, p);
@@ -887,7 +850,7 @@  static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
         *snapid = CURRENT_VDI_ID; /* search current vdi */
     }
 
-    if (s->addr == NULL) {
+    if (s->host_spec == NULL) {
         g_free(q);
     }
 
@@ -903,7 +866,7 @@  static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
     unsigned int wlen, rlen = 0;
     char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         return fd;
     }
@@ -1143,7 +1106,7 @@  static int sd_open(BlockDriverState *bs, const char *filename, int flags)
         s->is_snapshot = true;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         error_report("failed to connect");
         ret = fd;
@@ -1180,7 +1143,7 @@  out:
 
 static int do_sd_create(char *filename, int64_t vdi_size,
                         uint32_t base_vid, uint32_t *vdi_id, int snapshot,
-                        const char *addr, const char *port)
+                        const char *host_spec)
 {
     SheepdogVdiReq hdr;
     SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1188,7 +1151,7 @@  static int do_sd_create(char *filename, int64_t vdi_size,
     unsigned int wlen, rlen = 0;
     char buf[SD_MAX_VDI_LEN];
 
-    fd = connect_to_sdog(addr, port);
+    fd = connect_to_sdog(host_spec);
     if (fd < 0) {
         return fd;
     }
@@ -1355,7 +1318,7 @@  static int sd_create(const char *filename, QEMUOptionParameter *options)
         bdrv_delete(bs);
     }
 
-    ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->addr, s->port);
+    ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s->host_spec);
     if (!prealloc || ret) {
         goto out;
     }
@@ -1376,7 +1339,7 @@  static void sd_close(BlockDriverState *bs)
 
     dprintf("%s\n", s->name);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         return;
     }
@@ -1399,7 +1362,7 @@  static void sd_close(BlockDriverState *bs)
 
     qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL);
     closesocket(s->fd);
-    g_free(s->addr);
+    g_free(s->host_spec);
 }
 
 static int64_t sd_getlength(BlockDriverState *bs)
@@ -1423,7 +1386,7 @@  static int sd_truncate(BlockDriverState *bs, int64_t offset)
         return -EINVAL;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         return fd;
     }
@@ -1500,14 +1463,14 @@  static int sd_create_branch(BDRVSheepdogState *s)
     buf = g_malloc(SD_INODE_SIZE);
 
     ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
-                       s->addr, s->port);
+                       s->host_spec);
     if (ret) {
         goto out;
     }
 
     dprintf("%" PRIx32 " is created.\n", vid);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         error_report("failed to connect");
         ret = fd;
@@ -1768,7 +1731,7 @@  static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
 
     /* refresh inode. */
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         ret = fd;
         goto cleanup;
@@ -1782,7 +1745,7 @@  static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
-                       s->addr, s->port);
+                       s->host_spec);
     if (ret < 0) {
         error_report("failed to create inode for snapshot. %s",
                      strerror(errno));
@@ -1837,7 +1800,7 @@  static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
         goto out;
     }
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         error_report("failed to connect");
         ret = fd;
@@ -1901,7 +1864,7 @@  static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 
     vdi_inuse = g_malloc(max);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         ret = fd;
         goto out;
@@ -1928,7 +1891,7 @@  static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
     start_nr = hval & (SD_NR_VDIS - 1);
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         error_report("failed to connect");
         ret = fd;
@@ -1987,7 +1950,7 @@  static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
     uint32_t vdi_index;
     uint64_t offset;
 
-    fd = connect_to_sdog(s->addr, s->port);
+    fd = connect_to_sdog(s->host_spec);
     if (fd < 0) {
         return fd;
     }
diff --git a/qemu-options.hx b/qemu-options.hx
index 9df0cde..cbd2d4b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2059,17 +2059,15 @@  devices.
 
 Syntax for specifying a sheepdog device
 @table @list
-``sheepdog:<vdiname>''
-
-``sheepdog:<vdiname>:<snapid>''
-
-``sheepdog:<vdiname>:<tag>''
-
-``sheepdog:<host>:<port>:<vdiname>''
-
-``sheepdog:<host>:<port>:<vdiname>:<snapid>''
+using TCP:
+@example
+sheepdog:[<hostname>:<port>:]<vdiname>[:<snapid or tag>]
+@end example
 
-``sheepdog:<host>:<port>:<vdiname>:<tag>''
+using Unix Domain Socket:
+@example
+sheepdog:unix:<domain-socket>:<vdiname>[:<snapid or tag>]
+@end example
 @end table
 
 Example