Patchwork [v3] sheepdog: implement SD_OP_FLUSH_VDI operation

login
register
mail settings
Submitter namei.unix@gmail.com
Date April 3, 2012, 8:35 a.m.
Message ID <1333442100-29240-1-git-send-email-namei.unix@gmail.com>
Download mbox | patch
Permalink /patch/150338/
State New
Headers show

Comments

namei.unix@gmail.com - April 3, 2012, 8:35 a.m.
From: Liu Yuan <tailai.ly@taobao.com>

Flush operation is supposed to flush the write-back cache of
sheepdog cluster.

By issuing flush operation, we can assure the Guest of data
reaching the sheepdog cluster storage.

Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Michael Tokarev <mjt@tls.msk.ru>
Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
 v2 -> v3:
 address Stefan Hajnoczi comments. Thanks !
  - use qemu_co_send/recv to send/revc msg

 v1 -> v2:
 address Michael Tokarev comments. Thanks !
  - use per-device flag
  - use bs->fd instead of 'connect_to_sdog()'

 block/sheepdog.c |  133 ++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 119 insertions(+), 14 deletions(-)
Kevin Wolf - April 3, 2012, 9:12 a.m.
Am 03.04.2012 10:35, schrieb Liu Yuan:
> From: Liu Yuan <tailai.ly@taobao.com>
> 
> Flush operation is supposed to flush the write-back cache of
> sheepdog cluster.
> 
> By issuing flush operation, we can assure the Guest of data
> reaching the sheepdog cluster storage.
> 
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Michael Tokarev <mjt@tls.msk.ru>
> Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
> Cc: Stefan Hajnoczi <stefanha@gmail.com>
> Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
> ---
>  v2 -> v3:
>  address Stefan Hajnoczi comments. Thanks !
>   - use qemu_co_send/recv to send/revc msg
> 
>  v1 -> v2:
>  address Michael Tokarev comments. Thanks !
>   - use per-device flag
>   - use bs->fd instead of 'connect_to_sdog()'

This version looks okay to me now. Waiting for Kazutaka's Acked-by.

> +static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
> +                       unsigned int *wlen)
> +{
> +    int ret;
> +
> +    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
> +    if (ret < sizeof(*hdr)) {
> +        error_report("failed to send a req, %s", strerror(errno));
> +    }
> +
> +    ret = qemu_co_send(sockfd, data, *wlen);
> +    if (ret < *wlen) {
> +        error_report("failed to send a req, %s", strerror(errno));
> +    }
> +
> +    return ret;
> +}

If sending the header fails, why do we still send the data? In fact, if
sending the data succeeds then (which is unlikely, but who knows), we'll
even return success.

This only copies the logic of the existing send_req(), so if it needs to
be fixed, another patch on top that fixes both instances would be okay.

Kevin
MORITA Kazutaka - April 3, 2012, 9:52 a.m.
At Tue,  3 Apr 2012 16:35:00 +0800,
Liu Yuan wrote:
>  
> +static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
> +{
> +    BDRVSheepdogState *s = bs->opaque;
> +    SheepdogObjReq hdr = { 0 };
> +    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
> +    SheepdogInode *inode = &s->inode;
> +    int ret;
> +    unsigned int wlen = 0, rlen = 0;
> +
> +    if (!s->cache_enabled) {
> +        return 0;
> +    }
> +
> +    hdr.opcode = SD_OP_FLUSH_VDI;
> +    hdr.oid = vid_to_vdi_oid(inode->vdi_id);
> +
> +    ret = do_co_req(s->fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);

This will mix the flush request with I/O requests on s->fd.  We can
read s->fd only in aio_read_response() and write only in
add_aio_request(), so please create another connection for flush
operations.

Thanks,

Kazutaka

Patch

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 00276f6f..b2bb522 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -32,9 +32,11 @@ 
 #define SD_OP_RELEASE_VDI    0x13
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
+#define SD_OP_FLUSH_VDI      0x16
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
+#define SD_FLAG_CMD_CACHE    0x04
 
 #define SD_RES_SUCCESS       0x00 /* Success */
 #define SD_RES_UNKNOWN       0x01 /* Unknown error */
@@ -293,6 +295,7 @@  typedef struct BDRVSheepdogState {
 
     char name[SD_MAX_VDI_LEN];
     int is_snapshot;
+    uint8_t cache_enabled;
 
     char *addr;
     char *port;
@@ -516,6 +519,23 @@  static int send_req(int sockfd, SheepdogReq *hdr, void *data,
     return ret;
 }
 
+static int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
+                       unsigned int *wlen)
+{
+    int ret;
+
+    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
+    if (ret < sizeof(*hdr)) {
+        error_report("failed to send a req, %s", strerror(errno));
+    }
+
+    ret = qemu_co_send(sockfd, data, *wlen);
+    if (ret < *wlen) {
+        error_report("failed to send a req, %s", strerror(errno));
+    }
+
+    return ret;
+}
 static int do_req(int sockfd, SheepdogReq *hdr, void *data,
                   unsigned int *wlen, unsigned int *rlen)
 {
@@ -550,6 +570,40 @@  out:
     return ret;
 }
 
+static int do_co_req(int sockfd, SheepdogReq *hdr, void *data,
+                     unsigned int *wlen, unsigned int *rlen)
+{
+    int ret;
+
+    socket_set_block(sockfd);
+    ret = send_co_req(sockfd, hdr, data, wlen);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
+    if (ret < sizeof(*hdr)) {
+        error_report("failed to get a rsp, %s", strerror(errno));
+        goto out;
+    }
+
+    if (*rlen > hdr->data_length) {
+        *rlen = hdr->data_length;
+    }
+
+    if (*rlen) {
+        ret = qemu_co_recv(sockfd, data, *rlen);
+        if (ret < *rlen) {
+            error_report("failed to get the data, %s", strerror(errno));
+            goto out;
+        }
+    }
+    ret = 0;
+out:
+    socket_set_nonblock(sockfd);
+    return ret;
+}
+
 static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
                            struct iovec *iov, int niov, int create,
                            enum AIOCBState aiocb_type);
@@ -900,6 +954,10 @@  static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
     }
 
+    if (s->cache_enabled) {
+        hdr.flags |= SD_FLAG_CMD_CACHE;
+    }
+
     hdr.oid = oid;
     hdr.cow_oid = old_oid;
     hdr.copies = s->inode.nr_copies;
@@ -942,7 +1000,7 @@  static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
 
 static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
                              unsigned int datalen, uint64_t offset,
-                             int write, int create)
+                             int write, int create, uint8_t cache)
 {
     SheepdogObjReq hdr;
     SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
@@ -965,6 +1023,11 @@  static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
         rlen = datalen;
         hdr.opcode = SD_OP_READ_OBJ;
     }
+
+    if (cache) {
+        hdr.flags |= SD_FLAG_CMD_CACHE;
+    }
+
     hdr.oid = oid;
     hdr.data_length = datalen;
     hdr.offset = offset;
@@ -986,15 +1049,18 @@  static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
 }
 
 static int read_object(int fd, char *buf, uint64_t oid, int copies,
-                       unsigned int datalen, uint64_t offset)
+                       unsigned int datalen, uint64_t offset, uint8_t cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0,
+                             cache);
 }
 
 static int write_object(int fd, char *buf, uint64_t oid, int copies,
-                        unsigned int datalen, uint64_t offset, int create)
+                        unsigned int datalen, uint64_t offset, int create,
+                        uint8_t cache)
 {
-    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
+    return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create,
+                             cache);
 }
 
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
@@ -1011,6 +1077,10 @@  static int sd_open(BlockDriverState *bs, const char *filename, int flags)
     QLIST_INIT(&s->outstanding_aio_head);
     s->fd = -1;
 
+    if (flags & BDRV_O_CACHE_WB) {
+        s->cache_enabled = 1;
+    }
+
     memset(vdi, 0, sizeof(vdi));
     memset(tag, 0, sizeof(tag));
     if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
@@ -1038,7 +1108,8 @@  static int sd_open(BlockDriverState *bs, const char *filename, int flags)
     }
 
     buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
+    ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
+                      s->cache_enabled);
 
     closesocket(fd);
 
@@ -1305,7 +1376,7 @@  static int sd_truncate(BlockDriverState *bs, int64_t offset)
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
     s->inode.vdi_size = offset;
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0);
+                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
     close(fd);
 
     if (ret < 0) {
@@ -1387,7 +1458,7 @@  static int sd_create_branch(BDRVSheepdogState *s)
     }
 
     ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0);
+                      SD_INODE_SIZE, 0, s->cache_enabled);
 
     closesocket(fd);
 
@@ -1575,6 +1646,36 @@  static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
     return acb->ret;
 }
 
+static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
+{
+    BDRVSheepdogState *s = bs->opaque;
+    SheepdogObjReq hdr = { 0 };
+    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
+    SheepdogInode *inode = &s->inode;
+    int ret;
+    unsigned int wlen = 0, rlen = 0;
+
+    if (!s->cache_enabled) {
+        return 0;
+    }
+
+    hdr.opcode = SD_OP_FLUSH_VDI;
+    hdr.oid = vid_to_vdi_oid(inode->vdi_id);
+
+    ret = do_co_req(s->fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen);
+    if (ret) {
+        error_report("failed to send a request to the sheep");
+        return ret;
+    }
+
+    if (rsp->result != SD_RES_SUCCESS) {
+        error_report("%s", sd_strerror(rsp->result));
+        return -EIO;
+    }
+
+    return 0;
+}
+
 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
     BDRVSheepdogState *s = bs->opaque;
@@ -1610,7 +1711,7 @@  static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
-                       s->inode.nr_copies, datalen, 0, 0);
+                       s->inode.nr_copies, datalen, 0, 0, s->cache_enabled);
     if (ret < 0) {
         error_report("failed to write snapshot's inode.");
         ret = -EIO;
@@ -1629,7 +1730,7 @@  static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     inode = (SheepdogInode *)g_malloc(datalen);
 
     ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
-                      s->inode.nr_copies, datalen, 0);
+                      s->inode.nr_copies, datalen, 0, s->cache_enabled);
 
     if (ret < 0) {
         error_report("failed to read new inode info. %s", strerror(errno));
@@ -1684,7 +1785,7 @@  static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
 
     buf = g_malloc(SD_INODE_SIZE);
     ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
-                      SD_INODE_SIZE, 0);
+                      SD_INODE_SIZE, 0, s->cache_enabled);
 
     closesocket(fd);
 
@@ -1779,7 +1880,8 @@  static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
 
         /* we don't need to read entire object */
         ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
-                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
+                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+                          s->cache_enabled);
 
         if (ret) {
             continue;
@@ -1835,10 +1937,12 @@  static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
         create = (offset == 0);
         if (load) {
             ret = read_object(fd, (char *)data, vmstate_oid,
-                              s->inode.nr_copies, data_len, offset);
+                              s->inode.nr_copies, data_len, offset,
+                              s->cache_enabled);
         } else {
             ret = write_object(fd, (char *)data, vmstate_oid,
-                               s->inode.nr_copies, data_len, offset, create);
+                               s->inode.nr_copies, data_len, offset, create,
+                               s->cache_enabled);
         }
 
         if (ret < 0) {
@@ -1904,6 +2008,7 @@  BlockDriver bdrv_sheepdog = {
 
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
+    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,