Patchwork [for,QEMU,v3] sheepdog: add discard/trim support for sheepdog

login
register
mail settings
Submitter namei.unix@gmail.com
Date April 14, 2013, 5:16 a.m.
Message ID <1365916604-12362-1-git-send-email-namei.unix@gmail.com>
Download mbox | patch
Permalink /patch/236403/
State New
Headers show

Comments

namei.unix@gmail.com - April 14, 2013, 5:16 a.m.
From: Liu Yuan <tailai.ly@taobao.com>

The 'TRIM' command from VM that is to release underlying data storage for
better thin-provision is already supported by the Sheepdog.

This patch adds the TRIM support at QEMU part.

For older Sheepdog that doesn't support it, we return EIO to upper layer.

Cc: MORITA Kazutaka <morita.kazutaka@lab.ntt.co.jp>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Liu Yuan <tailai.ly@taobao.com>
---
v3:
 - fix a silly accidental deletion of 'default' in switch clause.
v2:
 - skip the object when it is not allocated

 block/sheepdog.c |   53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)
MORITA Kazutaka - April 15, 2013, 3:10 p.m.
At Sun, 14 Apr 2013 13:16:44 +0800,
Liu Yuan wrote:
> 
> From: Liu Yuan <tailai.ly@taobao.com>
> 
> The 'TRIM' command from VM that is to release underlying data storage for
> better thin-provision is already supported by the Sheepdog.
> 
> This patch adds the TRIM support at QEMU part.
> 
> For older Sheepdog that doesn't support it, we return EIO to upper layer.

I think we can safely return 0 without doing anything when the server
doesn't support SD_OP_DISCARD.  Actually, if the block driver doesn't
support the discard operation, bdrv_co_discard() in block.c returns 0.


> diff --git a/block/sheepdog.c b/block/sheepdog.c
> index 987018e..e852d4e 100644
> --- a/block/sheepdog.c
> +++ b/block/sheepdog.c
> @@ -34,6 +34,7 @@
>  #define SD_OP_GET_VDI_INFO   0x14
>  #define SD_OP_READ_VDIS      0x15
>  #define SD_OP_FLUSH_VDI      0x16
> +#define SD_OP_DISCARD        0x17

This is an opcode for objects, so I prefer SD_OP_DISCARD_OBJ.

>  
> +static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
> +                         int nb_sectors)

Should add coroutine_fn.

Thanks,

Kazutaka

Patch

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 987018e..e852d4e 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -34,6 +34,7 @@ 
 #define SD_OP_GET_VDI_INFO   0x14
 #define SD_OP_READ_VDIS      0x15
 #define SD_OP_FLUSH_VDI      0x16
+#define SD_OP_DISCARD        0x17
 
 #define SD_FLAG_CMD_WRITE    0x01
 #define SD_FLAG_CMD_COW      0x02
@@ -269,6 +270,7 @@  enum AIOCBState {
     AIOCB_WRITE_UDATA,
     AIOCB_READ_UDATA,
     AIOCB_FLUSH_CACHE,
+    AIOCB_DISCARD,
 };
 
 struct SheepdogAIOCB {
@@ -656,7 +658,7 @@  static void coroutine_fn aio_read_response(void *opaque)
     int ret;
     AIOReq *aio_req = NULL;
     SheepdogAIOCB *acb;
-    unsigned long idx;
+    uint64_t idx;
 
     if (QLIST_EMPTY(&s->inflight_aio_head)) {
         goto out;
@@ -727,6 +729,18 @@  static void coroutine_fn aio_read_response(void *opaque)
             rsp.result = SD_RES_SUCCESS;
         }
         break;
+    case AIOCB_DISCARD:
+        switch (rsp.result) {
+        case SD_RES_INVALID_PARMS:
+            error_report("you are running the old sheep that doesn't support "
+                         "discard command.\n");
+            break;
+        case SD_RES_SUCCESS:
+            idx = data_oid_to_idx(aio_req->oid);
+            s->inode.data_vdi_id[idx] = 0;
+            break;
+        }
+        break;
     }
 
     if (rsp.result != SD_RES_SUCCESS) {
@@ -1016,6 +1030,9 @@  static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
         wlen = datalen;
         hdr.flags = SD_FLAG_CMD_WRITE | flags;
         break;
+    case AIOCB_DISCARD:
+        hdr.opcode = SD_OP_DISCARD;
+        break;
     }
 
     if (s->cache_flags) {
@@ -1633,6 +1650,15 @@  static int coroutine_fn sd_co_rw_vector(void *p)
                 flags = SD_FLAG_CMD_COW;
             }
             break;
+        case AIOCB_DISCARD:
+            /*
+             * We discard the object only when the whole object is
+             * 1) allocated 2) trimmed. Otherwise, simply skip it.
+             */
+            if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+                goto done;
+            }
+            break;
         default:
             break;
         }
@@ -2071,6 +2097,28 @@  static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
 }
 
 
+static int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
+                         int nb_sectors)
+{
+    SheepdogAIOCB *acb;
+    QEMUIOVector dummy;
+    int ret;
+
+    acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+    acb->aiocb_type = AIOCB_DISCARD;
+    acb->aio_done_func = sd_finish_aiocb;
+
+    ret = sd_co_rw_vector(acb);
+    if (ret <= 0) {
+        qemu_aio_release(acb);
+        return ret;
+    }
+
+    qemu_coroutine_yield();
+
+    return acb->ret;
+}
+
 static QEMUOptionParameter sd_create_options[] = {
     {
         .name = BLOCK_OPT_SIZE,
@@ -2103,6 +2151,7 @@  static BlockDriver bdrv_sheepdog = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2128,6 +2177,7 @@  static BlockDriver bdrv_sheepdog_tcp = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,
@@ -2153,6 +2203,7 @@  static BlockDriver bdrv_sheepdog_unix = {
     .bdrv_co_readv  = sd_co_readv,
     .bdrv_co_writev = sd_co_writev,
     .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
+    .bdrv_co_discard = sd_co_discard,
 
     .bdrv_snapshot_create   = sd_snapshot_create,
     .bdrv_snapshot_goto     = sd_snapshot_goto,