Patchwork rbd: add discard support

login
register
mail settings
Submitter Josh Durgin
Date May 1, 2012, 6:16 a.m.
Message ID <1335853005-8711-1-git-send-email-josh.durgin@dreamhost.com>
Download mbox | patch
Permalink /patch/156010/
State New
Headers show

Comments

Josh Durgin - May 1, 2012, 6:16 a.m.
Change the write flag to an operation type in RBDAIOCB, and make the
buffer optional since discard doesn't use it.

Discard is first included in librbd 0.1.2 (which is in Ceph 0.46).
If librbd is too old, leave out qemu_rbd_aio_discard entirely,
so the old behavior is preserved.

Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
---
 block/rbd.c |   89 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 73 insertions(+), 16 deletions(-)
Stefan Hajnoczi - May 1, 2012, 8:39 a.m.
On Tue, May 1, 2012 at 7:16 AM, Josh Durgin <josh.durgin@dreamhost.com> wrote:
> Change the write flag to an operation type in RBDAIOCB, and make the
> buffer optional since discard doesn't use it.
>
> Discard is first included in librbd 0.1.2 (which is in Ceph 0.46).
> If librbd is too old, leave out qemu_rbd_aio_discard entirely,
> so the old behavior is preserved.
>
> Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
> ---
>  block/rbd.c |   89 ++++++++++++++++++++++++++++++++++++++++++++++++----------
>  1 files changed, 73 insertions(+), 16 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Kevin Wolf - May 2, 2012, 4:42 p.m.
Am 01.05.2012 10:39, schrieb Stefan Hajnoczi:
> On Tue, May 1, 2012 at 7:16 AM, Josh Durgin <josh.durgin@dreamhost.com> wrote:
>> Change the write flag to an operation type in RBDAIOCB, and make the
>> buffer optional since discard doesn't use it.
>>
>> Discard is first included in librbd 0.1.2 (which is in Ceph 0.46).
>> If librbd is too old, leave out qemu_rbd_aio_discard entirely,
>> so the old behavior is preserved.
>>
>> Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
>> ---
>>  block/rbd.c |   89 ++++++++++++++++++++++++++++++++++++++++++++++++----------
>>  1 files changed, 73 insertions(+), 16 deletions(-)
> 
> Reviewed-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>

Thanks, applied to the block branch.

Kevin

Patch

diff --git a/block/rbd.c b/block/rbd.c
index 6cd8448..1280d66 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -44,6 +44,13 @@ 
  * leading "\".
  */
 
+/* rbd_aio_discard added in 0.1.2 */
+#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
+#define LIBRBD_SUPPORTS_DISCARD
+#else
+#undef LIBRBD_SUPPORTS_DISCARD
+#endif
+
 #define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
 
 #define RBD_MAX_CONF_NAME_SIZE 128
@@ -53,13 +60,19 @@ 
 #define RBD_MAX_SNAP_NAME_SIZE 128
 #define RBD_MAX_SNAPS 100
 
+typedef enum {
+    RBD_AIO_READ,
+    RBD_AIO_WRITE,
+    RBD_AIO_DISCARD
+} RBDAIOCmd;
+
 typedef struct RBDAIOCB {
     BlockDriverAIOCB common;
     QEMUBH *bh;
     int ret;
     QEMUIOVector *qiov;
     char *bounce;
-    int write;
+    RBDAIOCmd cmd;
     int64_t sector_num;
     int error;
     struct BDRVRBDState *s;
@@ -371,7 +384,8 @@  static void qemu_rbd_complete_aio(RADOSCB *rcb)
 
     r = rcb->ret;
 
-    if (acb->write) {
+    if (acb->cmd == RBD_AIO_WRITE ||
+        acb->cmd == RBD_AIO_DISCARD) {
         if (r < 0) {
             acb->ret = r;
             acb->error = 1;
@@ -605,7 +619,7 @@  static void rbd_aio_bh_cb(void *opaque)
 {
     RBDAIOCB *acb = opaque;
 
-    if (!acb->write) {
+    if (acb->cmd == RBD_AIO_READ) {
         qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
     }
     qemu_vfree(acb->bounce);
@@ -616,12 +630,25 @@  static void rbd_aio_bh_cb(void *opaque)
     qemu_aio_release(acb);
 }
 
-static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
-                                           int64_t sector_num,
-                                           QEMUIOVector *qiov,
-                                           int nb_sectors,
-                                           BlockDriverCompletionFunc *cb,
-                                           void *opaque, int write)
+static int rbd_aio_discard_wrapper(rbd_image_t image,
+                                   uint64_t off,
+                                   uint64_t len,
+                                   rbd_completion_t comp)
+{
+#ifdef LIBRBD_SUPPORTS_DISCARD
+    return rbd_aio_discard(image, off, len, comp);
+#else
+    return -ENOTSUP;
+#endif
+}
+
+static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       QEMUIOVector *qiov,
+                                       int nb_sectors,
+                                       BlockDriverCompletionFunc *cb,
+                                       void *opaque,
+                                       RBDAIOCmd cmd)
 {
     RBDAIOCB *acb;
     RADOSCB *rcb;
@@ -633,16 +660,20 @@  static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
     BDRVRBDState *s = bs->opaque;
 
     acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque);
-    acb->write = write;
+    acb->cmd = cmd;
     acb->qiov = qiov;
-    acb->bounce = qemu_blockalign(bs, qiov->size);
+    if (cmd == RBD_AIO_DISCARD) {
+        acb->bounce = NULL;
+    } else {
+        acb->bounce = qemu_blockalign(bs, qiov->size);
+    }
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
     acb->cancelled = 0;
     acb->bh = NULL;
 
-    if (write) {
+    if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buffer(acb->qiov, acb->bounce);
     }
 
@@ -664,10 +695,18 @@  static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
         goto failed;
     }
 
-    if (write) {
+    switch (cmd) {
+    case RBD_AIO_WRITE:
         r = rbd_aio_write(s->image, off, size, buf, c);
-    } else {
+        break;
+    case RBD_AIO_READ:
         r = rbd_aio_read(s->image, off, size, buf, c);
+        break;
+    case RBD_AIO_DISCARD:
+        r = rbd_aio_discard_wrapper(s->image, off, size, c);
+        break;
+    default:
+        r = -EINVAL;
     }
 
     if (r < 0) {
@@ -690,7 +729,8 @@  static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                             BlockDriverCompletionFunc *cb,
                                             void *opaque)
 {
-    return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                         RBD_AIO_READ);
 }
 
 static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
@@ -700,7 +740,8 @@  static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                              BlockDriverCompletionFunc *cb,
                                              void *opaque)
 {
-    return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+                         RBD_AIO_WRITE);
 }
 
 static int qemu_rbd_co_flush(BlockDriverState *bs)
@@ -850,6 +891,18 @@  static int qemu_rbd_snap_list(BlockDriverState *bs,
     return snap_count;
 }
 
+#ifdef LIBRBD_SUPPORTS_DISCARD
+static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
+                                              int64_t sector_num,
+                                              int nb_sectors,
+                                              BlockDriverCompletionFunc *cb,
+                                              void *opaque)
+{
+    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
+                         RBD_AIO_DISCARD);
+}
+#endif
+
 static QEMUOptionParameter qemu_rbd_create_options[] = {
     {
      .name = BLOCK_OPT_SIZE,
@@ -880,6 +933,10 @@  static BlockDriver bdrv_rbd = {
     .bdrv_aio_writev        = qemu_rbd_aio_writev,
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
 
+#ifdef LIBRBD_SUPPORTS_DISCARD
+    .bdrv_aio_discard       = qemu_rbd_aio_discard,
+#endif
+
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
     .bdrv_snapshot_list     = qemu_rbd_snap_list,