diff mbox

[v3,1/2] rbd: add an asynchronous flush

Message ID 1365064515-23222-1-git-send-email-josh.durgin@inktank.com
State New
Headers show

Commit Message

Josh Durgin April 4, 2013, 8:35 a.m. UTC
The existing bdrv_co_flush_to_disk implementation uses rbd_flush(),
which is sychronous and causes the main qemu thread to block until it
is complete. This results in unresponsiveness and extra latency for
the guest.

Fix this by using an asynchronous version of flush.  This was added to
librbd with a special #define to indicate its presence, since it will
be backported to stable versions. Thus, there is no need to check the
version of librbd.

Implement this as bdrv_aio_flush, since it matches other aio functions
in the rbd block driver, and leave out bdrv_co_flush_to_disk when the
asynchronous version is available.

Reported-by: Oliver Francke <oliver@filoo.de>
Signed-off-by: Josh Durgin <josh.durgin@inktank.com>
---

Since v2:
 * remove #ifdefs around bdrv_aio_flush and qemu_rbd_aio_flush, since
   they are dealt with at runtime in the next patch

 block/rbd.c |   30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)
diff mbox

Patch

diff --git a/block/rbd.c b/block/rbd.c
index 1a8ea6d..037d82b 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@ 
 typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
-    RBD_AIO_DISCARD
+    RBD_AIO_DISCARD,
+    RBD_AIO_FLUSH
 } RBDAIOCmd;
 
 typedef struct RBDAIOCB {
@@ -379,8 +380,7 @@  static void qemu_rbd_complete_aio(RADOSCB *rcb)
 
     r = rcb->ret;
 
-    if (acb->cmd == RBD_AIO_WRITE ||
-        acb->cmd == RBD_AIO_DISCARD) {
+    if (acb->cmd != RBD_AIO_READ) {
         if (r < 0) {
             acb->ret = r;
             acb->error = 1;
@@ -659,6 +659,16 @@  static int rbd_aio_discard_wrapper(rbd_image_t image,
 #endif
 }
 
+static int rbd_aio_flush_wrapper(rbd_image_t image,
+                                 rbd_completion_t comp)
+{
+#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
+    return rbd_aio_flush(image, comp);
+#else
+    return -ENOTSUP;
+#endif
+}
+
 static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
                                        int64_t sector_num,
                                        QEMUIOVector *qiov,
@@ -679,7 +689,7 @@  static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
     acb->cmd = cmd;
     acb->qiov = qiov;
-    if (cmd == RBD_AIO_DISCARD) {
+    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
         acb->bounce = NULL;
     } else {
         acb->bounce = qemu_blockalign(bs, qiov->size);
@@ -723,6 +733,9 @@  static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     case RBD_AIO_DISCARD:
         r = rbd_aio_discard_wrapper(s->image, off, size, c);
         break;
+    case RBD_AIO_FLUSH:
+        r = rbd_aio_flush_wrapper(s->image, c);
+        break;
     default:
         r = -EINVAL;
     }
@@ -762,6 +775,13 @@  static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                          RBD_AIO_WRITE);
 }
 
+static BlockDriverAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
+                                            BlockDriverCompletionFunc *cb,
+                                            void *opaque)
+{
+    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
+}
+
 static int qemu_rbd_co_flush(BlockDriverState *bs)
 {
 #if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
@@ -949,6 +969,8 @@  static BlockDriver bdrv_rbd = {
 
     .bdrv_aio_readv         = qemu_rbd_aio_readv,
     .bdrv_aio_writev        = qemu_rbd_aio_writev,
+
+    .bdrv_aio_flush         = qemu_rbd_aio_flush,
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
 
 #ifdef LIBRBD_SUPPORTS_DISCARD