Patchwork [19/26] FVD: add support for aio_cancel

login
register
mail settings
Submitter Chunqiang Tang
Date Feb. 25, 2011, 10:37 p.m.
Message ID <1298673486-3573-19-git-send-email-ctang@us.ibm.com>
Download mbox | patch
Permalink /patch/84618/
State New
Headers show

Comments

Chunqiang Tang - Feb. 25, 2011, 10:37 p.m.
This patch is part of the Fast Virtual Disk (FVD) proposal.
See http://wiki.qemu.org/Features/FVD.

This patch adds the support for aio_cancel into FVD. FVD faithfully cleans up
all resources upon aio_cancel.

Signed-off-by: Chunqiang Tang <ctang@us.ibm.com>
---
 block/fvd-journal-buf.c |   16 +++++++++++
 block/fvd-load.c        |   24 +++++++++++++++++
 block/fvd-misc.c        |   67 +++++++++++++++++++++++++++++++++++++++++++++++
 block/fvd-read.c        |   37 ++++++++++++++++++++++++++
 block/fvd-store.c       |   31 +++++++++++++++++++++
 block/fvd-write.c       |   23 +++++++++++++++-
 block/fvd.c             |   25 +++++++++++++++++
 7 files changed, 222 insertions(+), 1 deletions(-)

Patch

diff --git a/block/fvd-journal-buf.c b/block/fvd-journal-buf.c
index e99a585..c6b60f9 100644
--- a/block/fvd-journal-buf.c
+++ b/block/fvd-journal-buf.c
@@ -360,6 +360,22 @@  use_current_buf:
     return s->bjnl.buf;
 }
 
+static void fvd_aio_cancel_bjnl_flush(FvdAIOCB * acb)
+{
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+    QTAILQ_REMOVE(&s->bjnl.queued_bufs, acb, jcb.bjnl_next_queued_buf);
+    my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_bjnl_buf_write(FvdAIOCB * acb)
+{
+    /* OP_BJNL_BUF_WRITE is never exposed to any external entity, and this
+     * should not be invoked. Internal cancellation of OP_BJNL_BUF_WRITE
+     * is handled by bjnl_sync_flush(). */
+    abort();
+}
+
 static void bjnl_clean_buf_timer_cb(BlockDriverState * bs)
 {
     BDRVFvdState *s = bs->opaque;
diff --git a/block/fvd-load.c b/block/fvd-load.c
index 88e5fb4..9789cc5 100644
--- a/block/fvd-load.c
+++ b/block/fvd-load.c
@@ -188,6 +188,30 @@  static inline FvdAIOCB *init_load_acb(FvdAIOCB * parent_acb,
     return acb;
 }
 
+static void fvd_aio_cancel_wrapper(FvdAIOCB * acb)
+{
+    qemu_bh_cancel(acb->wrapper.bh);
+    qemu_bh_delete(acb->wrapper.bh);
+    my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_load_compact(FvdAIOCB * acb)
+{
+    if (acb->load.children) {
+        int i;
+        for (i = 0; i < acb->load.num_children; i++) {
+            if (acb->load.children[i].hd_acb) {
+                bdrv_aio_cancel(acb->load.children[i].hd_acb);
+            }
+        }
+        my_qemu_free(acb->load.children);
+    }
+    if (acb->load.one_child.hd_acb) {
+        bdrv_aio_cancel(acb->load.one_child.hd_acb);
+    }
+    my_qemu_aio_release(acb);
+}
+
 static inline int load_create_one_child(bool count_only, bool empty,
                     QEMUIOVector * orig_qiov, int *iov_index, size_t *iov_left,
                     uint8_t **iov_buf, int64_t start_sec, int sectors_in_region,
diff --git a/block/fvd-misc.c b/block/fvd-misc.c
index f4e1038..a42bfac 100644
--- a/block/fvd-misc.c
+++ b/block/fvd-misc.c
@@ -11,6 +11,73 @@ 
  *
  */
 
+static void fvd_aio_cancel_bjnl_buf_write(FvdAIOCB * acb);
+static void fvd_aio_cancel_bjnl_flush(FvdAIOCB * acb);
+static void fvd_aio_cancel_read(FvdAIOCB * acb);
+static void fvd_aio_cancel_write(FvdAIOCB * acb);
+static void fvd_aio_cancel_copy(FvdAIOCB * acb);
+static void fvd_aio_cancel_load_compact(FvdAIOCB * acb);
+static void fvd_aio_cancel_store_compact(FvdAIOCB * acb);
+static void fvd_aio_cancel_wrapper(FvdAIOCB * acb);
+static void flush_metadata_to_disk_on_exit (BlockDriverState *bs);
+
+static void fvd_aio_cancel_flush(FvdAIOCB * acb)
+{
+    if (acb->flush.data_acb) {
+        bdrv_aio_cancel(acb->flush.data_acb);
+    }
+    if (acb->flush.metadata_acb) {
+        bdrv_aio_cancel(acb->flush.metadata_acb);
+    }
+    my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel(BlockDriverAIOCB * blockacb)
+{
+    FvdAIOCB *acb = container_of(blockacb, FvdAIOCB, common);
+
+    QDEBUG("CANCEL: acb%llu-%p\n", acb->uuid, acb);
+    acb->cancel_in_progress = true;
+
+    switch (acb->type) {
+    case OP_READ:
+        fvd_aio_cancel_read(acb);
+        break;
+
+    case OP_WRITE:
+        fvd_aio_cancel_write(acb);
+        break;
+
+    case OP_COPY:
+        fvd_aio_cancel_copy(acb);
+        break;
+
+    case OP_LOAD_COMPACT:
+        fvd_aio_cancel_load_compact(acb);
+        break;
+
+    case OP_STORE_COMPACT:
+        fvd_aio_cancel_store_compact(acb);
+        break;
+
+    case OP_WRAPPER:
+        fvd_aio_cancel_wrapper(acb);
+        break;
+
+    case OP_FLUSH:
+        fvd_aio_cancel_flush(acb);
+        break;
+
+    case OP_BJNL_BUF_WRITE:
+        fvd_aio_cancel_bjnl_buf_write(acb);
+        break;
+
+    case OP_BJNL_FLUSH:
+        fvd_aio_cancel_bjnl_flush(acb);
+        break;
+    }
+}
+
 static void fvd_close(BlockDriverState * bs)
 {
 }
diff --git a/block/fvd-read.c b/block/fvd-read.c
index 675af9e..b18fdf2 100644
--- a/block/fvd-read.c
+++ b/block/fvd-read.c
@@ -502,3 +502,40 @@  static inline void calc_read_region(BDRVFvdState * s, int64_t sector_num,
     *p_first_sec_in_backing = first_sec_in_backing;
     *p_last_sec_in_backing = last_sec_in_backing;
 }
+
+static void fvd_aio_cancel_read(FvdAIOCB * acb)
+{
+    if (acb->read.read_backing.hd_acb) {
+        bdrv_aio_cancel(acb->read.read_backing.hd_acb);
+    }
+    if (acb->read.read_fvd.hd_acb) {
+        bdrv_aio_cancel(acb->read.read_fvd.hd_acb);
+    }
+    if (acb->read.read_backing.iov.iov_base) {
+        my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+    }
+    if (acb->read.read_fvd.iov.iov_base) {
+        my_qemu_vfree(acb->read.read_fvd.iov.iov_base);
+    }
+    my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_copy(FvdAIOCB * acb)
+{
+    BlockDriverState *bs = acb->common.bs;
+    BDRVFvdState *s = bs->opaque;
+
+    if (acb->copy.hd_acb) {
+        bdrv_aio_cancel(acb->copy.hd_acb);
+    }
+    if (acb->copy_lock.next.le_prev != NULL) {
+        QLIST_REMOVE(acb, copy_lock.next);
+        restart_dependent_writes(acb);
+    }
+    my_qemu_vfree(acb->copy.buf);
+    if (acb->common.cb != prefetch_null_cb) {
+        /* This is a copy-on-read operation. */
+        s->outstanding_copy_on_read_data -= acb->nb_sectors * 512;
+    }
+    my_qemu_aio_release(acb);
+}
diff --git a/block/fvd-store.c b/block/fvd-store.c
index fe670eb..ec23fd7 100644
--- a/block/fvd-store.c
+++ b/block/fvd-store.c
@@ -477,3 +477,34 @@  static inline FvdAIOCB *init_store_acb(int soft_write,
     COPY_UUID(acb, parent_acb);
     return acb;
 }
+
+static void fvd_aio_cancel_store_compact(FvdAIOCB * acb)
+{
+    if (acb->store.children) {
+        int i;
+        for (i = 0; i < acb->store.num_children; i++) {
+            if (acb->store.children[i].hd_acb) {
+                bdrv_aio_cancel(acb->store.children[i].hd_acb);
+            }
+        }
+        my_qemu_free(acb->store.children);
+    }
+    if (acb->store.one_child.hd_acb) {
+        bdrv_aio_cancel(acb->store.one_child.hd_acb);
+    }
+    if (acb->jcb.hd_acb) {
+        bdrv_aio_cancel(acb->jcb.hd_acb);
+        BDRVFvdState *s = acb->common.bs->opaque;
+        if (!s->use_bjnl) {
+            ujnl_free_journal_sectors(acb->common.bs);
+        }
+    }
+    if (acb->jcb.iov.iov_base != NULL) {
+        my_qemu_vfree(acb->jcb.iov.iov_base);
+    }
+    if (acb->jcb.ujnl_next_wait4_recycle.le_prev) {
+        QLIST_REMOVE(acb, jcb.ujnl_next_wait4_recycle);
+    }
+
+    my_qemu_aio_release(acb);
+}
diff --git a/block/fvd-write.c b/block/fvd-write.c
index 623ec83..a74dc5d 100644
--- a/block/fvd-write.c
+++ b/block/fvd-write.c
@@ -15,7 +15,7 @@  static void write_metadata_to_journal(struct FvdAIOCB *acb, bool update_bitmap);
 static int do_aio_write(struct FvdAIOCB *acb);
 static void restart_dependent_writes(struct FvdAIOCB *acb);
 static void free_write_resource(struct FvdAIOCB *acb);
-static void ujnl_free_journal_sectors(BlockDriverState * bs);
+static void ujnl_free_journal_sectors(BlockDriverState *bs);
 static inline BlockDriverAIOCB *store_data(int soft_write,
                 FvdAIOCB * parent_acb, BlockDriverState * bs,
                 int64_t sector_num, QEMUIOVector * orig_qiov, int nb_sectors,
@@ -106,6 +106,27 @@  slow_path:
     return &acb->common;
 }
 
+static void fvd_aio_cancel_write(FvdAIOCB * acb)
+{
+    if (acb->write.hd_acb) {
+        bdrv_aio_cancel(acb->write.hd_acb);
+    }
+    if (acb->jcb.hd_acb) {
+        bdrv_aio_cancel(acb->jcb.hd_acb);
+        BDRVFvdState *s = acb->common.bs->opaque;
+        if (!s->use_bjnl) {
+            ujnl_free_journal_sectors(acb->common.bs);
+        }
+    }
+    if (acb->jcb.ujnl_next_wait4_recycle.le_prev) {
+        QLIST_REMOVE(acb, jcb.ujnl_next_wait4_recycle);
+    }
+    if (acb->write.next_dependent_write.le_prev) {
+        QLIST_REMOVE(acb, write.next_dependent_write);
+    }
+    free_write_resource(acb);
+}
+
 static void free_write_resource(FvdAIOCB * acb)
 {
     if (acb->write.next_write_lock.le_prev) {
diff --git a/block/fvd.c b/block/fvd.c
index 2402a94..c779d65 100644
--- a/block/fvd.c
+++ b/block/fvd.c
@@ -23,6 +23,16 @@ 
 
 #include "block/fvd.h"
 
+#define ENABLE_TRACE_IO
+//#define DEBUG_MEMORY_LEAK
+
+#ifndef FVD_DEBUG
+#undef DEBUG_MEMORY_LEAK
+#endif
+#ifndef ENABLE_QDEBUG
+#undef ENABLE_TRACE_IO
+#endif
+
 /* Use include to avoid exposing too many FVD symbols, and to allow inline
  * function optimization. */
 #include "block/fvd-debug.c"
@@ -41,6 +51,11 @@ 
 #include "block/fvd-prefetch.c"
 #include "block/fvd-update.c"
 
+static AIOPool fvd_aio_pool = {
+    .aiocb_size = sizeof(FvdAIOCB),
+    .cancel = fvd_aio_cancel,
+};
+
 static BlockDriver bdrv_fvd = {
     .format_name = "fvd",
     .instance_size = sizeof(BDRVFvdState),
@@ -62,6 +77,8 @@  static BlockDriver bdrv_fvd = {
 
 static void bdrv_fvd_init(void)
 {
+    /* Random numbers are used in fvd-prefetch.c. */
+    srand(time(NULL) + getpid() + getpid() * 987654 + rand());
     bdrv_register(&bdrv_fvd);
 }
 
@@ -84,3 +101,11 @@  extern QTAILQ_HEAD(, BlockDriverState) bdrv_states;
         }
     }
 }
+
+/*
+ * TODOs:
+ * - Cap the prefetch throughput at the upper limit. See Section 3.4.2 of
+ * the FVD-cow paper.  Related metadata are
+ * FvdHeader.prefetch_max_read_throughput and
+ * FvdHeader.prefetch_max_write_throughput.
+ */