@@ -360,6 +360,22 @@ use_current_buf:
return s->bjnl.buf;
}
+static void fvd_aio_cancel_bjnl_flush(FvdAIOCB * acb)
+{
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+ QTAILQ_REMOVE(&s->bjnl.queued_bufs, acb, jcb.bjnl_next_queued_buf);
+ my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_bjnl_buf_write(FvdAIOCB * acb)
+{
+ /* OP_BJNL_BUF_WRITE is never exposed to any external entity, and this
+ * should not be invoked. Internal cancellation of OP_BJNL_BUF_WRITE
+ * is handled by bjnl_sync_flush(). */
+ abort();
+}
+
static void bjnl_clean_buf_timer_cb(BlockDriverState * bs)
{
BDRVFvdState *s = bs->opaque;
@@ -188,6 +188,30 @@ static inline FvdAIOCB *init_load_acb(FvdAIOCB * parent_acb,
return acb;
}
+static void fvd_aio_cancel_wrapper(FvdAIOCB * acb)
+{
+ qemu_bh_cancel(acb->wrapper.bh);
+ qemu_bh_delete(acb->wrapper.bh);
+ my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_load_compact(FvdAIOCB * acb)
+{
+ if (acb->load.children) {
+ int i;
+ for (i = 0; i < acb->load.num_children; i++) {
+ if (acb->load.children[i].hd_acb) {
+ bdrv_aio_cancel(acb->load.children[i].hd_acb);
+ }
+ }
+ my_qemu_free(acb->load.children);
+ }
+ if (acb->load.one_child.hd_acb) {
+ bdrv_aio_cancel(acb->load.one_child.hd_acb);
+ }
+ my_qemu_aio_release(acb);
+}
+
static inline int load_create_one_child(bool count_only, bool empty,
QEMUIOVector * orig_qiov, int *iov_index, size_t *iov_left,
uint8_t **iov_buf, int64_t start_sec, int sectors_in_region,
@@ -11,6 +11,73 @@
*
*/
+static void fvd_aio_cancel_bjnl_buf_write(FvdAIOCB * acb);
+static void fvd_aio_cancel_bjnl_flush(FvdAIOCB * acb);
+static void fvd_aio_cancel_read(FvdAIOCB * acb);
+static void fvd_aio_cancel_write(FvdAIOCB * acb);
+static void fvd_aio_cancel_copy(FvdAIOCB * acb);
+static void fvd_aio_cancel_load_compact(FvdAIOCB * acb);
+static void fvd_aio_cancel_store_compact(FvdAIOCB * acb);
+static void fvd_aio_cancel_wrapper(FvdAIOCB * acb);
+static void flush_metadata_to_disk_on_exit (BlockDriverState *bs);
+
+static void fvd_aio_cancel_flush(FvdAIOCB * acb)
+{
+ if (acb->flush.data_acb) {
+ bdrv_aio_cancel(acb->flush.data_acb);
+ }
+ if (acb->flush.metadata_acb) {
+ bdrv_aio_cancel(acb->flush.metadata_acb);
+ }
+ my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel(BlockDriverAIOCB * blockacb)
+{
+ FvdAIOCB *acb = container_of(blockacb, FvdAIOCB, common);
+
+ QDEBUG("CANCEL: acb%llu-%p\n", acb->uuid, acb);
+ acb->cancel_in_progress = true;
+
+ switch (acb->type) {
+ case OP_READ:
+ fvd_aio_cancel_read(acb);
+ break;
+
+ case OP_WRITE:
+ fvd_aio_cancel_write(acb);
+ break;
+
+ case OP_COPY:
+ fvd_aio_cancel_copy(acb);
+ break;
+
+ case OP_LOAD_COMPACT:
+ fvd_aio_cancel_load_compact(acb);
+ break;
+
+ case OP_STORE_COMPACT:
+ fvd_aio_cancel_store_compact(acb);
+ break;
+
+ case OP_WRAPPER:
+ fvd_aio_cancel_wrapper(acb);
+ break;
+
+ case OP_FLUSH:
+ fvd_aio_cancel_flush(acb);
+ break;
+
+ case OP_BJNL_BUF_WRITE:
+ fvd_aio_cancel_bjnl_buf_write(acb);
+ break;
+
+ case OP_BJNL_FLUSH:
+ fvd_aio_cancel_bjnl_flush(acb);
+ break;
+ }
+}
+
static void fvd_close(BlockDriverState * bs)
{
}
@@ -502,3 +502,40 @@ static inline void calc_read_region(BDRVFvdState * s, int64_t sector_num,
*p_first_sec_in_backing = first_sec_in_backing;
*p_last_sec_in_backing = last_sec_in_backing;
}
+
+static void fvd_aio_cancel_read(FvdAIOCB * acb)
+{
+ if (acb->read.read_backing.hd_acb) {
+ bdrv_aio_cancel(acb->read.read_backing.hd_acb);
+ }
+ if (acb->read.read_fvd.hd_acb) {
+ bdrv_aio_cancel(acb->read.read_fvd.hd_acb);
+ }
+ if (acb->read.read_backing.iov.iov_base) {
+ my_qemu_vfree(acb->read.read_backing.iov.iov_base);
+ }
+ if (acb->read.read_fvd.iov.iov_base) {
+ my_qemu_vfree(acb->read.read_fvd.iov.iov_base);
+ }
+ my_qemu_aio_release(acb);
+}
+
+static void fvd_aio_cancel_copy(FvdAIOCB * acb)
+{
+ BlockDriverState *bs = acb->common.bs;
+ BDRVFvdState *s = bs->opaque;
+
+ if (acb->copy.hd_acb) {
+ bdrv_aio_cancel(acb->copy.hd_acb);
+ }
+ if (acb->copy_lock.next.le_prev != NULL) {
+ QLIST_REMOVE(acb, copy_lock.next);
+ restart_dependent_writes(acb);
+ }
+ my_qemu_vfree(acb->copy.buf);
+ if (acb->common.cb != prefetch_null_cb) {
+ /* This is a copy-on-read operation. */
+ s->outstanding_copy_on_read_data -= acb->nb_sectors * 512;
+ }
+ my_qemu_aio_release(acb);
+}
@@ -477,3 +477,34 @@ static inline FvdAIOCB *init_store_acb(int soft_write,
COPY_UUID(acb, parent_acb);
return acb;
}
+
+static void fvd_aio_cancel_store_compact(FvdAIOCB * acb)
+{
+ if (acb->store.children) {
+ int i;
+ for (i = 0; i < acb->store.num_children; i++) {
+ if (acb->store.children[i].hd_acb) {
+ bdrv_aio_cancel(acb->store.children[i].hd_acb);
+ }
+ }
+ my_qemu_free(acb->store.children);
+ }
+ if (acb->store.one_child.hd_acb) {
+ bdrv_aio_cancel(acb->store.one_child.hd_acb);
+ }
+ if (acb->jcb.hd_acb) {
+ bdrv_aio_cancel(acb->jcb.hd_acb);
+ BDRVFvdState *s = acb->common.bs->opaque;
+ if (!s->use_bjnl) {
+ ujnl_free_journal_sectors(acb->common.bs);
+ }
+ }
+ if (acb->jcb.iov.iov_base != NULL) {
+ my_qemu_vfree(acb->jcb.iov.iov_base);
+ }
+ if (acb->jcb.ujnl_next_wait4_recycle.le_prev) {
+ QLIST_REMOVE(acb, jcb.ujnl_next_wait4_recycle);
+ }
+
+ my_qemu_aio_release(acb);
+}
@@ -15,7 +15,7 @@ static void write_metadata_to_journal(struct FvdAIOCB *acb, bool update_bitmap);
static int do_aio_write(struct FvdAIOCB *acb);
static void restart_dependent_writes(struct FvdAIOCB *acb);
static void free_write_resource(struct FvdAIOCB *acb);
-static void ujnl_free_journal_sectors(BlockDriverState * bs);
+static void ujnl_free_journal_sectors(BlockDriverState *bs);
static inline BlockDriverAIOCB *store_data(int soft_write,
FvdAIOCB * parent_acb, BlockDriverState * bs,
int64_t sector_num, QEMUIOVector * orig_qiov, int nb_sectors,
@@ -106,6 +106,27 @@ slow_path:
return &acb->common;
}
+static void fvd_aio_cancel_write(FvdAIOCB * acb)
+{
+ if (acb->write.hd_acb) {
+ bdrv_aio_cancel(acb->write.hd_acb);
+ }
+ if (acb->jcb.hd_acb) {
+ bdrv_aio_cancel(acb->jcb.hd_acb);
+ BDRVFvdState *s = acb->common.bs->opaque;
+ if (!s->use_bjnl) {
+ ujnl_free_journal_sectors(acb->common.bs);
+ }
+ }
+ if (acb->jcb.ujnl_next_wait4_recycle.le_prev) {
+ QLIST_REMOVE(acb, jcb.ujnl_next_wait4_recycle);
+ }
+ if (acb->write.next_dependent_write.le_prev) {
+ QLIST_REMOVE(acb, write.next_dependent_write);
+ }
+ free_write_resource(acb);
+}
+
static void free_write_resource(FvdAIOCB * acb)
{
if (acb->write.next_write_lock.le_prev) {
@@ -23,6 +23,16 @@
#include "block/fvd.h"
+#define ENABLE_TRACE_IO
+//#define DEBUG_MEMORY_LEAK
+
+#ifndef FVD_DEBUG
+#undef DEBUG_MEMORY_LEAK
+#endif
+#ifndef ENABLE_QDEBUG
+#undef ENABLE_TRACE_IO
+#endif
+
/* Use include to avoid exposing too many FVD symbols, and to allow inline
* function optimization. */
#include "block/fvd-debug.c"
@@ -41,6 +51,11 @@
#include "block/fvd-prefetch.c"
#include "block/fvd-update.c"
+static AIOPool fvd_aio_pool = {
+ .aiocb_size = sizeof(FvdAIOCB),
+ .cancel = fvd_aio_cancel,
+};
+
static BlockDriver bdrv_fvd = {
.format_name = "fvd",
.instance_size = sizeof(BDRVFvdState),
@@ -62,6 +77,8 @@ static BlockDriver bdrv_fvd = {
static void bdrv_fvd_init(void)
{
+ /* Random numbers are used in fvd-prefetch.c. */
+ srand(time(NULL) + getpid() + getpid() * 987654 + rand());
bdrv_register(&bdrv_fvd);
}
@@ -84,3 +101,11 @@ extern QTAILQ_HEAD(, BlockDriverState) bdrv_states;
}
}
}
+
+/*
+ * TODOs:
+ * - Cap the prefetch throughput at the upper limit. See Section 3.4.2 of
+ * the FVD-cow paper. Related metadata are
+ * FvdHeader.prefetch_max_read_throughput and
+ * FvdHeader.prefetch_max_write_throughput.
+ */
This patch is part of the Fast Virtual Disk (FVD) proposal. See http://wiki.qemu.org/Features/FVD. This patch adds the support for aio_cancel into FVD. FVD faithfully cleans up all resources upon aio_cancel. Signed-off-by: Chunqiang Tang <ctang@us.ibm.com> --- block/fvd-journal-buf.c | 16 +++++++++++ block/fvd-load.c | 24 +++++++++++++++++ block/fvd-misc.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++ block/fvd-read.c | 37 ++++++++++++++++++++++++++ block/fvd-store.c | 31 +++++++++++++++++++++ block/fvd-write.c | 23 +++++++++++++++- block/fvd.c | 25 +++++++++++++++++ 7 files changed, 222 insertions(+), 1 deletions(-)