Patchwork [RFC,v3,4/4] qcow2: Preliminary block-queue support

login
register
mail settings
Submitter Kevin Wolf
Date Nov. 30, 2010, 12:48 p.m.
Message ID <1291121332-10588-5-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/73734/
State New
Headers show

Comments

Kevin Wolf - Nov. 30, 2010, 12:48 p.m.
This is a first hack that makes qcow2 use block-queue. Will be reworked to pass
down the blkqueue context to all functions that queue requests instead of using
a global per-image context.
---
 block/qcow2-cluster.c  |   39 +++++++++++-----------
 block/qcow2-refcount.c |   62 ++++++++++++++++++------------------
 block/qcow2.c          |   83 +++++++++++++++++++++++++++++++++++++++++++++++-
 block/qcow2.h          |    5 +++
 cpus.c                 |    8 +++--
 qemu-common.h          |    3 ++
 qemu-tool.c            |    5 +++
 sysemu.h               |    1 -
 8 files changed, 151 insertions(+), 55 deletions(-)

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b040208..674699d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -67,12 +67,13 @@  int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
         qemu_free(new_l1_table);
         return new_l1_table_offset;
     }
-    bdrv_flush(bs->file);
+    blkqueue_barrier(s->bq_context);
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+    ret = blkqueue_pwrite(s->bq_context, new_l1_table_offset, new_l1_table, new_l1_size2);
+    blkqueue_barrier(s->bq_context);
     if (ret < 0)
         goto fail;
     for(i = 0; i < s->l1_size; i++)
@@ -82,7 +83,8 @@  int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
     cpu_to_be32w((uint32_t*)data, new_l1_size);
     cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+    ret = blkqueue_pwrite(s->bq_context, offsetof(QCowHeader, l1_size), data, sizeof(data));
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         goto fail;
     }
@@ -185,8 +187,7 @@  static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
     *l2_table = s->l2_cache + (min_index << s->l2_bits);
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
-    ret = bdrv_pread(bs->file, l2_offset, *l2_table,
-        s->l2_size * sizeof(uint64_t));
+    ret = blkqueue_pread(s->bq_context, l2_offset, *l2_table, s->l2_size * sizeof(uint64_t));
     if (ret < 0) {
         qcow2_l2_cache_reset(bs);
         return ret;
@@ -216,8 +217,8 @@  static int write_l1_entry(BlockDriverState *bs, int l1_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
-        buf, sizeof(buf));
+    ret = blkqueue_pwrite(s->bq_context, s->l1_table_offset + 8 * l1_start_index, buf, sizeof(buf));
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         return ret;
     }
@@ -252,7 +253,7 @@  static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     if (l2_offset < 0) {
         return l2_offset;
     }
-    bdrv_flush(bs->file);
+    blkqueue_barrier(s->bq_context);
 
     /* allocate a new entry in the l2 cache */
 
@@ -265,16 +266,15 @@  static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     } else {
         /* if there was an old l2 table, read it from the disk */
         BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-        ret = bdrv_pread(bs->file, old_l2_offset, l2_table,
-            s->l2_size * sizeof(uint64_t));
+        ret = blkqueue_pread(s->bq_context, old_l2_offset, l2_table, s->l2_size * sizeof(uint64_t));
         if (ret < 0) {
             goto fail;
         }
     }
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-        s->l2_size * sizeof(uint64_t));
+    ret = blkqueue_pwrite(s->bq_context, l2_offset, l2_table, s->l2_size * sizeof(uint64_t));
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         goto fail;
     }
@@ -394,8 +394,8 @@  static int qcow_read(BlockDriverState *bs, int64_t sector_num,
             memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
         } else {
             BLKDBG_EVENT(bs->file, BLKDBG_READ);
-            ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
-            if (ret != n * 512)
+            ret = blkqueue_pread(s->bq_context, cluster_offset + index_in_cluster * 512, buf, n * 512);
+            if (ret < 0)
                 return -1;
             if (s->crypt_method) {
                 qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
@@ -647,11 +647,12 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    if (bdrv_pwrite_sync(bs->file,
+    if (blkqueue_pwrite(s->bq_context,
                     l2_offset + l2_index * sizeof(uint64_t),
                     l2_table + l2_index,
                     sizeof(uint64_t)) < 0)
         return 0;
+    blkqueue_barrier(s->bq_context);
 
     return cluster_offset;
 }
@@ -664,6 +665,7 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
     uint64_t l2_offset, int l2_index, int num)
 {
+    BDRVQcowState *s = bs->opaque;
     int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
     int start_offset = (8 * l2_index) & ~511;
     int end_offset = (8 * (l2_index + num) + 511) & ~511;
@@ -671,8 +673,7 @@  static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
     int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
-    ret = bdrv_pwrite(bs->file, l2_offset + start_offset,
-        &l2_table[l2_start_index], len);
+    ret = blkqueue_pwrite(s->bq_context, l2_offset + start_offset, &l2_table[l2_start_index], len);
     if (ret < 0) {
         return ret;
     }
@@ -733,7 +734,7 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
      * need to be sure that the refcounts have been increased and COW was
      * handled.
      */
-    bdrv_flush(bs->file);
+    blkqueue_barrier(s->bq_context);
 
     ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
     if (ret < 0) {
@@ -746,7 +747,7 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
      * Also flush bs->file to get the right order for L2 and refcount update.
      */
     if (j != 0) {
-        bdrv_flush(bs->file);
+        blkqueue_barrier(s->bq_context);
         for (i = 0; i < j; i++) {
             qcow2_free_any_clusters(bs,
                 be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index a10453c..ef109e9 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -44,7 +44,7 @@  static int write_refcount_block(BlockDriverState *bs)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
-    if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
+    if (blkqueue_pwrite(s->bq_context, s->refcount_block_cache_offset,
             s->refcount_block_cache, size) < 0)
     {
         return -EIO;
@@ -66,8 +66,7 @@  int qcow2_refcount_init(BlockDriverState *bs)
     s->refcount_table = qemu_malloc(refcount_table_size2);
     if (s->refcount_table_size > 0) {
         BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file, s->refcount_table_offset,
-                         s->refcount_table, refcount_table_size2);
+        ret = bdrv_pread(bs->file, s->refcount_table_offset, s->refcount_table, refcount_table_size2);
         if (ret != refcount_table_size2)
             goto fail;
         for(i = 0; i < s->refcount_table_size; i++)
@@ -100,8 +99,7 @@  static int load_refcount_block(BlockDriverState *bs,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
-    ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache,
-                     s->cluster_size);
+    ret = blkqueue_pread(s->bq_context, refcount_block_offset, s->refcount_block_cache, s->cluster_size);
     if (ret < 0) {
         s->refcount_block_cache_offset = 0;
         return ret;
@@ -262,7 +260,7 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
             goto fail_block;
         }
 
-        bdrv_flush(bs->file);
+        blkqueue_barrier(s->bq_context);
 
         /* Initialize the new refcount block only after updating its refcount,
          * update_refcount uses the refcount cache itself */
@@ -272,8 +270,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 
     /* Now the new refcount block needs to be written to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache,
-        s->cluster_size);
+    ret = blkqueue_pwrite(s->bq_context, new_block, s->refcount_block_cache, s->cluster_size);
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         goto fail_block;
     }
@@ -282,9 +280,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
     if (refcount_table_index < s->refcount_table_size) {
         uint64_t data64 = cpu_to_be64(new_block);
         BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file,
-            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
-            &data64, sizeof(data64));
+        ret = blkqueue_pwrite(s->bq_context, s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), &data64, sizeof(data64));
+        blkqueue_barrier(s->bq_context);
         if (ret < 0) {
             goto fail_block;
         }
@@ -362,8 +359,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 
     /* Write refcount blocks to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
-        blocks_clusters * s->cluster_size);
+    ret = blkqueue_pwrite(s->bq_context, meta_offset, new_blocks, blocks_clusters * s->cluster_size);
+    blkqueue_barrier(s->bq_context);
     qemu_free(new_blocks);
     if (ret < 0) {
         goto fail_table;
@@ -375,8 +372,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
-        table_size * sizeof(uint64_t));
+    ret = blkqueue_pwrite(s->bq_context, table_offset, new_table, table_size * sizeof(uint64_t));
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         goto fail_table;
     }
@@ -390,8 +387,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
     cpu_to_be64w((uint64_t*)data, table_offset);
     cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
-        data, sizeof(data));
+    ret = blkqueue_pwrite(s->bq_context, offsetof(QCowHeader, refcount_table_offset), data, sizeof(data));
+    blkqueue_barrier(s->bq_context);
     if (ret < 0) {
         goto fail_table;
     }
@@ -447,9 +444,7 @@  static int write_refcount_block_entries(BlockDriverState *bs,
     size = (last_index - first_index) << REFCOUNT_SHIFT;
 
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
-    ret = bdrv_pwrite(bs->file,
-        refcount_block_offset + (first_index << REFCOUNT_SHIFT),
-        &s->refcount_block_cache[first_index], size);
+    ret = blkqueue_pwrite(s->bq_context, refcount_block_offset + (first_index << REFCOUNT_SHIFT), &s->refcount_block_cache[first_index], size);
     if (ret < 0) {
         return ret;
     }
@@ -577,7 +572,7 @@  static int update_cluster_refcount(BlockDriverState *bs,
         return ret;
     }
 
-    bdrv_flush(bs->file);
+    blkqueue_barrier(s->bq_context);
 
     return get_refcount(bs, cluster_index);
 }
@@ -679,7 +674,7 @@  int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
         }
     }
 
-    bdrv_flush(bs->file);
+    blkqueue_barrier(s->bq_context);
     return offset;
 }
 
@@ -772,8 +767,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             l1_table = NULL;
         }
         l1_allocated = 1;
-        if (bdrv_pread(bs->file, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
+        if (blkqueue_pread(s->bq_context, l1_table_offset, l1_table, l1_size2) < 0)
             goto fail;
         for(i = 0;i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
@@ -792,7 +786,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             old_l2_offset = l2_offset;
             l2_offset &= ~QCOW_OFLAG_COPIED;
             l2_modified = 0;
-            if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+            if (blkqueue_pread(s->bq_context, l2_offset, l2_table, l2_size) < 0)
                 goto fail;
             for(j = 0; j < s->l2_size; j++) {
                 offset = be64_to_cpu(l2_table[j]);
@@ -813,7 +807,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
                             /* TODO Flushing once for the whole function should
                              * be enough */
-                            bdrv_flush(bs->file);
+                            blkqueue_barrier(s->bq_context);
                         }
                         /* compressed clusters are never modified */
                         refcount = 2;
@@ -839,9 +833,10 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                 }
             }
             if (l2_modified) {
-                if (bdrv_pwrite_sync(bs->file,
+                if (blkqueue_pwrite(s->bq_context,
                                 l2_offset, l2_table, l2_size) < 0)
                     goto fail;
+                blkqueue_barrier(s->bq_context);
             }
 
             if (addend != 0) {
@@ -863,9 +858,10 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     if (l1_modified) {
         for(i = 0; i < l1_size; i++)
             cpu_to_be64s(&l1_table[i]);
-        if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
+        if (blkqueue_pwrite(s->bq_context, l1_table_offset, l1_table,
                         l1_size2) < 0)
             goto fail;
+        blkqueue_barrier(s->bq_context);
         for(i = 0; i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
     }
@@ -956,8 +952,9 @@  static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
     l2_size = s->l2_size * sizeof(uint64_t);
     l2_table = qemu_malloc(l2_size);
 
-    if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+    if (blkqueue_pread(s->bq_context, l2_offset, l2_table, l2_size) < 0) {
         goto fail;
+    }
 
     /* Do the actual checks */
     for(i = 0; i < s->l2_size; i++) {
@@ -1051,9 +1048,10 @@  static int check_refcounts_l1(BlockDriverState *bs,
         l1_table = NULL;
     } else {
         l1_table = qemu_malloc(l1_size2);
-        if (bdrv_pread(bs->file, l1_table_offset,
-                       l1_table, l1_size2) != l1_size2)
+        if (blkqueue_pread(s->bq_context, l1_table_offset, l1_table, l1_size2) < 0) {
             goto fail;
+        }
+
         for(i = 0;i < l1_size; i++)
             be64_to_cpus(&l1_table[i]);
     }
@@ -1127,6 +1125,8 @@  int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)
     nb_clusters = size_to_clusters(s, size);
     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
 
+    blkqueue_init_context(s->bq_context, s->bq);
+
     /* header */
     inc_refcounts(bs, res, refcount_table, nb_clusters,
         0, s->cluster_size);
diff --git a/block/qcow2.c b/block/qcow2.c
index 537c479..1ad2832 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -136,6 +136,21 @@  static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
     return 0;
 }
 
+static bool qcow_blkqueue_error_cb(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    BlockErrorAction action = bdrv_get_on_error(bs, 0);
+
+    if ((action == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)
+        || action == BLOCK_ERR_STOP_ANY)
+    {
+        bdrv_mon_event(bs, BDRV_ACTION_STOP, 0);
+        vm_stop(0);
+        return true;
+    }
+
+    return false;
+}
 
 static int qcow_open(BlockDriverState *bs, int flags)
 {
@@ -237,6 +252,11 @@  static int qcow_open(BlockDriverState *bs, int flags)
     if (qcow2_read_snapshots(bs) < 0)
         goto fail;
 
+    /* Block queue */
+    s->bq = blkqueue_create(bs->file, qcow_blkqueue_error_cb, bs);
+    blkqueue_init_context(&s->initial_bq_context, s->bq);
+    s->bq_context = &s->initial_bq_context;
+
 #ifdef DEBUG_ALLOC
     qcow2_check_refcounts(bs);
 #endif
@@ -341,6 +361,7 @@  typedef struct QCowAIOCB {
     QEMUIOVector hd_qiov;
     QEMUBH *bh;
     QCowL2Meta l2meta;
+    BlockQueueContext bq_context;
     QLIST_ENTRY(QCowAIOCB) next_depend;
 } QCowAIOCB;
 
@@ -387,6 +408,8 @@  static void qcow_aio_read_cb(void *opaque, int ret)
     BDRVQcowState *s = bs->opaque;
     int index_in_cluster, n1;
 
+    s->bq_context = &acb->bq_context;
+
     acb->hd_aiocb = NULL;
     if (ret < 0)
         goto done;
@@ -519,6 +542,7 @@  static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque, int is_write)
 {
+    BDRVQcowState *s = bs->opaque;
     QCowAIOCB *acb;
 
     acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque);
@@ -536,6 +560,10 @@  static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
     acb->cluster_offset = 0;
     acb->l2meta.nb_clusters = 0;
     QLIST_INIT(&acb->l2meta.dependent_requests);
+
+    /* TODO Push the context into l2meta */
+    blkqueue_init_context(&acb->bq_context, s->bq);
+
     return acb;
 }
 
@@ -582,6 +610,7 @@  static void qcow_aio_write_cb(void *opaque, int ret)
     int index_in_cluster;
     int n_end;
 
+    s->bq_context = &acb->bq_context;
     acb->hd_aiocb = NULL;
 
     if (ret >= 0) {
@@ -694,6 +723,7 @@  static void qcow_close(BlockDriverState *bs)
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
     qcow2_refcount_close(bs);
+    blkqueue_destroy(s->bq);
 }
 
 /*
@@ -1150,13 +1180,64 @@  static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
 static int qcow_flush(BlockDriverState *bs)
 {
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    ret = blkqueue_flush(s->bq);
+    if (ret < 0) {
+        /*
+         * If the queue is empty, we couldn't handle the write error by
+         * stopping the guest. In this case we don't know which metadata writes
+         * have succeeded. Reopen the qcow2 layer to make sure that all caches
+         * are invalidated.
+         */
+        if (blkqueue_is_empty(s->bq)) {
+            qcow_close(bs);
+            qcow_open(bs, 0);
+        }
+
+        return ret;
+    }
+
     return bdrv_flush(bs->file);
 }
 
+typedef struct QcowFlushAIOCB {
+    BlockDriverState *bs;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+} QcowFlushAIOCB;
+
+static void qcow_aio_flush_cb(void *opaque, int ret)
+{
+    QcowFlushAIOCB *acb = opaque;
+    BlockDriverState *bs = acb->bs;
+    BDRVQcowState *s = bs->opaque;
+
+    if (blkqueue_is_empty(s->bq)) {
+        qcow_close(bs);
+        qcow_open(bs, 0);
+    }
+
+    acb->cb(acb->opaque, ret);
+    qemu_free(acb);
+}
+
 static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
          BlockDriverCompletionFunc *cb, void *opaque)
 {
-    return bdrv_aio_flush(bs->file, cb, opaque);
+    BDRVQcowState *s = bs->opaque;
+    BlockQueueContext context;
+    QcowFlushAIOCB *acb;
+
+    blkqueue_init_context(&context, s->bq);
+
+    acb = qemu_malloc(sizeof(*acb));
+    acb->bs = bs;
+    acb->cb = cb;
+    acb->opaque = opaque;
+
+    return blkqueue_aio_flush(&context, qcow_aio_flush_cb, acb);
 }
 
 static int64_t qcow_vm_state_offset(BDRVQcowState *s)
diff --git a/block/qcow2.h b/block/qcow2.h
index 2d22e5e..2ef910c 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,6 +26,7 @@ 
 #define BLOCK_QCOW2_H
 
 #include "aes.h"
+#include "block-queue.h"
 
 //#define DEBUG_ALLOC
 //#define DEBUG_ALLOC2
@@ -108,6 +109,10 @@  typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    BlockQueue *bq;
+    BlockQueueContext *bq_context;
+    BlockQueueContext initial_bq_context;
+
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */
     uint32_t crypt_method_header;
     AES_KEY aes_encrypt_key;
diff --git a/cpus.c b/cpus.c
index 91a0fb1..8ec0ed6 100644
--- a/cpus.c
+++ b/cpus.c
@@ -310,9 +310,10 @@  void qemu_notify_event(void)
 void qemu_mutex_lock_iothread(void) {}
 void qemu_mutex_unlock_iothread(void) {}
 
-void vm_stop(int reason)
+bool vm_stop(int reason)
 {
     do_vm_stop(reason);
+    return true;
 }
 
 #else /* CONFIG_IOTHREAD */
@@ -848,7 +849,7 @@  static void qemu_system_vmstop_request(int reason)
     qemu_notify_event();
 }
 
-void vm_stop(int reason)
+bool vm_stop(int reason)
 {
     QemuThread me;
     qemu_thread_self(&me);
@@ -863,9 +864,10 @@  void vm_stop(int reason)
             cpu_exit(cpu_single_env);
             cpu_single_env->stop = 1;
         }
-        return;
+        return true;
     }
     do_vm_stop(reason);
+    return true;
 }
 
 #endif
diff --git a/qemu-common.h b/qemu-common.h
index b3957f1..1c23b0f 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -115,6 +115,9 @@  static inline char *realpath(const char *path, char *resolved_path)
 
 #endif /* !defined(NEED_CPU_H) */
 
+/* VM state */
+bool vm_stop(int reason);
+
 /* bottom halves */
 typedef void QEMUBHFunc(void *opaque);
 
diff --git a/qemu-tool.c b/qemu-tool.c
index 392e1c9..3926435 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -111,3 +111,8 @@  int qemu_set_fd_handler2(int fd,
 {
     return 0;
 }
+
+bool vm_stop(int reason)
+{
+    return false;
+}
diff --git a/sysemu.h b/sysemu.h
index b81a70e..77788f1 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -38,7 +38,6 @@  VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
 
 void vm_start(void);
-void vm_stop(int reason);
 
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_transferred(void);