@@ -948,7 +948,12 @@ void bdrv_drain_all(void)
qemu_co_queue_restart_all(&bs->throttled_reqs);
busy = true;
}
+
+ if (bs->drv && bs->drv->bdrv_drain) {
+ busy |= bs->drv->bdrv_drain(bs);
+ }
}
+
} while (busy);
/* If requests are still pending there is a bug somewhere */
@@ -482,6 +482,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
/* Initialise locks */
qemu_co_mutex_init(&s->lock);
+ qemu_co_rwlock_init(&s->l2meta_flush);
/* Repair image if dirty */
if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
@@ -751,6 +752,50 @@ static void run_dependent_requests(BDRVQcowState *s, QCowL2Meta *m)
}
}
+typedef struct ProcessL2Meta {
+ BlockDriverState *bs;
+ QCowL2Meta *m;
+} ProcessL2Meta;
+
+static void coroutine_fn process_l2meta(void *opaque)
+{
+ ProcessL2Meta *p = opaque;
+ QCowL2Meta *m = p->m;
+ BlockDriverState *bs = p->bs;
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = qcow2_alloc_cluster_link_l2(bs, m);
+ if (ret < 0) {
+ /* FIXME */
+ }
+
+ run_dependent_requests(s, m);
+ g_free(m);
+
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
+static inline coroutine_fn void stop_l2meta(BDRVQcowState *s)
+{
+ qemu_co_rwlock_wrlock(&s->l2meta_flush);
+}
+
+static inline coroutine_fn void resume_l2meta(BDRVQcowState *s)
+{
+ qemu_co_rwlock_unlock(&s->l2meta_flush);
+}
+
+static bool qcow2_drain(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ return !QLIST_EMPTY(&s->cluster_allocs);
+}
+
static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int64_t sector_num,
int remaining_sectors,
@@ -831,16 +876,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
}
if (l2meta != NULL) {
- l2meta->is_written = true;
+ Coroutine *co;
+ ProcessL2Meta p = {
+ .bs = bs,
+ .m = l2meta,
+ };
- ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
- if (ret < 0) {
- goto fail;
- }
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_rwlock_rdlock(&s->l2meta_flush);
+
+ l2meta->is_written = true;
+ co = qemu_coroutine_create(process_l2meta);
+ qemu_coroutine_enter(co, &p);
- run_dependent_requests(s, l2meta);
- g_free(l2meta);
l2meta = NULL;
+ qemu_co_mutex_lock(&s->lock);
}
remaining_sectors -= cur_nr_sectors;
@@ -868,6 +918,11 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcowState *s = bs->opaque;
+
+ while (qcow2_drain(bs)) {
+ qemu_aio_wait();
+ }
+
g_free(s->l1_table);
qcow2_cache_flush(bs, s->l2_table_cache);
@@ -1405,10 +1460,12 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
}
/* Whatever is left can use real zero clusters */
+ stop_l2meta(s);
qemu_co_mutex_lock(&s->lock);
ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(s);
return ret;
}
@@ -1419,10 +1476,13 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int ret;
BDRVQcowState *s = bs->opaque;
+ stop_l2meta(s);
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(s);
+
return ret;
}
@@ -1548,23 +1608,27 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
BDRVQcowState *s = bs->opaque;
int ret;
+ stop_l2meta(s);
qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
+ goto fail;
}
if (qcow2_need_accurate_refcounts(s)) {
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
- qemu_co_mutex_unlock(&s->lock);
- return ret;
+ goto fail;
}
}
+
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ resume_l2meta(s);
- return 0;
+ return ret;
}
static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
@@ -1690,6 +1754,7 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_co_readv = qcow2_co_readv,
.bdrv_co_writev = qcow2_co_writev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+ .bdrv_drain = qcow2_drain,
.bdrv_co_write_zeroes = qcow2_co_write_zeroes,
.bdrv_co_discard = qcow2_co_discard,
@@ -162,6 +162,14 @@ typedef struct BDRVQcowState {
CoMutex lock;
+ /*
+ * Only to be aquired while s->lock is not held.
+ *
+ * Readers: All l2meta coroutines that are in flight
+ * Writers: Anyone who requires l2meta to be flushed
+ */
+ CoRwlock l2meta_flush;
+
uint32_t crypt_method; /* current crypt method, 0 if no key yet */
uint32_t crypt_method_header;
AES_KEY aes_encrypt_key;
@@ -198,6 +198,9 @@ struct BlockDriver {
*/
int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+ /** Returns true if the block device is still busy */
+ bool (*bdrv_drain)(BlockDriverState *bs);
+
const char *protocol_name;
int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
int64_t (*bdrv_getlength)(BlockDriverState *bs);
This creates a separate coroutine for processing the COW and the L2 table update of allocating requests. The request itself can then complete while the second part is still being processed. We need a qemu_aio_flush() hook in order to ensure that these coroutines for the second part aren't still running after bdrv_drain_all (e.g. when the VM is stopped). Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block.c | 5 +++ block/qcow2.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++++-------- block/qcow2.h | 8 +++++ block_int.h | 3 ++ 4 files changed, 93 insertions(+), 12 deletions(-)