From patchwork Mon Dec 13 16:29:07 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kevin Wolf X-Patchwork-Id: 75379 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) by ozlabs.org (Postfix) with ESMTP id 8BD49B6EE8 for ; Tue, 14 Dec 2010 03:48:26 +1100 (EST) Received: from localhost ([127.0.0.1]:60492 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PSBSp-0000io-E2 for incoming@patchwork.ozlabs.org; Mon, 13 Dec 2010 11:41:27 -0500 Received: from [140.186.70.92] (port=44604 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PSBG5-0002VN-2H for qemu-devel@nongnu.org; Mon, 13 Dec 2010 11:28:24 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PSBG0-0006lL-2T for qemu-devel@nongnu.org; Mon, 13 Dec 2010 11:28:16 -0500 Received: from mx1.redhat.com ([209.132.183.28]:31096) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PSBFz-0006kx-8V for qemu-devel@nongnu.org; Mon, 13 Dec 2010 11:28:12 -0500 Received: from int-mx12.intmail.prod.int.phx2.redhat.com (int-mx12.intmail.prod.int.phx2.redhat.com [10.5.11.25]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id oBDGS6Fp019773 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Mon, 13 Dec 2010 11:28:06 -0500 Received: from dhcp-5-188.str.redhat.com (dhcp-5-175.str.redhat.com [10.32.5.175]) by int-mx12.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id oBDGRx6J001392; Mon, 13 Dec 2010 11:28:05 -0500 From: Kevin Wolf To: qemu-devel@nongnu.org Date: Mon, 13 Dec 2010 17:29:07 +0100 Message-Id: <1292257747-10665-5-git-send-email-kwolf@redhat.com> In-Reply-To: <1292257747-10665-1-git-send-email-kwolf@redhat.com> References: <1292257747-10665-1-git-send-email-kwolf@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.25 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. Cc: kwolf@redhat.com, stefanha@gmail.com Subject: [Qemu-devel] [PATCH 4/4] qcow2: Use block-queue X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This changes qcow2 to use block-queue for metadata I/O, so that the number of bdrv_flush calls during heavy cluster allocation is greatly reduced. Most of this is mechanical conversion of bdrv_pwrite_sync to a sequence of blkqueue_pwrite and blkqueue_barrier (or bdrv_pread -> blkqueue_pread). Also, most functions get a QcowRequest structure now instead of a BlockDriverState. The QcowRequest contains the current block-queue context and refers to the BlockDriverState of the image. The more interesting parts include the bdrv_(aio_)flush implementation which has to deal with errors. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 139 ++++++++++++++++++------------- block/qcow2-refcount.c | 217 ++++++++++++++++++++++++++++-------------------- block/qcow2-snapshot.c | 106 ++++++++++++++++++------ block/qcow2.c | 144 +++++++++++++++++++++++++++++--- block/qcow2.h | 33 +++++--- 5 files changed, 440 insertions(+), 199 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index b040208..1dccb79 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -28,8 +28,9 @@ #include "block_int.h" #include "block/qcow2.h" -int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) +int qcow2_grow_l1_table(QcowRequest *req, int min_size, bool exact_size) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int new_l1_size, new_l1_size2, ret, i; uint64_t *new_l1_table; @@ -62,17 +63,19 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) /* write new table (align to cluster) */ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE); - new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2); + new_l1_table_offset = qcow2_alloc_clusters(req, new_l1_size2); if (new_l1_table_offset < 0) { qemu_free(new_l1_table); return new_l1_table_offset; } - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); for(i = 0; i < s->l1_size; i++) new_l1_table[i] = cpu_to_be64(new_l1_table[i]); - ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2); + ret = blkqueue_pwrite(&req->bq_context, new_l1_table_offset, new_l1_table, + new_l1_size2); + blkqueue_barrier(&req->bq_context); if (ret < 0) goto fail; for(i = 0; i < s->l1_size; i++) @@ -82,24 +85,27 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); cpu_to_be32w((uint32_t*)data, new_l1_size); cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data)); + ret = blkqueue_pwrite(&req->bq_context, offsetof(QCowHeader, l1_size), + data, sizeof(data)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail; } qemu_free(s->l1_table); - qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); + qcow2_free_clusters(req, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); s->l1_table_offset = new_l1_table_offset; s->l1_table = new_l1_table; s->l1_size = new_l1_size; return 0; fail: qemu_free(new_l1_table); - qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2); + qcow2_free_clusters(req, new_l1_table_offset, new_l1_size2); return ret; } -void qcow2_l2_cache_reset(BlockDriverState *bs) +void qcow2_l2_cache_reset(QcowRequest *req) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); @@ -107,8 +113,9 @@ void qcow2_l2_cache_reset(BlockDriverState *bs) memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); } -static inline int l2_cache_new_entry(BlockDriverState *bs) +static inline int l2_cache_new_entry(QcowRequest *req) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; uint32_t min_count; int min_index, i; @@ -165,9 +172,10 @@ static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset) * the image file failed. */ -static int l2_load(BlockDriverState *bs, uint64_t l2_offset, +static int l2_load(QcowRequest *req, uint64_t l2_offset, uint64_t **l2_table) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int min_index; int ret; @@ -181,14 +189,14 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset, /* not found: load a new entry in the least used one */ - min_index = l2_cache_new_entry(bs); + min_index = l2_cache_new_entry(req); *l2_table = s->l2_cache + (min_index << s->l2_bits); BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); - ret = bdrv_pread(bs->file, l2_offset, *l2_table, + ret = blkqueue_pread(&req->bq_context, l2_offset, *l2_table, s->l2_size * sizeof(uint64_t)); if (ret < 0) { - qcow2_l2_cache_reset(bs); + qcow2_l2_cache_reset(req); return ret; } @@ -203,8 +211,9 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset, * and we really don't want bdrv_pread to perform a read-modify-write) */ #define L1_ENTRIES_PER_SECTOR (512 / 8) -static int write_l1_entry(BlockDriverState *bs, int l1_index) +static int write_l1_entry(QcowRequest *req, int l1_index) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; uint64_t buf[L1_ENTRIES_PER_SECTOR]; int l1_start_index; @@ -216,8 +225,9 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index) } BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); - ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, - buf, sizeof(buf)); + ret = blkqueue_pwrite(&req->bq_context, + s->l1_table_offset + 8 * l1_start_index, buf, sizeof(buf)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { return ret; } @@ -235,8 +245,9 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index) * */ -static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) +static int l2_allocate(QcowRequest *req, int l1_index, uint64_t **table) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int min_index; uint64_t old_l2_offset; @@ -248,15 +259,15 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) /* allocate a new l2 entry */ - l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t)); + l2_offset = qcow2_alloc_clusters(req, s->l2_size * sizeof(uint64_t)); if (l2_offset < 0) { return l2_offset; } - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); /* allocate a new entry in the l2 cache */ - min_index = l2_cache_new_entry(bs); + min_index = l2_cache_new_entry(req); l2_table = s->l2_cache + (min_index << s->l2_bits); if (old_l2_offset == 0) { @@ -265,7 +276,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } else { /* if there was an old l2 table, read it from the disk */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); - ret = bdrv_pread(bs->file, old_l2_offset, l2_table, + ret = blkqueue_pread(&req->bq_context, old_l2_offset, l2_table, s->l2_size * sizeof(uint64_t)); if (ret < 0) { goto fail; @@ -273,15 +284,16 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) } /* write the l2 table to the file */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table, + ret = blkqueue_pwrite(&req->bq_context, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail; } /* update the L1 entry */ s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED; - ret = write_l1_entry(bs, l1_index); + ret = write_l1_entry(req, l1_index); if (ret < 0) { goto fail; } @@ -296,7 +308,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) fail: s->l1_table[l1_index] = old_l2_offset; - qcow2_l2_cache_reset(bs); + qcow2_l2_cache_reset(req); return ret; } @@ -352,9 +364,10 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, } -static int qcow_read(BlockDriverState *bs, int64_t sector_num, +static int qcow_read(QcowRequest *req, int64_t sector_num, uint8_t *buf, int nb_sectors) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int ret, index_in_cluster, n, n1; uint64_t cluster_offset; @@ -364,7 +377,7 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, while (nb_sectors > 0) { n = nb_sectors; - ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n, + ret = qcow2_get_cluster_offset(req, sector_num << 9, &n, &cluster_offset); if (ret < 0) { return ret; @@ -389,13 +402,14 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, memset(buf, 0, 512 * n); } } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) { - if (qcow2_decompress_cluster(bs, cluster_offset) < 0) + if (qcow2_decompress_cluster(req, cluster_offset) < 0) return -1; memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n); } else { BLKDBG_EVENT(bs->file, BLKDBG_READ); - ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512); - if (ret != n * 512) + ret = blkqueue_pread(&req->bq_context, + cluster_offset + index_in_cluster * 512, buf, n * 512); + if (ret < 0) return -1; if (s->crypt_method) { qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0, @@ -409,9 +423,10 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num, return 0; } -static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, +static int copy_sectors(QcowRequest *req, uint64_t start_sect, uint64_t cluster_offset, int n_start, int n_end) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int n, ret; @@ -419,7 +434,7 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, if (n <= 0) return 0; BLKDBG_EVENT(bs->file, BLKDBG_COW_READ); - ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n); + ret = qcow_read(req, start_sect + n_start, s->cluster_data, n); if (ret < 0) return ret; if (s->crypt_method) { @@ -453,9 +468,10 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect, * */ -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, +int qcow2_get_cluster_offset(QcowRequest *req, uint64_t offset, int *num, uint64_t *cluster_offset) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; unsigned int l1_index, l2_index; uint64_t l2_offset, *l2_table; @@ -501,7 +517,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, /* load the l2 table in memory */ l2_offset &= ~QCOW_OFLAG_COPIED; - ret = l2_load(bs, l2_offset, &l2_table); + ret = l2_load(req, l2_offset, &l2_table); if (ret < 0) { return ret; } @@ -543,11 +559,12 @@ out: * * Returns 0 on success, -errno in failure case */ -static int get_cluster_table(BlockDriverState *bs, uint64_t offset, +static int get_cluster_table(QcowRequest *req, uint64_t offset, uint64_t **new_l2_table, uint64_t *new_l2_offset, int *new_l2_index) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; unsigned int l1_index, l2_index; uint64_t l2_offset; @@ -558,7 +575,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, l1_index = offset >> (s->l2_bits + s->cluster_bits); if (l1_index >= s->l1_size) { - ret = qcow2_grow_l1_table(bs, l1_index + 1, false); + ret = qcow2_grow_l1_table(req, l1_index + 1, false); if (ret < 0) { return ret; } @@ -570,14 +587,14 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, if (l2_offset & QCOW_OFLAG_COPIED) { /* load the l2 table in memory */ l2_offset &= ~QCOW_OFLAG_COPIED; - ret = l2_load(bs, l2_offset, &l2_table); + ret = l2_load(req, l2_offset, &l2_table); if (ret < 0) { return ret; } } else { if (l2_offset) - qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); - ret = l2_allocate(bs, l1_index, &l2_table); + qcow2_free_clusters(req, l2_offset, s->l2_size * sizeof(uint64_t)); + ret = l2_allocate(req, l1_index, &l2_table); if (ret < 0) { return ret; } @@ -608,17 +625,18 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, * */ -uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, +uint64_t qcow2_alloc_compressed_cluster_offset(QcowRequest *req, uint64_t offset, int compressed_size) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int l2_index, ret; uint64_t l2_offset, *l2_table; int64_t cluster_offset; int nb_csectors; - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(req, offset, &l2_table, &l2_offset, &l2_index); if (ret < 0) { return 0; } @@ -628,9 +646,9 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset & ~QCOW_OFLAG_COPIED; if (cluster_offset) - qcow2_free_any_clusters(bs, cluster_offset, 1); + qcow2_free_any_clusters(req, cluster_offset, 1); - cluster_offset = qcow2_alloc_bytes(bs, compressed_size); + cluster_offset = qcow2_alloc_bytes(req, compressed_size); if (cluster_offset < 0) { return 0; } @@ -647,11 +665,12 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); l2_table[l2_index] = cpu_to_be64(cluster_offset); - if (bdrv_pwrite_sync(bs->file, + if (blkqueue_pwrite(&req->bq_context, l2_offset + l2_index * sizeof(uint64_t), l2_table + l2_index, sizeof(uint64_t)) < 0) return 0; + blkqueue_barrier(&req->bq_context); return cluster_offset; } @@ -661,9 +680,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, * read-modify-write in bdrv_pwrite */ #define L2_ENTRIES_PER_SECTOR (512 / 8) -static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table, +static int write_l2_entries(QcowRequest *req, uint64_t *l2_table, uint64_t l2_offset, int l2_index, int num) { + BlockDriverState *bs = req->bs; int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1); int start_offset = (8 * l2_index) & ~511; int end_offset = (8 * (l2_index + num) + 511) & ~511; @@ -671,7 +691,7 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table, int ret; BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); - ret = bdrv_pwrite(bs->file, l2_offset + start_offset, + ret = blkqueue_pwrite(&req->bq_context, l2_offset + start_offset, &l2_table[l2_start_index], len); if (ret < 0) { return ret; @@ -680,8 +700,9 @@ static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table, return 0; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +int qcow2_alloc_cluster_link_l2(QcowRequest *req, QCowL2Meta *m) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int i, j = 0, l2_index, ret; uint64_t *old_cluster, start_sect, l2_offset, *l2_table; @@ -695,21 +716,21 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) /* copy content of unmodified sectors */ start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; if (m->n_start) { - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); + ret = copy_sectors(req, start_sect, cluster_offset, 0, m->n_start); if (ret < 0) goto err; } if (m->nb_available & (s->cluster_sectors - 1)) { uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); - ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), + ret = copy_sectors(req, start_sect + end, cluster_offset + (end << 9), m->nb_available - end, s->cluster_sectors); if (ret < 0) goto err; } /* update L2 table */ - ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(req, m->offset, &l2_table, &l2_offset, &l2_index); if (ret < 0) { goto err; } @@ -733,11 +754,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * need to be sure that the refcounts have been increased and COW was * handled. */ - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); - ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters); + ret = write_l2_entries(req, l2_table, l2_offset, l2_index, m->nb_clusters); if (ret < 0) { - qcow2_l2_cache_reset(bs); + qcow2_l2_cache_reset(req); goto err; } @@ -746,9 +767,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) * Also flush bs->file to get the right order for L2 and refcount update. */ if (j != 0) { - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); for (i = 0; i < j; i++) { - qcow2_free_any_clusters(bs, + qcow2_free_any_clusters(req, be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1); } } @@ -778,9 +799,10 @@ err: * * Return 0 on success and -errno in error cases */ -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, +int qcow2_alloc_cluster_offset(QcowRequest *req, uint64_t offset, int n_start, int n_end, int *num, QCowL2Meta *m) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int l2_index, ret; uint64_t l2_offset, *l2_table; @@ -788,7 +810,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, unsigned int nb_clusters, i = 0; QCowL2Meta *old_alloc; - ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index); + ret = get_cluster_table(req, offset, &l2_table, &l2_offset, &l2_index); if (ret < 0) { return ret; } @@ -881,7 +903,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, /* allocate a new cluster */ - cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); + cluster_offset = qcow2_alloc_clusters(req, nb_clusters * s->cluster_size); if (cluster_offset < 0) { QLIST_REMOVE(m, next_in_flight); return cluster_offset; @@ -928,8 +950,9 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size, return 0; } -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) +int qcow2_decompress_cluster(QcowRequest *req, uint64_t cluster_offset) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int ret, csize, nb_csectors, sector_offset; uint64_t coffset; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index a10453c..39ead62 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -26,16 +26,17 @@ #include "block_int.h" #include "block/qcow2.h" -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size); -static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, +static int64_t alloc_clusters_noref(QcowRequest *req, int64_t size); +static int QEMU_WARN_UNUSED_RESULT update_refcount(QcowRequest *req, int64_t offset, int64_t length, int addend); static int cache_refcount_updates = 0; -static int write_refcount_block(BlockDriverState *bs) +static int write_refcount_block(QcowRequest *req) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; size_t size = s->cluster_size; @@ -44,8 +45,8 @@ static int write_refcount_block(BlockDriverState *bs) } BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE); - if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset, - s->refcount_block_cache, size) < 0) + if (blkqueue_pwrite(&req->bq_context, s->refcount_block_cache_offset, + s->refcount_block_cache, size) < 0) { return -EIO; } @@ -66,8 +67,7 @@ int qcow2_refcount_init(BlockDriverState *bs) s->refcount_table = qemu_malloc(refcount_table_size2); if (s->refcount_table_size > 0) { BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); - ret = bdrv_pread(bs->file, s->refcount_table_offset, - s->refcount_table, refcount_table_size2); + ret = bdrv_pread(bs->file, s->refcount_table_offset, s->refcount_table, refcount_table_size2); if (ret != refcount_table_size2) goto fail; for(i = 0; i < s->refcount_table_size; i++) @@ -86,22 +86,23 @@ void qcow2_refcount_close(BlockDriverState *bs) } -static int load_refcount_block(BlockDriverState *bs, +static int load_refcount_block(QcowRequest *req, int64_t refcount_block_offset) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int ret; if (cache_refcount_updates) { - ret = write_refcount_block(bs); + ret = write_refcount_block(req); if (ret < 0) { return ret; } } BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); - ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache, - s->cluster_size); + ret = blkqueue_pread(&req->bq_context, refcount_block_offset, + s->refcount_block_cache, s->cluster_size); if (ret < 0) { s->refcount_block_cache_offset = 0; return ret; @@ -116,8 +117,9 @@ static int load_refcount_block(BlockDriverState *bs, * return value is the refcount of the cluster, negative values are -errno * and indicate an error. */ -static int get_refcount(BlockDriverState *bs, int64_t cluster_index) +static int get_refcount(QcowRequest *req, int64_t cluster_index) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int refcount_table_index, block_index; int64_t refcount_block_offset; @@ -131,7 +133,7 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) return 0; if (refcount_block_offset != s->refcount_block_cache_offset) { /* better than nothing: return allocated if read error */ - ret = load_refcount_block(bs, refcount_block_offset); + ret = load_refcount_block(req, refcount_block_offset); if (ret < 0) { return ret; } @@ -176,8 +178,9 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, * * Returns the offset of the refcount block on success or -errno in error case */ -static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) +static int64_t alloc_refcount_block(QcowRequest *req, int64_t cluster_index) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; unsigned int refcount_table_index; int ret; @@ -195,7 +198,7 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* If it's already there, we're done */ if (refcount_block_offset) { if (refcount_block_offset != s->refcount_block_cache_offset) { - ret = load_refcount_block(bs, refcount_block_offset); + ret = load_refcount_block(req, refcount_block_offset); if (ret < 0) { return ret; } @@ -227,14 +230,14 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) */ if (cache_refcount_updates) { - ret = write_refcount_block(bs); + ret = write_refcount_block(req); if (ret < 0) { return ret; } } /* Allocate the refcount block itself and mark it as used */ - int64_t new_block = alloc_clusters_noref(bs, s->cluster_size); + int64_t new_block = alloc_clusters_noref(req, s->cluster_size); if (new_block < 0) { return new_block; } @@ -257,12 +260,12 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) } else { /* Described somewhere else. This can recurse at most twice before we * arrive at a block that describes itself. */ - ret = update_refcount(bs, new_block, s->cluster_size, 1); + ret = update_refcount(req, new_block, s->cluster_size, 1); if (ret < 0) { goto fail_block; } - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); /* Initialize the new refcount block only after updating its refcount, * update_refcount uses the refcount cache itself */ @@ -272,8 +275,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* Now the new refcount block needs to be written to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); - ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache, + ret = blkqueue_pwrite(&req->bq_context, new_block, s->refcount_block_cache, s->cluster_size); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail_block; } @@ -282,9 +286,10 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) if (refcount_table_index < s->refcount_table_size) { uint64_t data64 = cpu_to_be64(new_block); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP); - ret = bdrv_pwrite_sync(bs->file, + ret = blkqueue_pwrite(&req->bq_context, s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), &data64, sizeof(data64)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail_block; } @@ -362,8 +367,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* Write refcount blocks to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); - ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks, + ret = blkqueue_pwrite(&req->bq_context, meta_offset, new_blocks, blocks_clusters * s->cluster_size); + blkqueue_barrier(&req->bq_context); qemu_free(new_blocks); if (ret < 0) { goto fail_table; @@ -375,8 +381,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) } BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE); - ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, + ret = blkqueue_pwrite(&req->bq_context, table_offset, new_table, table_size * sizeof(uint64_t)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail_table; } @@ -390,8 +397,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) cpu_to_be64w((uint64_t*)data, table_offset); cpu_to_be32w((uint32_t*)(data + 8), table_clusters); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset), - data, sizeof(data)); + ret = blkqueue_pwrite(&req->bq_context, + offsetof(QCowHeader, refcount_table_offset), data, sizeof(data)); + blkqueue_barrier(&req->bq_context); if (ret < 0) { goto fail_table; } @@ -407,10 +415,11 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* Free old table. Remember, we must not change free_cluster_index */ uint64_t old_free_cluster_index = s->free_cluster_index; - qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); + qcow2_free_clusters(req, old_table_offset, + old_table_size * sizeof(uint64_t)); s->free_cluster_index = old_free_cluster_index; - ret = load_refcount_block(bs, new_block); + ret = load_refcount_block(req, new_block); if (ret < 0) { goto fail_block; } @@ -425,9 +434,10 @@ fail_block: } #define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT) -static int write_refcount_block_entries(BlockDriverState *bs, +static int write_refcount_block_entries(QcowRequest *req, int64_t refcount_block_offset, int first_index, int last_index) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; size_t size; int ret; @@ -447,7 +457,7 @@ static int write_refcount_block_entries(BlockDriverState *bs, size = (last_index - first_index) << REFCOUNT_SHIFT; BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); - ret = bdrv_pwrite(bs->file, + ret = blkqueue_pwrite(&req->bq_context, refcount_block_offset + (first_index << REFCOUNT_SHIFT), &s->refcount_block_cache[first_index], size); if (ret < 0) { @@ -458,9 +468,10 @@ static int write_refcount_block_entries(BlockDriverState *bs, } /* XXX: cache several refcount block clusters ? */ -static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, +static int QEMU_WARN_UNUSED_RESULT update_refcount(QcowRequest *req, int64_t offset, int64_t length, int addend) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int64_t start, last, cluster_offset; int64_t refcount_block_offset = 0; @@ -492,7 +503,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); if ((old_table_index >= 0) && (table_index != old_table_index)) { - ret = write_refcount_block_entries(bs, refcount_block_offset, + ret = write_refcount_block_entries(req, refcount_block_offset, first_index, last_index); if (ret < 0) { return ret; @@ -503,7 +514,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, } /* Load the refcount block and allocate it if needed */ - new_block = alloc_refcount_block(bs, cluster_index); + new_block = alloc_refcount_block(req, cluster_index); if (new_block < 0) { ret = new_block; goto fail; @@ -538,7 +549,7 @@ fail: /* Write last changed block to disk */ if (refcount_block_offset != 0) { int wret; - wret = write_refcount_block_entries(bs, refcount_block_offset, + wret = write_refcount_block_entries(req, refcount_block_offset, first_index, last_index); if (wret < 0) { return ret < 0 ? ret : wret; @@ -551,7 +562,7 @@ fail: */ if (ret < 0) { int dummy; - dummy = update_refcount(bs, offset, cluster_offset - offset, -addend); + dummy = update_refcount(req, offset, cluster_offset - offset, -addend); (void)dummy; } @@ -565,21 +576,22 @@ fail: * If the return value is non-negative, it is the new refcount of the cluster. * If it is negative, it is -errno and indicates an error. */ -static int update_cluster_refcount(BlockDriverState *bs, +static int update_cluster_refcount(QcowRequest *req, int64_t cluster_index, int addend) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int ret; - ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend); + ret = update_refcount(req, cluster_index << s->cluster_bits, 1, addend); if (ret < 0) { return ret; } - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); - return get_refcount(bs, cluster_index); + return get_refcount(req, cluster_index); } @@ -590,8 +602,9 @@ static int update_cluster_refcount(BlockDriverState *bs, /* return < 0 if error */ -static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) +static int64_t alloc_clusters_noref(QcowRequest *req, int64_t size) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int i, nb_clusters, refcount; @@ -599,7 +612,7 @@ static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size) retry: for(i = 0; i < nb_clusters; i++) { int64_t next_cluster_index = s->free_cluster_index++; - refcount = get_refcount(bs, next_cluster_index); + refcount = get_refcount(req, next_cluster_index); if (refcount < 0) { return refcount; @@ -615,18 +628,19 @@ retry: return (s->free_cluster_index - nb_clusters) << s->cluster_bits; } -int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) +int64_t qcow2_alloc_clusters(QcowRequest *req, int64_t size) { + BlockDriverState *bs = req->bs; int64_t offset; int ret; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC); - offset = alloc_clusters_noref(bs, size); + offset = alloc_clusters_noref(req, size); if (offset < 0) { return offset; } - ret = update_refcount(bs, offset, size, 1); + ret = update_refcount(req, offset, size, 1); if (ret < 0) { return ret; } @@ -636,8 +650,9 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size) /* only used to allocate compressed sectors. We try to allocate contiguous sectors. size must be <= cluster_size */ -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) +int64_t qcow2_alloc_bytes(QcowRequest *req, int size) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int64_t offset, cluster_offset; int free_in_cluster; @@ -645,7 +660,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES); assert(size > 0 && size <= s->cluster_size); if (s->free_byte_offset == 0) { - s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size); + s->free_byte_offset = qcow2_alloc_clusters(req, s->cluster_size); if (s->free_byte_offset < 0) { return s->free_byte_offset; } @@ -661,9 +676,9 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) if (free_in_cluster == 0) s->free_byte_offset = 0; if ((offset & (s->cluster_size - 1)) != 0) - update_cluster_refcount(bs, offset >> s->cluster_bits, 1); + update_cluster_refcount(req, offset >> s->cluster_bits, 1); } else { - offset = qcow2_alloc_clusters(bs, s->cluster_size); + offset = qcow2_alloc_clusters(req, s->cluster_size); if (offset < 0) { return offset; } @@ -671,7 +686,7 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) if ((cluster_offset + s->cluster_size) == offset) { /* we are lucky: contiguous data */ offset = s->free_byte_offset; - update_cluster_refcount(bs, offset >> s->cluster_bits, 1); + update_cluster_refcount(req, offset >> s->cluster_bits, 1); s->free_byte_offset += size; } else { s->free_byte_offset = offset; @@ -679,21 +694,28 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) } } - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); return offset; } -void qcow2_free_clusters(BlockDriverState *bs, +void qcow2_free_clusters(QcowRequest *req, int64_t offset, int64_t size) { + BlockDriverState *bs = req->bs; + BDRVQcowState *s = bs->opaque; int ret; BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE); - ret = update_refcount(bs, offset, size, -1); + ret = update_refcount(req, offset, size, -1); if (ret < 0) { fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret)); /* TODO Remember the clusters to free them later and avoid leaking */ } + + /* TODO The cluster may be reused as a data cluster, and data doesn't go + * through block-queue at the moment. As soon as it does, this flush can be + * dropped. */ + blkqueue_flush(s->bq); } /* @@ -703,9 +725,10 @@ void qcow2_free_clusters(BlockDriverState *bs, * */ -void qcow2_free_any_clusters(BlockDriverState *bs, +void qcow2_free_any_clusters(QcowRequest *req, uint64_t cluster_offset, int nb_clusters) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; /* free the cluster */ @@ -714,13 +737,13 @@ void qcow2_free_any_clusters(BlockDriverState *bs, int nb_csectors; nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1; - qcow2_free_clusters(bs, + qcow2_free_clusters(req, (cluster_offset & s->cluster_offset_mask) & ~511, nb_csectors * 512); return; } - qcow2_free_clusters(bs, cluster_offset, nb_clusters << s->cluster_bits); + qcow2_free_clusters(req, cluster_offset, nb_clusters << s->cluster_bits); return; } @@ -751,15 +774,16 @@ void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset, } /* update the refcounts of snapshots and the copied flag */ -int qcow2_update_snapshot_refcount(BlockDriverState *bs, +int qcow2_update_snapshot_refcount(QcowRequest *req, int64_t l1_table_offset, int l1_size, int addend) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; int64_t old_offset, old_l2_offset; int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; - qcow2_l2_cache_reset(bs); + qcow2_l2_cache_reset(req); cache_refcount_updates = 1; l2_table = NULL; @@ -772,8 +796,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, l1_table = NULL; } l1_allocated = 1; - if (bdrv_pread(bs->file, l1_table_offset, - l1_table, l1_size2) != l1_size2) + if (blkqueue_pread(&req->bq_context, l1_table_offset, l1_table, l1_size2) < 0) goto fail; for(i = 0;i < l1_size; i++) be64_to_cpus(&l1_table[i]); @@ -792,7 +815,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, old_l2_offset = l2_offset; l2_offset &= ~QCOW_OFLAG_COPIED; l2_modified = 0; - if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size) + if (blkqueue_pread(&req->bq_context, l2_offset, l2_table, l2_size) < 0) goto fail; for(j = 0; j < s->l2_size; j++) { offset = be64_to_cpu(l2_table[j]); @@ -804,7 +827,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, s->csize_mask) + 1; if (addend != 0) { int ret; - ret = update_refcount(bs, + ret = update_refcount(req, (offset & s->cluster_offset_mask) & ~511, nb_csectors * 512, addend); if (ret < 0) { @@ -813,15 +836,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, /* TODO Flushing once for the whole function should * be enough */ - bdrv_flush(bs->file); + blkqueue_barrier(&req->bq_context); } /* compressed clusters are never modified */ refcount = 2; } else { if (addend != 0) { - refcount = update_cluster_refcount(bs, offset >> s->cluster_bits, addend); + refcount = update_cluster_refcount(req, offset >> s->cluster_bits, addend); } else { - refcount = get_refcount(bs, offset >> s->cluster_bits); + refcount = get_refcount(req, offset >> s->cluster_bits); } if (refcount < 0) { @@ -839,15 +862,16 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } } if (l2_modified) { - if (bdrv_pwrite_sync(bs->file, + if (blkqueue_pwrite(&req->bq_context, l2_offset, l2_table, l2_size) < 0) goto fail; + blkqueue_barrier(&req->bq_context); } if (addend != 0) { - refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); + refcount = update_cluster_refcount(req, l2_offset >> s->cluster_bits, addend); } else { - refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + refcount = get_refcount(req, l2_offset >> s->cluster_bits); } if (refcount < 0) { goto fail; @@ -863,9 +887,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, if (l1_modified) { for(i = 0; i < l1_size; i++) cpu_to_be64s(&l1_table[i]); - if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, + if (blkqueue_pwrite(&req->bq_context, l1_table_offset, l1_table, l1_size2) < 0) goto fail; + blkqueue_barrier(&req->bq_context); for(i = 0; i < l1_size; i++) be64_to_cpus(&l1_table[i]); } @@ -873,14 +898,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, qemu_free(l1_table); qemu_free(l2_table); cache_refcount_updates = 0; - write_refcount_block(bs); + write_refcount_block(req); return 0; fail: if (l1_allocated) qemu_free(l1_table); qemu_free(l2_table); cache_refcount_updates = 0; - write_refcount_block(bs); + write_refcount_block(req); return -EIO; } @@ -899,12 +924,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, * * Modifies the number of errors in res. */ -static void inc_refcounts(BlockDriverState *bs, +static void inc_refcounts(QcowRequest *req, BdrvCheckResult *res, uint16_t *refcount_table, int refcount_table_size, int64_t offset, int64_t size) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; int64_t start, last, cluster_offset; int k; @@ -944,10 +970,11 @@ static void inc_refcounts(BlockDriverState *bs, * Returns the number of errors found by the checks or -errno if an internal * error occurred. */ -static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, +static int check_refcounts_l2(QcowRequest *req, BdrvCheckResult *res, uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset, int check_copied) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; uint64_t *l2_table, offset; int i, l2_size, nb_csectors, refcount; @@ -956,8 +983,9 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, l2_size = s->l2_size * sizeof(uint64_t); l2_table = qemu_malloc(l2_size); - if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size) + if (blkqueue_pread(&req->bq_context, l2_offset, l2_table, l2_size) < 0) { goto fail; + } /* Do the actual checks */ for(i = 0; i < s->l2_size; i++) { @@ -977,14 +1005,14 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, nb_csectors = ((offset >> s->csize_shift) & s->csize_mask) + 1; offset &= s->cluster_offset_mask; - inc_refcounts(bs, res, refcount_table, refcount_table_size, + inc_refcounts(req, res, refcount_table, refcount_table_size, offset & ~511, nb_csectors * 512); } else { /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ if (check_copied) { uint64_t entry = offset; offset &= ~QCOW_OFLAG_COPIED; - refcount = get_refcount(bs, offset >> s->cluster_bits); + refcount = get_refcount(req, offset >> s->cluster_bits); if (refcount < 0) { fprintf(stderr, "Can't get refcount for offset %" PRIx64 ": %s\n", entry, strerror(-refcount)); @@ -999,7 +1027,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, /* Mark cluster as used */ offset &= ~QCOW_OFLAG_COPIED; - inc_refcounts(bs, res, refcount_table,refcount_table_size, + inc_refcounts(req, res, refcount_table,refcount_table_size, offset, s->cluster_size); /* Correct offsets are cluster aligned */ @@ -1029,13 +1057,14 @@ fail: * Returns the number of errors found by the checks or -errno if an internal * error occurred. */ -static int check_refcounts_l1(BlockDriverState *bs, +static int check_refcounts_l1(QcowRequest *req, BdrvCheckResult *res, uint16_t *refcount_table, int refcount_table_size, int64_t l1_table_offset, int l1_size, int check_copied) { + BlockDriverState *bs = req->bs; BDRVQcowState *s = bs->opaque; uint64_t *l1_table, l2_offset, l1_size2; int i, refcount, ret; @@ -1043,7 +1072,7 @@ static int check_refcounts_l1(BlockDriverState *bs, l1_size2 = l1_size * sizeof(uint64_t); /* Mark L1 table as used */ - inc_refcounts(bs, res, refcount_table, refcount_table_size, + inc_refcounts(req, res, refcount_table, refcount_table_size, l1_table_offset, l1_size2); /* Read L1 table entries from disk */ @@ -1051,9 +1080,12 @@ static int check_refcounts_l1(BlockDriverState *bs, l1_table = NULL; } else { l1_table = qemu_malloc(l1_size2); - if (bdrv_pread(bs->file, l1_table_offset, - l1_table, l1_size2) != l1_size2) + ret = blkqueue_pread(&req->bq_context, l1_table_offset, l1_table, + l1_size2); + if (ret < 0) { goto fail; + } + for(i = 0;i < l1_size; i++) be64_to_cpus(&l1_table[i]); } @@ -1064,7 +1096,7 @@ static int check_refcounts_l1(BlockDriverState *bs, if (l2_offset) { /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */ if (check_copied) { - refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED) + refcount = get_refcount(req, (l2_offset & ~QCOW_OFLAG_COPIED) >> s->cluster_bits); if (refcount < 0) { fprintf(stderr, "Can't get refcount for l2_offset %" @@ -1080,7 +1112,7 @@ static int check_refcounts_l1(BlockDriverState *bs, /* Mark L2 table as used */ l2_offset &= ~QCOW_OFLAG_COPIED; - inc_refcounts(bs, res, refcount_table, refcount_table_size, + inc_refcounts(req, res, refcount_table, refcount_table_size, l2_offset, s->cluster_size); /* L2 tables are cluster aligned */ @@ -1091,7 +1123,7 @@ static int check_refcounts_l1(BlockDriverState *bs, } /* Process and check L2 entries */ - ret = check_refcounts_l2(bs, res, refcount_table, + ret = check_refcounts_l2(req, res, refcount_table, refcount_table_size, l2_offset, check_copied); if (ret < 0) { goto fail; @@ -1123,16 +1155,23 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res) uint16_t *refcount_table; int ret; + QcowRequest req1 = { + .bs = bs, + }; + QcowRequest *req = &req1; + + blkqueue_init_context(&req->bq_context, s->bq); + size = bdrv_getlength(bs->file); nb_clusters = size_to_clusters(s, size); refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t)); /* header */ - inc_refcounts(bs, res, refcount_table, nb_clusters, + inc_refcounts(req, res, refcount_table, nb_clusters, 0, s->cluster_size); /* current L1 table */ - ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, + ret = check_refcounts_l1(req, res, refcount_table, nb_clusters, s->l1_table_offset, s->l1_size, 1); if (ret < 0) { return ret; @@ -1141,17 +1180,17 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res) /* snapshots */ for(i = 0; i < s->nb_snapshots; i++) { sn = s->snapshots + i; - ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters, + ret = check_refcounts_l1(req, res, refcount_table, nb_clusters, sn->l1_table_offset, sn->l1_size, 0); if (ret < 0) { return ret; } } - inc_refcounts(bs, res, refcount_table, nb_clusters, + inc_refcounts(req, res, refcount_table, nb_clusters, s->snapshots_offset, s->snapshots_size); /* refcount data */ - inc_refcounts(bs, res, refcount_table, nb_clusters, + inc_refcounts(req, res, refcount_table, nb_clusters, s->refcount_table_offset, s->refcount_table_size * sizeof(uint64_t)); @@ -1175,7 +1214,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res) } if (offset != 0) { - inc_refcounts(bs, res, refcount_table, nb_clusters, + inc_refcounts(req, res, refcount_table, nb_clusters, offset, s->cluster_size); if (refcount_table[cluster] != 1) { fprintf(stderr, "ERROR refcount block %d refcount=%d\n", @@ -1187,7 +1226,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res) /* compare ref counts */ for(i = 0; i < nb_clusters; i++) { - refcount1 = get_refcount(bs, i); + refcount1 = get_refcount(req, i); if (refcount1 < 0) { fprintf(stderr, "Can't get refcount for cluster %d: %s\n", i, strerror(-refcount1)); diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index aacf357..dd42220 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -68,6 +68,13 @@ int qcow2_read_snapshots(BlockDriverState *bs) int i, id_str_size, name_size; int64_t offset; uint32_t extra_data_size; + int ret; + + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); if (!s->nb_snapshots) { s->snapshots = NULL; @@ -79,8 +86,10 @@ int qcow2_read_snapshots(BlockDriverState *bs) s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot)); for(i = 0; i < s->nb_snapshots; i++) { offset = align_offset(offset, 8); - if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h)) + ret = blkqueue_pread(&req.bq_context, offset, &h, sizeof(h)); + if (ret < 0) { goto fail; + } offset += sizeof(h); sn = s->snapshots + i; sn->l1_table_offset = be64_to_cpu(h.l1_table_offset); @@ -97,14 +106,18 @@ int qcow2_read_snapshots(BlockDriverState *bs) offset += extra_data_size; sn->id_str = qemu_malloc(id_str_size + 1); - if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != id_str_size) + ret = blkqueue_pread(&req.bq_context, offset, sn->id_str, id_str_size); + if (ret < 0) { goto fail; + } offset += id_str_size; sn->id_str[id_str_size] = '\0'; sn->name = qemu_malloc(name_size + 1); - if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size) + ret = blkqueue_pread(&req.bq_context, offset, sn->name, name_size); + if (ret < 0) { goto fail; + } offset += name_size; sn->name[name_size] = '\0'; } @@ -126,6 +139,12 @@ static int qcow_write_snapshots(BlockDriverState *bs) uint32_t data32; int64_t offset, snapshots_offset; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); + /* compute the size of the snapshots */ offset = 0; for(i = 0; i < s->nb_snapshots; i++) { @@ -137,7 +156,7 @@ static int qcow_write_snapshots(BlockDriverState *bs) } snapshots_size = offset; - snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size); + snapshots_offset = qcow2_alloc_clusters(&req, snapshots_size); bdrv_flush(bs->file); offset = snapshots_offset; if (offset < 0) { @@ -159,29 +178,29 @@ static int qcow_write_snapshots(BlockDriverState *bs) h.id_str_size = cpu_to_be16(id_str_size); h.name_size = cpu_to_be16(name_size); offset = align_offset(offset, 8); - if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0) + if (blkqueue_pwrite(&req.bq_context, offset, &h, sizeof(h)) < 0) goto fail; offset += sizeof(h); - if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0) + if (blkqueue_pwrite(&req.bq_context, offset, sn->id_str, id_str_size) < 0) goto fail; offset += id_str_size; - if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0) + if (blkqueue_pwrite(&req.bq_context, offset, sn->name, name_size) < 0) goto fail; offset += name_size; } /* update the various header fields */ data64 = cpu_to_be64(snapshots_offset); - if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset), + if (blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, snapshots_offset), &data64, sizeof(data64)) < 0) goto fail; data32 = cpu_to_be32(s->nb_snapshots); - if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + if (blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, nb_snapshots), &data32, sizeof(data32)) < 0) goto fail; /* free the old snapshot table */ - qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size); + qcow2_free_clusters(&req, s->snapshots_offset, s->snapshots_size); s->snapshots_offset = snapshots_offset; s->snapshots_size = snapshots_size; return 0; @@ -241,6 +260,12 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) uint64_t *l1_table = NULL; int64_t l1_table_offset; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); + memset(sn, 0, sizeof(*sn)); if (sn_info->id_str[0] == '\0') { @@ -263,12 +288,13 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) sn->date_nsec = sn_info->date_nsec; sn->vm_clock_nsec = sn_info->vm_clock_nsec; - ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1); + ret = qcow2_update_snapshot_refcount(&req, s->l1_table_offset, + s->l1_size, 1); if (ret < 0) goto fail; /* create the L1 table of the snapshot */ - l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t)); + l1_table_offset = qcow2_alloc_clusters(&req, s->l1_size * sizeof(uint64_t)); if (l1_table_offset < 0) { goto fail; } @@ -286,7 +312,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) for(i = 0; i < s->l1_size; i++) { l1_table[i] = cpu_to_be64(s->l1_table[i]); } - if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset, + if (blkqueue_pwrite(&req.bq_context, sn->l1_table_offset, l1_table, s->l1_size * sizeof(uint64_t)) < 0) goto fail; qemu_free(l1_table); @@ -318,32 +344,45 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) BDRVQcowState *s = bs->opaque; QCowSnapshot *sn; int i, snapshot_index, l1_size2; + int ret; + + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); if (snapshot_index < 0) return -ENOENT; sn = &s->snapshots[snapshot_index]; - if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, -1) < 0) + if (qcow2_update_snapshot_refcount(&req, s->l1_table_offset, s->l1_size, -1) < 0) goto fail; - if (qcow2_grow_l1_table(bs, sn->l1_size, true) < 0) + if (qcow2_grow_l1_table(&req, sn->l1_size, true) < 0) goto fail; s->l1_size = sn->l1_size; l1_size2 = s->l1_size * sizeof(uint64_t); /* copy the snapshot l1 table to the current l1 table */ - if (bdrv_pread(bs->file, sn->l1_table_offset, - s->l1_table, l1_size2) != l1_size2) + ret = blkqueue_pread(&req.bq_context, sn->l1_table_offset, + s->l1_table, l1_size2); + if (ret < 0) { goto fail; - if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, - s->l1_table, l1_size2) < 0) + } + + ret = blkqueue_pwrite(&req.bq_context, s->l1_table_offset, + s->l1_table, l1_size2); + if (ret < 0) { goto fail; + } + for(i = 0;i < s->l1_size; i++) { be64_to_cpus(&s->l1_table[i]); } - if (qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1) < 0) + if (qcow2_update_snapshot_refcount(&req, s->l1_table_offset, s->l1_size, 1) < 0) goto fail; #ifdef DEBUG_ALLOC @@ -360,19 +399,28 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) QCowSnapshot *sn; int snapshot_index, ret; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); + snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id); if (snapshot_index < 0) return -ENOENT; sn = &s->snapshots[snapshot_index]; - ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset, sn->l1_size, -1); + ret = qcow2_update_snapshot_refcount(&req, sn->l1_table_offset, + sn->l1_size, -1); if (ret < 0) return ret; /* must update the copied flag on the current cluster offsets */ - ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0); + ret = qcow2_update_snapshot_refcount(&req, s->l1_table_offset, + s->l1_size, 0); if (ret < 0) return ret; - qcow2_free_clusters(bs, sn->l1_table_offset, sn->l1_size * sizeof(uint64_t)); + qcow2_free_clusters(&req, sn->l1_table_offset, + sn->l1_size * sizeof(uint64_t)); qemu_free(sn->id_str); qemu_free(sn->name); @@ -423,6 +471,13 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) int i, snapshot_index, l1_size2; BDRVQcowState *s = bs->opaque; QCowSnapshot *sn; + int ret; + + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name); if (snapshot_index < 0) { @@ -439,8 +494,9 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name) s->l1_table_offset = sn->l1_table_offset; s->l1_table = qemu_mallocz(align_offset(l1_size2, 512)); - if (bdrv_pread(bs->file, sn->l1_table_offset, - s->l1_table, l1_size2) != l1_size2) { + ret = blkqueue_pread(&req.bq_context, sn->l1_table_offset, + s->l1_table, l1_size2); + if (ret < 0) { return -1; } diff --git a/block/qcow2.c b/block/qcow2.c index 537c479..e445913 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -136,6 +136,20 @@ static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset, return 0; } +static bool qcow_blkqueue_error_cb(void *opaque, int ret) +{ + BlockDriverState *bs = opaque; + BlockErrorAction action = bdrv_get_on_error(bs, 0); + + if ((action == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC) + || action == BLOCK_ERR_STOP_ANY) + { + bdrv_mon_event(bs, BDRV_ACTION_STOP, 0); + return vm_stop(0); + } + + return false; +} static int qcow_open(BlockDriverState *bs, int flags) { @@ -234,6 +248,11 @@ static int qcow_open(BlockDriverState *bs, int flags) goto fail; bs->backing_file[len] = '\0'; } + + /* Block queue */ + s->bq = blkqueue_create(bs->file, qcow_blkqueue_error_cb, bs); + + /* Snapshots */ if (qcow2_read_snapshots(bs) < 0) goto fail; @@ -242,7 +261,11 @@ static int qcow_open(BlockDriverState *bs, int flags) #endif return 0; - fail: +fail: + if (s->bq) { + blkqueue_destroy(s->bq); + } + qcow2_free_snapshots(bs); qcow2_refcount_close(bs); qemu_free(s->l1_table); @@ -297,13 +320,20 @@ static int qcow_set_key(BlockDriverState *bs, const char *key) static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { + BDRVQcowState *s = bs->opaque; uint64_t cluster_offset; int ret; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); *pnum = nb_sectors; /* FIXME We can get errors here, but the bdrv_is_allocated interface can't * pass them on today */ - ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); + ret = qcow2_get_cluster_offset(&req, sector_num << 9, pnum, + &cluster_offset); if (ret < 0) { *pnum = 0; } @@ -341,6 +371,7 @@ typedef struct QCowAIOCB { QEMUIOVector hd_qiov; QEMUBH *bh; QCowL2Meta l2meta; + QcowRequest req; QLIST_ENTRY(QCowAIOCB) next_depend; } QCowAIOCB; @@ -425,7 +456,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors); } - ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9, + ret = qcow2_get_cluster_offset(&acb->req, acb->sector_num << 9, &acb->cur_nr_sectors, &acb->cluster_offset); if (ret < 0) { goto done; @@ -464,7 +495,7 @@ static void qcow_aio_read_cb(void *opaque, int ret) } } else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) { /* add AIO support for compressed blocks ? */ - if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0) + if (qcow2_decompress_cluster(&acb->req, acb->cluster_offset) < 0) goto done; qemu_iovec_from_buffer(&acb->hd_qiov, @@ -519,6 +550,7 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque, int is_write) { + BDRVQcowState *s = bs->opaque; QCowAIOCB *acb; acb = qemu_aio_get(&qcow_aio_pool, bs, cb, opaque); @@ -536,6 +568,10 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs, acb->cluster_offset = 0; acb->l2meta.nb_clusters = 0; QLIST_INIT(&acb->l2meta.dependent_requests); + + acb->req.bs = bs; + blkqueue_init_context(&acb->req.bq_context, s->bq); + return acb; } @@ -585,7 +621,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) acb->hd_aiocb = NULL; if (ret >= 0) { - ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta); + ret = qcow2_alloc_cluster_link_l2(&acb->req, &acb->l2meta); } run_dependent_requests(&acb->l2meta); @@ -609,7 +645,7 @@ static void qcow_aio_write_cb(void *opaque, int ret) n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors; - ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9, + ret = qcow2_alloc_cluster_offset(&acb->req, acb->sector_num << 9, index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta); if (ret < 0) { goto done; @@ -689,6 +725,9 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs, static void qcow_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; + + blkqueue_destroy(s->bq); + qemu_free(s->l1_table); qemu_free(s->l2_cache); qemu_free(s->cluster_cache); @@ -797,11 +836,17 @@ static int qcow2_change_backing_file(BlockDriverState *bs, static int preallocate(BlockDriverState *bs) { + BDRVQcowState *s = bs->opaque; uint64_t nb_sectors; uint64_t offset; int num; int ret; QCowL2Meta meta; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); nb_sectors = bdrv_getlength(bs) >> 9; offset = 0; @@ -810,14 +855,15 @@ static int preallocate(BlockDriverState *bs) while (nb_sectors) { num = MIN(nb_sectors, INT_MAX >> 9); - ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta); + ret = qcow2_alloc_cluster_offset(&req, offset, 0, num, &num, &meta); if (ret < 0) { return ret; } - ret = qcow2_alloc_cluster_link_l2(bs, &meta); + ret = qcow2_alloc_cluster_link_l2(&req, &meta); if (ret < 0) { - qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters); + qcow2_free_any_clusters(&req, meta.cluster_offset, + meta.nb_clusters); return ret; } @@ -931,13 +977,20 @@ static int qcow_create2(const char *filename, int64_t total_size, * table) */ BlockDriver* drv = bdrv_find_format("qcow2"); + QcowRequest req; + BDRVQcowState *s; + assert(drv != NULL); ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv); if (ret < 0) { goto out; } - ret = qcow2_alloc_clusters(bs, 2 * cluster_size); + s = bs->opaque; + req.bs = bs; + blkqueue_init_context(&req.bq_context, s->bq); + + ret = qcow2_alloc_clusters(&req, 2 * cluster_size); if (ret < 0) { goto out; @@ -1045,6 +1098,11 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) { BDRVQcowState *s = bs->opaque; int ret, new_l1_size; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); if (offset & 511) { return -EINVAL; @@ -1061,19 +1119,21 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) } new_l1_size = size_to_l1(s, offset); - ret = qcow2_grow_l1_table(bs, new_l1_size, true); + ret = qcow2_grow_l1_table(&req, new_l1_size, true); if (ret < 0) { return ret; } /* write updated header.size */ offset = cpu_to_be64(offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), + ret = blkqueue_pwrite(&req.bq_context, offsetof(QCowHeader, size), &offset, sizeof(uint64_t)); if (ret < 0) { return ret; } + blkqueue_barrier(&req.bq_context); + s->l1_vm_state_index = new_l1_size; return 0; } @@ -1088,6 +1148,11 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, int ret, out_len; uint8_t *out_buf; uint64_t cluster_offset; + QcowRequest req = { + .bs = bs, + }; + + blkqueue_init_context(&req.bq_context, s->bq); if (nb_sectors == 0) { /* align end of file to a sector boundary to ease reading with @@ -1132,7 +1197,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, /* could not compress: write normal cluster */ bdrv_write(bs, sector_num, buf, s->cluster_sectors); } else { - cluster_offset = qcow2_alloc_compressed_cluster_offset(bs, + cluster_offset = qcow2_alloc_compressed_cluster_offset(&req, sector_num << 9, out_len); if (!cluster_offset) return -1; @@ -1150,13 +1215,64 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, static int qcow_flush(BlockDriverState *bs) { + BDRVQcowState *s = bs->opaque; + int ret; + + ret = blkqueue_flush(s->bq); + if (ret < 0) { + /* + * If the queue is empty, we couldn't handle the write error by + * stopping the guest. In this case we don't know which metadata writes + * have succeeded. Reopen the qcow2 layer to make sure that all caches + * are invalidated. + */ + if (blkqueue_is_empty(s->bq)) { + qcow_close(bs); + qcow_open(bs, 0); + } + + return ret; + } + return bdrv_flush(bs->file); } +typedef struct QcowFlushAIOCB { + BlockDriverState *bs; + BlockDriverCompletionFunc *cb; + void *opaque; +} QcowFlushAIOCB; + +static void qcow_aio_flush_cb(void *opaque, int ret) +{ + QcowFlushAIOCB *acb = opaque; + BlockDriverState *bs = acb->bs; + BDRVQcowState *s = bs->opaque; + + if (ret < 0 && blkqueue_is_empty(s->bq)) { + qcow_close(bs); + qcow_open(bs, 0); + } + + acb->cb(acb->opaque, ret); + qemu_free(acb); +} + static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { - return bdrv_aio_flush(bs->file, cb, opaque); + BDRVQcowState *s = bs->opaque; + BlockQueueContext context; + QcowFlushAIOCB *acb; + + blkqueue_init_context(&context, s->bq); + + acb = qemu_malloc(sizeof(*acb)); + acb->bs = bs; + acb->cb = cb; + acb->opaque = opaque; + + return blkqueue_aio_flush(&context, qcow_aio_flush_cb, acb); } static int64_t qcow_vm_state_offset(BDRVQcowState *s) diff --git a/block/qcow2.h b/block/qcow2.h index 5217bea..589767c 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -26,6 +26,7 @@ #define BLOCK_QCOW2_H #include "aes.h" +#include "block-queue.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -107,6 +108,8 @@ typedef struct BDRVQcowState { int64_t free_cluster_index; int64_t free_byte_offset; + BlockQueue *bq; + uint32_t crypt_method; /* current crypt method, 0 if no key yet */ uint32_t crypt_method_header; AES_KEY aes_encrypt_key; @@ -144,6 +147,11 @@ typedef struct QCowL2Meta QLIST_ENTRY(QCowL2Meta) next_in_flight; } QCowL2Meta; +typedef struct QcowRequest { + BlockDriverState* bs; + BlockQueueContext bq_context; +} QcowRequest; + static inline int size_to_clusters(BDRVQcowState *s, int64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; @@ -172,38 +180,37 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, int qcow2_refcount_init(BlockDriverState *bs); void qcow2_refcount_close(BlockDriverState *bs); -int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size); -int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size); -void qcow2_free_clusters(BlockDriverState *bs, - int64_t offset, int64_t size); -void qcow2_free_any_clusters(BlockDriverState *bs, +int64_t qcow2_alloc_clusters(QcowRequest *req, int64_t size); +int64_t qcow2_alloc_bytes(QcowRequest *req, int size); +void qcow2_free_clusters(QcowRequest *req, int64_t offset, int64_t size); +void qcow2_free_any_clusters(QcowRequest *req, uint64_t cluster_offset, int nb_clusters); void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset, int64_t size); -int qcow2_update_snapshot_refcount(BlockDriverState *bs, +int qcow2_update_snapshot_refcount(QcowRequest *req, int64_t l1_table_offset, int l1_size, int addend); int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res); /* qcow2-cluster.c functions */ -int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size); -void qcow2_l2_cache_reset(BlockDriverState *bs); -int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); +int qcow2_grow_l1_table(QcowRequest *req, int min_size, bool exact_size); +void qcow2_l2_cache_reset(QcowRequest *req); +int qcow2_decompress_cluster(QcowRequest *req, uint64_t cluster_offset); void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, uint8_t *out_buf, const uint8_t *in_buf, int nb_sectors, int enc, const AES_KEY *key); -int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, +int qcow2_get_cluster_offset(QcowRequest *req, uint64_t offset, int *num, uint64_t *cluster_offset); -int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, +int qcow2_alloc_cluster_offset(QcowRequest *req, uint64_t offset, int n_start, int n_end, int *num, QCowL2Meta *m); -uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, +uint64_t qcow2_alloc_compressed_cluster_offset(QcowRequest *req, uint64_t offset, int compressed_size); -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m); +int qcow2_alloc_cluster_link_l2(QcowRequest *req, QCowL2Meta *m); /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);