From patchwork Mon Jan 17 10:50:21 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kevin Wolf X-Patchwork-Id: 79141 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 5B46CB6EED for ; Mon, 17 Jan 2011 22:08:57 +1100 (EST) Received: from localhost ([127.0.0.1]:57400 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Pemvh-0001Pt-4d for incoming@patchwork.ozlabs.org; Mon, 17 Jan 2011 06:07:21 -0500 Received: from [140.186.70.92] (port=41329 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PemfO-0002Fd-W3 for qemu-devel@nongnu.org; Mon, 17 Jan 2011 05:50:38 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Peme0-0004K7-GK for qemu-devel@nongnu.org; Mon, 17 Jan 2011 05:49:07 -0500 Received: from mx1.redhat.com ([209.132.183.28]:32910) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Pemdz-0004JU-Tb for qemu-devel@nongnu.org; Mon, 17 Jan 2011 05:49:04 -0500 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id p0HAn38f001762 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Mon, 17 Jan 2011 05:49:03 -0500 Received: from dhcp-5-188.str.redhat.com (vpn1-5-22.ams2.redhat.com [10.36.5.22]) by int-mx09.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id p0HAmqRW006823; Mon, 17 Jan 2011 05:48:58 -0500 From: Kevin Wolf To: qemu-devel@nongnu.org Date: Mon, 17 Jan 2011 11:50:21 +0100 Message-Id: <1295261422-7933-3-git-send-email-kwolf@redhat.com> In-Reply-To: <1295261422-7933-1-git-send-email-kwolf@redhat.com> References: <1295261422-7933-1-git-send-email-kwolf@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.22 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. Cc: kwolf@redhat.com Subject: [Qemu-devel] [PATCH v2 2/3] qcow2: Use QcowCache X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Use the new functions of qcow2-cache.c for everything that works on refcount block and L2 tables. Signed-off-by: Kevin Wolf --- block/qcow2-cluster.c | 206 ++++++++++++++-------------------------- block/qcow2-refcount.c | 249 +++++++++++++++++++----------------------------- block/qcow2.c | 43 ++++++++- block/qcow2.h | 12 ++- 4 files changed, 216 insertions(+), 294 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index c3ef550..f6dd09e 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -67,7 +67,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) qemu_free(new_l1_table); return new_l1_table_offset; } - bdrv_flush(bs->file); + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + return ret; + } BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); for(i = 0; i < s->l1_size; i++) @@ -98,63 +102,6 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) return ret; } -void qcow2_l2_cache_reset(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - - memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t)); - memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t)); -} - -static inline int l2_cache_new_entry(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - uint32_t min_count; - int min_index, i; - - /* find a new entry in the least used one */ - min_index = 0; - min_count = 0xffffffff; - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (s->l2_cache_counts[i] < min_count) { - min_count = s->l2_cache_counts[i]; - min_index = i; - } - } - return min_index; -} - -/* - * seek_l2_table - * - * seek l2_offset in the l2_cache table - * if not found, return NULL, - * if found, - * increments the l2 cache hit count of the entry, - * if counter overflow, divide by two all counters - * return the pointer to the l2 cache entry - * - */ - -static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset) -{ - int i, j; - - for(i = 0; i < L2_CACHE_SIZE; i++) { - if (l2_offset == s->l2_cache_offsets[i]) { - /* increment the hit count */ - if (++s->l2_cache_counts[i] == 0xffffffff) { - for(j = 0; j < L2_CACHE_SIZE; j++) { - s->l2_cache_counts[j] >>= 1; - } - } - return s->l2_cache + (i << s->l2_bits); - } - } - return NULL; -} - /* * l2_load * @@ -169,33 +116,12 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset, uint64_t **l2_table) { BDRVQcowState *s = bs->opaque; - int min_index; int ret; - /* seek if the table for the given offset is in the cache */ - - *l2_table = seek_l2_table(s, l2_offset); - if (*l2_table != NULL) { - return 0; - } - - /* not found: load a new entry in the least used one */ - - min_index = l2_cache_new_entry(bs); - *l2_table = s->l2_cache + (min_index << s->l2_bits); - BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); - ret = bdrv_pread(bs->file, l2_offset, *l2_table, - s->l2_size * sizeof(uint64_t)); - if (ret < 0) { - qcow2_l2_cache_reset(bs); - return ret; - } + ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - - return 0; + return ret; } /* @@ -238,7 +164,6 @@ static int write_l1_entry(BlockDriverState *bs, int l1_index) static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) { BDRVQcowState *s = bs->opaque; - int min_index; uint64_t old_l2_offset; uint64_t *l2_table; int64_t l2_offset; @@ -252,29 +177,48 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) if (l2_offset < 0) { return l2_offset; } - bdrv_flush(bs->file); + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto fail; + } /* allocate a new entry in the l2 cache */ - min_index = l2_cache_new_entry(bs); - l2_table = s->l2_cache + (min_index << s->l2_bits); + ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table); + if (ret < 0) { + return ret; + } + + l2_table = *table; if (old_l2_offset == 0) { /* if there was no old l2 table, clear the new table */ memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); } else { + uint64_t* old_table; + /* if there was an old l2 table, read it from the disk */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ); - ret = bdrv_pread(bs->file, old_l2_offset, l2_table, - s->l2_size * sizeof(uint64_t)); + ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset, + (void**) &old_table); + if (ret < 0) { + goto fail; + } + + memcpy(l2_table, old_table, s->cluster_size); + + ret = qcow2_cache_put(bs, s->l2_table_cache, old_table); if (ret < 0) { goto fail; } } + /* write the l2 table to the file */ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE); - ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table, - s->l2_size * sizeof(uint64_t)); + + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); + ret = qcow2_cache_flush(bs, s->l2_table_cache); if (ret < 0) { goto fail; } @@ -286,17 +230,12 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) goto fail; } - /* update the l2 cache entry */ - - s->l2_cache_offsets[min_index] = l2_offset; - s->l2_cache_counts[min_index] = 1; - *table = l2_table; return 0; fail: + qcow2_cache_put(bs, s->l2_table_cache, *table); s->l1_table[l1_index] = old_l2_offset; - qcow2_l2_cache_reset(bs); return ret; } @@ -521,6 +460,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, &l2_table[l2_index], 0, QCOW_OFLAG_COPIED); } + qcow2_cache_put(bs, s->l2_table_cache, l2_table); + nb_available = (c * s->cluster_sectors); out: if (nb_available > nb_needed) @@ -575,6 +516,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, return ret; } } else { + /* FIXME Order */ if (l2_offset) qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); ret = l2_allocate(bs, l1_index, &l2_table); @@ -632,6 +574,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, cluster_offset = qcow2_alloc_bytes(bs, compressed_size); if (cluster_offset < 0) { + qcow2_cache_put(bs, s->l2_table_cache, l2_table); return 0; } @@ -646,38 +589,14 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, /* compressed clusters never have the copied flag */ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED); + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); l2_table[l2_index] = cpu_to_be64(cluster_offset); - if (bdrv_pwrite_sync(bs->file, - l2_offset + l2_index * sizeof(uint64_t), - l2_table + l2_index, - sizeof(uint64_t)) < 0) - return 0; - - return cluster_offset; -} - -/* - * Write L2 table updates to disk, writing whole sectors to avoid a - * read-modify-write in bdrv_pwrite - */ -#define L2_ENTRIES_PER_SECTOR (512 / 8) -static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table, - uint64_t l2_offset, int l2_index, int num) -{ - int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1); - int start_offset = (8 * l2_index) & ~511; - int end_offset = (8 * (l2_index + num) + 511) & ~511; - size_t len = end_offset - start_offset; - int ret; - - BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); - ret = bdrv_pwrite(bs->file, l2_offset + start_offset, - &l2_table[l2_start_index], len); + ret = qcow2_cache_put(bs, s->l2_table_cache, l2_table); if (ret < 0) { - return ret; + return 0; } - return 0; + return cluster_offset; } int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) @@ -686,6 +605,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) int i, j = 0, l2_index, ret; uint64_t *old_cluster, start_sect, l2_offset, *l2_table; uint64_t cluster_offset = m->cluster_offset; + bool cow = false; if (m->nb_clusters == 0) return 0; @@ -695,6 +615,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) /* copy content of unmodified sectors */ start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; if (m->n_start) { + cow = true; ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); if (ret < 0) goto err; @@ -702,17 +623,30 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) if (m->nb_available & (s->cluster_sectors - 1)) { uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1); + cow = true; ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9), m->nb_available - end, s->cluster_sectors); if (ret < 0) goto err; } - /* update L2 table */ + /* + * Update L2 table. + * + * Before we update the L2 table to actually point to the new cluster, we + * need to be sure that the refcounts have been increased and COW was + * handled. + */ + if (cow) { + bdrv_flush(bs->file); + } + + qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index); if (ret < 0) { goto err; } + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); for (i = 0; i < m->nb_clusters; i++) { /* if two concurrent writes happen to the same unallocated cluster @@ -728,16 +662,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) (i << s->cluster_bits)) | QCOW_OFLAG_COPIED); } - /* - * Before we update the L2 table to actually point to the new cluster, we - * need to be sure that the refcounts have been increased and COW was - * handled. - */ - bdrv_flush(bs->file); - ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters); + ret = qcow2_cache_put(bs, s->l2_table_cache, l2_table); if (ret < 0) { - qcow2_l2_cache_reset(bs); goto err; } @@ -868,7 +795,8 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, m->depends_on = old_alloc; m->nb_clusters = 0; *num = 0; - return 0; + ret = 0; + goto fail; } } } @@ -884,7 +812,8 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size); if (cluster_offset < 0) { QLIST_REMOVE(m, next_in_flight); - return cluster_offset; + ret = cluster_offset; + goto fail; } /* save info needed for meta data update */ @@ -893,12 +822,21 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, m->nb_clusters = nb_clusters; out: + ret = qcow2_cache_put(bs, s->l2_table_cache, l2_table); + if (ret < 0) { + return ret; + } + m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end); m->cluster_offset = cluster_offset; *num = m->nb_available - n_start; return 0; + +fail: + qcow2_cache_put(bs, s->l2_table_cache, l2_table); + return ret; } static int decompress_buffer(uint8_t *out_buf, int out_buf_size, diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index a10453c..0ab77ff 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -32,27 +32,6 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int addend); -static int cache_refcount_updates = 0; - -static int write_refcount_block(BlockDriverState *bs) -{ - BDRVQcowState *s = bs->opaque; - size_t size = s->cluster_size; - - if (s->refcount_block_cache_offset == 0) { - return 0; - } - - BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE); - if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset, - s->refcount_block_cache, size) < 0) - { - return -EIO; - } - - return 0; -} - /*********************************************************/ /* refcount handling */ @@ -61,7 +40,6 @@ int qcow2_refcount_init(BlockDriverState *bs) BDRVQcowState *s = bs->opaque; int ret, refcount_table_size2, i; - s->refcount_block_cache = qemu_malloc(s->cluster_size); refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t); s->refcount_table = qemu_malloc(refcount_table_size2); if (s->refcount_table_size > 0) { @@ -81,34 +59,22 @@ int qcow2_refcount_init(BlockDriverState *bs) void qcow2_refcount_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; - qemu_free(s->refcount_block_cache); qemu_free(s->refcount_table); } static int load_refcount_block(BlockDriverState *bs, - int64_t refcount_block_offset) + int64_t refcount_block_offset, + void **refcount_block) { BDRVQcowState *s = bs->opaque; int ret; - if (cache_refcount_updates) { - ret = write_refcount_block(bs); - if (ret < 0) { - return ret; - } - } - BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); - ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache, - s->cluster_size); - if (ret < 0) { - s->refcount_block_cache_offset = 0; - return ret; - } + ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, + refcount_block); - s->refcount_block_cache_offset = refcount_block_offset; - return 0; + return ret; } /* @@ -122,6 +88,8 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) int refcount_table_index, block_index; int64_t refcount_block_offset; int ret; + uint16_t *refcount_block; + uint16_t refcount; refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); if (refcount_table_index >= s->refcount_table_size) @@ -129,16 +97,23 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index) refcount_block_offset = s->refcount_table[refcount_table_index]; if (!refcount_block_offset) return 0; - if (refcount_block_offset != s->refcount_block_cache_offset) { - /* better than nothing: return allocated if read error */ - ret = load_refcount_block(bs, refcount_block_offset); - if (ret < 0) { - return ret; - } + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset, + (void**) &refcount_block); + if (ret < 0) { + return ret; } + block_index = cluster_index & ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - return be16_to_cpu(s->refcount_block_cache[block_index]); + refcount = be16_to_cpu(refcount_block[block_index]); + + ret = qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); + if (ret < 0) { + return ret; + } + + return refcount; } /* @@ -174,9 +149,10 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, * Loads a refcount block. If it doesn't exist yet, it is allocated first * (including growing the refcount table if needed). * - * Returns the offset of the refcount block on success or -errno in error case + * Returns 0 on success or -errno in error case */ -static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) +static int alloc_refcount_block(BlockDriverState *bs, + int64_t cluster_index, uint16_t **refcount_block) { BDRVQcowState *s = bs->opaque; unsigned int refcount_table_index; @@ -194,13 +170,8 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* If it's already there, we're done */ if (refcount_block_offset) { - if (refcount_block_offset != s->refcount_block_cache_offset) { - ret = load_refcount_block(bs, refcount_block_offset); - if (ret < 0) { - return ret; - } - } - return refcount_block_offset; + return load_refcount_block(bs, refcount_block_offset, + (void**) refcount_block); } } @@ -226,12 +197,10 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) * refcount block into the cache */ - if (cache_refcount_updates) { - ret = write_refcount_block(bs); - if (ret < 0) { - return ret; - } - } + *refcount_block = NULL; + + /* We write to the refcount table, so we might depend on L2 tables */ + qcow2_cache_flush(bs, s->l2_table_cache); /* Allocate the refcount block itself and mark it as used */ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size); @@ -247,13 +216,18 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) { /* Zero the new refcount block before updating it */ - memset(s->refcount_block_cache, 0, s->cluster_size); - s->refcount_block_cache_offset = new_block; + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, + (void**) refcount_block); + if (ret < 0) { + goto fail_block; + } + + memset(*refcount_block, 0, s->cluster_size); /* The block describes itself, need to update the cache */ int block_index = (new_block >> s->cluster_bits) & ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - s->refcount_block_cache[block_index] = cpu_to_be16(1); + (*refcount_block)[block_index] = cpu_to_be16(1); } else { /* Described somewhere else. This can recurse at most twice before we * arrive at a block that describes itself. */ @@ -266,14 +240,19 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) /* Initialize the new refcount block only after updating its refcount, * update_refcount uses the refcount cache itself */ - memset(s->refcount_block_cache, 0, s->cluster_size); - s->refcount_block_cache_offset = new_block; + ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block, + (void**) refcount_block); + if (ret < 0) { + goto fail_block; + } + + memset(*refcount_block, 0, s->cluster_size); } /* Now the new refcount block needs to be written to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE); - ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache, - s->cluster_size); + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block); + ret = qcow2_cache_flush(bs, s->refcount_block_cache); if (ret < 0) { goto fail_block; } @@ -293,6 +272,12 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) return new_block; } + ret = qcow2_cache_put(bs, s->refcount_block_cache, *refcount_block); + if (ret < 0) { + goto fail_block; + } + *refcount_block = NULL; + /* * If we come here, we need to grow the refcount table. Again, a new * refcount table needs some space and we can't simply allocate to avoid @@ -410,9 +395,9 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t)); s->free_cluster_index = old_free_cluster_index; - ret = load_refcount_block(bs, new_block); + ret = load_refcount_block(bs, new_block, (void**) refcount_block); if (ret < 0) { - goto fail_block; + return ret; } return new_block; @@ -420,41 +405,11 @@ static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index) fail_table: qemu_free(new_table); fail_block: - s->refcount_block_cache_offset = 0; - return ret; -} - -#define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT) -static int write_refcount_block_entries(BlockDriverState *bs, - int64_t refcount_block_offset, int first_index, int last_index) -{ - BDRVQcowState *s = bs->opaque; - size_t size; - int ret; - - if (cache_refcount_updates) { - return 0; - } - - if (first_index < 0) { - return 0; + if (*refcount_block != NULL) { + qcow2_cache_put(bs, s->refcount_block_cache, *refcount_block); + *refcount_block = NULL; } - - first_index &= ~(REFCOUNTS_PER_SECTOR - 1); - last_index = (last_index + REFCOUNTS_PER_SECTOR) - & ~(REFCOUNTS_PER_SECTOR - 1); - - size = (last_index - first_index) << REFCOUNT_SHIFT; - - BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART); - ret = bdrv_pwrite(bs->file, - refcount_block_offset + (first_index << REFCOUNT_SHIFT), - &s->refcount_block_cache[first_index], size); - if (ret < 0) { - return ret; - } - - return 0; + return ret; } /* XXX: cache several refcount block clusters ? */ @@ -463,9 +418,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, { BDRVQcowState *s = bs->opaque; int64_t start, last, cluster_offset; - int64_t refcount_block_offset = 0; - int64_t table_index = -1, old_table_index; - int first_index = -1, last_index = -1; + uint16_t *refcount_block = NULL; int ret; #ifdef DEBUG_ALLOC2 @@ -478,6 +431,11 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, return 0; } + if (addend < 0) { + qcow2_cache_set_dependency(bs, s->refcount_block_cache, + s->l2_table_cache); + } + start = offset & ~(s->cluster_size - 1); last = (offset + length - 1) & ~(s->cluster_size - 1); for(cluster_offset = start; cluster_offset <= last; @@ -485,42 +443,26 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, { int block_index, refcount; int64_t cluster_index = cluster_offset >> s->cluster_bits; - int64_t new_block; - /* Only write refcount block to disk when we are done with it */ - old_table_index = table_index; - table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT); - if ((old_table_index >= 0) && (table_index != old_table_index)) { - - ret = write_refcount_block_entries(bs, refcount_block_offset, - first_index, last_index); + /* Load the refcount block and allocate it if needed */ + if (refcount_block) { + ret = qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); if (ret < 0) { - return ret; + goto fail; } - - first_index = -1; - last_index = -1; } - /* Load the refcount block and allocate it if needed */ - new_block = alloc_refcount_block(bs, cluster_index); - if (new_block < 0) { - ret = new_block; + ret = alloc_refcount_block(bs, cluster_index, &refcount_block); + if (ret < 0) { goto fail; } - refcount_block_offset = new_block; + qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block); /* we can update the count and save it */ block_index = cluster_index & ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1); - if (first_index == -1 || block_index < first_index) { - first_index = block_index; - } - if (block_index > last_index) { - last_index = block_index; - } - refcount = be16_to_cpu(s->refcount_block_cache[block_index]); + refcount = be16_to_cpu(refcount_block[block_index]); refcount += addend; if (refcount < 0 || refcount > 0xffff) { ret = -EINVAL; @@ -529,17 +471,15 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, if (refcount == 0 && cluster_index < s->free_cluster_index) { s->free_cluster_index = cluster_index; } - s->refcount_block_cache[block_index] = cpu_to_be16(refcount); + refcount_block[block_index] = cpu_to_be16(refcount); } ret = 0; fail: - /* Write last changed block to disk */ - if (refcount_block_offset != 0) { + if (refcount_block) { int wret; - wret = write_refcount_block_entries(bs, refcount_block_offset, - first_index, last_index); + wret = qcow2_cache_put(bs, s->refcount_block_cache, refcount_block); if (wret < 0) { return ret < 0 ? ret : wret; } @@ -758,9 +698,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated; int64_t old_offset, old_l2_offset; int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount; - - qcow2_l2_cache_reset(bs); - cache_refcount_updates = 1; + int ret; l2_table = NULL; l1_table = NULL; @@ -784,7 +722,6 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } l2_size = s->l2_size * sizeof(uint64_t); - l2_table = qemu_malloc(l2_size); l1_modified = 0; for(i = 0; i < l1_size; i++) { l2_offset = l1_table[i]; @@ -792,8 +729,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, old_l2_offset = l2_offset; l2_offset &= ~QCOW_OFLAG_COPIED; l2_modified = 0; - if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size) + + ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, + (void**) &l2_table); + if (ret < 0) { goto fail; + } + for(j = 0; j < s->l2_size; j++) { offset = be64_to_cpu(l2_table[j]); if (offset != 0) { @@ -833,17 +775,24 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, offset |= QCOW_OFLAG_COPIED; } if (offset != old_offset) { + if (addend > 0) { + qcow2_cache_set_dependency(bs, s->l2_table_cache, + s->refcount_block_cache); + } l2_table[j] = cpu_to_be64(offset); l2_modified = 1; + qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); } } } - if (l2_modified) { - if (bdrv_pwrite_sync(bs->file, - l2_offset, l2_table, l2_size) < 0) - goto fail; + + ret = qcow2_cache_put(bs, s->l2_table_cache, l2_table); + if (ret < 0) { + goto fail; } + l2_table = NULL; + if (addend != 0) { refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend); } else { @@ -871,16 +820,14 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } if (l1_allocated) qemu_free(l1_table); - qemu_free(l2_table); - cache_refcount_updates = 0; - write_refcount_block(bs); return 0; fail: + if (l2_table) { + qcow2_cache_put(bs, s->l2_table_cache, l2_table); + } + if (l1_allocated) qemu_free(l1_table); - qemu_free(l2_table); - cache_refcount_updates = 0; - write_refcount_block(bs); return -EIO; } diff --git a/block/qcow2.c b/block/qcow2.c index b6b094c..299ef38 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -143,6 +143,7 @@ static int qcow2_open(BlockDriverState *bs, int flags) int len, i, ret = 0; QCowHeader header; uint64_t ext_end; + bool writethrough; ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); if (ret < 0) { @@ -217,8 +218,13 @@ static int qcow2_open(BlockDriverState *bs, int flags) be64_to_cpus(&s->l1_table[i]); } } - /* alloc L2 cache */ - s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); + + /* alloc L2 table/refcount block cache */ + writethrough = ((flags & BDRV_O_CACHE_MASK) == 0); + s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, writethrough); + s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE, + writethrough); + s->cluster_cache = qemu_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size @@ -270,7 +276,9 @@ static int qcow2_open(BlockDriverState *bs, int flags) qcow2_free_snapshots(bs); qcow2_refcount_close(bs); qemu_free(s->l1_table); - qemu_free(s->l2_cache); + if (s->l2_table_cache) { + qcow2_cache_destroy(bs, s->l2_table_cache); + } qemu_free(s->cluster_cache); qemu_free(s->cluster_data); return ret; @@ -719,7 +727,8 @@ static void qcow2_close(BlockDriverState *bs) { BDRVQcowState *s = bs->opaque; qemu_free(s->l1_table); - qemu_free(s->l2_cache); + qcow2_cache_destroy(bs, s->l2_table_cache); + qcow2_cache_destroy(bs, s->refcount_block_cache); qemu_free(s->cluster_cache); qemu_free(s->cluster_data); qcow2_refcount_close(bs); @@ -1179,6 +1188,19 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, static int qcow2_flush(BlockDriverState *bs) { + BDRVQcowState *s = bs->opaque; + int ret; + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret < 0) { + return ret; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + return ret; + } + return bdrv_flush(bs->file); } @@ -1186,6 +1208,19 @@ static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs, BlockDriverCompletionFunc *cb, void *opaque) { + BDRVQcowState *s = bs->opaque; + int ret; + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret < 0) { + return NULL; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + return NULL; + } + return bdrv_aio_flush(bs->file, cb, opaque); } diff --git a/block/qcow2.h b/block/qcow2.h index b67f95f..8302bbb 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -51,6 +51,9 @@ #define L2_CACHE_SIZE 16 +/* Must be at least 4 to cover all cases of refcount table growth */ +#define REFCOUNT_CACHE_SIZE 4 + typedef struct QCowHeader { uint32_t magic; uint32_t version; @@ -94,9 +97,10 @@ typedef struct BDRVQcowState { uint64_t cluster_offset_mask; uint64_t l1_table_offset; uint64_t *l1_table; - uint64_t *l2_cache; - uint64_t l2_cache_offsets[L2_CACHE_SIZE]; - uint32_t l2_cache_counts[L2_CACHE_SIZE]; + + Qcow2Cache* l2_table_cache; + Qcow2Cache* refcount_block_cache; + uint8_t *cluster_cache; uint8_t *cluster_data; uint64_t cluster_cache_offset; @@ -105,8 +109,6 @@ typedef struct BDRVQcowState { uint64_t *refcount_table; uint64_t refcount_table_offset; uint32_t refcount_table_size; - uint64_t refcount_block_cache_offset; - uint16_t *refcount_block_cache; int64_t free_cluster_index; int64_t free_byte_offset;