From patchwork Mon Jun 24 09:10:33 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Hajnoczi X-Patchwork-Id: 253770 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 6FCF42C016F for ; Mon, 24 Jun 2013 19:23:52 +1000 (EST) Received: from localhost ([::1]:52155 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ur302-0004ia-A7 for incoming@patchwork.ozlabs.org; Mon, 24 Jun 2013 05:23:50 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47634) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ur2oD-0002ku-5F for qemu-devel@nongnu.org; Mon, 24 Jun 2013 05:11:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1Ur2o5-0007hb-FG for qemu-devel@nongnu.org; Mon, 24 Jun 2013 05:11:37 -0400 Received: from mx1.redhat.com ([209.132.183.28]:17269) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1Ur2o5-0007gI-4T for qemu-devel@nongnu.org; Mon, 24 Jun 2013 05:11:29 -0400 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id r5O9BS7W001790 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Mon, 24 Jun 2013 05:11:28 -0400 Received: from localhost (ovpn-112-34.ams2.redhat.com [10.36.112.34]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id r5O9BRD7016301; Mon, 24 Jun 2013 05:11:27 -0400 From: Stefan Hajnoczi To: Date: Mon, 24 Jun 2013 11:10:33 +0200 Message-Id: <1372065035-19601-22-git-send-email-stefanha@redhat.com> In-Reply-To: <1372065035-19601-1-git-send-email-stefanha@redhat.com> References: <1372065035-19601-1-git-send-email-stefanha@redhat.com> X-Scanned-By: MIMEDefang 2.68 on 10.5.11.23 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 209.132.183.28 Cc: Kevin Wolf , Anthony Liguori , Stefan Hajnoczi Subject: [Qemu-devel] [PATCH 21/23] qcow2: Batch discards X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Kevin Wolf This optimises the discard operation for freed clusters by batching discard requests (both snapshot deletion and bdrv_discard end up updating the refcounts cluster by cluster). Note that we don't discard asynchronously, but keep s->lock held. This is to avoid that a freed cluster is reallocated and written to while the discard is still in flight. Signed-off-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/qcow2-cluster.c | 22 +++++++++++--- block/qcow2-refcount.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++-- block/qcow2.c | 1 + block/qcow2.h | 11 +++++++ 4 files changed, 109 insertions(+), 7 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 3191d6b..cca76d4 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1377,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, nb_clusters = size_to_clusters(s, end_offset - offset); + s->cache_discards = true; + /* Each L2 table is handled by its own loop iteration */ while (nb_clusters > 0) { ret = discard_single_l2(bs, offset, nb_clusters); if (ret < 0) { - return ret; + goto fail; } nb_clusters -= ret; offset += (ret * s->cluster_size); } - return 0; + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; } /* @@ -1450,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) /* Each L2 table is handled by its own loop iteration */ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); + s->cache_discards = true; + while (nb_clusters > 0) { ret = zero_single_l2(bs, offset, nb_clusters); if (ret < 0) { - return ret; + goto fail; } nb_clusters -= ret; offset += (ret * s->cluster_size); } - return 0; + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 7488988..1244693 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -420,6 +420,74 @@ fail_block: return ret; } +void qcow2_process_discards(BlockDriverState *bs, int ret) +{ + BDRVQcowState *s = bs->opaque; + Qcow2DiscardRegion *d, *next; + + QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) { + QTAILQ_REMOVE(&s->discards, d, next); + + /* Discard is optional, ignore the return value */ + if (ret >= 0) { + bdrv_discard(bs->file, + d->offset >> BDRV_SECTOR_BITS, + d->bytes >> BDRV_SECTOR_BITS); + } + + g_free(d); + } +} + +static void update_refcount_discard(BlockDriverState *bs, + uint64_t offset, uint64_t length) +{ + BDRVQcowState *s = bs->opaque; + Qcow2DiscardRegion *d, *p, *next; + + QTAILQ_FOREACH(d, &s->discards, next) { + uint64_t new_start = MIN(offset, d->offset); + uint64_t new_end = MAX(offset + length, d->offset + d->bytes); + + if (new_end - new_start <= length + d->bytes) { + /* There can't be any overlap, areas ending up here have no + * references any more and therefore shouldn't get freed another + * time. */ + assert(d->bytes + length == new_end - new_start); + d->offset = new_start; + d->bytes = new_end - new_start; + goto found; + } + } + + d = g_malloc(sizeof(*d)); + *d = (Qcow2DiscardRegion) { + .bs = bs, + .offset = offset, + .bytes = length, + }; + QTAILQ_INSERT_TAIL(&s->discards, d, next); + +found: + /* Merge discard requests if they are adjacent now */ + QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) { + if (p == d + || p->offset > d->offset + d->bytes + || d->offset > p->offset + p->bytes) + { + continue; + } + + /* Still no overlap possible */ + assert(p->offset == d->offset + d->bytes + || d->offset == p->offset + p->bytes); + + QTAILQ_REMOVE(&s->discards, p, next); + d->offset = MIN(d->offset, p->offset); + d->bytes += p->bytes; + } +} + /* XXX: cache several refcount block clusters ? */ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, int64_t offset, int64_t length, int addend, enum qcow2_discard_type type) @@ -488,15 +556,18 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, s->free_cluster_index = cluster_index; } refcount_block[block_index] = cpu_to_be16(refcount); + if (refcount == 0 && s->discard_passthrough[type]) { - /* Try discarding, ignore errors */ - /* FIXME Doing this cluster by cluster will be painfully slow */ - bdrv_discard(bs->file, cluster_offset, 1); + update_refcount_discard(bs, cluster_offset, s->cluster_size); } } ret = 0; fail: + if (!s->cache_discards) { + qcow2_process_discards(bs, ret); + } + /* Write last changed block to disk */ if (refcount_block) { int wret; @@ -755,6 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, l1_table = NULL; l1_size2 = l1_size * sizeof(uint64_t); + s->cache_discards = true; + /* WARNING: qcow2_snapshot_goto relies on this function not using the * l1_table_offset when it is the current s->l1_table_offset! Be careful * when changing this! */ @@ -867,6 +940,9 @@ fail: qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); } + s->cache_discards = false; + qcow2_process_discards(bs, ret); + /* Update L1 only if it isn't deleted anyway (addend = -1) */ if (ret == 0 && addend >= 0 && l1_modified) { for (i = 0; i < l1_size; i++) { diff --git a/block/qcow2.c b/block/qcow2.c index ef8a2ca..9383990 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -486,6 +486,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags) } QLIST_INIT(&s->cluster_allocs); + QTAILQ_INIT(&s->discards); /* read qcow2 extensions */ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) { diff --git a/block/qcow2.h b/block/qcow2.h index 6f91b9a..3b2d5cd 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -147,6 +147,13 @@ typedef struct Qcow2Feature { char name[46]; } QEMU_PACKED Qcow2Feature; +typedef struct Qcow2DiscardRegion { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + QTAILQ_ENTRY(Qcow2DiscardRegion) next; +} Qcow2DiscardRegion; + typedef struct BDRVQcowState { int cluster_bits; int cluster_size; @@ -199,6 +206,8 @@ typedef struct BDRVQcowState { size_t unknown_header_fields_size; void* unknown_header_fields; QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext; + QTAILQ_HEAD (, Qcow2DiscardRegion) discards; + bool cache_discards; } BDRVQcowState; /* XXX: use std qcow open function ? */ @@ -374,6 +383,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); +void qcow2_process_discards(BlockDriverState *bs, int ret); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size);