From patchwork Fri Mar 15 14:49:33 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Beno=C3=AEt_Canet?= X-Patchwork-Id: 228034 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id D22322C008E for ; Sat, 16 Mar 2013 02:03:01 +1100 (EST) Received: from localhost ([::1]:49525 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UGW9s-0003AH-0H for incoming@patchwork.ozlabs.org; Fri, 15 Mar 2013 11:03:00 -0400 Received: from eggs.gnu.org ([208.118.235.92]:37196) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UGVz4-0001mS-Qv for qemu-devel@nongnu.org; Fri, 15 Mar 2013 10:51:55 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1UGVz0-0005Bi-Tx for qemu-devel@nongnu.org; Fri, 15 Mar 2013 10:51:50 -0400 Received: from nodalink.pck.nerim.net ([62.212.105.220]:59631 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1UGVz0-0005BS-At for qemu-devel@nongnu.org; Fri, 15 Mar 2013 10:51:46 -0400 Received: by paradis.irqsave.net (Postfix, from userid 1002) id 72415874352; Fri, 15 Mar 2013 15:51:45 +0100 (CET) Received: from localhost.localdomain (unknown [192.168.77.1]) by paradis.irqsave.net (Postfix) with ESMTP id 1F8F387435F; Fri, 15 Mar 2013 15:48:48 +0100 (CET) From: =?UTF-8?q?Beno=C3=AEt=20Canet?= To: qemu-devel@nongnu.org Date: Fri, 15 Mar 2013 15:49:33 +0100 Message-Id: <1363358986-8360-20-git-send-email-benoit@irqsave.net> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1363358986-8360-1-git-send-email-benoit@irqsave.net> References: <1363358986-8360-1-git-send-email-benoit@irqsave.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 62.212.105.220 Cc: kwolf@redhat.com, =?UTF-8?q?Beno=C3=AEt=20Canet?= , stefanha@redhat.com Subject: [Qemu-devel] [RFC V7 19/32] block: Add qcow2_dedup format and image creation code. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Also modify qemu-io-test. Signed-off-by: Benoit Canet --- block/qcow2.c | 185 +++++++++++++++++++++++++++++++++++++++--- include/block/block_int.h | 1 + tests/qemu-iotests/common.rc | 3 +- 3 files changed, 175 insertions(+), 14 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index 1210780..9032dfc 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1263,7 +1263,8 @@ static int preallocate(BlockDriverState *bs) static int qcow2_create2(const char *filename, int64_t total_size, const char *backing_file, const char *backing_format, int flags, size_t cluster_size, int prealloc, - QEMUOptionParameter *options, int version) + QEMUOptionParameter *options, int version, + bool dedup, uint8_t hash_algo) { /* Calculate cluster_bits */ int cluster_bits; @@ -1291,7 +1292,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, */ BlockDriverState* bs; QCowHeader header; - uint8_t* refcount_table; + uint8_t *tables; + int size; int ret; ret = bdrv_create_file(filename, options); @@ -1333,10 +1335,11 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - /* Write an empty refcount table */ - refcount_table = g_malloc0(cluster_size); - ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size); - g_free(refcount_table); + /* Write an empty refcount table + extra space for dedup table if needed */ + size = dedup ? 2 : 1; + tables = g_malloc0(size * cluster_size); + ret = bdrv_pwrite(bs, cluster_size, tables, size * cluster_size); + g_free(tables); if (ret < 0) { goto out; @@ -1347,7 +1350,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* * And now open the image and make it consistent first (i.e. increase the * refcount of the cluster that is occupied by the header and the refcount - * table) + * table and the eventual dedup table) */ BlockDriver* drv = bdrv_find_format("qcow2"); assert(drv != NULL); @@ -1357,7 +1360,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - ret = qcow2_alloc_clusters(bs, 2 * cluster_size); + size++; /* Add a cluster for the header */ + ret = qcow2_alloc_clusters(bs, size * cluster_size); if (ret < 0) { goto out; @@ -1367,11 +1371,31 @@ static int qcow2_create2(const char *filename, int64_t total_size, } /* Okay, now that we have a valid image, let's give it the right size */ + BDRVQcowState *s = bs->opaque; ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE); if (ret < 0) { goto out; } + if (dedup) { + s->has_dedup = true; + s->dedup_table_offset = cluster_size * 2; + s->dedup_table_size = cluster_size / sizeof(uint64_t); + s->dedup_hash_algo = hash_algo; + + ret = qcow2_set_incompat_feature(bs, QCOW2_INCOMPAT_DEDUP); + if (ret < 0) { + goto out; + } + + s->dedup_status = DEDUP_STATUS_STARTED; + ret = qcow2_update_header(bs); + s->dedup_status = DEDUP_STATUS_STOPPED; + if (ret < 0) { + goto out; + } + } + /* Want a backing file? There you go.*/ if (backing_file) { ret = bdrv_change_backing_file(bs, backing_file, backing_format); @@ -1397,15 +1421,41 @@ out: return ret; } +static int qcow2_warn_if_version_3_is_needed(int version, + bool has_feature, + const char *feature) +{ + if (version < 3 && has_feature) { + fprintf(stderr, "%s only supported with compatibility " + "level 1.1 and above (use compat=1.1 or greater)\n", + feature); + return -EINVAL; + } + return 0; +} + +static int8_t qcow2_get_dedup_hash_algo(char *value) +{ + if (!value || !strcmp(value, "sha256")) { + return QCOW_HASH_SHA256; + } + + error_printf("Unsupported deduplication hash algorithm.\n"); + return -EINVAL; +} + static int qcow2_create(const char *filename, QEMUOptionParameter *options) { const char *backing_file = NULL; const char *backing_fmt = NULL; uint64_t sectors = 0; int flags = 0; + int ret; size_t cluster_size = DEFAULT_CLUSTER_SIZE; int prealloc = 0; int version = 2; + bool dedup = false; + int8_t hash_algo = 0; /* Read out options */ while (options && options->name) { @@ -1443,6 +1493,13 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) } } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) { flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0; + } else if (!strcmp(options->name, BLOCK_OPT_DEDUP)) { + hash_algo = qcow2_get_dedup_hash_algo(options->value.s); + if (hash_algo < 0) { + return hash_algo; + } + dedup = true; + version = 3; } options++; } @@ -1453,14 +1510,22 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options) return -EINVAL; } - if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) { - fprintf(stderr, "Lazy refcounts only supported with compatibility " - "level 1.1 and above (use compat=1.1 or greater)\n"); - return -EINVAL; + ret = qcow2_warn_if_version_3_is_needed(version, + flags & BLOCK_FLAG_LAZY_REFCOUNTS, + "Lazy refcounts"); + if (ret < 0) { + return ret; + } + ret = qcow2_warn_if_version_3_is_needed(version, + dedup, + "Deduplication"); + if (ret < 0) { + return ret; } return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags, - cluster_size, prealloc, options, version); + cluster_size, prealloc, options, version, + dedup, hash_algo); } static int qcow2_make_empty(BlockDriverState *bs) @@ -1766,6 +1831,51 @@ static QEMUOptionParameter qcow2_create_options[] = { { NULL } }; +static QEMUOptionParameter qcow2_dedup_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_OPT_BACKING_FILE, + .type = OPT_STRING, + .help = "File name of a base image" + }, + { + .name = BLOCK_OPT_BACKING_FMT, + .type = OPT_STRING, + .help = "Image format of the base image" + }, + { + .name = BLOCK_OPT_ENCRYPT, + .type = OPT_FLAG, + .help = "Encrypt the image" + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = OPT_SIZE, + .help = "qcow2 cluster size", + .value = { .n = DEFAULT_DEDUP_CLUSTER_SIZE }, + }, + { + .name = BLOCK_OPT_PREALLOC, + .type = OPT_STRING, + .help = "Preallocation mode (allowed values: off, metadata)" + }, + { + .name = BLOCK_OPT_LAZY_REFCOUNTS, + .type = OPT_FLAG, + .help = "Postpone refcount updates", + }, + { + .name = BLOCK_OPT_DEDUP, + .type = OPT_STRING, + .help = "Deduplication", + }, + { NULL } +}; + static BlockDriver bdrv_qcow2 = { .format_name = "qcow2", .instance_size = sizeof(BDRVQcowState), @@ -1805,9 +1915,58 @@ static BlockDriver bdrv_qcow2 = { .bdrv_check = qcow2_check, }; +/* As all the defined .create_options are passed to qcow2_create() even if + * the user does not specify them it's not possible to have a default 4KB + * cluster size for deduplication. + * For example it's impossible to make the difference between the 64KB cluster + * size default create option of qcow2 or a 64KB user specified cluster size. + * So we declare the qcow2_dedup format in order to be able to define + * deduplication specific create options. + * It will also help for qemu-io-test integration. + */ +static BlockDriver bdrv_qcow2_dedup = { + .format_name = "qcow2_dedup", + .instance_size = sizeof(BDRVQcowState), + .bdrv_probe = qcow2_probe, + .bdrv_open = qcow2_open, + .bdrv_close = qcow2_close, + .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_create = qcow2_create, + .bdrv_co_is_allocated = qcow2_co_is_allocated, + .bdrv_set_key = qcow2_set_key, + .bdrv_make_empty = qcow2_make_empty, + + .bdrv_co_readv = qcow2_co_readv, + .bdrv_co_writev = qcow2_co_writev, + .bdrv_co_flush_to_os = qcow2_co_flush_to_os, + + .bdrv_co_write_zeroes = qcow2_co_write_zeroes, + .bdrv_co_discard = qcow2_co_discard, + .bdrv_truncate = qcow2_truncate, + .bdrv_write_compressed = qcow2_write_compressed, + + .bdrv_snapshot_create = qcow2_snapshot_create, + .bdrv_snapshot_goto = qcow2_snapshot_goto, + .bdrv_snapshot_delete = qcow2_snapshot_delete, + .bdrv_snapshot_list = qcow2_snapshot_list, + .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp, + .bdrv_get_info = qcow2_get_info, + + .bdrv_save_vmstate = qcow2_save_vmstate, + .bdrv_load_vmstate = qcow2_load_vmstate, + + .bdrv_change_backing_file = qcow2_change_backing_file, + + .bdrv_invalidate_cache = qcow2_invalidate_cache, + + .create_options = qcow2_dedup_create_options, + .bdrv_check = qcow2_check, +}; + static void bdrv_qcow2_init(void) { bdrv_register(&bdrv_qcow2); + bdrv_register(&bdrv_qcow2_dedup); } block_init(bdrv_qcow2_init); diff --git a/include/block/block_int.h b/include/block/block_int.h index eaad53e..62c72fc 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -57,6 +57,7 @@ #define BLOCK_OPT_COMPAT_LEVEL "compat" #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_DEDUP "dedup" typedef struct BdrvTrackedRequest BdrvTrackedRequest; diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index e522d61..520083a 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -124,7 +124,8 @@ _make_test_img() -e "s# compat='[^']*'##g" \ -e "s# compat6=\\(on\\|off\\)##g" \ -e "s# static=\\(on\\|off\\)##g" \ - -e "s# lazy_refcounts=\\(on\\|off\\)##g" + -e "s# lazy_refcounts=\\(on\\|off\\)##g" \ + -e "s# dedup=\\('sha256'\\|'skein'\\|'sha3'\\)##g" # Start an NBD server on the image file, which is what we'll be talking to if [ $IMGPROTO = "nbd" ]; then