From patchwork Wed Feb 6 12:31:41 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Beno=C3=AEt_Canet?= X-Patchwork-Id: 218602 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 112D82C02EA for ; Thu, 7 Feb 2013 00:27:09 +1100 (EST) Received: from localhost ([::1]:58734 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U351n-0005k3-6s for incoming@patchwork.ozlabs.org; Wed, 06 Feb 2013 08:27:07 -0500 Received: from eggs.gnu.org ([208.118.235.92]:55364) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U34As-00063j-Dv for qemu-devel@nongnu.org; Wed, 06 Feb 2013 07:32:32 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1U34An-0004s5-7l for qemu-devel@nongnu.org; Wed, 06 Feb 2013 07:32:26 -0500 Received: from nodalink.pck.nerim.net ([62.212.105.220]:43571 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U34Am-0004ps-HQ for qemu-devel@nongnu.org; Wed, 06 Feb 2013 07:32:21 -0500 Received: by paradis.irqsave.net (Postfix, from userid 1002) id 98525874359; Wed, 6 Feb 2013 13:32:19 +0100 (CET) Received: from localhost.localdomain (unknown [192.168.77.1]) by paradis.irqsave.net (Postfix) with ESMTP id AEA12874327; Wed, 6 Feb 2013 13:31:19 +0100 (CET) From: =?UTF-8?q?Beno=C3=AEt=20Canet?= To: qemu-devel@nongnu.org Date: Wed, 6 Feb 2013 13:31:41 +0100 Message-Id: <1360153926-9492-9-git-send-email-benoit@irqsave.net> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1360153926-9492-1-git-send-email-benoit@irqsave.net> References: <1360153926-9492-1-git-send-email-benoit@irqsave.net> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.2.x-3.x [generic] X-Received-From: 62.212.105.220 Cc: kwolf@redhat.com, =?UTF-8?q?Beno=C3=AEt=20Canet?= , stefanha@redhat.com Subject: [Qemu-devel] [RFC V6 08/33] qcow2: Add qcow2_dedup_store_new_hashes. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Benoit Canet --- block/qcow2-dedup.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++- block/qcow2.h | 5 + 2 files changed, 329 insertions(+), 1 deletion(-) diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 5901749..a424af8 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -29,6 +29,12 @@ #include "qemu-common.h" #include "qcow2.h" +static int qcow2_dedup_read_write_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t *first_logical_sect, + uint64_t physical_sect, + bool write); + /* * Prepare a buffer containing all the required data required to compute cluster * sized deduplication hashes. @@ -291,7 +297,11 @@ static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ hash_node->first_logical_sect &= first_logical_sect; - return 0; + /* clear the QCOW_FLAG_FIRST flag from disk */ + return qcow2_dedup_read_write_hash(bs, &hash_node->hash, + &hash_node->first_logical_sect, + hash_node->physical_sect, + true); } /* This function deduplicate a cluster @@ -553,3 +563,316 @@ exit: return deduped_clusters_nr * s->cluster_sectors - begining_index; } + + +/* Create a deduplication table hash block, write it's offset to disk and + * reference it in the RAM deduplication table + * + * sync this to disk and get the dedup cluster cache entry + * + * @index: index in the RAM deduplication table + * @ret: offset on success, negative on error + */ +static uint64_t qcow2_create_block(BlockDriverState *bs, + int32_t index) +{ + BDRVQcowState *s = bs->opaque; + int64_t offset; + uint64_t data64; + int ret = 0; + + /* allocate a new dedup table hash block */ + offset = qcow2_alloc_clusters(bs, s->hash_block_size); + + if (offset < 0) { + return offset; + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + goto free_fail; + } + + /* write the new block offset in the dedup table L1 */ + data64 = cpu_to_be64(offset); + ret = bdrv_pwrite_sync(bs->file, + s->dedup_table_offset + + index * sizeof(uint64_t), + &data64, sizeof(data64)); + + if (ret < 0) { + goto free_fail; + } + + s->dedup_table[index] = offset; + + return offset; + +free_fail: + qcow2_free_clusters(bs, offset, s->hash_block_size); + return ret; +} + +static int qcow2_create_and_get_block(BlockDriverState *bs, + uint32_t index, + uint8_t **block) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + int64_t offset; + + offset = qcow2_create_block(bs, index); + + if (offset < 0) { + return offset; + } + + + /* get an empty cluster from the dedup cache */ + ret = qcow2_cache_get_empty(bs, s->dedup_cluster_cache, + offset, + (void **) block); + + if (ret < 0) { + return ret; + } + + /* clear it */ + memset(*block, 0, s->hash_block_size); + + return 0; +} + +static inline bool qcow2_has_dedup_block(BlockDriverState *bs, + uint32_t index) +{ + BDRVQcowState *s = bs->opaque; + return s->dedup_table[index] == 0 ? false : true; +} + +static inline void qcow2_write_hash_to_block_and_dirty(BlockDriverState *bs, + uint8_t *block, + QCowHash *hash, + int offset, + uint64_t *logical_sect) +{ + BDRVQcowState *s = bs->opaque; + uint64_t first; + first = cpu_to_be64(*logical_sect); + memcpy(block + offset, hash->data, HASH_LENGTH); + memcpy(block + offset + HASH_LENGTH, &first, 8); + qcow2_cache_entry_mark_dirty(s->dedup_cluster_cache, block); +} + +static inline uint64_t qcow2_read_hash_from_block(uint8_t *block, + QCowHash *hash, + int offset) +{ + uint64_t first; + memcpy(hash->data, block + offset, HASH_LENGTH); + memcpy(&first, block + offset + HASH_LENGTH, 8); + return be64_to_cpu(first); +} + +/* Read/write a given hash and cluster_sect from/to the dedup table + * + * This function doesn't flush the dedup cache to disk + * + * @hash: the hash to read or store + * @first_logical_sect: logical sector of the QCOW_FLAG_OCOPIED cluster + * @physical_sect: sector of the cluster in QCOW2 file (in sectors) + * @write: true to write, false to read + * @ret: 0 on succes, errno on error + */ +static int qcow2_dedup_read_write_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t *first_logical_sect, + uint64_t physical_sect, + bool write) +{ + BDRVQcowState *s = bs->opaque; + uint8_t *block = NULL; + int ret = 0; + int64_t cluster_number; + uint32_t index_in_dedup_table; + int offset_in_block; + int nb_hash_in_block = s->hash_block_size / (HASH_LENGTH + 8); + + cluster_number = physical_sect / s->cluster_sectors; + index_in_dedup_table = cluster_number / nb_hash_in_block; + + if (s->dedup_table_size <= index_in_dedup_table) { + return -ENOSPC; + } + + /* if we must read and there is nothing to read return a null hash */ + if (!qcow2_has_dedup_block(bs, index_in_dedup_table) && !write) { + memset(hash->data, 0, HASH_LENGTH); + *first_logical_sect = 0; + return 0; + } + + if (qcow2_has_dedup_block(bs, index_in_dedup_table)) { + ret = qcow2_cache_get(bs, + s->dedup_cluster_cache, + s->dedup_table[index_in_dedup_table], + (void **) &block); + } else { + ret = qcow2_create_and_get_block(bs, + index_in_dedup_table, + &block); + } + + if (ret < 0) { + return ret; + } + + offset_in_block = (cluster_number % nb_hash_in_block) * + (HASH_LENGTH + 8); + + if (write) { + qcow2_write_hash_to_block_and_dirty(bs, + block, + hash, + offset_in_block, + first_logical_sect); + } else { + *first_logical_sect = qcow2_read_hash_from_block(block, + hash, + offset_in_block); + } + + qcow2_cache_put(bs, s->dedup_cluster_cache, (void **) &block); + + return 0; +} + +static inline bool is_hash_node_empty(QCowHashNode *hash_node) +{ + return hash_node->physical_sect & QCOW_FLAG_EMPTY; +} + +static void qcow2_remove_hash_node(BlockDriverState *bs, + QCowHashNode *hash_node) +{ + BDRVQcowState *s = bs->opaque; + g_tree_remove(s->dedup_tree_by_sect, &hash_node->physical_sect); + g_tree_remove(s->dedup_tree_by_hash, &hash_node->hash); +} + +/* This function removes a hash_node from the trees given a physical sector + * + * @physical_sect: The physical sector of the cluster corresponding to the hash + */ +static void qcow2_remove_hash_node_by_sector(BlockDriverState *bs, + uint64_t physical_sect) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + hash_node = g_tree_lookup(s->dedup_tree_by_sect, &physical_sect); + + if (!hash_node) { + return; + } + + qcow2_remove_hash_node(bs, hash_node); +} + +/* This function store a hash information to disk and RAM + * + * @hash: the QCowHash to process + * @logical_sect: the logical sector of the cluster seen by the guest + * @physical_sect: the physical sector of the stored cluster + * @ret: 0 on success, negative on error + */ +static int qcow2_store_hash(BlockDriverState *bs, + QCowHash *hash, + uint64_t logical_sect, + uint64_t physical_sect) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + hash_node = g_tree_lookup(s->dedup_tree_by_hash, hash); + + /* no hash node found for this hash */ + if (!hash_node) { + return 0; + } + + /* the hash node information are already completed */ + if (!is_hash_node_empty(hash_node)) { + return 0; + } + + /* Remember that this QCowHashNoderepresent the first occurence of the + * cluste so we will be able to clear QCOW_OFLAG_COPIED from the L2 table + * entry when refcount will go > 1. + */ + logical_sect = logical_sect | QCOW_FLAG_FIRST; + + /* remove stale hash node pointing to this physical sector from the trees */ + qcow2_remove_hash_node_by_sector(bs, physical_sect); + + /* fill the missing fields of the hash node */ + hash_node->physical_sect = physical_sect; + hash_node->first_logical_sect = logical_sect; + + /* insert the hash node in the second tree: it's already in the first one */ + g_tree_insert(s->dedup_tree_by_sect, &hash_node->physical_sect, hash_node); + + /* write the hash to disk */ + return qcow2_dedup_read_write_hash(bs, + hash, + &logical_sect, + physical_sect, + true); +} + +/* This function store the hashes of the clusters which are not duplicated + * + * @ds: The deduplication state + * @count: the number of dedup hash to process + * @logical_sect: logical offset of the first cluster (in sectors) + * @physical_sect: offset of the first cluster (in sectors) + * @ret: 0 on succes, errno on error + */ +int qcow2_dedup_store_new_hashes(BlockDriverState *bs, + QCowDedupState *ds, + int count, + uint64_t logical_sect, + uint64_t physical_sect) +{ + int ret = 0; + int i = 0; + BDRVQcowState *s = bs->opaque; + QCowHashElement *dedup_hash, *next_dedup_hash; + + /* round values on cluster boundaries for easier cluster deletion */ + logical_sect = logical_sect & ~(s->cluster_sectors - 1); + physical_sect = physical_sect & ~(s->cluster_sectors - 1); + + QTAILQ_FOREACH_SAFE(dedup_hash, &ds->undedupables, next, next_dedup_hash) { + + ret = qcow2_store_hash(bs, + &dedup_hash->hash, + logical_sect + i * s->cluster_sectors, + physical_sect + i * s->cluster_sectors); + + QTAILQ_REMOVE(&ds->undedupables, dedup_hash, next); + g_free(dedup_hash); + + if (ret < 0) { + break; + } + + i++; + + if (i == count) { + break; + } + } + + return ret; +} diff --git a/block/qcow2.h b/block/qcow2.h index 46a5800..3b076db 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -468,5 +468,10 @@ int qcow2_dedup(BlockDriverState *bs, uint64_t sector_num, uint8_t *data, int data_nr); +int qcow2_dedup_store_new_hashes(BlockDriverState *bs, + QCowDedupState *ds, + int count, + uint64_t logical_sect, + uint64_t physical_sect); #endif