From patchwork Wed Oct 17 16:00:15 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Beno=C3=AEt_Canet?= X-Patchwork-Id: 192094 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id DDFFC2C0092 for ; Thu, 18 Oct 2012 03:33:45 +1100 (EST) Received: from localhost ([::1]:51407 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TOW4L-0004Vw-8e for incoming@patchwork.ozlabs.org; Wed, 17 Oct 2012 12:02:05 -0400 Received: from eggs.gnu.org ([208.118.235.92]:51021) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TOW3p-0003m6-Mj for qemu-devel@nongnu.org; Wed, 17 Oct 2012 12:01:43 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TOW3e-00037K-QC for qemu-devel@nongnu.org; Wed, 17 Oct 2012 12:01:33 -0400 Received: from nodalink.pck.nerim.net ([62.212.105.220]:48788 helo=paradis.irqsave.net) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TOW3e-00034h-BC for qemu-devel@nongnu.org; Wed, 17 Oct 2012 12:01:22 -0400 Received: by paradis.irqsave.net (Postfix, from userid 1002) id 5F41F8742EF; Wed, 17 Oct 2012 18:06:48 +0200 (CEST) Received: from localhost.localdomain (unknown [192.168.77.1]) by paradis.irqsave.net (Postfix) with ESMTP id 6296F8742F2; Wed, 17 Oct 2012 18:05:56 +0200 (CEST) From: =?UTF-8?q?Beno=C3=AEt=20Canet?= To: qemu-devel@nongnu.org Date: Wed, 17 Oct 2012 18:00:15 +0200 Message-Id: <1350489629-1838-7-git-send-email-benoit@irqsave.net> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1350489629-1838-1-git-send-email-benoit@irqsave.net> References: <1350489629-1838-1-git-send-email-benoit@irqsave.net> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 62.212.105.220 Cc: kwolf@redhat.com, =?UTF-8?q?Beno=C3=AEt=20Canet?= , stefanha@redhat.com Subject: [Qemu-devel] [RFC V2 06/20] qcow2: Add qcow2_dedup and related functions. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org --- block/qcow2-dedup.c | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 7 ++ 2 files changed, 339 insertions(+) diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 9955843..ae45130 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -29,6 +29,8 @@ #include "qemu-common.h" #include "qcow2.h" +#define HASH_LENGTH 32 + /** * Read some data from the QCOW2 file * @@ -135,3 +137,333 @@ fail: qemu_vfree(*data); return ret; } + +/* + * Lookup a hash in the deduplication red black tree + * + * @hash: the hash to lookup in the red black tree + * @ret: a structure containing the offset of the cluster and the hash or NULL + */ +static QCowHashNode *qcow2_dedup_lookup_hash_in_rb_tree(BlockDriverState *bs, + uint8_t *hash) +{ + BDRVQcowState *s = bs->opaque; + struct rb_node *node = s->dedup_rb_tree.rb_node; + + while (node) { + QCowHashNode *data = container_of(node, QCowHashNode, node); + int result = memcmp(hash, data->hash, HASH_LENGTH); + + if (result < 0) { + node = node->rb_left; + } else if (result > 0) { + node = node->rb_right; + } else { + return data; + } + } + + return NULL; +} + +/* + * Insert a hash and offset couple in the red black tree + * + * @hash: the given hash + * @physical_cluster_offset: the cluster offset in the QCOW2 file + * @ret: true on successfull insertion, false on duplicate + */ +static bool qcow2_dedup_insert_hash_in_rb_tree(BlockDriverState *bs, + uint8_t *hash, + uint64_t physical_cluster_offset) +{ + BDRVQcowState *s = bs->opaque; + struct rb_node **new = &(s->dedup_rb_tree.rb_node), *parent = NULL; + QCowHashNode *data; + + /* Figure out where to put new node */ + while (*new) { + QCowHashNode *this = container_of(*new, QCowHashNode, node); + int result = memcmp(hash, this->hash, HASH_LENGTH); + + parent = *new; + if (result < 0) { + new = &((*new)->rb_left); + } else if (result > 0) { + new = &((*new)->rb_right); + } else { + return false; + } + } + + /* alloc node */ + data = g_new0(QCowHashNode, 1); + data->hash = hash; + data->offset = physical_cluster_offset; + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, &s->dedup_rb_tree); + + return true; +} + +/* + * Compute the hash of a given cluster + * + * @data: a buffer containing the cluster data + * @ret: a HASH_LENGTH long dynamically allocated array containing the hash + */ +static uint8_t *qcow2_compute_cluster_hash(BlockDriverState *bs, + uint8_t *data) +{ + return NULL; +} + +/* Try to find the offset of a given cluster if it's duplicated + * Exceptionally we cast return value to int64_t to use as error code. + * + * @data: a buffer containing the cluster + * @skip_cluster_nr: the number of cluster to skip in the buffer + * @hash: if hash is provided it's used else it's computed + * @ret: the offset in sectors of the duplicated cluster or -1 + */ +static int64_t qcow2_dedup_compute_hash_lookup_offset(BlockDriverState *bs, + uint8_t *data, + int skip_cluster_nr, + uint8_t **hash) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + if (!*hash) { + /* no hash has been provided compute it and store it for caller usage */ + *hash = qcow2_compute_cluster_hash(bs, + data + skip_cluster_nr * + s->cluster_size); + } + hash_node = qcow2_dedup_lookup_hash_in_rb_tree(bs, *hash); + if (hash_node) { + /* return offset of the duplicated cluster */ + return hash_node->offset; + } + /* cluster not duplicated */ + qcow2_dedup_insert_hash_in_rb_tree(bs, *hash, + (uint64_t) 1 << + (sizeof(uint64_t) * 8 - 1)); + return -1; +} + +/* + * Helper used to build a QCowHashElement + * + * @hash: the hash to use + * @ret: a newly allocated QCowHashElement pointing to the given hash + */ +static QCowHashElement *qcow2_build_dedup_hash(uint8_t *hash) +{ + QCowHashElement *dedup_hash; + dedup_hash = g_new0(QCowHashElement, 1); + dedup_hash->hash = hash; + return dedup_hash; +} + +/* + * Helper used to link a deduplicated cluster in the l2 + * + * @logical_cluster_offset: the cluster offset seen by the guest (in sectors) + * @physical_cluster_offset: the cluster offset in the QCOW2 file (in sectors) + * @ret: + */ +static int qcow2_dedup_link_l2(BlockDriverState *bs, + uint64_t logical_cluster_offset, + uint64_t physical_cluster_offset) +{ + QCowL2Meta m; + /* function correctness regarding copy on write ? */ + m.offset = logical_cluster_offset; + m.alloc_offset = physical_cluster_offset; + m.nb_clusters = 1; /* we are linking only one cluster in l2 */ + return qcow2_alloc_cluster_link_l2(bs, &m); +} + +/* This function tries to deduplicate a given cluster. + * + * @sector_num: the logical sector number we are trying to deduplicate + * @precomputed_hash: Used instead of computing the hash if provided + * @data: the buffer in which to look for a duplicated cluster + * @skip_clusters_nr: the number of cluster that must be skipped in data + * @non_duplicated_dedup_hash: returned if the cluster is not deduplicated + * @ret: ret < 0 on error, 1 on deduplication else 0 + */ +static int qcow2_dedup_cluster(BlockDriverState *bs, + uint64_t sector_num, + uint8_t **precomputed_hash, + uint8_t *data, + int skip_clusters_nr, + QCowHashElement **non_duplicated_dedup_hash) +{ + BDRVQcowState *s = bs->opaque; + int64_t physical_cluster_offset; + uint64_t logical_cluster_offset; + uint64_t existing_physical_cluster_offset; + int ret; + int pnum = s->cluster_sectors; + *non_duplicated_dedup_hash = NULL; + + /* look for physical cluster offset if the cluster is duplicated */ + physical_cluster_offset = + qcow2_dedup_compute_hash_lookup_offset(bs, + data, + skip_clusters_nr, + precomputed_hash); + + /* round the logical cluster offset to cluster boundaries */ + logical_cluster_offset = sector_num & ~(s->cluster_sectors - 1); + ret = qcow2_get_cluster_offset(bs, logical_cluster_offset << 9, + &pnum, &existing_physical_cluster_offset); + if (ret < 0) { + goto exit; + } + + if (physical_cluster_offset == -1) { + /* no duplicated cluster found, store the hash for later usage */ + *non_duplicated_dedup_hash = qcow2_build_dedup_hash(*precomputed_hash); + return 0; + } else { + /* duplicated cluster found */ + + if (ret == QCOW2_CLUSTER_NORMAL) { + /* we are replacing a cluster, decrement it's physical refcount */ + qcow2_free_clusters(bs, existing_physical_cluster_offset, + s->cluster_size); + } + + ret = qcow2_update_refcount(bs, physical_cluster_offset, + s->cluster_size, 1); + if (ret < 0) { + goto exit; + } + + ret = qcow2_dedup_link_l2(bs, logical_cluster_offset, + physical_cluster_offset); + if (ret < 0) { + goto exit; + } + + } + + ret = 1; +exit: + *precomputed_hash = NULL; + free(*precomputed_hash); + return ret; +} + +/* + * Deduplicate all the cluster that can be deduplicated. + * + * Next it compute the number of non deduplicable sectors to come while storing + * the hashes of these sectors in a linked list for later usage. + * Then it compute the first duplicated cluster hash that come after non + * deduplicable cluster, this hash will be used at next call of the function + * + * @sector_num: the first sector to deduplicate (in sectors) + * @data: the buffer containing the data to deduplicate + * @data_nr: the size of the buffer in sectors + * @skip_cluster_nr: the number of cluster to skip at the begining of data + * @next_non_dedupable_sectors_nr: result containing the number of non + * deduplicable sectors to come + * next_call_first_hash: hash saved between the function call + * + * + * + */ +int qcow2_dedup(BlockDriverState *bs, + uint64_t sector_num, + uint8_t *data, + int data_nr, + int *skip_clusters_nr, + int *next_non_dedupable_sectors_nr, + uint8_t **next_call_first_hash) +{ + BDRVQcowState *s = bs->opaque; + int ret; + int deduped_clusters_nr = 0; + int left_to_test_clusters_nr; + int begining_index; + uint8_t *hash = NULL; + QCowHashElement *non_duplicated_dedup_hash = NULL; + + /* should already be zero when entering this function */ + assert(*next_non_dedupable_sectors_nr == 0); + + begining_index = sector_num & (s->cluster_sectors - 1); + + left_to_test_clusters_nr = (data_nr / s->cluster_sectors) - + *skip_clusters_nr; + + /* Deduplicate all that can be */ + while (left_to_test_clusters_nr-- && + (ret = qcow2_dedup_cluster(bs, + sector_num, + next_call_first_hash, + data, + (*skip_clusters_nr)++, + &non_duplicated_dedup_hash)) == 1) { + deduped_clusters_nr++; + } + + if (ret < 0) { + *next_call_first_hash = NULL; + goto exit; + } + + /* We deduped everything till the end */ + if (!non_duplicated_dedup_hash) { + *next_call_first_hash = NULL; + *next_non_dedupable_sectors_nr = 0; + goto exit; + } + + /* We consumed the precomputed hash */ + *next_call_first_hash = NULL; + + /* remember that the last sector we tested in non deduplicable */ + *next_non_dedupable_sectors_nr += s->cluster_sectors; + + /* Memorize the hash of the first non duplicated cluster. + * we will store it before writing the cluster to disk. + */ + QTAILQ_INSERT_TAIL(&s->undedupable_hashes, non_duplicated_dedup_hash, next); + + /* Count how many non duplicated sector can be written and memorize the + * hashes for later. + * We make sure we pass hash == NULL to force computation of the hash. + */ + hash = NULL; + while (left_to_test_clusters_nr-- > 0 && + -1 == qcow2_dedup_compute_hash_lookup_offset(bs, + data, + (*skip_clusters_nr)++, + &hash)) { + *next_non_dedupable_sectors_nr += s->cluster_sectors; + non_duplicated_dedup_hash = qcow2_build_dedup_hash(hash); + QTAILQ_INSERT_TAIL(&s->undedupable_hashes, + non_duplicated_dedup_hash, next); + hash = NULL; + } + + /* We find a duplicated cluster before stopping iterating store it for + * next call. + */ + if (hash && non_duplicated_dedup_hash && + non_duplicated_dedup_hash->hash != hash) { + *next_call_first_hash = hash; + } + +exit: + if (!deduped_clusters_nr) { + return 0; + } + return deduped_clusters_nr * s->cluster_sectors - begining_index; +} diff --git a/block/qcow2.h b/block/qcow2.h index 77c0f2b..6292d4e 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -365,5 +365,12 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, int sectors_nr, uint8_t **dedup_cluster_data, int *dedup_cluster_data_nr); +int qcow2_dedup(BlockDriverState *bs, + uint64_t sector_num, + uint8_t *data, + int data_nr, + int *skip_clusters_nr, + int *next_non_dedupable_sectors_nr, + uint8_t **next_call_first_hash); #endif