Message ID | 1360153926-9492-8-git-send-email-benoit@irqsave.net |
---|---|
State | New |
Headers | show |
On Wed, Feb 06, 2013 at 01:31:40PM +0100, Benoît Canet wrote: > Signed-off-by: Benoit Canet <benoit@irqsave.net> > --- > block/qcow2-dedup.c | 436 +++++++++++++++++++++++++++++++++++++++++++++++++++ > block/qcow2.h | 5 + > 2 files changed, 441 insertions(+) > > diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c > index 4e99eb1..5901749 100644 > --- a/block/qcow2-dedup.c > +++ b/block/qcow2-dedup.c > @@ -117,3 +117,439 @@ fail: > *data = NULL; > return ret; > } > + > +/* > + * Build a QCowHashNode structure > + * > + * @hash: the given hash > + * @physical_sect: the cluster offset in the QCOW2 file > + * @first_logical_sect: the first logical cluster offset written > + * @ret: the build QCowHashNode > + */ > +static QCowHashNode *qcow2_dedup_build_qcow_hash_node(QCowHash *hash, > + uint64_t physical_sect, > + uint64_t first_logical_sect) qcow2_hash_node_new() would be a shorter name. It uses "new" like coroutine_new(), qemu_bh_new(), etc. > +{ > + QCowHashNode *hash_node; > + > + hash_node = g_new0(QCowHashNode, 1); > + memcpy(hash_node->hash.data, hash->data, HASH_LENGTH); > + hash_node->physical_sect = physical_sect; > + hash_node->first_logical_sect = first_logical_sect; > + > + return hash_node; > +} > + > +/* > + * Compute the hash of a given cluster > + * > + * @data: a buffer containing the cluster data > + * @hash: a QCowHash where to store the computed hash > + * @ret: 0 on success, negative on error > + */ > +static int qcow2_compute_cluster_hash(BlockDriverState *bs, > + QCowHash *hash, > + uint8_t *data) > +{ > + return 0; > +} > + > +/* > + * Get a QCowHashNode corresponding to a cluster data > + * > + * @phash: if phash can be used no hash is computed > + * @data: a buffer containing the cluster > + * @nb_clusters_processed: the number of cluster to skip in the buffer > + * @err: Error code if any > + * @ret: QCowHashNode of the duplicated cluster or NULL if not found > + */ > +static QCowHashNode *qcow2_get_hash_node_for_cluster(BlockDriverState *bs, > + QcowPersistantHash *phash, > + uint8_t *data, > + int nb_clusters_processed, > + int *err) > +{ > + BDRVQcowState *s = bs->opaque; > + int ret = 0; > + *err = 0; > + > + /* no hash has been provided compute it and store it for later usage */ > + if (!phash->reuse) { > + ret = qcow2_compute_cluster_hash(bs, > + &phash->hash, > + data + > + nb_clusters_processed * > + s->cluster_size); > + } > + > + /* do not reuse the hash anymore if it was precomputed */ > + phash->reuse = false; > + > + if (ret < 0) { > + *err = ret; > + return NULL; > + } > + > + return g_tree_lookup(s->dedup_tree_by_hash, &phash->hash); > +} > + > +/* > + * Build a QCowHashNode from a given QCowHash and insert it into the tree > + * > + * @hash: the given QCowHash > + */ > +static void qcow2_build_and_insert_hash_node(BlockDriverState *bs, > + QCowHash *hash) > +{ > + BDRVQcowState *s = bs->opaque; > + QCowHashNode *hash_node; > + > + /* build the hash node with QCOW_FLAG_EMPTY as offsets so we will remember > + * to fill these field later with real values. > + */ > + hash_node = qcow2_dedup_build_qcow_hash_node(hash, > + QCOW_FLAG_EMPTY, > + QCOW_FLAG_EMPTY); > + g_tree_insert(s->dedup_tree_by_hash, &hash_node->hash, hash_node); > +} Interesting function, it doesn't return hash_node. Someone will have to look it up. Also, we put an incomplete node into dedup_tree_by_hash. The caller must ensure that other coroutines do not use dedup_tree_by_hash() before we've filled in real values, or the callers need to check for QCOW_FLAG_EMPTY. Seems a little risky, so why insert before completing the hash node? > + > +/* > + * Helper used to build a QCowHashElement > + * > + * @hash: the QCowHash to use > + * @ret: a newly allocated QCowHashElement containing the given hash > + */ > +static QCowHashElement *qcow2_build_dedup_hash(QCowHash *hash) I suggest _new() instead of _build_*(). > +{ > + QCowHashElement *dedup_hash; > + dedup_hash = g_new0(QCowHashElement, 1); > + memcpy(dedup_hash->hash.data, hash->data, HASH_LENGTH); > + return dedup_hash; > +} > + > +/* > + * Helper used to link a deduplicated cluster in the l2 > + * > + * @logical_sect: the cluster sector seen by the guest > + * @physical_sect: the cluster sector in the QCOW2 file > + * @overwrite: true if we must overwrite the L2 table entry > + * @ret: > + */ > +static int qcow2_dedup_link_l2(BlockDriverState *bs, > + uint64_t logical_sect, > + uint64_t physical_sect, > + bool overwrite) > +{ > + QCowL2Meta m = { > + .alloc_offset = physical_sect << 9, > + .offset = logical_sect << 9, > + .nb_clusters = 1, I wonder if there's any real-world performance advantage to doing nb_clusters > 0 when possible... > + .nb_available = 0, > + .cow_start = { > + .offset = 0, > + .nb_sectors = 0, > + }, > + .cow_end = { > + .offset = 0, > + .nb_sectors = 0, > + }, > + .oflag_copied = false, > + .overwrite = overwrite, > + }; > + return qcow2_alloc_cluster_link_l2(bs, &m); > +} > + > +/* Clear the QCOW_OFLAG_COPIED from the first L2 entry written for a physical > + * cluster. > + * > + * @hash_node: the duplicated hash node > + * @ret: 0 on success, negative on error > + */ > +static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, > + QCowHashNode *hash_node) > +{ > + int ret = 0; > + uint64_t first_logical_sect = hash_node->first_logical_sect; > + > + /* QCOW_OFLAG_COPIED already cleared -> do nothing */ > + if (!(first_logical_sect & QCOW_FLAG_FIRST)) { > + return 0; > + } > + > + /* note : QCOW_FLAG_FIRST == QCOW_OFLAG_COPIED */ Please use QCOW_OFLAG_COPIED instead of redefining it. > + first_logical_sect &= ~QCOW_FLAG_FIRST; > + > + /* overwrite first L2 entry to clear QCOW_FLAG_COPIED */ > + ret = qcow2_dedup_link_l2(bs, first_logical_sect, > + hash_node->physical_sect, > + true); > + > + if (ret < 0) { > + return ret; > + } > + > + /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ s/dont't/don't/ > + hash_node->first_logical_sect &= first_logical_sect; Just '=' would be clearer. > + > + return 0; > +} > + > +/* This function deduplicate a cluster > + * > + * @logical_sect: The logical sector of the write > + * @hash_node: The duplicated cluster hash node > + * @ret: 0 on success, negative on error > + */ > +static int qcow2_deduplicate_cluster(BlockDriverState *bs, > + uint64_t logical_sect, > + QCowHashNode *hash_node) > +{ > + BDRVQcowState *s = bs->opaque; > + int ret = 0; > + > + /* create new L2 entry */ > + ret = qcow2_dedup_link_l2(bs, logical_sect, > + hash_node->physical_sect, > + false); > + > + if (ret < 0) { > + return ret; > + } > + > + /* Increment the refcount of the cluster */ > + return update_refcount(bs, > + (hash_node->physical_sect / > + s->cluster_sectors) << s->cluster_bits, > + 1, 1); I think it's better to increment the refcount and then store the new L2 entry. This way a crash partway through leaves a leaked cluster instead of an L2 entry to an unallocated cluster (which could cause corruption further on). Cache flushes may be necessary between these steps, I haven't checked. > +} > + > +/* This function tries to deduplicate a given cluster. > + * > + * @sector_num: the logical sector number we are trying to deduplicate > + * @phash: Used instead of computing the hash if provided > + * @data: the buffer in which to look for a duplicated cluster > + * @nb_clusters_processed: the number of cluster that must be skipped in data > + * @ret: ret < 0 on error, 1 on deduplication else 0 > + */ > +static int qcow2_try_dedup_cluster(BlockDriverState *bs, > + QcowPersistantHash *phash, > + uint64_t sector_num, > + uint8_t *data, > + int nb_clusters_processed) > +{ > + BDRVQcowState *s = bs->opaque; > + int ret = 0; > + QCowHashNode *hash_node; > + uint64_t logical_sect; > + uint64_t existing_physical_offset; > + int pnum = s->cluster_sectors; > + > + /* search the tree for duplicated cluster */ > + hash_node = qcow2_get_hash_node_for_cluster(bs, > + phash, > + data, > + nb_clusters_processed, > + &ret); > + > + /* we won't reuse the hash on error */ > + if (ret < 0) { > + return ret; > + } > + > + /* if cluster is not duplicated store hash for later usage */ > + if (!hash_node) { > + qcow2_build_and_insert_hash_node(bs, &phash->hash); > + return 0; > + } > + > + logical_sect = sector_num & ~(s->cluster_sectors - 1); > + ret = qcow2_get_cluster_offset(bs, logical_sect << 9, > + &pnum, &existing_physical_offset); > + > + if (ret < 0) { > + return ret; > + } > + > + /* if we are rewriting the same cluster at the same place do nothing */ > + if (existing_physical_offset == hash_node->physical_sect << 9) { > + return 1; > + } > + > + /* take care of not having refcount > 1 and QCOW_OFLAG_COPIED at once */ > + ret = qcow2_clear_l2_copied_flag_if_needed(bs, hash_node); > + > + if (ret < 0) { > + return ret; > + } > + > + /* do the deduplication */ > + ret = qcow2_deduplicate_cluster(bs, logical_sect, > + hash_node); I think none of these drop the lock so we're guaranteed that metadata doesn't change beneath our feet. Good. > + if (ret < 0) { > + return ret; > + } > + > + return 1; > +} > + > + > +static void add_hash_to_undedupable_list(BlockDriverState *bs, > + QCowDedupState *ds) > +{ > + /* memorise hash for later storage in gtree and disk */ > + QCowHashElement *dedup_hash = qcow2_build_dedup_hash(&ds->phash.hash); > + QTAILQ_INSERT_TAIL(&ds->undedupables, dedup_hash, next); > +} > + > +static int qcow2_dedup_starting_from_begining(BlockDriverState *bs, > + QCowDedupState *ds, > + uint64_t sector_num, > + uint8_t *data, > + int left_to_process) > +{ > + BDRVQcowState *s = bs->opaque; > + int i; > + int ret = 0; > + > + for (i = 0; i < left_to_process; i++) { > + ret = qcow2_try_dedup_cluster(bs, > + &ds->phash, > + sector_num + i * s->cluster_sectors, > + data, > + ds->nb_clusters_processed + i); I don't understand how the data argument works here. The data for left_to_process clusters has been pre-read into data? Why is data not incremented (data += s->cluster_size)?
> > +/* > > + * Build a QCowHashNode from a given QCowHash and insert it into the tree > > + * > > + * @hash: the given QCowHash > > + */ > > +static void qcow2_build_and_insert_hash_node(BlockDriverState *bs, > > + QCowHash *hash) > > +{ > > + BDRVQcowState *s = bs->opaque; > > + QCowHashNode *hash_node; > > + > > + /* build the hash node with QCOW_FLAG_EMPTY as offsets so we will remember > > + * to fill these field later with real values. > > + */ > > + hash_node = qcow2_dedup_build_qcow_hash_node(hash, > > + QCOW_FLAG_EMPTY, > > + QCOW_FLAG_EMPTY); > > + g_tree_insert(s->dedup_tree_by_hash, &hash_node->hash, hash_node); > > +} > > Interesting function, it doesn't return hash_node. Someone will have to > look it up. > > Also, we put an incomplete node into dedup_tree_by_hash. The caller > must ensure that other coroutines do not use dedup_tree_by_hash() before > we've filled in real values, or the callers need to check for > QCOW_FLAG_EMPTY. Seems a little risky, so why insert before completing > the hash node? > It's done like this so the next iteration of the deduplication loop will find that a cluster is duplicated if so and stop rigth here. I can fill the missing information of the qcow hash node only long after that when the new cluster cluster is allocated.
On Wed, Feb 06, 2013 at 01:31:40PM +0100, Benoît Canet wrote: > +/* Deduplicate all the cluster that can be deduplicated. > + * > + * Next it compute the number of non deduplicable sectors to come while storing s/compute/computes/ > + * the hashes of these sectors in a linked list for later usage. > + * Then it compute the first duplicated cluster hash that come after non s/compute/computes/ s/come/comes/ > + * deduplicable cluster, this hash will be used at next call of the function > + * > + * @ds: a structure containing the state of the deduplication > + * for this write request > + * @sector_num: The logical sector > + * @data: the buffer containing the data to deduplicate > + * @data_nr: the size of the buffer in sectors > + * > + */ > +int qcow2_dedup(BlockDriverState *bs, > + QCowDedupState *ds, > + uint64_t sector_num, > + uint8_t *data, > + int data_nr) > +{ > + BDRVQcowState *s = bs->opaque; > + int ret = 0; > + int deduped_clusters_nr = 0; > + int left_to_process; > + int begining_index; > + > + begining_index = sector_num & (s->cluster_sectors - 1); s/begining/beginning/ but start_index would be shorter. > + > + left_to_process = (data_nr / s->cluster_sectors) - > + ds->nb_clusters_processed; data_nr must be a multiple of cluster_sectors?
diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c index 4e99eb1..5901749 100644 --- a/block/qcow2-dedup.c +++ b/block/qcow2-dedup.c @@ -117,3 +117,439 @@ fail: *data = NULL; return ret; } + +/* + * Build a QCowHashNode structure + * + * @hash: the given hash + * @physical_sect: the cluster offset in the QCOW2 file + * @first_logical_sect: the first logical cluster offset written + * @ret: the build QCowHashNode + */ +static QCowHashNode *qcow2_dedup_build_qcow_hash_node(QCowHash *hash, + uint64_t physical_sect, + uint64_t first_logical_sect) +{ + QCowHashNode *hash_node; + + hash_node = g_new0(QCowHashNode, 1); + memcpy(hash_node->hash.data, hash->data, HASH_LENGTH); + hash_node->physical_sect = physical_sect; + hash_node->first_logical_sect = first_logical_sect; + + return hash_node; +} + +/* + * Compute the hash of a given cluster + * + * @data: a buffer containing the cluster data + * @hash: a QCowHash where to store the computed hash + * @ret: 0 on success, negative on error + */ +static int qcow2_compute_cluster_hash(BlockDriverState *bs, + QCowHash *hash, + uint8_t *data) +{ + return 0; +} + +/* + * Get a QCowHashNode corresponding to a cluster data + * + * @phash: if phash can be used no hash is computed + * @data: a buffer containing the cluster + * @nb_clusters_processed: the number of cluster to skip in the buffer + * @err: Error code if any + * @ret: QCowHashNode of the duplicated cluster or NULL if not found + */ +static QCowHashNode *qcow2_get_hash_node_for_cluster(BlockDriverState *bs, + QcowPersistantHash *phash, + uint8_t *data, + int nb_clusters_processed, + int *err) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + *err = 0; + + /* no hash has been provided compute it and store it for later usage */ + if (!phash->reuse) { + ret = qcow2_compute_cluster_hash(bs, + &phash->hash, + data + + nb_clusters_processed * + s->cluster_size); + } + + /* do not reuse the hash anymore if it was precomputed */ + phash->reuse = false; + + if (ret < 0) { + *err = ret; + return NULL; + } + + return g_tree_lookup(s->dedup_tree_by_hash, &phash->hash); +} + +/* + * Build a QCowHashNode from a given QCowHash and insert it into the tree + * + * @hash: the given QCowHash + */ +static void qcow2_build_and_insert_hash_node(BlockDriverState *bs, + QCowHash *hash) +{ + BDRVQcowState *s = bs->opaque; + QCowHashNode *hash_node; + + /* build the hash node with QCOW_FLAG_EMPTY as offsets so we will remember + * to fill these field later with real values. + */ + hash_node = qcow2_dedup_build_qcow_hash_node(hash, + QCOW_FLAG_EMPTY, + QCOW_FLAG_EMPTY); + g_tree_insert(s->dedup_tree_by_hash, &hash_node->hash, hash_node); +} + +/* + * Helper used to build a QCowHashElement + * + * @hash: the QCowHash to use + * @ret: a newly allocated QCowHashElement containing the given hash + */ +static QCowHashElement *qcow2_build_dedup_hash(QCowHash *hash) +{ + QCowHashElement *dedup_hash; + dedup_hash = g_new0(QCowHashElement, 1); + memcpy(dedup_hash->hash.data, hash->data, HASH_LENGTH); + return dedup_hash; +} + +/* + * Helper used to link a deduplicated cluster in the l2 + * + * @logical_sect: the cluster sector seen by the guest + * @physical_sect: the cluster sector in the QCOW2 file + * @overwrite: true if we must overwrite the L2 table entry + * @ret: + */ +static int qcow2_dedup_link_l2(BlockDriverState *bs, + uint64_t logical_sect, + uint64_t physical_sect, + bool overwrite) +{ + QCowL2Meta m = { + .alloc_offset = physical_sect << 9, + .offset = logical_sect << 9, + .nb_clusters = 1, + .nb_available = 0, + .cow_start = { + .offset = 0, + .nb_sectors = 0, + }, + .cow_end = { + .offset = 0, + .nb_sectors = 0, + }, + .oflag_copied = false, + .overwrite = overwrite, + }; + return qcow2_alloc_cluster_link_l2(bs, &m); +} + +/* Clear the QCOW_OFLAG_COPIED from the first L2 entry written for a physical + * cluster. + * + * @hash_node: the duplicated hash node + * @ret: 0 on success, negative on error + */ +static int qcow2_clear_l2_copied_flag_if_needed(BlockDriverState *bs, + QCowHashNode *hash_node) +{ + int ret = 0; + uint64_t first_logical_sect = hash_node->first_logical_sect; + + /* QCOW_OFLAG_COPIED already cleared -> do nothing */ + if (!(first_logical_sect & QCOW_FLAG_FIRST)) { + return 0; + } + + /* note : QCOW_FLAG_FIRST == QCOW_OFLAG_COPIED */ + first_logical_sect &= ~QCOW_FLAG_FIRST; + + /* overwrite first L2 entry to clear QCOW_FLAG_COPIED */ + ret = qcow2_dedup_link_l2(bs, first_logical_sect, + hash_node->physical_sect, + true); + + if (ret < 0) { + return ret; + } + + /* remember that we dont't need to clear QCOW_OFLAG_COPIED again */ + hash_node->first_logical_sect &= first_logical_sect; + + return 0; +} + +/* This function deduplicate a cluster + * + * @logical_sect: The logical sector of the write + * @hash_node: The duplicated cluster hash node + * @ret: 0 on success, negative on error + */ +static int qcow2_deduplicate_cluster(BlockDriverState *bs, + uint64_t logical_sect, + QCowHashNode *hash_node) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + + /* create new L2 entry */ + ret = qcow2_dedup_link_l2(bs, logical_sect, + hash_node->physical_sect, + false); + + if (ret < 0) { + return ret; + } + + /* Increment the refcount of the cluster */ + return update_refcount(bs, + (hash_node->physical_sect / + s->cluster_sectors) << s->cluster_bits, + 1, 1); +} + +/* This function tries to deduplicate a given cluster. + * + * @sector_num: the logical sector number we are trying to deduplicate + * @phash: Used instead of computing the hash if provided + * @data: the buffer in which to look for a duplicated cluster + * @nb_clusters_processed: the number of cluster that must be skipped in data + * @ret: ret < 0 on error, 1 on deduplication else 0 + */ +static int qcow2_try_dedup_cluster(BlockDriverState *bs, + QcowPersistantHash *phash, + uint64_t sector_num, + uint8_t *data, + int nb_clusters_processed) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + QCowHashNode *hash_node; + uint64_t logical_sect; + uint64_t existing_physical_offset; + int pnum = s->cluster_sectors; + + /* search the tree for duplicated cluster */ + hash_node = qcow2_get_hash_node_for_cluster(bs, + phash, + data, + nb_clusters_processed, + &ret); + + /* we won't reuse the hash on error */ + if (ret < 0) { + return ret; + } + + /* if cluster is not duplicated store hash for later usage */ + if (!hash_node) { + qcow2_build_and_insert_hash_node(bs, &phash->hash); + return 0; + } + + logical_sect = sector_num & ~(s->cluster_sectors - 1); + ret = qcow2_get_cluster_offset(bs, logical_sect << 9, + &pnum, &existing_physical_offset); + + if (ret < 0) { + return ret; + } + + /* if we are rewriting the same cluster at the same place do nothing */ + if (existing_physical_offset == hash_node->physical_sect << 9) { + return 1; + } + + /* take care of not having refcount > 1 and QCOW_OFLAG_COPIED at once */ + ret = qcow2_clear_l2_copied_flag_if_needed(bs, hash_node); + + if (ret < 0) { + return ret; + } + + /* do the deduplication */ + ret = qcow2_deduplicate_cluster(bs, logical_sect, + hash_node); + + if (ret < 0) { + return ret; + } + + return 1; +} + + +static void add_hash_to_undedupable_list(BlockDriverState *bs, + QCowDedupState *ds) +{ + /* memorise hash for later storage in gtree and disk */ + QCowHashElement *dedup_hash = qcow2_build_dedup_hash(&ds->phash.hash); + QTAILQ_INSERT_TAIL(&ds->undedupables, dedup_hash, next); +} + +static int qcow2_dedup_starting_from_begining(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int left_to_process) +{ + BDRVQcowState *s = bs->opaque; + int i; + int ret = 0; + + for (i = 0; i < left_to_process; i++) { + ret = qcow2_try_dedup_cluster(bs, + &ds->phash, + sector_num + i * s->cluster_sectors, + data, + ds->nb_clusters_processed + i); + + if (ret < 0) { + return ret; + } + + /* stop if a cluster has not been deduplicated */ + if (ret != 1) { + break; + } + } + + return i; +} + +static int qcow2_count_next_non_dedupable_clusters(BlockDriverState *bs, + QCowDedupState *ds, + uint8_t *data, + int left_to_process) +{ + int i; + int ret = 0; + QCowHashNode *hash_node; + + for (i = 0; i < left_to_process; i++) { + hash_node = qcow2_get_hash_node_for_cluster(bs, + &ds->phash, + data, + ds->nb_clusters_processed + i, + &ret); + + if (ret < 0) { + return ret; + } + + /* found a duplicated cluster : stop here */ + if (hash_node) { + break; + } + + qcow2_build_and_insert_hash_node(bs, &ds->phash.hash); + add_hash_to_undedupable_list(bs, ds); + } + + return i; +} + + +/* Deduplicate all the cluster that can be deduplicated. + * + * Next it compute the number of non deduplicable sectors to come while storing + * the hashes of these sectors in a linked list for later usage. + * Then it compute the first duplicated cluster hash that come after non + * deduplicable cluster, this hash will be used at next call of the function + * + * @ds: a structure containing the state of the deduplication + * for this write request + * @sector_num: The logical sector + * @data: the buffer containing the data to deduplicate + * @data_nr: the size of the buffer in sectors + * + */ +int qcow2_dedup(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int data_nr) +{ + BDRVQcowState *s = bs->opaque; + int ret = 0; + int deduped_clusters_nr = 0; + int left_to_process; + int begining_index; + + begining_index = sector_num & (s->cluster_sectors - 1); + + left_to_process = (data_nr / s->cluster_sectors) - + ds->nb_clusters_processed; + + /* start deduplicating all that can be cluster after cluster */ + ret = qcow2_dedup_starting_from_begining(bs, + ds, + sector_num, + data, + left_to_process); + + if (ret < 0) { + return ret; + } + + deduped_clusters_nr = ret; + + left_to_process -= ret; + ds->nb_clusters_processed += ret; + + /* We deduped everything till the end */ + if (!left_to_process) { + ds->nb_undedupable_sectors = 0; + goto exit; + } + + /* skip and account the first undedupable cluster found */ + left_to_process--; + ds->nb_clusters_processed++; + ds->nb_undedupable_sectors += s->cluster_sectors; + + add_hash_to_undedupable_list(bs, ds); + + /* Count how many non duplicated sector can be written and memorize hashes + * to write them after data has reached disk. + */ + ret = qcow2_count_next_non_dedupable_clusters(bs, + ds, + data, + left_to_process); + + if (ret < 0) { + return ret; + } + + left_to_process -= ret; + ds->nb_clusters_processed += ret; + ds->nb_undedupable_sectors += ret * s->cluster_sectors; + + /* remember to reuse the last hash computed at new qcow2_dedup call */ + if (left_to_process) { + ds->phash.reuse = true; + } + +exit: + if (!deduped_clusters_nr) { + return 0; + } + + return deduped_clusters_nr * s->cluster_sectors - begining_index; +} diff --git a/block/qcow2.h b/block/qcow2.h index 95bf848..46a5800 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -463,5 +463,10 @@ int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs, int sectors_nr, uint8_t **dedup_cluster_data, int *dedup_cluster_data_nr); +int qcow2_dedup(BlockDriverState *bs, + QCowDedupState *ds, + uint64_t sector_num, + uint8_t *data, + int data_nr); #endif
Signed-off-by: Benoit Canet <benoit@irqsave.net> --- block/qcow2-dedup.c | 436 +++++++++++++++++++++++++++++++++++++++++++++++++++ block/qcow2.h | 5 + 2 files changed, 441 insertions(+)