Patchwork [RFC,V3,06/24] qcow2: Add qcow2_dedup and related functions.

login
register
mail settings
Submitter Benoît Canet
Date Nov. 26, 2012, 1:05 p.m.
Message ID <1353935123-24199-7-git-send-email-benoit@irqsave.net>
Download mbox | patch
Permalink /patch/201670/
State New
Headers show

Comments

Benoît Canet - Nov. 26, 2012, 1:05 p.m.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/qcow2-dedup.c |  312 +++++++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h       |   13 +++
 2 files changed, 325 insertions(+)
Stefan Hajnoczi - Dec. 11, 2012, 1:16 p.m.
On Mon, Nov 26, 2012 at 02:05:05PM +0100, Benoît Canet wrote:
> +/*
> + * Compute the hash of a given cluster
> + *
> + * @data: a buffer containing the cluster data
> + * @ret:  a HASH_LENGTH long dynamically allocated array containing the hash
> + */
> +static uint8_t *qcow2_compute_cluster_hash(BlockDriverState *bs,
> +                                           uint8_t *data)
> +{
> +    return NULL;
> +}
> +
> +/* Try to find the offset of a given cluster if it's duplicated
> + * Exceptionally we cast return value to int64_t to use as error code.

I don't see an int64_t return value, this comment is probably outdated.

> + *
> + * @data:            a buffer containing the cluster
> + * @skip_cluster_nr: the number of cluster to skip in the buffer
> + * @hash:            if hash is provided it's used else it's computed
> + * @ret:             QCowHashNode of the duplicated cluster or NULL

This function does several things:
1. Allocating and computing *hash if not given.
2. Returning existing dedup_tree_by_hash node or NULL if the node wasn't
   already in the tree.
3. Inserting the node into the tree if not present.

I wonder if it can be simplified or split to do less work.

> +/*
> + * Helper used to link a deduplicated cluster in the l2
> + *
> + * @logical_cluster_offset:  the cluster offset seen by the guest (in sectors)
> + * @physical_cluster_offset: the cluster offset in the QCOW2 file (in sectors)

Perhaps s/_offset/_sect/ because usually offset is in bytes.

Patch

diff --git a/block/qcow2-dedup.c b/block/qcow2-dedup.c
index e4e6108..a7c7202 100644
--- a/block/qcow2-dedup.c
+++ b/block/qcow2-dedup.c
@@ -29,6 +29,8 @@ 
 #include "qemu-common.h"
 #include "qcow2.h"
 
+#define HASH_LENGTH 32
+
 /**
  * Read some data from the QCOW2 file
  *
@@ -148,3 +150,313 @@  int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
 fail:
     return ret;
 }
+
+/*
+ * Build a QCowHashNode structure
+ *
+ * @hash:             the given hash
+ * @physical_offset:  the cluster offset in the QCOW2 file
+ * @first_offset:     the first logical cluster offset written
+ * @ret:              the build QCowHashNode
+ */
+static QCowHashNode *qcow2_dedup_build_qcow_hash_node(uint8_t *hash,
+                                                      uint64_t physical_offset,
+                                                      uint64_t first_offset)
+{
+    QCowHashNode *data;
+
+    data = g_new0(QCowHashNode, 1);
+    data->hash = hash;
+    data->offset = physical_offset;
+    data->first_logical_offset = first_offset;
+
+    return data;
+}
+
+/*
+ * Compute the hash of a given cluster
+ *
+ * @data: a buffer containing the cluster data
+ * @ret:  a HASH_LENGTH long dynamically allocated array containing the hash
+ */
+static uint8_t *qcow2_compute_cluster_hash(BlockDriverState *bs,
+                                           uint8_t *data)
+{
+    return NULL;
+}
+
+/* Try to find the offset of a given cluster if it's duplicated
+ * Exceptionally we cast return value to int64_t to use as error code.
+ *
+ * @data:            a buffer containing the cluster
+ * @skip_cluster_nr: the number of cluster to skip in the buffer
+ * @hash:            if hash is provided it's used else it's computed
+ * @ret:             QCowHashNode of the duplicated cluster or NULL
+ */
+static QCowHashNode *qcow2_build_hash_lookup_offset(BlockDriverState *bs,
+                                                    uint8_t *data,
+                                                    int skip_cluster_nr,
+                                                    uint8_t **hash)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowHashNode *hash_node;
+    if (!*hash) {
+        /* no hash has been provided compute it and store it for caller usage */
+        *hash = qcow2_compute_cluster_hash(bs,
+                                           data + skip_cluster_nr *
+                                           s->cluster_size);
+    }
+    hash_node =  g_tree_lookup(s->dedup_tree_by_hash, *hash);
+    if (hash_node) {
+        return hash_node;
+    }
+
+    /* cluster not duplicated */
+    hash_node = qcow2_dedup_build_qcow_hash_node(*hash,
+                                                 QCOW_FLAG_EMPTY,
+                                                 QCOW_FLAG_EMPTY);
+    g_tree_insert(s->dedup_tree_by_hash, *hash, hash_node);
+
+    return NULL;
+}
+
+/*
+ * Helper used to build a QCowHashElement
+ *
+ * @hash: the hash to use
+ * @ret:  a newly allocated QCowHashElement pointing to the given hash
+ */
+static QCowHashElement *qcow2_build_dedup_hash(uint8_t *hash)
+{
+    QCowHashElement *dedup_hash;
+    dedup_hash = g_new0(QCowHashElement, 1);
+    dedup_hash->hash = hash;
+    return dedup_hash;
+}
+
+/*
+ * Helper used to link a deduplicated cluster in the l2
+ *
+ * @logical_cluster_offset:  the cluster offset seen by the guest (in sectors)
+ * @physical_cluster_offset: the cluster offset in the QCOW2 file (in sectors)
+ * @overwrite:               true if we are overwriting an l2 table
+ * @ret:
+ */
+static int qcow2_dedup_link_l2(BlockDriverState *bs,
+                               uint64_t logical_cluster_offset,
+                               uint64_t physical_cluster_offset,
+                               bool overwrite)
+{
+    QCowL2Meta m;
+    /* function correctness regarding copy on write ? */
+    m.offset         = logical_cluster_offset << 9;
+    m.alloc_offset   = physical_cluster_offset << 9;
+    m.nb_clusters    = 1; /* we are linking only one cluster in l2 */
+    m.cluster_offset = 0;
+    m.n_start        = 0;
+    m.nb_available   = 0;
+    m.oflag_copied   = false;
+    m.overwrite      = overwrite;
+    return qcow2_alloc_cluster_link_l2(bs, &m);
+}
+
+/* This function tries to deduplicate a given cluster.
+ *
+ * @sector_num:           the logical sector number we are trying to deduplicate
+ * @precomputed_hash:     Used instead of computing the hash if provided
+ * @data:                 the buffer in which to look for a duplicated cluster
+ * @skip_clusters_nr:     the number of cluster that must be skipped in data
+ * @non_duplicated_dedup_hash:   returned if the cluster is not deduplicated
+ * @ret:                  ret < 0 on error, 1 on deduplication else 0
+ */
+static int qcow2_dedup_cluster(BlockDriverState *bs,
+                               uint64_t sector_num,
+                               uint8_t **precomputed_hash,
+                               uint8_t *data,
+                               int skip_clusters_nr,
+                               QCowHashElement **non_duplicated_dedup_hash)
+{
+    BDRVQcowState *s = bs->opaque;
+    QCowHashNode *hash_node;
+    int64_t physical_cluster_offset;
+    uint64_t first_logical_offset;
+    uint64_t logical_cluster_offset;
+    uint64_t existing_physical_cluster_offset;
+    int ret;
+    int pnum = s->cluster_sectors;
+    *non_duplicated_dedup_hash = NULL;
+
+    /* look if the cluster is duplicated */
+    hash_node = qcow2_build_hash_lookup_offset(bs,
+                                               data,
+                                               skip_clusters_nr,
+                                               precomputed_hash);
+
+    /* round the logical cluster offset to cluster boundaries */
+    logical_cluster_offset = sector_num & ~(s->cluster_sectors - 1);
+    ret = qcow2_get_cluster_offset(bs, logical_cluster_offset << 9,
+                                   &pnum, &existing_physical_cluster_offset);
+    if (ret < 0) {
+        goto exit;
+    }
+
+    if (!hash_node) {
+        /* no duplicated cluster found, store the hash for later usage */
+        *non_duplicated_dedup_hash = qcow2_build_dedup_hash(*precomputed_hash);
+        return 0;
+    } else {
+        /* duplicated cluster found */
+        physical_cluster_offset = hash_node->offset;
+        first_logical_offset = hash_node->first_logical_offset;
+
+        if (existing_physical_cluster_offset != physical_cluster_offset << 9) {
+
+            ret = update_cluster_refcount(bs,
+                                          physical_cluster_offset /
+                                          s->cluster_sectors,
+                                          1,
+                                          false);
+            if (ret < 0) {
+                goto exit;
+            }
+
+            ret = qcow2_dedup_link_l2(bs, logical_cluster_offset,
+                                      physical_cluster_offset,
+                                      false);
+            if (ret < 0) {
+                goto exit;
+            }
+
+            /* if refcount was one remove the QCOW_FLAG_FIRST flag */
+            if (first_logical_offset & QCOW_FLAG_FIRST) {
+                first_logical_offset &= ~QCOW_FLAG_FIRST;
+                ret = qcow2_dedup_link_l2(bs, first_logical_offset,
+                                          physical_cluster_offset,
+                                          true);
+                if (ret < 0) {
+                    goto exit;
+                }
+                hash_node->first_logical_offset = first_logical_offset;
+            }
+        }
+    }
+
+    ret = 1;
+exit:
+    g_free(*precomputed_hash);
+    *precomputed_hash = NULL;
+    return ret;
+}
+
+/*
+ * Deduplicate all the cluster that can be deduplicated.
+ *
+ * Next it compute the number of non deduplicable sectors to come while storing
+ * the hashes of these sectors in a linked list for later usage.
+ * Then it compute the first duplicated cluster hash that come after non
+ * deduplicable cluster, this hash will be used at next call of the function
+ *
+ * @u:               where to store the list of undedupable hashes
+ * @sector_num:      the first sector to deduplicate (in sectors)
+ * @data:            the buffer containing the data to deduplicate
+ * @data_nr:         the size of the buffer in sectors
+ * @skip_cluster_nr: the number of cluster to skip at the begining of data
+ * @next_non_dedupable_sectors_nr: result containing the number of non
+ *                                 deduplicable sectors to come
+ * next_call_first_hash:           hash saved between the function call
+ *
+ *
+ *
+ */
+int qcow2_dedup(BlockDriverState *bs,
+                UndedupableHashes *u,
+                uint64_t sector_num,
+                uint8_t *data,
+                int data_nr,
+                int *skip_clusters_nr,
+                int *next_non_dedupable_sectors_nr,
+                uint8_t **next_call_first_hash)
+{
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+    int deduped_clusters_nr = 0;
+    int left_to_test_clusters_nr;
+    int begining_index;
+    uint8_t *hash = NULL;
+    QCowHashElement *non_duplicated_dedup_hash = NULL;
+
+    /* should already be zero when entering this function */
+    assert(*next_non_dedupable_sectors_nr == 0);
+
+    begining_index = sector_num & (s->cluster_sectors - 1);
+
+    left_to_test_clusters_nr = (data_nr / s->cluster_sectors) -
+                               *skip_clusters_nr;
+
+    /* Deduplicate all that can be */
+    while (left_to_test_clusters_nr-- &&
+           (ret = qcow2_dedup_cluster(bs,
+                                      sector_num,
+                                      next_call_first_hash,
+                                      data,
+                                      (*skip_clusters_nr)++,
+                                      &non_duplicated_dedup_hash)) == 1) {
+        sector_num += s->cluster_sectors;
+        deduped_clusters_nr++;
+    }
+
+    if (ret < 0) {
+        *next_call_first_hash = NULL;
+        goto exit;
+    }
+
+    /* We deduped everything till the end */
+    if (!non_duplicated_dedup_hash) {
+        *next_call_first_hash = NULL;
+        *next_non_dedupable_sectors_nr = 0;
+        goto exit;
+    }
+
+    /* We consumed the precomputed hash */
+    *next_call_first_hash = NULL;
+
+    /* remember that the last sector we tested in non deduplicable */
+    *next_non_dedupable_sectors_nr += s->cluster_sectors;
+
+    /* Memorize the hash of the first non duplicated cluster.
+     * we will store it before writing the cluster to disk.
+     */
+    QTAILQ_INSERT_TAIL(&u->undedupable_hashes, non_duplicated_dedup_hash, next);
+
+    /* Count how many non duplicated sector can be written and memorize the
+     * hashes for later.
+     * We make sure we pass hash == NULL to force computation of the hash.
+     */
+    hash = NULL;
+    while (left_to_test_clusters_nr-- > 0 &&
+           !qcow2_build_hash_lookup_offset(bs,
+                                           data,
+                                           *skip_clusters_nr,
+                                           &hash)) {
+        *next_non_dedupable_sectors_nr += s->cluster_sectors;
+        non_duplicated_dedup_hash = qcow2_build_dedup_hash(hash);
+        QTAILQ_INSERT_TAIL(&u->undedupable_hashes,
+                           non_duplicated_dedup_hash, next);
+        hash = NULL;
+        (*skip_clusters_nr)++;
+    }
+
+    /* We find a duplicated cluster before stopping iterating store it for
+     * next call.
+     */
+    if (hash && non_duplicated_dedup_hash &&
+        non_duplicated_dedup_hash->hash != hash) {
+        *next_call_first_hash = hash;
+    }
+
+exit:
+    if (!deduped_clusters_nr) {
+        return 0;
+    }
+    return deduped_clusters_nr * s->cluster_sectors - begining_index;
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index ccb24ad..5c18425 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -58,6 +58,11 @@ 
 
 #define DEFAULT_CLUSTER_SIZE 65536
 
+/* indicate that the hash structure is empty and miss offset */
+#define QCOW_FLAG_EMPTY   (1LL << 62)
+/* indicate that the cluster for this hash has QCOW_OFLAG_COPIED on disk */
+#define QCOW_FLAG_FIRST   (1LL << 63)
+
 /* deduplication node */
 typedef struct {
     uint8_t *hash;         /* 32 bytes hash of a given cluster */
@@ -372,5 +377,13 @@  int qcow2_dedup_read_missing_and_concatenate(BlockDriverState *bs,
                                              int sectors_nr,
                                              uint8_t **dedup_cluster_data,
                                              int *dedup_cluster_data_nr);
+int qcow2_dedup(BlockDriverState *bs,
+                UndedupableHashes *u,
+                uint64_t sector_num,
+                uint8_t *data,
+                int data_nr,
+                int *skip_clusters_nr,
+                int *next_non_dedupable_sectors_nr,
+                uint8_t **next_call_first_hash);
 
 #endif