Patchwork [RFC,V6,03/33] qcow2: Add deduplication structures and fields.

login
register
mail settings
Submitter Benoît Canet
Date Feb. 6, 2013, 12:31 p.m.
Message ID <1360153926-9492-4-git-send-email-benoit@irqsave.net>
Download mbox | patch
Permalink /patch/218581/
State New
Headers show

Comments

Benoît Canet - Feb. 6, 2013, 12:31 p.m.
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/qcow2.h |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)
Stefan Hajnoczi - Feb. 6, 2013, 4:18 p.m.
On Wed, Feb 06, 2013 at 01:31:36PM +0100, Benoît Canet wrote:
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 718b52b..c7b6860 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -43,6 +43,10 @@
>  #define QCOW_OFLAG_COPIED     (1LL << 63)
>  /* indicate that the cluster is compressed (they never have the copied flag) */
>  #define QCOW_OFLAG_COMPRESSED (1LL << 62)
> +/* indicate that the cluster must be processed when deduplication restart

s/restart/restarts/

> + * also indicate that the on disk dedup hash must be ignored and discarded
> + */
> +#define QCOW_OFLAG_TO_DEDUP (1LL << 61)

PENDING_DEDUP or NOT_DEDUPED_YET might be clearer.

I didn't see this defined in the spec, BTW.  Maybe I missed it.

> +/* Used to keep a single precomputed hash between the calls of the dedup
> + * function
> + */
> +typedef struct {
> +    QCowHash hash;
> +    bool reuse;                  /* The hash is precomputed reuse it */

Is this some kind of memory management "shared"/"dont_free" flag?

> +} QcowPersistantHash;

s/Persistant/Persistent/

> +
> +/* deduplication node */
> +typedef struct {
> +    QCowHash hash;
> +    uint64_t physical_sect;       /* where the cluster is stored on disk */
> +    uint64_t first_logical_sect;  /* logical sector of the first occurence of
> +                                   * this cluster
> +                                   */
> +} QCowHashNode;
> +
> +/* Undedupable hashes that must be written later to disk */
> +typedef struct QCowHashElement {
> +    QCowHash hash;
> +    QTAILQ_ENTRY(QCowHashElement) next;
> +} QCowHashElement;
> +
> +typedef struct {
> +    QcowPersistantHash phash;  /* contains a hash persisting between calls of
> +                                * qcow2_dedup()
> +                                */
> +    QTAILQ_HEAD(, QCowHashElement) undedupables;
> +    int nb_clusters_processed;
> +    int nb_undedupable_sectors;

Is int large enough for huge disk images (multi terabyte and beyond)?

> @@ -160,6 +216,17 @@ typedef struct BDRVQcowState {
>      int64_t free_cluster_index;
>      int64_t free_byte_offset;
>  
> +    bool has_dedup;
> +    DedupStatus dedup_status;
> +    QCowHashAlgo dedup_hash_algo;
> +    Coroutine *dedup_resume_co;
> +    int dedup_co_delay;
> +    uint64_t *dedup_table;
> +    uint64_t dedup_table_offset;
> +    int32_t dedup_table_size;

size_t?
Eric Blake - Feb. 6, 2013, 4:37 p.m.
On 02/06/2013 05:31 AM, Benoît Canet wrote:
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>  block/qcow2.h |   69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 68 insertions(+), 1 deletion(-)
> 
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 718b52b..c7b6860 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -43,6 +43,10 @@
>  #define QCOW_OFLAG_COPIED     (1LL << 63)
>  /* indicate that the cluster is compressed (they never have the copied flag) */
>  #define QCOW_OFLAG_COMPRESSED (1LL << 62)
> +/* indicate that the cluster must be processed when deduplication restart

s/restart/restarts./

> + * also indicate that the on disk dedup hash must be ignored and discarded
> + */
> +#define QCOW_OFLAG_TO_DEDUP (1LL << 61)
>  /* The cluster reads as all zeros */
>  #define QCOW_OFLAG_ZERO (1LL << 0)
>  
> @@ -58,6 +62,54 @@
>  
>  #define DEFAULT_CLUSTER_SIZE 65536
>  
> +#define HASH_LENGTH 32
> +
> +#define QCOW_STRATEGY_RAM     1
> +#define QCOW_STRATEGY_DISK    (1 << 1)
> +#define QCOW_STRATEGY_RUNNING (1 << 2)

For consistency, it would look better to use (1 << 0) for the first bit.
 Also, see my comments on patch 1/33 - reserving QCOW_STRATEGY_DISK when
you don't yet have it implemented doesn't make sense.  Is your plan to
have RAM and DISK mutually exclusive (if so, you don't need anything
now, and later on, you would add a single bit, set to 0 for RAM and 1
for disk) or can they both be set at once (in which case, having the RAM
bit now is okay, but I would rearrange bit orders so that today's RAM
bit is last leaving the door open to stick tomorrow's disk bit adjacent
to it).


> +/* deduplication node */
> +typedef struct {
> +    QCowHash hash;
> +    uint64_t physical_sect;       /* where the cluster is stored on disk */
> +    uint64_t first_logical_sect;  /* logical sector of the first occurence of

s/occurence/occurrence/

Patch

diff --git a/block/qcow2.h b/block/qcow2.h
index 718b52b..c7b6860 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -43,6 +43,10 @@ 
 #define QCOW_OFLAG_COPIED     (1LL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
 #define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* indicate that the cluster must be processed when deduplication restart
+ * also indicate that the on disk dedup hash must be ignored and discarded
+ */
+#define QCOW_OFLAG_TO_DEDUP (1LL << 61)
 /* The cluster reads as all zeros */
 #define QCOW_OFLAG_ZERO (1LL << 0)
 
@@ -58,6 +62,54 @@ 
 
 #define DEFAULT_CLUSTER_SIZE 65536
 
+#define HASH_LENGTH 32
+
+#define QCOW_STRATEGY_RAM     1
+#define QCOW_STRATEGY_DISK    (1 << 1)
+#define QCOW_STRATEGY_RUNNING (1 << 2)
+
+typedef enum {
+    QCOW_HASH_SHA256 = 0,
+    QCOW_HASH_SHA3   = 1,
+    QCOW_HASH_SKEIN  = 2,
+} QCowHashAlgo;
+
+typedef struct {
+    uint8_t data[HASH_LENGTH]; /* 32 bytes hash of a given cluster */
+} QCowHash;
+
+/* Used to keep a single precomputed hash between the calls of the dedup
+ * function
+ */
+typedef struct {
+    QCowHash hash;
+    bool reuse;                  /* The hash is precomputed reuse it */
+} QcowPersistantHash;
+
+/* deduplication node */
+typedef struct {
+    QCowHash hash;
+    uint64_t physical_sect;       /* where the cluster is stored on disk */
+    uint64_t first_logical_sect;  /* logical sector of the first occurence of
+                                   * this cluster
+                                   */
+} QCowHashNode;
+
+/* Undedupable hashes that must be written later to disk */
+typedef struct QCowHashElement {
+    QCowHash hash;
+    QTAILQ_ENTRY(QCowHashElement) next;
+} QCowHashElement;
+
+typedef struct {
+    QcowPersistantHash phash;  /* contains a hash persisting between calls of
+                                * qcow2_dedup()
+                                */
+    QTAILQ_HEAD(, QCowHashElement) undedupables;
+    int nb_clusters_processed;
+    int nb_undedupable_sectors;
+} QCowDedupState;
+
 typedef struct QCowHeader {
     uint32_t magic;
     uint32_t version;
@@ -114,8 +166,10 @@  enum {
 enum {
     QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
     QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+    QCOW2_INCOMPAT_DEDUP_BITNR   = 1,
+    QCOW2_INCOMPAT_DEDUP         = 1 << QCOW2_INCOMPAT_DEDUP_BITNR,
 
-    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
+    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY | QCOW2_INCOMPAT_DEDUP,
 };
 
 /* Compatible feature bits */
@@ -138,6 +192,7 @@  typedef struct BDRVQcowState {
     int cluster_sectors;
     int l2_bits;
     int l2_size;
+    int hash_block_size;
     int l1_size;
     int l1_vm_state_index;
     int csize_shift;
@@ -148,6 +203,7 @@  typedef struct BDRVQcowState {
 
     Qcow2Cache* l2_table_cache;
     Qcow2Cache* refcount_block_cache;
+    Qcow2Cache *dedup_cluster_cache;
 
     uint8_t *cluster_cache;
     uint8_t *cluster_data;
@@ -160,6 +216,17 @@  typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    bool has_dedup;
+    DedupStatus dedup_status;
+    QCowHashAlgo dedup_hash_algo;
+    Coroutine *dedup_resume_co;
+    int dedup_co_delay;
+    uint64_t *dedup_table;
+    uint64_t dedup_table_offset;
+    int32_t dedup_table_size;
+    GTree *dedup_tree_by_hash;
+    GTree *dedup_tree_by_sect;
+
     CoMutex lock;
 
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */