diff mbox

[RFC,V5,02/62] qcow2: Add deduplication structures and fields.

Message ID 1358351321-4891-3-git-send-email-benoit@irqsave.net
State New
Headers show

Commit Message

Benoît Canet Jan. 16, 2013, 3:47 p.m. UTC
Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/qcow2.h |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 71 insertions(+), 1 deletion(-)

Comments

Eric Blake Jan. 16, 2013, 4:30 p.m. UTC | #1
On 01/16/2013 08:47 AM, Benoît Canet wrote:
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>  block/qcow2.h |   72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 71 insertions(+), 1 deletion(-)
> 
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 718b52b..b31b64e 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -43,6 +43,10 @@
>  #define QCOW_OFLAG_COPIED     (1LL << 63)
>  /* indicate that the cluster is compressed (they never have the copied flag) */
>  #define QCOW_OFLAG_COMPRESSED (1LL << 62)
> +/* indicate that the cluster must be processed when deduplication restart
> + * also indicate that the on disk dedup hash must be ignored and discarded

s/restart also/restarts. Also,/


> +/* deduplication node */
> +typedef struct {
> +    QCowHash hash;
> +    uint64_t physical_sect;       /* where the cluster is stored on disk */
> +    uint64_t first_logical_sect;  /* logical sector of the first occurence of

s/occurence/occurrence/
diff mbox

Patch

diff --git a/block/qcow2.h b/block/qcow2.h
index 718b52b..b31b64e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -43,6 +43,10 @@ 
 #define QCOW_OFLAG_COPIED     (1LL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
 #define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* indicate that the cluster must be processed when deduplication restart
+ * also indicate that the on disk dedup hash must be ignored and discarded
+ */
+#define QCOW_OFLAG_TO_DEDUP (1LL << 61)
 /* The cluster reads as all zeros */
 #define QCOW_OFLAG_ZERO (1LL << 0)
 
@@ -58,6 +62,57 @@ 
 
 #define DEFAULT_CLUSTER_SIZE 65536
 
+#define HASH_LENGTH 32
+
+typedef enum {
+    QCOW_DEDUP_STOPPED,
+    QCOW_DEDUP_STARTING,
+    QCOW_DEDUP_STARTED,
+    QCOW_DEDUP_STOPPING,
+} QCowDedupStatus;
+
+typedef enum {
+    QCOW_HASH_SHA256 = 0,
+    QCOW_HASH_SHA3   = 1,
+    QCOW_HASH_SKEIN  = 2,
+} QCowHashAlgo;
+
+typedef struct {
+    uint8_t data[HASH_LENGTH]; /* 32 bytes hash of a given cluster */
+} QCowHash;
+
+/* Used to keep a single precomputed hash between the calls of the dedup
+ * function
+ */
+typedef struct {
+    QCowHash hash;
+    bool reuse;                  /* The hash is precomputed reuse it */
+} QcowPersistantHash;
+
+/* deduplication node */
+typedef struct {
+    QCowHash hash;
+    uint64_t physical_sect;       /* where the cluster is stored on disk */
+    uint64_t first_logical_sect;  /* logical sector of the first occurence of
+                                   * this cluster
+                                   */
+} QCowHashNode;
+
+/* Undedupable hashes that must be written later to disk */
+typedef struct QCowHashElement {
+    QCowHash hash;
+    QTAILQ_ENTRY(QCowHashElement) next;
+} QCowHashElement;
+
+typedef struct {
+    QcowPersistantHash phash;  /* contains a hash persisting between calls of
+                                * qcow2_dedup()
+                                */
+    QTAILQ_HEAD(, QCowHashElement) undedupables;
+    int nb_clusters_processed;
+    int nb_undedupable_sectors;
+} QCowDedupState;
+
 typedef struct QCowHeader {
     uint32_t magic;
     uint32_t version;
@@ -114,8 +169,10 @@  enum {
 enum {
     QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
     QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+    QCOW2_INCOMPAT_DEDUP_BITNR   = 1,
+    QCOW2_INCOMPAT_DEDUP         = 1 << QCOW2_INCOMPAT_DEDUP_BITNR,
 
-    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
+    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY | QCOW2_INCOMPAT_DEDUP,
 };
 
 /* Compatible feature bits */
@@ -138,6 +195,7 @@  typedef struct BDRVQcowState {
     int cluster_sectors;
     int l2_bits;
     int l2_size;
+    int hash_block_size;
     int l1_size;
     int l1_vm_state_index;
     int csize_shift;
@@ -148,6 +206,7 @@  typedef struct BDRVQcowState {
 
     Qcow2Cache* l2_table_cache;
     Qcow2Cache* refcount_block_cache;
+    Qcow2Cache *dedup_cluster_cache;
 
     uint8_t *cluster_cache;
     uint8_t *cluster_data;
@@ -160,6 +219,17 @@  typedef struct BDRVQcowState {
     int64_t free_cluster_index;
     int64_t free_byte_offset;
 
+    bool has_dedup;
+    QCowDedupStatus dedup_status;
+    QCowHashAlgo dedup_hash_algo;
+    Coroutine *dedup_resume_co;
+    int dedup_co_delay;
+    uint64_t *dedup_table;
+    uint64_t dedup_table_offset;
+    int32_t dedup_table_size;
+    GTree *dedup_tree_by_hash;
+    GTree *dedup_tree_by_sect;
+
     CoMutex lock;
 
     uint32_t crypt_method; /* current crypt method, 0 if no key yet */