Patchwork [RFC,V8,04/24] qcow2: Create the log store.

login
register
mail settings
Submitter Benoît Canet
Date June 20, 2013, 2:26 p.m.
Message ID <1371738392-9594-5-git-send-email-benoit@irqsave.net>
Download mbox | patch
Permalink /patch/253002/
State New
Headers show

Comments

Benoît Canet - June 20, 2013, 2:26 p.m.
The log store is an in RAM hash table used to index QCowJournalEntries.
Combining the log store with a journal allows to get the benefits of the journal
(fewer random write and minimization of SSD wear out) with the advantages of a
hash table (being good at random access).

Once the journal is full and not packable or if the log store hash table is full
the journal is frozen and converted into an on disk hash table used by the hash
store.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block/Makefile.objs     |    2 +-
 block/qcow2-log-store.c | 1039 +++++++++++++++++++++++++++++++++++++++++++++++
 block/qcow2.h           |   26 ++
 3 files changed, 1066 insertions(+), 1 deletion(-)
 create mode 100644 block/qcow2-log-store.c

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index ee894b5..8c0b646 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,6 +1,6 @@ 
 block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
 block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
-block-obj-y += qcow2-journal.o
+block-obj-y += qcow2-journal.o qcow2-log-store.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += vhdx.o
diff --git a/block/qcow2-log-store.c b/block/qcow2-log-store.c
new file mode 100644
index 0000000..24ae310
--- /dev/null
+++ b/block/qcow2-log-store.c
@@ -0,0 +1,1039 @@ 
+/*
+ * QCOW2 log store
+ *
+ * Copyright (C) Nodalink, SARL. 2013
+ *
+ * Author:
+ *   Benoît Canet <benoit.canet@irqsave.net>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+static uint64_t qcow2_dedup_get_h(QCowHashInfo *hash_info, int index,
+                                  uint32_t order)
+{
+    uint64_t *array = (uint64_t *) &hash_info->hash.data;
+    uint64_t result = array[index];
+    return result & ((1 << order) - 1);
+}
+
+uint64_t qcow2_dedup_h1(QCowHashInfo *hash_info, uint32_t order)
+{
+    return qcow2_dedup_get_h(hash_info, 0, order);
+}
+
+uint64_t qcow2_dedup_h2(QCowHashInfo *hash_info, uint32_t order)
+{
+    return qcow2_dedup_get_h(hash_info, 1, order);
+}
+
+static bool qcow2_dedup_h_equals(QCowHashInfo *hash_info, uint32_t order)
+{
+    return qcow2_dedup_h1(hash_info, order) ==
+           qcow2_dedup_h2(hash_info, order);
+}
+
+static void qcow2_log_store_reset_kick_map(QCowLogStore *store)
+{
+    uint64_t i;
+    for (i = 0; i < qcow2_log_store_nb_entries(store->order); i++) {
+        store->kick_map[i] = false;
+    }
+}
+
+static bool qcow2_log_store_is_in_kick_path(QCowLogStore *store,
+                                            uint64_t in_table_idx)
+{
+    return store->kick_map[in_table_idx];
+}
+
+static void qcow2_log_store_set_in_kick_path(QCowLogStore *store,
+                                             uint64_t in_table_idx)
+{
+    store->kick_map[in_table_idx] = true;
+}
+
+static void qcow2_log_store_unset_from_kick_path(QCowLogStore *store,
+                                                 uint64_t in_table_idx)
+{
+    store->kick_map[in_table_idx] = false;
+}
+
+static void qcow2_log_store_set_perm_element(int *perm, int value, int rnd)
+{
+    for (;perm[rnd % QCOW_LOG_STORE_BUCKET_SIZE] != -1; rnd++);
+    perm[rnd % QCOW_LOG_STORE_BUCKET_SIZE] = value;
+}
+
+/* This function create a random permutation of the set
+ * [0, QCOW_LOG_STORE_BUCKET_SIZE]
+ */
+static void qcow2_log_store_make_perm(int *perm)
+{
+    int i, rnd;
+
+    /* reset */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+        perm[i] = -1;
+    }
+
+    /* create permutation */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+        rnd = rand();
+        qcow2_log_store_set_perm_element(perm, i, rnd);
+    }
+}
+
+/* This function is used to make sure we don't create path cycle while kicking
+ *
+ * The function uses a random permutation to compute the kick idx.
+ * This way two call to the function should not give the same results.
+ * Randomization increase hash table usage by 13% giving a 93% filling rate.
+ *
+ * @store:      the store to work with
+ * @bucket_idx: the index of the bucket where we are trying to get an entry to
+ *              push
+ * @ret:        [0, QCOW_LOG_STORE_BUCKET_SIZE] on succes else -2
+ */
+static int qcow2_log_store_get_kick_idx(QCowLogStore *store,
+                                        uint64_t bucket_idx)
+{
+    QCowHashInfo *entry;
+    uint64_t in_table_idx;
+    int perm[QCOW_LOG_STORE_BUCKET_SIZE];
+    int i;
+
+    /* create a pseudo random permutation */
+    qcow2_log_store_make_perm(&perm[0]);
+
+    /* use the permutation to get entry to kick */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+        in_table_idx = bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + perm[i];
+        entry = &store->hash_table[in_table_idx];
+
+        /* we take care of not selecting an entry already moved or an entry
+         * which as no alternate location
+         */
+        if (!qcow2_log_store_is_in_kick_path(store, in_table_idx) &&
+            !qcow2_dedup_h_equals(entry, store->order)) {
+            qcow2_log_store_set_in_kick_path(store, in_table_idx);
+            return perm[i];
+        }
+    }
+
+    return -2;
+}
+
+
+uint64_t qcow2_log_store_nb_entries(uint32_t order)
+{
+    /* entries are grouped in QCOW_LOG_STORE_BUCKET_SIZE sized buckets */
+    return (1 << order) * QCOW_LOG_STORE_BUCKET_SIZE;
+}
+
+static uint64_t qcow2_log_store_journal_size(uint32_t order)
+{
+    /* size of a QCowLog hash entry */
+    uint32_t qcow_log_hash_size = sizeof(QCowHashInfo) +
+                                  QCOW_LOG_END_SIZE;
+    /* leave some extra room for insert/delete cycle and other QCOW2 journal
+     * insertion
+     */
+    return qcow2_journal_round_to_erase_blocks(qcow2_log_store_nb_entries(order)
+                                               * qcow_log_hash_size
+                                               * QCOW_LOG_STORE_JOURNAL_RATIO);
+}
+
+/* this function reset a log store state
+ *
+ * @store: the log store to reset
+ * @order: the bit order
+ */
+static void qcow2_log_store_reset(QCowLogStore *store, uint32_t order)
+{
+    store->order = order;
+    store->nb_kicks = 0;
+    qcow2_log_store_reset_kick_map(store);
+    memset(store->hash_table, 0,
+           qcow2_log_store_nb_entries(order) * sizeof(QCowHashInfo));
+    memset(store->write_buf, 0, HASH_STORE_CLUSTER_SIZE);
+    store->write_buf_offset = 0;
+    store->in_buf_offset = 0;
+}
+
+/* This function create a new log store
+ *
+ * The function does allocate journal disk space
+ *
+ * @store: the log store to work on
+ * @order: the bit order
+ * @ret:   0 on success, -errno on error
+ */
+int qcow2_log_store_create(BlockDriverState *bs,
+                           QCowLogStore *store,
+                           uint32_t order)
+{
+    qcow2_log_store_reset(store, order);
+    return qcow2_journal_disk_allocate(bs,
+               &store->journal,
+               qcow2_log_store_journal_size(store->order));
+}
+
+
+int qcow2_log_store_recycle(BlockDriverState *bs,
+                            QCowLogStore *store)
+{
+    qcow2_log_store_reset(store, store->order);
+    return qcow2_journal_recycle(bs, &store->journal);
+}
+
+/* This function initialize a log store
+ *
+ * @store: the log store to initialize
+ */
+void qcow2_log_store_init(BlockDriverState *bs,
+                          QCowLogStore *store,
+                          uint32_t order)
+{
+    store->order = order;
+    qcow2_journal_init(bs, &store->journal);
+    store->hash_table = g_new0(QCowHashInfo,
+                               qcow2_log_store_nb_entries(order));
+    store->kick_map = g_new0(bool, qcow2_log_store_nb_entries(order));
+    store->write_buf = qemu_blockalign(bs, HASH_STORE_CLUSTER_SIZE);
+}
+
+void qcow2_log_store_cleanup(QCowLogStore *store)
+{
+    qemu_vfree(store->write_buf);
+    g_free(store->hash_table);
+    g_free(store->kick_map);
+    qcow2_journal_cleanup(&store->journal);
+}
+
+static bool qcow2_log_store_is_used_by_table_idx(QCowHashInfo *table,
+                                                 uint64_t in_table_idx)
+{
+    QCowHashInfo *entry;
+    entry = &table[in_table_idx];
+    return entry->first_logical_sect & QCOW_LOG_STORE_ENTRY_USED;
+}
+
+static void qcow2_log_store_copy_to_hash_table(QCowHashInfo *table,
+                                               uint64_t in_table_idx,
+                                               QCowHashInfo *hash_info)
+{
+    QCowHashInfo *entry;
+
+    entry = &table[in_table_idx];
+    memcpy(entry, hash_info, sizeof(QCowHashInfo));
+    entry->first_logical_sect |= QCOW_LOG_STORE_ENTRY_USED;
+}
+
+static void qcow2_log_store_copy_from_hash_table(QCowHashInfo *hash_info,
+                                                 QCowHashInfo *table,
+                                                 uint64_t in_table_idx)
+{
+    QCowHashInfo *entry;
+
+    entry = &table[in_table_idx];
+    memcpy(hash_info, entry, sizeof(QCowHashInfo));
+    hash_info->first_logical_sect &= ~QCOW_LOG_STORE_ENTRY_USED;
+}
+
+static uint64_t qcow2_log_store_tag_by_bucket_idx(QCowLogStore *store,
+                                                  QCowHashInfo *hash_info,
+                                                  uint64_t bucket_idx)
+{
+    uint64_t h[2];
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    if (bucket_idx == h[0]) {
+        return h[1];
+    }
+
+    if (bucket_idx == h[1]) {
+        return h[0];
+    }
+
+    /* should not pass here */
+    assert(false);
+    return 0;
+}
+
+/* This function calculate the order (number of bit) required
+ *
+ * @min_nb_entries_required: minimal number of entry the log store must handle
+ * @ret:                     number of bit (order) of the sub hashes functions
+ */
+int qcow2_log_store_compute_order(uint64_t min_nb_entries_required)
+{
+    uint32_t result = 1;
+    uint64_t min = min_nb_entries_required / QCOW_LOG_STORE_BUCKET_SIZE;
+    while (min) {
+        min >>= 1;
+        result++;
+    }
+    return result;
+}
+
+/* This function get a hash info from the table given it's table index
+ *
+ * It get the result only if the given hash info as the same hash as the one
+ * stored in the table
+ *
+ * @store:        the store to work on
+ * @hash_info:    the QCowHashInfo we are working with (will be overwritten)
+ * @in_table_idx: the index of the entry in the cuckoo hash table
+ * @ret:          true if hash match else false
+ */
+static int qcow2_log_store_try_get_hash_info(QCowLogStore *store,
+                                             QCowHashInfo *hash_info,
+                                             uint64_t in_table_idx)
+{
+    bool match;
+    QCowHashInfo *entry;
+
+    entry = &store->hash_table[in_table_idx];
+
+    match = !memcmp(&entry->hash.data,
+                    &hash_info->hash.data,
+                    HASH_LENGTH);
+
+    if (match) {
+        memcpy(hash_info, entry, sizeof(QCowHashInfo));
+        hash_info->first_logical_sect &= ~QCOW_LOG_STORE_ENTRY_USED;
+    }
+
+    return match;
+}
+
+/* This functions try to get a QCowHashInfo the store hash_table
+ *
+ * @store:       the store to work on
+ * @hash_info:   the QCowHashInfo we are indexing
+ * @bucket_idx:  the bucket index
+ * @index_only:  if true only return the in bucket index and do not overwritte
+ *                 the QCowHashInfo parameter.
+ *                 This is used to check if we can overwrite a given entry in
+ *                 the bucket.
+ * @ret:         integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)] or
+ *                 -1 if not found
+ */
+static int qcow2_log_store_try_get(QCowLogStore *store,
+                                   QCowHashInfo *hash_info,
+                                   uint64_t bucket_idx,
+                                   bool index_only)
+{
+    int i;
+    bool got = false;
+    uint64_t in_table_idx;
+    QCowHashInfo dummy;
+
+    /* if index_only is true we want to work on a dummy QCowHashInfo copy */
+    memcpy(&dummy, hash_info, sizeof(QCowHashInfo));
+
+    /* for each bucket entry */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+        in_table_idx = bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + i;
+
+        got = qcow2_log_store_try_get_hash_info(store,
+                                                index_only ? &dummy :hash_info,
+                                                in_table_idx);
+
+        /* the hash info is indexed by this entry */
+        if (got) {
+            return i;
+        }
+    }
+
+    /* no entry to overwrite -> not found */
+    return -1;
+}
+
+/* This function look for a given hash info in a log store
+ *
+ * @store:     The QCowLogStore to lookup into
+ * @hash_info: The QCowHashInfo to complete : at last the hash must be filled.
+ * @ret:       true on successfull lookup, false if not found
+ */
+bool qcow2_log_store_get(QCowLogStore *store, QCowHashInfo *hash_info)
+{
+    uint64_t h[2], bucket_idx;
+    int ret = 0;
+    int i;
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    /* try to get the hash info at the two alternate location */
+    for (i = 0; i < 2; i++) {
+        bucket_idx = h[i];
+
+        ret = qcow2_log_store_try_get(store, hash_info, bucket_idx, false);
+
+        /* on success (in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)]) */
+        if (ret >= 0) {
+            return true;
+        }
+    }
+
+    /* not found */
+    return false;
+}
+
+/* This function try to append an entry in a new journal given it's table index
+ *
+ * The nop operation (no append to do) is considered as a success
+ *
+ * @table:        the hash table to work on
+ * @dest:         the new journal to insert the entry into
+ * @in_table_idx: the index in the hash table of the entry to append
+ * @ret:          0 on success, -1 if the new journal is full,
+ *                -EFBIG on bogus entry offset, -errno on error
+ */
+static int qcow2_log_store_migrate_journal_entry_by_index(BlockDriverState *bs,
+                                                       QCowJournal *dest,
+                                                       QCowHashInfo *table,
+                                                       uint64_t in_table_idx)
+{
+    QCowJournalEntry journal_entry;
+    QCowHashInfo hash_info;
+    int64_t offset;
+
+    /* return success on nop (the entry is not used) */
+    if (!qcow2_log_store_is_used_by_table_idx(table, in_table_idx)) {
+        return 0;
+    }
+
+    qcow2_log_store_copy_from_hash_table(&hash_info,
+                                         table,
+                                         in_table_idx);
+
+    qcow2_journal_entry_from_hash_info(&journal_entry, &hash_info);
+
+    /* get the new offset by writing the journal entry in the new journal */
+    offset = qcow2_journal_append(bs, dest, &journal_entry);
+
+    /* error or full */
+    if (offset < 0) {
+        return offset;
+    }
+
+    return 0;
+}
+
+static void qcow2_log_store_copy_hash_table(QCowLogStore *store)
+{
+    store->hash_table_copy = g_new0(QCowHashInfo,
+                                    qcow2_log_store_nb_entries(store->order));
+    memcpy(store->hash_table_copy, store->hash_table,
+           qcow2_log_store_nb_entries(store->order) * sizeof(QCowHashInfo));
+}
+
+/* this function do the real work of packing the store journal into a new one
+ *
+ * The function take care of flushing the new journal
+ *
+ * @store:       the store to work on
+ * @new_journal: the destination journal
+ * @ret:          0 on success, -1 if the new journal is full,
+ *                -EFBIG on bogus entry offset, -errno on error
+ */
+static int qcow2_log_store_do_pack_journal(BlockDriverState *bs,
+                                           QCowLogStore *store,
+                                           QCowJournal *dest)
+{
+    uint64_t i;
+    int ret = 0;
+
+    /* we will insert each used QCowLogStore entry into the new journal */
+    for (i = 0; i < qcow2_log_store_nb_entries(store->order); i++) {
+        ret = qcow2_log_store_migrate_journal_entry_by_index(bs,
+                                                         dest,
+                                                         store->hash_table_copy,
+                                                         i);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return qcow2_journal_flush(bs, dest);
+}
+
+static void qcow2_log_store_commit_new_journal(BlockDriverState *bs,
+                                               QCowLogStore *store,
+                                               QCowJournal *new_journal)
+{
+    /* cleanup old data */
+    qcow2_journal_disk_deallocate(bs, &store->journal);
+    g_free(store->hash_table);
+    /* commit new journal and hash table */
+    memcpy(&store->journal, new_journal, sizeof(new_journal));;
+    store->hash_table = store->hash_table_copy;
+    store->hash_table_copy = NULL;
+}
+
+/* This function pack a log store journal into a new one
+ *
+ * This is used to manage pathological cyclic insertion/deletion cases or the
+ * case where a log store journal is full due to too much entries inserted by
+ * other QCOW2 users.
+ * As this event should be rare we don't bother recycling the journal
+ * disk space. As a consequence the QCOW2 file may grow.
+ *
+ * @store:     the log store from which the journal must be packed
+ * @need_save: will be set to true if new journal config must be saved later
+                 by the upper layers
+ * @ret:       0 on success, -1 if the new journal is full,
+ *               -EFBIG on bogus entry offset, -errno on error
+ */
+static int qcow2_log_store_pack_journal(BlockDriverState *bs,
+                                        QCowLogStore *store,
+                                        bool *need_save)
+{
+    QCowJournal new_journal;
+    int ret = 0;
+    /* we won't need to save journal config on failure */
+    *need_save = false;
+
+    /* TODO: insert journal replay here if other part of QCOW make use of it */
+
+    /* we flush the journal first */
+    ret = qcow2_journal_flush(bs, &store->journal);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* we allocate a new journal, we will copy used entry into */
+    qcow2_journal_init(bs, &new_journal);
+    ret = qcow2_journal_disk_allocate(bs,
+              &new_journal,
+              qcow2_log_store_journal_size(store->order));
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* copy the hash table to be able to abort packing at any time */
+    qcow2_log_store_copy_hash_table(store);
+
+    ret = qcow2_log_store_do_pack_journal(bs,
+                                          store,
+                                          &new_journal);
+
+    if (ret < 0) {
+        goto free_dealloc_error_exit;
+    }
+
+    qcow2_log_store_commit_new_journal(bs, store, &new_journal);
+
+    /* tell the upper layer to save the new journal config to disk */
+    *need_save = true;
+
+    return 0;
+
+free_dealloc_error_exit:
+    g_free(store->hash_table_copy);
+    qcow2_journal_disk_deallocate(bs, &new_journal);
+    qcow2_journal_cleanup(&new_journal);
+    return ret;
+}
+
+/* This function append a QCowHashInfo to the journal
+ *
+ * The function will try to pack the journal and redo the insert if the journal
+ * is full
+ *
+ * store:      the QCowLogStore to work with
+ * hash_info:  the QCowHashInfo to insert into the store
+ * @need_save: will be set to true if new journal info must be saved later
+ * ret:        the offset of the QCowJournalEntry on success, -1 if a journal
+ *             packing failed or journal is full and unpackable,
+               -EFBIG on bogus entry offset while packing,
+ *             -errno on error
+ */
+static int64_t qcow2_log_store_append_to_journal(BlockDriverState *bs,
+                                                 QCowLogStore *store,
+                                                 QCowHashInfo *hash_info,
+                                                 bool *need_save)
+{
+    QCowJournalEntry journal_entry;
+    int ret = 0;
+    int64_t offset;
+    *need_save = false;
+
+    qcow2_journal_entry_from_hash_info(&journal_entry, hash_info);
+
+    offset = qcow2_journal_append(bs, &store->journal, &journal_entry);
+
+    /* the journal is full : maybe a pathological cyclic insertion/deletion
+     * pattern made it full.
+     * We will create a new one and repack the current journal into it.
+     */
+    if (offset == -1) {
+        ret = qcow2_log_store_pack_journal(bs, store, need_save);
+
+        if (ret < 0) {
+            return ret;
+        }
+
+        offset = qcow2_journal_append(bs, &store->journal, &journal_entry);
+    }
+
+    return offset;
+}
+
+/* This function find the first free entry index
+ *
+ * @store:      the store to work on
+ * @bucket_idx: the bucket to scan index
+ * @ret:        integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)]
+ *              or -1 if no room found
+ */
+static int qcow2_log_store_first_free_entry_idx(QCowLogStore *store,
+                                                uint64_t bucket_idx)
+{
+    uint64_t in_table_idx;
+    bool used;
+    int i;
+
+    /* for each entry of the bucket */
+    for (i = 0; i < QCOW_LOG_STORE_BUCKET_SIZE; i++) {
+        in_table_idx = bucket_idx * QCOW_LOG_STORE_BUCKET_SIZE + i;
+
+        /* return index of the first free entry */
+        used = qcow2_log_store_is_used_by_table_idx(store->hash_table,
+                                                    in_table_idx);
+
+        if (!used) {
+            return i;
+        }
+    }
+
+    /* no free entry found -> place not found */
+    return -1;
+}
+
+/* This function recursively kick an entry to it's alternate location
+ *
+ * @store:            the QCowLogStore to work on
+ * @entry:            the entry to kick
+ * @entry_bucket_idx: the index of the entry to kick (in bucket)
+ * @ret:              0 on success, -1 if the hash table is full, -2 on kick
+ *                        path loop
+ */
+static int qcow2_log_store_kick_entry(QCowLogStore *store,
+                                      QCowHashInfo *entry,
+                                      uint64_t entry_bucket_idx)
+{
+    QCowHashInfo *dest_entry;
+    int ret = 0, in_bucket_idx;
+    uint64_t in_table_idx, tag;
+
+    /* is hash table full */
+    if (store->nb_kicks == (QCOW_LOG_STORE_MAX_KICKS - 1)) {
+        return -1;
+    }
+
+    store->nb_kicks++;
+
+    tag = qcow2_log_store_tag_by_bucket_idx(store, entry, entry_bucket_idx);
+
+    /* look if we can insert the kicked entry in the alternate location */
+    in_bucket_idx = qcow2_log_store_first_free_entry_idx(store, tag);
+
+    /* no room found in the alternate location we must kick again an entry */
+    if (in_bucket_idx == -1) {
+        in_bucket_idx = qcow2_log_store_get_kick_idx(store, tag);
+
+        /* the kick path is doing a loop */
+        if (in_bucket_idx == -2) {
+            return -2;
+        }
+
+        in_table_idx = tag * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_idx;
+        dest_entry = &store->hash_table[in_table_idx];
+
+        /* kick the random entry */
+        ret = qcow2_log_store_kick_entry(store,
+                                         dest_entry,
+                                         tag);
+
+        /* if table is full or kick path make a loop: propagate to the caller */
+        if (ret < 0) {
+            qcow2_log_store_unset_from_kick_path(store, in_table_idx);
+            return ret;
+        }
+    }
+
+    /* free room found, compute the index in the hash table */
+    in_table_idx = tag * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_idx;
+    /* the old bucket index become the alternate location (tag) */
+    qcow2_log_store_copy_to_hash_table(store->hash_table,
+                                       in_table_idx,
+                                       entry);
+    memset(entry, 0, sizeof(QCowHashInfo));
+    qcow2_log_store_unset_from_kick_path(store, in_table_idx);
+    return 0;
+}
+
+static int64_t qcow2_log_store_find_overwrite_idx(QCowLogStore *store,
+                                                  QCowHashInfo *hash_info)
+{
+    uint64_t h[2];
+    int i, idx;
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    for (i = 0; i < 2; i++) {
+        idx = qcow2_log_store_try_get(store,
+                                      hash_info,
+                                      h[i],      /* index */
+                                      true);
+
+        /* no suitable place found -> continue */
+        if (idx == -1) {
+            continue;
+        }
+
+        return h[i] * QCOW_LOG_STORE_BUCKET_SIZE + idx;
+    }
+
+    return -1;
+}
+
+static int64_t qcow2_log_store_find_any_free_idx(QCowLogStore *store,
+                                                 QCowHashInfo *hash_info)
+{
+    uint64_t h[2];
+    int i, idx;
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    /* try to find a free entry in either alternate location */
+    for (i = 0; i < 2; i++) {
+        idx = qcow2_log_store_first_free_entry_idx(store, h[i]);
+
+        /* no suitable place found -> continue */
+        if (idx == -1) {
+            continue;
+        }
+
+        return h[i] * QCOW_LOG_STORE_BUCKET_SIZE + idx;
+    }
+
+    /* no suitable place found */
+    return -1;
+}
+
+/* this function scan for the two alternative buckets of a given QCowHashInfo
+ * and return entry index if a suitable place is found
+ *
+ * @store:     the QCowLogStore to work on
+ * @hash_info: the QCowHashInfo we are trying to index
+ * @ret:        integer in range [0, (QCOW_LOG_STORE_BUCKET_SIZE - 1)] or
+ *              -1 if room not found
+ */
+static int64_t qcow2_log_store_scan_canditates_buckets(QCowLogStore *store,
+                                                       QCowHashInfo *hash_info)
+{
+    int ret = 0;
+
+    /* we try to overwrite in the two alternate location first because
+     * a kick may have swapped entries and we don't want duplicates.
+     */
+    ret = qcow2_log_store_find_overwrite_idx(store, hash_info);
+
+    /* place found return it */
+    if (ret != -1) {
+        return ret;
+    }
+
+    /* else try to find any free entry in the two alternate location */
+    return qcow2_log_store_find_any_free_idx(store, hash_info);
+}
+
+/* This function will return the index of the hash table entry to copy into
+ *
+ * It's a two step process : first the two alternate buckets are checked.
+ *     If they are not suitable the first entry of the first bucket is kicked to
+ *     it's alternate location. This process is recursive up to
+ *     QCOW_LOG_STORE_MAX_KICKS times.
+ *
+ * @store:     the QCowLogStore to work on
+ * @hash_info: the QCowHashInfo we are trying to index
+ * @ret:       the index (in the hash table) or -1 if the hash table is full
+ */
+static int64_t qcow2_log_store_get_in_table_idx(QCowLogStore *store,
+                                                QCowHashInfo *hash_info)
+{
+    QCowHashInfo *dest_entry;
+    int ret = 0, kick_retry;
+    uint64_t h[2];
+    int64_t in_table_idx;
+    int in_bucket_idx;
+
+    /* first check if any of the two candidate buckets has some room to do an
+     * insertion. If so return the index
+     */
+    in_table_idx = qcow2_log_store_scan_canditates_buckets(store, hash_info);
+
+    /* room available */
+    if (in_table_idx >= 0) {
+        return in_table_idx;
+    }
+
+    h[0] = qcow2_dedup_h1(hash_info, store->order);
+    h[1] = qcow2_dedup_h2(hash_info, store->order);
+
+    /* we try 10 time to kick in order to try differents kick path and
+     * avoid kick path loops
+     */
+    kick_retry = 10;
+    do {
+        store->nb_kicks = 0;
+        /* clear previous kick path */
+        qcow2_log_store_unset_from_kick_path(store, in_table_idx);
+
+        /* take the first not yet pushed entry of the h[0] bucket */
+        in_bucket_idx = qcow2_log_store_get_kick_idx(store, h[0]);
+
+        /* kick path cycle found */
+        if (in_bucket_idx == -2) {
+            in_table_idx = 0;
+            continue;
+        }
+
+        in_table_idx = h[0] * QCOW_LOG_STORE_BUCKET_SIZE + in_bucket_idx;
+
+        dest_entry = &store->hash_table[in_table_idx];
+
+        ret = qcow2_log_store_kick_entry(store, dest_entry, h[0]);
+    } while (ret == -2 && kick_retry);
+
+    /* hash table is full, or kick path was cycling multiple time */
+    if (ret < 0) {
+        return -1;
+    }
+
+    qcow2_log_store_unset_from_kick_path(store, in_table_idx);
+    return in_table_idx;
+}
+
+/* This function insert a given QCowHashInfo in the log store
+ *
+ * This function will overwrite an already inserted entry
+ *
+ * @store:     the QCowLogStore to insert into
+ * @hash_info: the QCowHashInfo to insert : at last the hash must be filled
+ * @need_save: will be set to true if new journal info must be saved later
+ * @ret:       0 on success, 1 if the log store is full, -errno on error
+ */
+int qcow2_log_store_insert(BlockDriverState *bs, QCowLogStore *store,
+                           QCowHashInfo *hash_info, bool *need_save)
+{
+    int64_t in_table_idx, offset;
+
+    *need_save = false;
+
+    /* find the place where to insert the entry in the cuckoo hash table */
+    in_table_idx = qcow2_log_store_get_in_table_idx(store, hash_info);
+
+    /* hash table is full so log store is full -> need to freeze log store */
+    if (in_table_idx == -1) {
+        return 1;
+    }
+
+    offset = qcow2_log_store_append_to_journal(bs, store, hash_info, need_save);
+
+    /* the journal is full and packing failed so we should treat the log
+     * store as full
+     */
+    if (offset == -1) {
+        return 1;
+    }
+
+    /* journal appending failed */
+    if (offset < 0) {
+        return offset;
+    }
+
+    qcow2_log_store_copy_to_hash_table(store->hash_table,
+                                       in_table_idx,
+                                       hash_info);
+
+    return 0;
+}
+
+/* This function delegate a flush to the QCowJournal of the log store
+ *
+ * @store: the QCowLogStore to work with
+ * @ret:   0 on success, -errno on error
+ */
+int qcow2_log_store_flush(BlockDriverState *bs, QCowLogStore *store)
+{
+    return qcow2_journal_flush(bs, &store->journal);
+}
+
+/* This function flush and stop the log store journal
+ *
+ * @store: the QCowLogStore to work with
+ * @ret:   0 on success, -errno on error
+ */
+int qcow2_log_store_stop(BlockDriverState *bs, QCowLogStore *store)
+{
+    return qcow2_journal_stop(bs, &store->journal);
+}
+
+/* This function index a QCowJournalEntry into the cuckoo hash table 
+ *
+ * @store:  the QCowLogStore to work on
+ * @entry:  the journal entry to convert validate and insert
+ * @offset: the offset of the entry in the journal
+ * @ret:    0 on success, -EINVAL on bogus journal entry
+ */
+static int qcow2_log_store_index_journal_entry(BlockDriverState *bs,
+                                               QCowLogStore *store,
+                                               QCowJournalEntry *entry,
+                                               uint64_t offset)
+{
+    QCowHashInfo hash_info;
+    int64_t in_table_idx;
+    int ret = 0;
+
+    ret = qcow2_hash_info_from_journal_entry(&hash_info, entry);
+
+    /* bogus entry size or invalid type */
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* find the place where to insert the entry in the the cuckoo hash table */
+    in_table_idx = qcow2_log_store_get_in_table_idx(store, &hash_info);
+
+    /* the cuckoo hash table should not be full when rebuilding */
+    assert(in_table_idx >= 0);
+
+    /* we store the offset and the tag in the in ram hash table */
+    qcow2_log_store_copy_to_hash_table(store->hash_table,
+                                       in_table_idx,
+                                       &hash_info);
+
+    return 0;
+}
+
+/* This function rebuild the log store from its journal
+ *
+ * @store: the QCowLogStore to rebuild
+ * @ret:   0 on succes, -errno on error, -EINVAL on bogus journal entry
+ */
+int qcow2_log_store_rebuild_from_journal(BlockDriverState *bs,
+                                         QCowLogStore *store)
+{
+    QCowJournalEntry entry;
+    uint64_t offset = 0;
+    int ret = 0, size = 0;
+
+    /* we walk through the journal (qcow2_journal_read is an iterator) */
+    size = qcow2_journal_read(bs, &store->journal, offset, &entry);
+    /* we stop on obviously bogus sized entry and end of journal */
+    while (size >= QCOW_LOG_END_SIZE && entry.type != QCOW_LOG_NONE) {
+        /* it's a hash insert it */
+        if (entry.type == QCOW_LOG_HASH) {
+            ret = qcow2_log_store_index_journal_entry(bs,
+                                                      store,
+                                                      &entry,
+                                                      offset);
+
+        }
+        /* bogus entry -> return error */
+        if (ret < 0) {
+            return ret;
+        }
+        /* read next journal entry */
+        offset += size;
+        size = qcow2_journal_read(bs, &store->journal, offset, &entry);
+    }
+
+    /* bogus entry size */
+    if (size >= 0 &&
+        size < QCOW_LOG_END_SIZE &&
+        entry.type != QCOW_LOG_NONE) {
+        return -EINVAL;
+    }
+
+    /* on error */
+    if (size < 0) {
+        return size;
+    }
+
+    /* now inform the journal that it must append to existing data */
+    return qcow2_journal_resume(bs, &store->journal, offset);
+}
+
+/* the on disk size of a log store dump */
+size_t qcow2_log_store_dump_size(void)
+{
+    /* sizeof order + sizeof journal */
+    return sizeof(uint32_t) + qcow2_journal_dump_size();
+}
+
+/* This function dump a log store infos into buffer
+ *
+ * @buf:   the buffer to dump into
+ * @store: the log store to dump
+ * @ret:   the size of the dump
+ */
+size_t qcow2_log_store_dump(uint8_t *buf, QCowLogStore *store)
+{
+    uint32_t *buf32 = (uint32_t *) buf;
+
+    buf32[0] = cpu_to_be32(store->order);
+
+    return sizeof(store->order) +
+           qcow2_journal_dump(buf + sizeof(store->order),
+                              &store->journal);
+}
+
+/* this function parse a log store from disk
+ *
+ * @store: the store to parse into
+ * @buf: the buffer to parse from
+ * @ret: the size of the parsed data
+ */
+size_t qcow2_log_store_parse(QCowLogStore *store, uint8_t *buf)
+{
+    uint32_t *buf32 = (uint32_t *) buf;
+    uint32_t order;
+
+    order = be32_to_cpu(buf32[0]);
+    qcow2_log_store_reset(store, order);
+    qcow2_journal_parse(&store->journal, buf + sizeof(order));
+    return qcow2_log_store_dump_size();
+}
diff --git a/block/qcow2.h b/block/qcow2.h
index adde631..d347471 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -635,4 +635,30 @@  size_t qcow2_journal_dump_size(void);
 size_t qcow2_journal_dump(uint8_t *buf, QCowJournal *journal);
 void qcow2_journal_parse(QCowJournal *journal, uint8_t *buf);
 
+/* qcow2-log-store.c functions */
+
+uint64_t qcow2_dedup_h1(QCowHashInfo *hash_info, uint32_t order);
+uint64_t qcow2_dedup_h2(QCowHashInfo *hash_info, uint32_t order);
+uint64_t qcow2_log_store_nb_entries(uint32_t order);
+int qcow2_log_store_create(BlockDriverState *bs,
+                           QCowLogStore *store,
+                           uint32_t order);
+int qcow2_log_store_recycle(BlockDriverState *bs,
+                            QCowLogStore *store);
+void qcow2_log_store_init(BlockDriverState *bs,
+                          QCowLogStore *store,
+                          uint32_t order);
+void qcow2_log_store_cleanup(QCowLogStore *store);
+int qcow2_log_store_compute_order(uint64_t min_nb_entries_required);
+bool qcow2_log_store_get(QCowLogStore *store, QCowHashInfo *hash_info);
+int qcow2_log_store_insert(BlockDriverState *bs, QCowLogStore *store,
+                           QCowHashInfo *hash_info, bool *need_save);
+int qcow2_log_store_flush(BlockDriverState *bs, QCowLogStore *store);
+int qcow2_log_store_stop(BlockDriverState *bs, QCowLogStore *store);
+int qcow2_log_store_rebuild_from_journal(BlockDriverState *bs,
+                                         QCowLogStore *store);
+size_t qcow2_log_store_dump_size(void);
+size_t qcow2_log_store_dump(uint8_t *buf, QCowLogStore *store);
+size_t qcow2_log_store_parse(QCowLogStore *store, uint8_t *buf);
+
 #endif