Patchwork [v5,2/3] qcow2: Use QcowCache

login
register
mail settings
Submitter Kevin Wolf
Date Jan. 24, 2011, 4:08 p.m.
Message ID <1295885307-23926-3-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/80219/
State New
Headers show

Comments

Kevin Wolf - Jan. 24, 2011, 4:08 p.m.
Use the new functions of qcow2-cache.c for everything that works on refcount
block and L2 tables.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cache.c    |   10 ++
 block/qcow2-cluster.c  |  208 +++++++++++++-------------------------
 block/qcow2-refcount.c |  260 ++++++++++++++++++++----------------------------
 block/qcow2.c          |   48 ++++++++-
 block/qcow2.h          |   12 ++-
 5 files changed, 240 insertions(+), 298 deletions(-)

Patch

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index f7c4e2a..5f1740b 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -104,6 +104,12 @@  static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         }
     }
 
+    if (c == s->refcount_block_cache) {
+        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+    } else if (c == s->l2_table_cache) {
+        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+    }
+
     ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
         s->cluster_size);
     if (ret < 0) {
@@ -218,6 +224,10 @@  static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 
     c->entries[i].offset = 0;
     if (read_from_disk) {
+        if (c == s->l2_table_cache) {
+            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+        }
+
         ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
         if (ret < 0) {
             return ret;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c3ef550..76e7e07 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -67,7 +67,11 @@  int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
         qemu_free(new_l1_table);
         return new_l1_table_offset;
     }
-    bdrv_flush(bs->file);
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        return ret;
+    }
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
@@ -98,63 +102,6 @@  int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
     return ret;
 }
 
-void qcow2_l2_cache_reset(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-
-    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-}
-
-static inline int l2_cache_new_entry(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint32_t min_count;
-    int min_index, i;
-
-    /* find a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    return min_index;
-}
-
-/*
- * seek_l2_table
- *
- * seek l2_offset in the l2_cache table
- * if not found, return NULL,
- * if found,
- *   increments the l2 cache hit count of the entry,
- *   if counter overflow, divide by two all counters
- *   return the pointer to the l2 cache entry
- *
- */
-
-static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
-{
-    int i, j;
-
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            return s->l2_cache + (i << s->l2_bits);
-        }
-    }
-    return NULL;
-}
-
 /*
  * l2_load
  *
@@ -169,33 +116,11 @@  static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
     uint64_t **l2_table)
 {
     BDRVQcowState *s = bs->opaque;
-    int min_index;
     int ret;
 
-    /* seek if the table for the given offset is in the cache */
-
-    *l2_table = seek_l2_table(s, l2_offset);
-    if (*l2_table != NULL) {
-        return 0;
-    }
-
-    /* not found: load a new entry in the least used one */
-
-    min_index = l2_cache_new_entry(bs);
-    *l2_table = s->l2_cache + (min_index << s->l2_bits);
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
-    ret = bdrv_pread(bs->file, l2_offset, *l2_table,
-        s->l2_size * sizeof(uint64_t));
-    if (ret < 0) {
-        qcow2_l2_cache_reset(bs);
-        return ret;
-    }
-
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
+    ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
 
-    return 0;
+    return ret;
 }
 
 /*
@@ -238,7 +163,6 @@  static int write_l1_entry(BlockDriverState *bs, int l1_index)
 static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 {
     BDRVQcowState *s = bs->opaque;
-    int min_index;
     uint64_t old_l2_offset;
     uint64_t *l2_table;
     int64_t l2_offset;
@@ -252,29 +176,48 @@  static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     if (l2_offset < 0) {
         return l2_offset;
     }
-    bdrv_flush(bs->file);
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        goto fail;
+    }
 
     /* allocate a new entry in the l2 cache */
 
-    min_index = l2_cache_new_entry(bs);
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
+    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
+    if (ret < 0) {
+        return ret;
+    }
+
+    l2_table = *table;
 
     if (old_l2_offset == 0) {
         /* if there was no old l2 table, clear the new table */
         memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
     } else {
+        uint64_t* old_table;
+
         /* if there was an old l2 table, read it from the disk */
         BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-        ret = bdrv_pread(bs->file, old_l2_offset, l2_table,
-            s->l2_size * sizeof(uint64_t));
+        ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_offset,
+            (void**) &old_table);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        memcpy(l2_table, old_table, s->cluster_size);
+
+        ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
         if (ret < 0) {
             goto fail;
         }
     }
+
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
-        s->l2_size * sizeof(uint64_t));
+
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
         goto fail;
     }
@@ -286,17 +229,12 @@  static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
         goto fail;
     }
 
-    /* update the l2 cache entry */
-
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
-
     *table = l2_table;
     return 0;
 
 fail:
+    qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
     s->l1_table[l1_index] = old_l2_offset;
-    qcow2_l2_cache_reset(bs);
     return ret;
 }
 
@@ -521,6 +459,8 @@  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
                 &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
     }
 
+    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+
    nb_available = (c * s->cluster_sectors);
 out:
     if (nb_available > nb_needed)
@@ -575,6 +515,7 @@  static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
             return ret;
         }
     } else {
+        /* FIXME Order */
         if (l2_offset)
             qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
         ret = l2_allocate(bs, l1_index, &l2_table);
@@ -632,6 +573,7 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 
     cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
     if (cluster_offset < 0) {
+        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
         return 0;
     }
 
@@ -646,38 +588,14 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     /* compressed clusters never have the copied flag */
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    if (bdrv_pwrite_sync(bs->file,
-                    l2_offset + l2_index * sizeof(uint64_t),
-                    l2_table + l2_index,
-                    sizeof(uint64_t)) < 0)
-        return 0;
-
-    return cluster_offset;
-}
-
-/*
- * Write L2 table updates to disk, writing whole sectors to avoid a
- * read-modify-write in bdrv_pwrite
- */
-#define L2_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
-    uint64_t l2_offset, int l2_index, int num)
-{
-    int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
-    int start_offset = (8 * l2_index) & ~511;
-    int end_offset = (8 * (l2_index + num) + 511) & ~511;
-    size_t len = end_offset - start_offset;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
-    ret = bdrv_pwrite(bs->file, l2_offset + start_offset,
-        &l2_table[l2_start_index], len);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
-        return ret;
+        return 0;
     }
 
-    return 0;
+    return cluster_offset;
 }
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
@@ -686,6 +604,7 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
     uint64_t cluster_offset = m->cluster_offset;
+    bool cow = false;
 
     if (m->nb_clusters == 0)
         return 0;
@@ -695,6 +614,7 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     /* copy content of unmodified sectors */
     start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
     if (m->n_start) {
+        cow = true;
         ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
         if (ret < 0)
             goto err;
@@ -702,17 +622,30 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
 
     if (m->nb_available & (s->cluster_sectors - 1)) {
         uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
+        cow = true;
         ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
                 m->nb_available - end, s->cluster_sectors);
         if (ret < 0)
             goto err;
     }
 
-    /* update L2 table */
+    /*
+     * Update L2 table.
+     *
+     * Before we update the L2 table to actually point to the new cluster, we
+     * need to be sure that the refcounts have been increased and COW was
+     * handled.
+     */
+    if (cow) {
+        bdrv_flush(bs->file);
+    }
+
+    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
     if (ret < 0) {
         goto err;
     }
+    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 
     for (i = 0; i < m->nb_clusters; i++) {
         /* if two concurrent writes happen to the same unallocated cluster
@@ -728,16 +661,9 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
                     (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
      }
 
-    /*
-     * Before we update the L2 table to actually point to the new cluster, we
-     * need to be sure that the refcounts have been increased and COW was
-     * handled.
-     */
-    bdrv_flush(bs->file);
 
-    ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
-        qcow2_l2_cache_reset(bs);
         goto err;
     }
 
@@ -746,7 +672,6 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
      * Also flush bs->file to get the right order for L2 and refcount update.
      */
     if (j != 0) {
-        bdrv_flush(bs->file);
         for (i = 0; i < j; i++) {
             qcow2_free_any_clusters(bs,
                 be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
@@ -868,7 +793,8 @@  int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
                 m->depends_on = old_alloc;
                 m->nb_clusters = 0;
                 *num = 0;
-                return 0;
+                ret = 0;
+                goto fail;
             }
         }
     }
@@ -884,7 +810,8 @@  int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
     if (cluster_offset < 0) {
         QLIST_REMOVE(m, next_in_flight);
-        return cluster_offset;
+        ret = cluster_offset;
+        goto fail;
     }
 
     /* save info needed for meta data update */
@@ -893,12 +820,21 @@  int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     m->nb_clusters = nb_clusters;
 
 out:
+    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    if (ret < 0) {
+        return ret;
+    }
+
     m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
     m->cluster_offset = cluster_offset;
 
     *num = m->nb_available - n_start;
 
     return 0;
+
+fail:
+    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    return ret;
 }
 
 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index a10453c..e37e226 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -32,27 +32,6 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
                             int addend);
 
 
-static int cache_refcount_updates = 0;
-
-static int write_refcount_block(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    size_t size = s->cluster_size;
-
-    if (s->refcount_block_cache_offset == 0) {
-        return 0;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
-    if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
-            s->refcount_block_cache, size) < 0)
-    {
-        return -EIO;
-    }
-
-    return 0;
-}
-
 /*********************************************************/
 /* refcount handling */
 
@@ -61,7 +40,6 @@  int qcow2_refcount_init(BlockDriverState *bs)
     BDRVQcowState *s = bs->opaque;
     int ret, refcount_table_size2, i;
 
-    s->refcount_block_cache = qemu_malloc(s->cluster_size);
     refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
     s->refcount_table = qemu_malloc(refcount_table_size2);
     if (s->refcount_table_size > 0) {
@@ -81,34 +59,22 @@  int qcow2_refcount_init(BlockDriverState *bs)
 void qcow2_refcount_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
-    qemu_free(s->refcount_block_cache);
     qemu_free(s->refcount_table);
 }
 
 
 static int load_refcount_block(BlockDriverState *bs,
-                               int64_t refcount_block_offset)
+                               int64_t refcount_block_offset,
+                               void **refcount_block)
 {
     BDRVQcowState *s = bs->opaque;
     int ret;
 
-    if (cache_refcount_updates) {
-        ret = write_refcount_block(bs);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
-    ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache,
-                     s->cluster_size);
-    if (ret < 0) {
-        s->refcount_block_cache_offset = 0;
-        return ret;
-    }
+    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+        refcount_block);
 
-    s->refcount_block_cache_offset = refcount_block_offset;
-    return 0;
+    return ret;
 }
 
 /*
@@ -122,6 +88,8 @@  static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
     int refcount_table_index, block_index;
     int64_t refcount_block_offset;
     int ret;
+    uint16_t *refcount_block;
+    uint16_t refcount;
 
     refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
     if (refcount_table_index >= s->refcount_table_size)
@@ -129,16 +97,24 @@  static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
     refcount_block_offset = s->refcount_table[refcount_table_index];
     if (!refcount_block_offset)
         return 0;
-    if (refcount_block_offset != s->refcount_block_cache_offset) {
-        /* better than nothing: return allocated if read error */
-        ret = load_refcount_block(bs, refcount_block_offset);
-        if (ret < 0) {
-            return ret;
-        }
+
+    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+        (void**) &refcount_block);
+    if (ret < 0) {
+        return ret;
     }
+
     block_index = cluster_index &
         ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-    return be16_to_cpu(s->refcount_block_cache[block_index]);
+    refcount = be16_to_cpu(refcount_block[block_index]);
+
+    ret = qcow2_cache_put(bs, s->refcount_block_cache,
+        (void**) &refcount_block);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return refcount;
 }
 
 /*
@@ -174,9 +150,10 @@  static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
  * Loads a refcount block. If it doesn't exist yet, it is allocated first
  * (including growing the refcount table if needed).
  *
- * Returns the offset of the refcount block on success or -errno in error case
+ * Returns 0 on success or -errno in error case
  */
-static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
+static int alloc_refcount_block(BlockDriverState *bs,
+    int64_t cluster_index, uint16_t **refcount_block)
 {
     BDRVQcowState *s = bs->opaque;
     unsigned int refcount_table_index;
@@ -194,13 +171,8 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 
         /* If it's already there, we're done */
         if (refcount_block_offset) {
-            if (refcount_block_offset != s->refcount_block_cache_offset) {
-                ret = load_refcount_block(bs, refcount_block_offset);
-                if (ret < 0) {
-                    return ret;
-                }
-            }
-            return refcount_block_offset;
+             return load_refcount_block(bs, refcount_block_offset,
+                 (void**) refcount_block);
         }
     }
 
@@ -226,12 +198,10 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
      *   refcount block into the cache
      */
 
-    if (cache_refcount_updates) {
-        ret = write_refcount_block(bs);
-        if (ret < 0) {
-            return ret;
-        }
-    }
+    *refcount_block = NULL;
+
+    /* We write to the refcount table, so we might depend on L2 tables */
+    qcow2_cache_flush(bs, s->l2_table_cache);
 
     /* Allocate the refcount block itself and mark it as used */
     int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
@@ -247,13 +217,18 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 
     if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
         /* Zero the new refcount block before updating it */
-        memset(s->refcount_block_cache, 0, s->cluster_size);
-        s->refcount_block_cache_offset = new_block;
+        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+            (void**) refcount_block);
+        if (ret < 0) {
+            goto fail_block;
+        }
+
+        memset(*refcount_block, 0, s->cluster_size);
 
         /* The block describes itself, need to update the cache */
         int block_index = (new_block >> s->cluster_bits) &
             ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-        s->refcount_block_cache[block_index] = cpu_to_be16(1);
+        (*refcount_block)[block_index] = cpu_to_be16(1);
     } else {
         /* Described somewhere else. This can recurse at most twice before we
          * arrive at a block that describes itself. */
@@ -266,14 +241,19 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 
         /* Initialize the new refcount block only after updating its refcount,
          * update_refcount uses the refcount cache itself */
-        memset(s->refcount_block_cache, 0, s->cluster_size);
-        s->refcount_block_cache_offset = new_block;
+        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+            (void**) refcount_block);
+        if (ret < 0) {
+            goto fail_block;
+        }
+
+        memset(*refcount_block, 0, s->cluster_size);
     }
 
     /* Now the new refcount block needs to be written to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache,
-        s->cluster_size);
+    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
     if (ret < 0) {
         goto fail_block;
     }
@@ -290,7 +270,12 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
         }
 
         s->refcount_table[refcount_table_index] = new_block;
-        return new_block;
+        return 0;
+    }
+
+    ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+    if (ret < 0) {
+        goto fail_block;
     }
 
     /*
@@ -410,9 +395,9 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
     qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
     s->free_cluster_index = old_free_cluster_index;
 
-    ret = load_refcount_block(bs, new_block);
+    ret = load_refcount_block(bs, new_block, (void**) refcount_block);
     if (ret < 0) {
-        goto fail_block;
+        return ret;
     }
 
     return new_block;
@@ -420,41 +405,10 @@  static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
 fail_table:
     qemu_free(new_table);
 fail_block:
-    s->refcount_block_cache_offset = 0;
-    return ret;
-}
-
-#define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
-static int write_refcount_block_entries(BlockDriverState *bs,
-    int64_t refcount_block_offset, int first_index, int last_index)
-{
-    BDRVQcowState *s = bs->opaque;
-    size_t size;
-    int ret;
-
-    if (cache_refcount_updates) {
-        return 0;
-    }
-
-    if (first_index < 0) {
-        return 0;
-    }
-
-    first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
-    last_index = (last_index + REFCOUNTS_PER_SECTOR)
-        & ~(REFCOUNTS_PER_SECTOR - 1);
-
-    size = (last_index - first_index) << REFCOUNT_SHIFT;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
-    ret = bdrv_pwrite(bs->file,
-        refcount_block_offset + (first_index << REFCOUNT_SHIFT),
-        &s->refcount_block_cache[first_index], size);
-    if (ret < 0) {
-        return ret;
+    if (*refcount_block != NULL) {
+        qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
     }
-
-    return 0;
+    return ret;
 }
 
 /* XXX: cache several refcount block clusters ? */
@@ -463,9 +417,8 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
 {
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
-    int64_t refcount_block_offset = 0;
-    int64_t table_index = -1, old_table_index;
-    int first_index = -1, last_index = -1;
+    uint16_t *refcount_block = NULL;
+    int64_t old_table_index = -1;
     int ret;
 
 #ifdef DEBUG_ALLOC2
@@ -478,6 +431,11 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         return 0;
     }
 
+    if (addend < 0) {
+        qcow2_cache_set_dependency(bs, s->refcount_block_cache,
+            s->l2_table_cache);
+    }
+
     start = offset & ~(s->cluster_size - 1);
     last = (offset + length - 1) & ~(s->cluster_size - 1);
     for(cluster_offset = start; cluster_offset <= last;
@@ -485,42 +443,33 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
     {
         int block_index, refcount;
         int64_t cluster_index = cluster_offset >> s->cluster_bits;
-        int64_t new_block;
+        int64_t table_index =
+            cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
 
-        /* Only write refcount block to disk when we are done with it */
-        old_table_index = table_index;
-        table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
-        if ((old_table_index >= 0) && (table_index != old_table_index)) {
+        /* Load the refcount block and allocate it if needed */
+        if (table_index != old_table_index) {
+            if (refcount_block) {
+                ret = qcow2_cache_put(bs, s->refcount_block_cache,
+                    (void**) &refcount_block);
+                if (ret < 0) {
+                    goto fail;
+                }
+            }
 
-            ret = write_refcount_block_entries(bs, refcount_block_offset,
-                first_index, last_index);
+            ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
             if (ret < 0) {
-                return ret;
+                goto fail;
             }
-
-            first_index = -1;
-            last_index = -1;
         }
+        old_table_index = table_index;
 
-        /* Load the refcount block and allocate it if needed */
-        new_block = alloc_refcount_block(bs, cluster_index);
-        if (new_block < 0) {
-            ret = new_block;
-            goto fail;
-        }
-        refcount_block_offset = new_block;
+        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
 
         /* we can update the count and save it */
         block_index = cluster_index &
             ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
-        if (first_index == -1 || block_index < first_index) {
-            first_index = block_index;
-        }
-        if (block_index > last_index) {
-            last_index = block_index;
-        }
 
-        refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
+        refcount = be16_to_cpu(refcount_block[block_index]);
         refcount += addend;
         if (refcount < 0 || refcount > 0xffff) {
             ret = -EINVAL;
@@ -529,17 +478,16 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         if (refcount == 0 && cluster_index < s->free_cluster_index) {
             s->free_cluster_index = cluster_index;
         }
-        s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
+        refcount_block[block_index] = cpu_to_be16(refcount);
     }
 
     ret = 0;
 fail:
-
     /* Write last changed block to disk */
-    if (refcount_block_offset != 0) {
+    if (refcount_block) {
         int wret;
-        wret = write_refcount_block_entries(bs, refcount_block_offset,
-            first_index, last_index);
+        wret = qcow2_cache_put(bs, s->refcount_block_cache,
+            (void**) &refcount_block);
         if (wret < 0) {
             return ret < 0 ? ret : wret;
         }
@@ -758,9 +706,7 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
     int64_t old_offset, old_l2_offset;
     int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
-
-    qcow2_l2_cache_reset(bs);
-    cache_refcount_updates = 1;
+    int ret;
 
     l2_table = NULL;
     l1_table = NULL;
@@ -784,7 +730,6 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     }
 
     l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = qemu_malloc(l2_size);
     l1_modified = 0;
     for(i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
@@ -792,8 +737,13 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
             old_l2_offset = l2_offset;
             l2_offset &= ~QCOW_OFLAG_COPIED;
             l2_modified = 0;
-            if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+
+            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+                (void**) &l2_table);
+            if (ret < 0) {
                 goto fail;
+            }
+
             for(j = 0; j < s->l2_size; j++) {
                 offset = be64_to_cpu(l2_table[j]);
                 if (offset != 0) {
@@ -833,17 +783,23 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                         offset |= QCOW_OFLAG_COPIED;
                     }
                     if (offset != old_offset) {
+                        if (addend > 0) {
+                            qcow2_cache_set_dependency(bs, s->l2_table_cache,
+                                s->refcount_block_cache);
+                        }
                         l2_table[j] = cpu_to_be64(offset);
                         l2_modified = 1;
+                        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
                     }
                 }
             }
-            if (l2_modified) {
-                if (bdrv_pwrite_sync(bs->file,
-                                l2_offset, l2_table, l2_size) < 0)
-                    goto fail;
+
+            ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+            if (ret < 0) {
+                goto fail;
             }
 
+
             if (addend != 0) {
                 refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend);
             } else {
@@ -871,16 +827,14 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
     }
     if (l1_allocated)
         qemu_free(l1_table);
-    qemu_free(l2_table);
-    cache_refcount_updates = 0;
-    write_refcount_block(bs);
     return 0;
  fail:
+    if (l2_table) {
+        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+    }
+
     if (l1_allocated)
         qemu_free(l1_table);
-    qemu_free(l2_table);
-    cache_refcount_updates = 0;
-    write_refcount_block(bs);
     return -EIO;
 }
 
diff --git a/block/qcow2.c b/block/qcow2.c
index b6b094c..49bf7b9 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -143,6 +143,7 @@  static int qcow2_open(BlockDriverState *bs, int flags)
     int len, i, ret = 0;
     QCowHeader header;
     uint64_t ext_end;
+    bool writethrough;
 
     ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
     if (ret < 0) {
@@ -217,8 +218,13 @@  static int qcow2_open(BlockDriverState *bs, int flags)
             be64_to_cpus(&s->l1_table[i]);
         }
     }
-    /* alloc L2 cache */
-    s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+
+    /* alloc L2 table/refcount block cache */
+    writethrough = ((flags & BDRV_O_CACHE_MASK) == 0);
+    s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE, writethrough);
+    s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE,
+        writethrough);
+
     s->cluster_cache = qemu_malloc(s->cluster_size);
     /* one more sector for decompressed data alignment */
     s->cluster_data = qemu_malloc(QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
@@ -270,7 +276,9 @@  static int qcow2_open(BlockDriverState *bs, int flags)
     qcow2_free_snapshots(bs);
     qcow2_refcount_close(bs);
     qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+    if (s->l2_table_cache) {
+        qcow2_cache_destroy(bs, s->l2_table_cache);
+    }
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
     return ret;
@@ -719,7 +727,13 @@  static void qcow2_close(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     qemu_free(s->l1_table);
-    qemu_free(s->l2_cache);
+
+    qcow2_cache_flush(bs, s->l2_table_cache);
+    qcow2_cache_flush(bs, s->refcount_block_cache);
+
+    qcow2_cache_destroy(bs, s->l2_table_cache);
+    qcow2_cache_destroy(bs, s->refcount_block_cache);
+
     qemu_free(s->cluster_cache);
     qemu_free(s->cluster_data);
     qcow2_refcount_close(bs);
@@ -1179,6 +1193,19 @@  static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
 static int qcow2_flush(BlockDriverState *bs)
 {
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    ret = qcow2_cache_flush(bs, s->l2_table_cache);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        return ret;
+    }
+
     return bdrv_flush(bs->file);
 }
 
@@ -1186,6 +1213,19 @@  static BlockDriverAIOCB *qcow2_aio_flush(BlockDriverState *bs,
                                          BlockDriverCompletionFunc *cb,
                                          void *opaque)
 {
+    BDRVQcowState *s = bs->opaque;
+    int ret;
+
+    ret = qcow2_cache_flush(bs, s->l2_table_cache);
+    if (ret < 0) {
+        return NULL;
+    }
+
+    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+    if (ret < 0) {
+        return NULL;
+    }
+
     return bdrv_aio_flush(bs->file, cb, opaque);
 }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index e5473e1..11cbce3 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -51,6 +51,9 @@ 
 
 #define L2_CACHE_SIZE 16
 
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define REFCOUNT_CACHE_SIZE 4
+
 typedef struct QCowHeader {
     uint32_t magic;
     uint32_t version;
@@ -94,9 +97,10 @@  typedef struct BDRVQcowState {
     uint64_t cluster_offset_mask;
     uint64_t l1_table_offset;
     uint64_t *l1_table;
-    uint64_t *l2_cache;
-    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
+
+    Qcow2Cache* l2_table_cache;
+    Qcow2Cache* refcount_block_cache;
+
     uint8_t *cluster_cache;
     uint8_t *cluster_data;
     uint64_t cluster_cache_offset;
@@ -105,8 +109,6 @@  typedef struct BDRVQcowState {
     uint64_t *refcount_table;
     uint64_t refcount_table_offset;
     uint32_t refcount_table_size;
-    uint64_t refcount_block_cache_offset;
-    uint16_t *refcount_block_cache;
     int64_t free_cluster_index;
     int64_t free_byte_offset;