diff mbox

[1/7] qcow2: use one single memory block for the L2/refcount cache tables

Message ID 9947b50525d2d06d9a9c0bacb11cab8a9710ede1.1430919406.git.berto@igalia.com
State New
Headers show

Commit Message

Alberto Garcia May 6, 2015, 1:39 p.m. UTC
The qcow2 L2/refcount cache contains one separate table for each cache
entry. Doing one allocation per table adds unnecessary overhead and it
also requires us to store the address of each table separately.

Since the size of the cache is constant during its lifetime, it's
better to have an array that contains all the tables using one single
allocation.

In my tests measuring freshly created caches with sizes 128MB (L2) and
32MB (refcount) this uses around 10MB of RAM less.

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block/qcow2-cache.c    | 55 ++++++++++++++++++++++++--------------------------
 block/qcow2-cluster.c  | 12 +++++------
 block/qcow2-refcount.c |  8 +++++---
 block/qcow2.h          |  3 ++-
 4 files changed, 39 insertions(+), 39 deletions(-)

Comments

Stefan Hajnoczi May 7, 2015, 10:14 a.m. UTC | #1
On Wed, May 06, 2015 at 04:39:25PM +0300, Alberto Garcia wrote:
> The qcow2 L2/refcount cache contains one separate table for each cache
> entry. Doing one allocation per table adds unnecessary overhead and it
> also requires us to store the address of each table separately.
> 
> Since the size of the cache is constant during its lifetime, it's
> better to have an array that contains all the tables using one single
> allocation.
> 
> In my tests measuring freshly created caches with sizes 128MB (L2) and
> 32MB (refcount) this uses around 10MB of RAM less.
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>  block/qcow2-cache.c    | 55 ++++++++++++++++++++++++--------------------------
>  block/qcow2-cluster.c  | 12 +++++------
>  block/qcow2-refcount.c |  8 +++++---
>  block/qcow2.h          |  3 ++-
>  4 files changed, 39 insertions(+), 39 deletions(-)

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Max Reitz May 8, 2015, 3:03 p.m. UTC | #2
On 06.05.2015 15:39, Alberto Garcia wrote:
> The qcow2 L2/refcount cache contains one separate table for each cache
> entry. Doing one allocation per table adds unnecessary overhead and it
> also requires us to store the address of each table separately.
>
> Since the size of the cache is constant during its lifetime, it's
> better to have an array that contains all the tables using one single
> allocation.
>
> In my tests measuring freshly created caches with sizes 128MB (L2) and
> 32MB (refcount) this uses around 10MB of RAM less.
>
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>   block/qcow2-cache.c    | 55 ++++++++++++++++++++++++--------------------------
>   block/qcow2-cluster.c  | 12 +++++------
>   block/qcow2-refcount.c |  8 +++++---
>   block/qcow2.h          |  3 ++-
>   4 files changed, 39 insertions(+), 39 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>
diff mbox

Patch

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index b115549..f0dfb69 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -28,7 +28,6 @@ 
 #include "trace.h"
 
 typedef struct Qcow2CachedTable {
-    void*   table;
     int64_t offset;
     bool    dirty;
     int     cache_hits;
@@ -40,39 +39,35 @@  struct Qcow2Cache {
     struct Qcow2Cache*      depends;
     int                     size;
     bool                    depends_on_flush;
+    void                   *table_array;
 };
 
+static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
+                    Qcow2Cache *c, int table)
+{
+    BDRVQcowState *s = bs->opaque;
+    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+}
+
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
 {
     BDRVQcowState *s = bs->opaque;
     Qcow2Cache *c;
-    int i;
 
     c = g_new0(Qcow2Cache, 1);
     c->size = num_tables;
     c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    if (!c->entries) {
-        goto fail;
-    }
-
-    for (i = 0; i < c->size; i++) {
-        c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
-        if (c->entries[i].table == NULL) {
-            goto fail;
-        }
+    c->table_array = qemu_try_blockalign(bs->file,
+                                         (size_t) num_tables * s->cluster_size);
+
+    if (!c->entries || !c->table_array) {
+        qemu_vfree(c->table_array);
+        g_free(c->entries);
+        g_free(c);
+        c = NULL;
     }
 
     return c;
-
-fail:
-    if (c->entries) {
-        for (i = 0; i < c->size; i++) {
-            qemu_vfree(c->entries[i].table);
-        }
-    }
-    g_free(c->entries);
-    g_free(c);
-    return NULL;
 }
 
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
@@ -81,9 +76,9 @@  int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
 
     for (i = 0; i < c->size; i++) {
         assert(c->entries[i].ref == 0);
-        qemu_vfree(c->entries[i].table);
     }
 
+    qemu_vfree(c->table_array);
     g_free(c->entries);
     g_free(c);
 
@@ -151,8 +146,8 @@  static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
         BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
     }
 
-    ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
-        s->cluster_size);
+    ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
     if (ret < 0) {
         return ret;
     }
@@ -304,7 +299,8 @@  static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
             BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
         }
 
-        ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+        ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+                         s->cluster_size);
         if (ret < 0) {
             return ret;
         }
@@ -319,7 +315,7 @@  static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
 found:
     c->entries[i].cache_hits++;
     c->entries[i].ref++;
-    *table = c->entries[i].table;
+    *table = qcow2_cache_get_table_addr(bs, c, i);
 
     trace_qcow2_cache_get_done(qemu_coroutine_self(),
                                c == s->l2_table_cache, i);
@@ -344,7 +340,7 @@  int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
     int i;
 
     for (i = 0; i < c->size; i++) {
-        if (c->entries[i].table == *table) {
+        if (qcow2_cache_get_table_addr(bs, c, i) == *table) {
             goto found;
         }
     }
@@ -358,12 +354,13 @@  found:
     return 0;
 }
 
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+     void *table)
 {
     int i;
 
     for (i = 0; i < c->size; i++) {
-        if (c->entries[i].table == table) {
+        if (qcow2_cache_get_table_addr(bs, c, i) == table) {
             goto found;
         }
     }
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ed2b44d..5cd418a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -263,7 +263,7 @@  static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
     trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
         goto fail;
@@ -692,7 +692,7 @@  uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     /* compressed clusters never have the copied flag */
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
     ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
     if (ret < 0) {
@@ -771,7 +771,7 @@  int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     if (ret < 0) {
         goto err;
     }
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
 
     assert(l2_index + m->nb_clusters <= s->l2_size);
     for (i = 0; i < m->nb_clusters; i++) {
@@ -1470,7 +1470,7 @@  static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         }
 
         /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
         if (!full_discard && s->qcow_version >= 3) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
         } else {
@@ -1558,7 +1558,7 @@  static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
         old_offset = be64_to_cpu(l2_table[l2_index + i]);
 
         /* Update L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
         if (old_offset & QCOW_OFLAG_COMPRESSED) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1760,7 +1760,7 @@  static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
 
         if (is_active_l1) {
             if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
                 qcow2_cache_depends_on_flush(s->l2_table_cache);
             }
             ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index f47260b..35a6a35 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -424,7 +424,7 @@  static int alloc_refcount_block(BlockDriverState *bs,
 
     /* Now the new refcount block needs to be written to disk */
     BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
     ret = qcow2_cache_flush(bs, s->refcount_block_cache);
     if (ret < 0) {
         goto fail_block;
@@ -737,7 +737,8 @@  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
         }
         old_table_index = table_index;
 
-        qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+        qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+                                     refcount_block);
 
         /* we can update the count and save it */
         block_index = cluster_index & (s->refcount_block_size - 1);
@@ -1177,7 +1178,8 @@  int qcow2_update_snapshot_refcount(BlockDriverState *bs,
                             s->refcount_block_cache);
                     }
                     l2_table[j] = cpu_to_be64(offset);
-                    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
+                                                 l2_table);
                 }
             }
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 422b825..5d0995f 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -574,7 +574,8 @@  int qcow2_read_snapshots(BlockDriverState *bs);
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
 
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+     void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
 int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
     Qcow2Cache *dependency);