Patchwork [v2] qcow2: Metadata preallocation

login
register
mail settings
Submitter Kevin Wolf
Date Aug. 17, 2009, 1:50 p.m.
Message ID <1250517010-11352-1-git-send-email-kwolf@redhat.com>
Download mbox | patch
Permalink /patch/31518/
State Superseded
Headers show

Comments

Kevin Wolf - Aug. 17, 2009, 1:50 p.m.
This introduces a qemu-img create option for qcow2 which allows the metadata to
be preallocated, i.e. clusters are reserved in the refcount table and L1/L2
tables, but no data is written to them. Metadata is quite small, so this
happens in almost no time.

Especially with qcow2 on virtio this helps to gain a bit of performance during
the initial writes. However, as soon as create a snapshot, we're back to the
normal slow speed, obviously. So this isn't the real fix, but kind of a cheat
while we're still having trouble with qcow2 on virtio.

Note that the option is disabled by default and needs to be specified
explicitly using qemu-img create -f qcow2 -o preallocation=metadata.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---

v2: Use bdrv_truncate() to set the right file size

 block/qcow2.c |   81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 block_int.h   |    1 +
 2 files changed, 80 insertions(+), 2 deletions(-)
Kevin Wolf - Aug. 21, 2009, 11:17 a.m.
Kevin Wolf schrieb:
> This introduces a qemu-img create option for qcow2 which allows the metadata to
> be preallocated, i.e. clusters are reserved in the refcount table and L1/L2
> tables, but no data is written to them. Metadata is quite small, so this
> happens in almost no time.
> 
> Especially with qcow2 on virtio this helps to gain a bit of performance during
> the initial writes. However, as soon as create a snapshot, we're back to the
> normal slow speed, obviously. So this isn't the real fix, but kind of a cheat
> while we're still having trouble with qcow2 on virtio.
> 
> Note that the option is disabled by default and needs to be specified
> explicitly using qemu-img create -f qcow2 -o preallocation=metadata.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
> 
> v2: Use bdrv_truncate() to set the right file size

This one seems to be wrong actually, possibly some metadata being stored
after the last allocated cluster. Please apply version 1 instead.

Kevin

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index a5bf205..a9c39bd 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -638,9 +638,54 @@  static int get_bits_from_size(size_t size)
     return res;
 }
 
+
+static int preallocate(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t cluster_offset;
+    uint64_t nb_sectors;
+    uint64_t offset;
+    int num;
+    QCowL2Meta meta;
+
+    nb_sectors = bdrv_getlength(bs) >> 9;
+    offset = 0;
+
+    while (nb_sectors) {
+        num = MIN(nb_sectors, INT_MAX >> 9);
+        cluster_offset = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+            &meta);
+
+        if (cluster_offset == 0) {
+            return -1;
+        }
+
+        if (qcow2_alloc_cluster_link_l2(bs, cluster_offset, &meta) < 0) {
+            qcow2_free_any_clusters(bs, cluster_offset, meta.nb_clusters);
+            return -1;
+        }
+
+        /* TODO Preallocate data if requested */
+
+        nb_sectors -= num;
+        offset += num << 9;
+    }
+
+    /*
+     * It is expected that the image file is large enough to actually contain
+     * all of the allocated clusters (otherwise we get failing reads after
+     * EOF). Extend the image to the last allocated sector.
+     */
+    if (cluster_offset != 0) {
+        bdrv_truncate(s->hd, cluster_offset + (num <<  9));
+    }
+
+    return 0;
+}
+
 static int qcow_create2(const char *filename, int64_t total_size,
                         const char *backing_file, const char *backing_format,
-                        int flags, size_t cluster_size)
+                        int flags, size_t cluster_size, int prealloc)
 {
 
     int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
@@ -762,6 +807,16 @@  static int qcow_create2(const char *filename, int64_t total_size,
     qemu_free(s->refcount_table);
     qemu_free(s->refcount_block);
     close(fd);
+
+    /* Preallocate metadata */
+    if (prealloc) {
+        BlockDriverState *bs;
+        bs = bdrv_new("");
+        bdrv_open(bs, filename, BDRV_O_CACHE_WB);
+        preallocate(bs);
+        bdrv_close(bs);
+    }
+
     return 0;
 }
 
@@ -772,6 +827,7 @@  static int qcow_create(const char *filename, QEMUOptionParameter *options)
     uint64_t sectors = 0;
     int flags = 0;
     size_t cluster_size = 65536;
+    int prealloc = 0;
 
     /* Read out options */
     while (options && options->name) {
@@ -787,12 +843,28 @@  static int qcow_create(const char *filename, QEMUOptionParameter *options)
             if (options->value.n) {
                 cluster_size = options->value.n;
             }
+        } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+            if (!options->value.s || !strcmp(options->value.s, "off")) {
+                prealloc = 0;
+            } else if (!strcmp(options->value.s, "metadata")) {
+                prealloc = 1;
+            } else {
+                fprintf(stderr, "Invalid preallocation mode: '%s'\n",
+                    options->value.s);
+                return -EINVAL;
+            }
         }
         options++;
     }
 
+    if (backing_file && prealloc) {
+        fprintf(stderr, "Backing file and preallocation cannot be used at "
+            "the same time\n");
+        return -EINVAL;
+    }
+
     return qcow_create2(filename, sectors, backing_file, backing_fmt, flags,
-        cluster_size);
+        cluster_size, prealloc);
 }
 
 static int qcow_make_empty(BlockDriverState *bs)
@@ -982,6 +1054,11 @@  static QEMUOptionParameter qcow_create_options[] = {
         .type = OPT_SIZE,
         .help = "qcow2 cluster size"
     },
+    {
+        .name = BLOCK_OPT_PREALLOC,
+        .type = OPT_STRING,
+        .help = "Preallocation mode (allowed values: off, metadata)"
+    },
     { NULL }
 };
 
diff --git a/block_int.h b/block_int.h
index 8898d91..0902fd4 100644
--- a/block_int.h
+++ b/block_int.h
@@ -37,6 +37,7 @@ 
 #define BLOCK_OPT_BACKING_FILE  "backing_file"
 #define BLOCK_OPT_BACKING_FMT   "backing_fmt"
 #define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
+#define BLOCK_OPT_PREALLOC      "preallocation"
 
 typedef struct AIOPool {
     void (*cancel)(BlockDriverAIOCB *acb);