Patchwork CPU consumption optimization of 'qemu-img convert' using bdrv_is_allocated()

login
register
mail settings
Submitter Dmitry Konishchev
Date June 15, 2011, 10:14 a.m.
Message ID <1308132899-12632-1-git-send-email-konishchev@gmail.com>
Download mbox | patch
Permalink /patch/100506/
State New
Headers show

Comments

Dmitry Konishchev - June 15, 2011, 10:14 a.m.
This patch optimizes 'qemu-img convert' operation for volumes which are
almost fully unallocated. Here are the results of simple tests:

We have a snapshot of a volume:
$ qemu-img info snapshot.qcow2
image: snapshot.qcow2
file format: qcow2
virtual size: 5.0G (5372805120 bytes)
disk size: 4.0G
cluster_size: 65536

Create a volume from the snapshot and use it a little:
$ qemu-img create -f qcow2 -o backing_file=snapshot.qcow2 volume.qcow2

For volumes which are almost fully allocated we have a little regression:
$ time qemu-img convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  2m43.864s
user  0m9.257s
sys   0m40.559s
$ time qemu-img-patched convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  2m46.899s
user  0m9.749s
sys	  0m40.471s

But now create a volume which is almost fully unallocated:
$ qemu-img create -f qcow2 -o backing_file=snapshot.qcow2 volume.qcow2 1T

And now we have more than twice decreased CPU consumption:
$ time qemu-img convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  6m40.985s
user  4m13.832s
sys   0m33.738s
$ time qemu-img-patched convert -O qcow2 volume.qcow2 volume_snapshot.qcow2
real  4m28.448s
user  1m43.882s
sys   0m33.894s

Signed-off-by: Dmitry Konishchev <konishchev@gmail.com>
---
 qemu-img.c |  184 ++++++++++++++++++++++++++++++++++++++++++++++-------------
 1 files changed, 143 insertions(+), 41 deletions(-)

Patch

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..7f3d853 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -27,6 +27,7 @@ 
 #include "osdep.h"
 #include "sysemu.h"
 #include "block_int.h"
+#include "block.h"
 #include <stdio.h>
 
 #ifdef _WIN32
@@ -586,19 +587,95 @@  static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n,
     return res;
 }
 
+/*
+ * Copies sectors from one image to another.
+ * Writes only non-zero bytes to the output image.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int copy_allocated_sectors(
+    BlockDriverState *out_bs, BlockDriverState *bs, int n,
+    int64_t sector_num, int64_t bs_offset, const uint64_t *bs_geometry, uint8_t *buf)
+{
+    BlockDriverState *cur_bs;
+    uint64_t cur_sectors;
+    uint64_t bs_sector;
+    int backing_depth;
+    int allocated_num;
+    int sector_found;
+    int cur_n;
+
+    while (n > 0) {
+        /* Look for the sectors in the source image and if they are not
+           allocated - sequentially in all its backing images. */
+
+        cur_bs = bs;
+        bs_sector = sector_num - bs_offset;
+        backing_depth = 0;
+        sector_found = 0;
+
+        do {
+            cur_sectors = bs_geometry[backing_depth++];
+
+            if (bs_sector >= cur_sectors) {
+                continue;
+            }
+
+            if (bs_sector + n <= cur_sectors) {
+                cur_n = n;
+            } else {
+                cur_n = cur_sectors - bs_sector;
+            }
+
+            if (bdrv_is_allocated(cur_bs, bs_sector, cur_n, &allocated_num)) {
+                const uint8_t *cur_buf = buf;
+                sector_found = 1;
+
+                if (bdrv_read(cur_bs, bs_sector, buf, allocated_num) < 0) {
+                    error_report("error while reading");
+                    return -1;
+                }
+
+                while (allocated_num > 0) {
+                    if (is_allocated_sectors(cur_buf, allocated_num, &cur_n)) {
+                        if (bdrv_write(out_bs, sector_num, cur_buf, cur_n) < 0) {
+                            error_report("error while writing");
+                            return -1;
+                        }
+                    }
+
+                    n -= cur_n;
+                    sector_num += cur_n;
+                    allocated_num -= cur_n;
+                    cur_buf += cur_n * BDRV_SECTOR_SIZE;
+                }
+
+                break;
+            }
+        } while(( cur_bs = cur_bs->backing_hd ));
+
+        if (!sector_found) {
+            sector_num++;
+            n--;
+        }
+    }
+
+    return 0;
+}
+
 #define IO_BUF_SIZE (2 * 1024 * 1024)
 
 static int img_convert(int argc, char **argv)
 {
-    int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
+    int c, ret = 0, n, cur_n, bs_n, bs_i, compress, cluster_size, cluster_sectors;
     int progress = 0;
     const char *fmt, *out_fmt, *out_baseimg, *out_filename;
     BlockDriver *drv, *proto_drv;
     BlockDriverState **bs = NULL, *out_bs = NULL;
     int64_t total_sectors, nb_sectors, sector_num, bs_offset;
     uint64_t bs_sectors;
+    uint64_t *bs_geometry = NULL;
     uint8_t * buf = NULL;
-    const uint8_t *buf1;
     BlockDriverInfo bdi;
     QEMUOptionParameter *param = NULL, *create_options = NULL;
     QEMUOptionParameter *out_baseimg_param;
@@ -874,14 +951,20 @@  static int img_convert(int argc, char **argv)
         /* signal EOF to align */
         bdrv_write_compressed(out_bs, 0, NULL, 0);
     } else {
+        int bs_i_prev = -1;
+        float progress = 100;
+        BlockDriverState *cur_bs;
         int has_zero_init = bdrv_has_zero_init(out_bs);
 
         sector_num = 0; // total number of sectors converted so far
         nb_sectors = total_sectors - sector_num;
-        local_progress = (float)100 /
-            (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
 
         for(;;) {
+            if (total_sectors) {
+                progress = (long double) sector_num / total_sectors * 100;
+            }
+            qemu_progress_print(progress, 0);
+
             nb_sectors = total_sectors - sector_num;
             if (nb_sectors <= 0) {
                 break;
@@ -893,15 +976,38 @@  static int img_convert(int argc, char **argv)
             }
 
             while (sector_num - bs_offset >= bs_sectors) {
-                bs_i ++;
-                assert (bs_i < bs_n);
+                bs_i++;
+                assert(bs_i < bs_n);
                 bs_offset += bs_sectors;
                 bdrv_get_geometry(bs[bs_i], &bs_sectors);
+
                 /* printf("changing part: sector_num=%" PRId64 ", bs_i=%d, "
                   "bs_offset=%" PRId64 ", bs_sectors=%" PRId64 "\n",
                    sector_num, bs_i, bs_offset, bs_sectors); */
             }
 
+            if (bs_i != bs_i_prev) {
+                /* Getting geometry of the image and all its backing images */
+
+                int backing_depth = 1;
+                cur_bs = bs[bs_i];
+                while (( cur_bs = cur_bs->backing_hd )) {
+                    backing_depth++;
+                }
+
+                bs_geometry = (uint64_t *) qemu_realloc(
+                    bs_geometry, backing_depth * sizeof(uint64_t));
+
+                backing_depth = 1;
+                cur_bs = bs[bs_i];
+                *bs_geometry = bs_sectors;
+                while (( cur_bs = cur_bs->backing_hd )) {
+                    bdrv_get_geometry(cur_bs, bs_geometry + backing_depth++);
+                }
+
+                bs_i_prev = bs_i;
+            }
+
             if (n > bs_offset + bs_sectors - sector_num) {
                 n = bs_offset + bs_sectors - sector_num;
             }
@@ -912,55 +1018,51 @@  static int img_convert(int argc, char **argv)
                    are present in both the output's and input's base images (no
                    need to copy them). */
                 if (out_baseimg) {
-                    if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset,
-                                           n, &n1)) {
-                        sector_num += n1;
+                    if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset, n, &cur_n)) {
+                        sector_num += cur_n;
                         continue;
                     }
-                    /* The next 'n1' sectors are allocated in the input image. Copy
+                    /* The next 'cur_n' sectors are allocated in the input image. Copy
                        only those as they may be followed by unallocated sectors. */
-                    n = n1;
+                    n = cur_n;
                 }
-            } else {
-                n1 = n;
             }
 
-            ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
-            if (ret < 0) {
-                error_report("error while reading");
-                goto out;
-            }
-            /* NOTE: at the same time we convert, we do not write zero
-               sectors to have a chance to compress the image. Ideally, we
-               should add a specific call to have the info to go faster */
-            buf1 = buf;
-            while (n > 0) {
-                /* If the output image is being created as a copy on write image,
-                   copy all sectors even the ones containing only NUL bytes,
-                   because they may differ from the sectors in the base image.
-
-                   If the output is to a host device, we also write out
-                   sectors that are entirely 0, since whatever data was
-                   already there is garbage, not 0s. */
-                if (!has_zero_init || out_baseimg ||
-                    is_allocated_sectors(buf1, n, &n1)) {
-                    ret = bdrv_write(out_bs, sector_num, buf1, n1);
-                    if (ret < 0) {
-                        error_report("error while writing");
-                        goto out;
-                    }
+            /* If the output image is being created as a copy on write image,
+               copy all sectors even the ones containing only zero bytes,
+               because they may differ from the sectors in the base image.
+
+               If the output is to a host device, we also write out
+               sectors that are entirely 0, since whatever data was
+               already there is garbage, not 0s. */
+            if (!has_zero_init || out_baseimg) {
+                ret = bdrv_read(bs[bs_i], sector_num - bs_offset, buf, n);
+                if (ret < 0) {
+                    error_report("error while reading");
+                    goto out;
+                }
+
+                ret = bdrv_write(out_bs, sector_num, buf, n);
+                if (ret < 0) {
+                    error_report("error while writing");
+                    goto out;
+                }
+            } else {
+                ret = copy_allocated_sectors(out_bs, bs[bs_i], n,
+                    sector_num, bs_offset, bs_geometry, buf);
+                if (ret < 0) {
+                    goto out;
                 }
-                sector_num += n1;
-                n -= n1;
-                buf1 += n1 * 512;
             }
-            qemu_progress_print(local_progress, 100);
+
+            sector_num += n;
         }
     }
 out:
     qemu_progress_end();
     free_option_parameters(create_options);
     free_option_parameters(param);
+    qemu_free(bs_geometry);
     qemu_free(buf);
     if (out_bs) {
         bdrv_delete(out_bs);