diff mbox

[v4,3/6] qemu-img: Implement commit like QMP

Message ID 1397329060-23599-4-git-send-email-mreitz@redhat.com
State New
Headers show

Commit Message

Max Reitz April 12, 2014, 6:57 p.m. UTC
qemu-img should use QMP commands whenever possible in order to ensure
feature completeness of both online and offline image operations. As
qemu-img itself has no access to QMP (since this would basically require
just everything being linked into qemu-img), imitate QMP's
implementation of block-commit by using commit_active_start() and then
waiting for the block job to finish.

This new implementation does not empty the snapshot image, as opposed to
the old implementation using bdrv_commit(). However, as QMP's
block-commit apparently never did this and as qcow2 (which is probably
qemu's standard image format) does not even implement the required
function (bdrv_make_empty()), it does not seem necessary.

Signed-off-by: Max Reitz <mreitz@redhat.com>
---
 block/Makefile.objs |  2 +-
 qemu-img.c          | 86 +++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 68 insertions(+), 20 deletions(-)

Comments

Kevin Wolf April 16, 2014, 2:40 p.m. UTC | #1
Am 12.04.2014 um 20:57 hat Max Reitz geschrieben:
> qemu-img should use QMP commands whenever possible in order to ensure
> feature completeness of both online and offline image operations. As
> qemu-img itself has no access to QMP (since this would basically require
> just everything being linked into qemu-img), imitate QMP's
> implementation of block-commit by using commit_active_start() and then
> waiting for the block job to finish.
> 
> This new implementation does not empty the snapshot image, as opposed to
> the old implementation using bdrv_commit(). However, as QMP's
> block-commit apparently never did this and as qcow2 (which is probably
> qemu's standard image format) does not even implement the required
> function (bdrv_make_empty()), it does not seem necessary.
> 
> Signed-off-by: Max Reitz <mreitz@redhat.com>
> ---
>  block/Makefile.objs |  2 +-
>  qemu-img.c          | 86 +++++++++++++++++++++++++++++++++++++++++------------
>  2 files changed, 68 insertions(+), 20 deletions(-)
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index fd88c03..2c37e80 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -9,6 +9,7 @@ block-obj-y += snapshot.o qapi.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
>  block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
> +block-obj-y += mirror.o
>  
>  ifeq ($(CONFIG_POSIX),y)
>  block-obj-y += nbd.o nbd-client.o sheepdog.o
> @@ -22,7 +23,6 @@ endif
>  
>  common-obj-y += stream.o
>  common-obj-y += commit.o
> -common-obj-y += mirror.o
>  common-obj-y += backup.o
>  
>  iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
> diff --git a/qemu-img.c b/qemu-img.c
> index 8455994..9fe6384 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -30,6 +30,7 @@
>  #include "qemu/osdep.h"
>  #include "sysemu/sysemu.h"
>  #include "block/block_int.h"
> +#include "block/blockjob.h"
>  #include "block/qapi.h"
>  #include <getopt.h>
>  
> @@ -682,12 +683,49 @@ fail:
>      return ret;
>  }
>  
> +struct CommonBlockJobCBInfo {
> +    Error **errp;
> +    bool done;

Looks unused (set, but never read).

> +};
> +
> +static void common_block_job_cb(void *opaque, int ret)
> +{
> +    struct CommonBlockJobCBInfo *cbi = opaque;
> +
> +    if (ret < 0) {
> +        error_setg_errno(cbi->errp, -ret, "Block job failed");
> +    }

In practice, I guess this will give us rather bad error messages.
Perhaps we need to replace 'int ret' with 'Error *errp' for block job
callbacks in a followup.

> +
> +    cbi->done = true;
> +}
> +
> +static void run_block_job(BlockJob *job, struct CommonBlockJobCBInfo *cbi)
> +{
> +    BlockJobInfo *info;
> +
> +    do {
> +        qemu_aio_wait();
> +
> +        info = block_job_query(job);

Where does info get freed?

> +
> +        if (!info->busy && info->offset < info->len) {
> +            block_job_resume(job);
> +        }
> +    } while (info->offset < info->len);
> +
> +    block_job_complete_sync(job, cbi->errp);
> +}
> +
> +/* Same as in block.c */
> +#define COMMIT_BUF_SECTORS 2048
[...]
> +    commit_active_start(bs, base_bs, 0, COMMIT_BUF_SECTORS << BDRV_SECTOR_BITS,
> +                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
> +                        &local_err);

Though bdrv_commit() uses it for a different purpose: There it's the
buffer size that is used for committing. A single request can never be
larger than this value, but depending on bdrv_is_allocated() it can be
smaller. So the granularity for the decision whether to copy data is
still the granularity of bdrv_is_allocated(), i.e. one cluster.

For the mirror block job, the decision is taken on the granularity that
you specify. This should be the same as for bdrv_commit(), i.e. the
default that you get when you specify 0. mirror_start_job() also has a
buf_size parameter, which is however not exposed by
commit_active_start(). This is where COMMIT_BUF_SECTORS would be right.

Kevin
Max Reitz April 16, 2014, 10:03 p.m. UTC | #2
On 16.04.2014 16:40, Kevin Wolf wrote:
> Am 12.04.2014 um 20:57 hat Max Reitz geschrieben:
>> qemu-img should use QMP commands whenever possible in order to ensure
>> feature completeness of both online and offline image operations. As
>> qemu-img itself has no access to QMP (since this would basically require
>> just everything being linked into qemu-img), imitate QMP's
>> implementation of block-commit by using commit_active_start() and then
>> waiting for the block job to finish.
>>
>> This new implementation does not empty the snapshot image, as opposed to
>> the old implementation using bdrv_commit(). However, as QMP's
>> block-commit apparently never did this and as qcow2 (which is probably
>> qemu's standard image format) does not even implement the required
>> function (bdrv_make_empty()), it does not seem necessary.
>>
>> Signed-off-by: Max Reitz <mreitz@redhat.com>
>> ---
>>   block/Makefile.objs |  2 +-
>>   qemu-img.c          | 86 +++++++++++++++++++++++++++++++++++++++++------------
>>   2 files changed, 68 insertions(+), 20 deletions(-)
>>
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index fd88c03..2c37e80 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -9,6 +9,7 @@ block-obj-y += snapshot.o qapi.o
>>   block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>>   block-obj-$(CONFIG_POSIX) += raw-posix.o
>>   block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>> +block-obj-y += mirror.o
>>   
>>   ifeq ($(CONFIG_POSIX),y)
>>   block-obj-y += nbd.o nbd-client.o sheepdog.o
>> @@ -22,7 +23,6 @@ endif
>>   
>>   common-obj-y += stream.o
>>   common-obj-y += commit.o
>> -common-obj-y += mirror.o
>>   common-obj-y += backup.o
>>   
>>   iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
>> diff --git a/qemu-img.c b/qemu-img.c
>> index 8455994..9fe6384 100644
>> --- a/qemu-img.c
>> +++ b/qemu-img.c
>> @@ -30,6 +30,7 @@
>>   #include "qemu/osdep.h"
>>   #include "sysemu/sysemu.h"
>>   #include "block/block_int.h"
>> +#include "block/blockjob.h"
>>   #include "block/qapi.h"
>>   #include <getopt.h>
>>   
>> @@ -682,12 +683,49 @@ fail:
>>       return ret;
>>   }
>>   
>> +struct CommonBlockJobCBInfo {
>> +    Error **errp;
>> +    bool done;
> Looks unused (set, but never read).

Right, it's an artifact from an earlier version and in-between work.

>> +};
>> +
>> +static void common_block_job_cb(void *opaque, int ret)
>> +{
>> +    struct CommonBlockJobCBInfo *cbi = opaque;
>> +
>> +    if (ret < 0) {
>> +        error_setg_errno(cbi->errp, -ret, "Block job failed");
>> +    }
> In practice, I guess this will give us rather bad error messages.
> Perhaps we need to replace 'int ret' with 'Error *errp' for block job
> callbacks in a followup.

Probably, yes.

>> +
>> +    cbi->done = true;
>> +}
>> +
>> +static void run_block_job(BlockJob *job, struct CommonBlockJobCBInfo *cbi)
>> +{
>> +    BlockJobInfo *info;
>> +
>> +    do {
>> +        qemu_aio_wait();
>> +
>> +        info = block_job_query(job);
> Where does info get freed?

That, indeed, is a good question. I'll fix it, thanks.

>> +
>> +        if (!info->busy && info->offset < info->len) {
>> +            block_job_resume(job);
>> +        }
>> +    } while (info->offset < info->len);
>> +
>> +    block_job_complete_sync(job, cbi->errp);
>> +}
>> +
>> +/* Same as in block.c */
>> +#define COMMIT_BUF_SECTORS 2048
> [...]
>> +    commit_active_start(bs, base_bs, 0, COMMIT_BUF_SECTORS << BDRV_SECTOR_BITS,
>> +                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
>> +                        &local_err);
> Though bdrv_commit() uses it for a different purpose: There it's the
> buffer size that is used for committing. A single request can never be
> larger than this value, but depending on bdrv_is_allocated() it can be
> smaller. So the granularity for the decision whether to copy data is
> still the granularity of bdrv_is_allocated(), i.e. one cluster.
>
> For the mirror block job, the decision is taken on the granularity that
> you specify. This should be the same as for bdrv_commit(), i.e. the
> default that you get when you specify 0. mirror_start_job() also has a
> buf_size parameter, which is however not exposed by
> commit_active_start(). This is where COMMIT_BUF_SECTORS would be right.

Hm, interesting. I remembered trying this and it being pretty slow (test 
20 and some other test images). However, I can't reproduce it any 
longer. I guess that means I can drop patch 1 of this series, too.

Max
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index fd88c03..2c37e80 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,6 +9,7 @@  block-obj-y += snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
+block-obj-y += mirror.o
 
 ifeq ($(CONFIG_POSIX),y)
 block-obj-y += nbd.o nbd-client.o sheepdog.o
@@ -22,7 +23,6 @@  endif
 
 common-obj-y += stream.o
 common-obj-y += commit.o
-common-obj-y += mirror.o
 common-obj-y += backup.o
 
 iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
diff --git a/qemu-img.c b/qemu-img.c
index 8455994..9fe6384 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -30,6 +30,7 @@ 
 #include "qemu/osdep.h"
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
+#include "block/blockjob.h"
 #include "block/qapi.h"
 #include <getopt.h>
 
@@ -682,12 +683,49 @@  fail:
     return ret;
 }
 
+struct CommonBlockJobCBInfo {
+    Error **errp;
+    bool done;
+};
+
+static void common_block_job_cb(void *opaque, int ret)
+{
+    struct CommonBlockJobCBInfo *cbi = opaque;
+
+    if (ret < 0) {
+        error_setg_errno(cbi->errp, -ret, "Block job failed");
+    }
+
+    cbi->done = true;
+}
+
+static void run_block_job(BlockJob *job, struct CommonBlockJobCBInfo *cbi)
+{
+    BlockJobInfo *info;
+
+    do {
+        qemu_aio_wait();
+
+        info = block_job_query(job);
+
+        if (!info->busy && info->offset < info->len) {
+            block_job_resume(job);
+        }
+    } while (info->offset < info->len);
+
+    block_job_complete_sync(job, cbi->errp);
+}
+
+/* Same as in block.c */
+#define COMMIT_BUF_SECTORS 2048
+
 static int img_commit(int argc, char **argv)
 {
     int c, ret, flags;
     const char *filename, *fmt, *cache;
-    BlockDriverState *bs;
+    BlockDriverState *bs, *base_bs;
     bool quiet = false;
+    Error *local_err = NULL;
 
     fmt = NULL;
     cache = BDRV_DEFAULT_CACHE;
@@ -728,29 +766,39 @@  static int img_commit(int argc, char **argv)
     if (!bs) {
         return 1;
     }
-    ret = bdrv_commit(bs);
-    switch(ret) {
-    case 0:
-        qprintf(quiet, "Image committed.\n");
-        break;
-    case -ENOENT:
-        error_report("No disk inserted");
-        break;
-    case -EACCES:
-        error_report("Image is read-only");
-        break;
-    case -ENOTSUP:
-        error_report("Image is already committed");
-        break;
-    default:
-        error_report("Error while committing image");
-        break;
+
+    /* This is different from QMP, which by default uses the deepest file in the
+     * backing chain (i.e., the very base); however, the traditional behavior of
+     * qemu-img commit is using the immediate backing file. */
+    base_bs = bs->backing_hd;
+    if (!base_bs) {
+        error_set(&local_err, QERR_BASE_NOT_FOUND, "NULL");
+        goto done;
     }
 
+    struct CommonBlockJobCBInfo cbi = {
+        .errp = &local_err,
+    };
+
+    commit_active_start(bs, base_bs, 0, COMMIT_BUF_SECTORS << BDRV_SECTOR_BITS,
+                        BLOCKDEV_ON_ERROR_REPORT, common_block_job_cb, &cbi,
+                        &local_err);
+    if (local_err) {
+        goto done;
+    }
+
+    run_block_job(bs->job, &cbi);
+
+done:
     bdrv_unref(bs);
-    if (ret) {
+
+    if (local_err) {
+        qerror_report_err(local_err);
+        error_free(local_err);
         return 1;
     }
+
+    qprintf(quiet, "Image committed.\n");
     return 0;
 }