diff mbox

[v3,5/9] qmp: add block_stream command

Message ID 1323784351-25531-6-git-send-email-stefanha@linux.vnet.ibm.com
State New
Headers show

Commit Message

Stefan Hajnoczi Dec. 13, 2011, 1:52 p.m. UTC
Add the block_stream command, which starts copy backing file contents
into the image file.  Later patches add control over the background copy
speed, cancelation, and querying running streaming operations.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 blockdev.c       |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hmp-commands.hx  |   13 ++++++++++
 hmp.c            |   14 +++++++++++
 hmp.h            |    1 +
 monitor.c        |    3 ++
 monitor.h        |    1 +
 qapi-schema.json |   53 ++++++++++++++++++++++++++++++++++++++++++
 qerror.c         |    4 +++
 qerror.h         |    3 ++
 qmp-commands.hx  |   18 ++++++++++++++
 trace-events     |    4 +++
 11 files changed, 182 insertions(+), 0 deletions(-)

Comments

Luiz Capitulino Jan. 4, 2012, 12:59 p.m. UTC | #1
On Tue, 13 Dec 2011 13:52:27 +0000
Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> wrote:

> Add the block_stream command, which starts copy backing file contents
> into the image file.  Later patches add control over the background copy
> speed, cancelation, and querying running streaming operations.

Please also mention that you're adding an event, otherwise it comes as a
surprise to the reviewer.

When we talked about this implementation (ie. having events, cancellation etc)
I thought you were going to do something very specific to the streaming api,
like migration. However, you ended up adding async QMP support to the block
layer.

I have the impression this could be generalized a bit more and be moved to
QMP instead (and I strongly feel that's what we should do).

There's only one problem: we are waiting for the new QMP server to work on
that. I hope to have it integrated by the end of this month, but it might
be possible to add async support to the current server if waiting is not
an option.

> 
> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
> ---
>  blockdev.c       |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hmp-commands.hx  |   13 ++++++++++
>  hmp.c            |   14 +++++++++++
>  hmp.h            |    1 +
>  monitor.c        |    3 ++
>  monitor.h        |    1 +
>  qapi-schema.json |   53 ++++++++++++++++++++++++++++++++++++++++++
>  qerror.c         |    4 +++
>  qerror.h         |    3 ++
>  qmp-commands.hx  |   18 ++++++++++++++
>  trace-events     |    4 +++
>  11 files changed, 182 insertions(+), 0 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index dbf0251..08355cf 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -13,8 +13,11 @@
>  #include "qerror.h"
>  #include "qemu-option.h"
>  #include "qemu-config.h"
> +#include "qemu-objects.h"
>  #include "sysemu.h"
>  #include "block_int.h"
> +#include "qmp-commands.h"
> +#include "trace.h"
>  
>  static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
>  
> @@ -887,3 +890,68 @@ int do_block_resize(Monitor *mon, const QDict *qdict, QObject **ret_data)
>  
>      return 0;
>  }
> +
> +static QObject *qobject_from_block_job(BlockJob *job)
> +{
> +    return qobject_from_jsonf("{ 'type': %s,"
> +                              "'device': %s,"
> +                              "'len': %" PRId64 ","
> +                              "'offset': %" PRId64 ","
> +                              "'speed': %" PRId64 " }",
> +                              job->job_type->job_type,
> +                              bdrv_get_device_name(job->bs),
> +                              job->len,
> +                              job->offset,
> +                              job->speed);
> +}
> +
> +static void block_stream_cb(void *opaque, int ret)
> +{
> +    BlockDriverState *bs = opaque;
> +    QObject *obj;
> +
> +    trace_block_stream_cb(bs, bs->job, ret);
> +
> +    assert(bs->job);
> +    obj = qobject_from_block_job(bs->job);
> +    if (ret < 0) {
> +        QDict *dict = qobject_to_qdict(obj);
> +        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
> +    }
> +
> +    monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj);
> +    qobject_decref(obj);
> +}
> +
> +void qmp_block_stream(const char *device, bool has_base,
> +                      const char *base, Error **errp)
> +{
> +    BlockDriverState *bs;
> +    int ret;
> +
> +    bs = bdrv_find(device);
> +    if (!bs) {
> +        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> +        return;
> +    }
> +
> +    /* Base device not supported */
> +    if (base) {
> +        error_set(errp, QERR_NOT_SUPPORTED);
> +        return;
> +    }

Is this a future feature? In this case I'd rather drop the argument for
now and add it later. The only detail is that we haven't reached consensus
if it will be required to add a new command for that or if we'll be able
to just extend existing commands.

> +
> +    ret = stream_start(bs, NULL, block_stream_cb, bs);
> +    if (ret < 0) {
> +        switch (ret) {
> +        case -EBUSY:
> +            error_set(errp, QERR_DEVICE_IN_USE, device);
> +            return;
> +        default:
> +            error_set(errp, QERR_NOT_SUPPORTED);
> +            return;
> +        }
> +    }
> +
> +    trace_qmp_block_stream(bs, bs->job);
> +}
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 79a9195..c0de41c 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -70,6 +70,19 @@ but should be used with extreme caution.  Note that this command only
>  resizes image files, it can not resize block devices like LVM volumes.
>  ETEXI
>  
> +    {
> +        .name       = "block_stream",
> +        .args_type  = "device:B,base:s?",
> +        .params     = "device [base]",
> +        .help       = "copy data from a backing file into a block device",
> +        .mhandler.cmd = hmp_block_stream,
> +    },
> +
> +STEXI
> +@item block_stream
> +@findex block_stream
> +Copy data from a backing file into a block device.
> +ETEXI
>  
>      {
>          .name       = "eject",
> diff --git a/hmp.c b/hmp.c
> index dfab7ad..8b7d7d4 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -531,3 +531,17 @@ void hmp_cpu(Monitor *mon, const QDict *qdict)
>          monitor_printf(mon, "invalid CPU index\n");
>      }
>  }
> +
> +void hmp_block_stream(Monitor *mon, const QDict *qdict)
> +{
> +    Error *error = NULL;
> +    const char *device = qdict_get_str(qdict, "device");
> +    const char *base = qdict_get_try_str(qdict, "base");
> +
> +    qmp_block_stream(device, base != NULL, base, &error);
> +
> +    if (error) {
> +        monitor_printf(mon, "%s\n", error_get_pretty(error));
> +        error_free(error);
> +    }

There's a hmp_handle_error() helper recently merged that does exactly that.

> +}
> diff --git a/hmp.h b/hmp.h
> index 4422578..3cb6fe5 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -37,5 +37,6 @@ void hmp_stop(Monitor *mon, const QDict *qdict);
>  void hmp_system_reset(Monitor *mon, const QDict *qdict);
>  void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
>  void hmp_cpu(Monitor *mon, const QDict *qdict);
> +void hmp_block_stream(Monitor *mon, const QDict *qdict);
>  
>  #endif
> diff --git a/monitor.c b/monitor.c
> index 1be222e..a33218c 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -479,6 +479,9 @@ void monitor_protocol_event(MonitorEvent event, QObject *data)
>          case QEVENT_SPICE_DISCONNECTED:
>              event_name = "SPICE_DISCONNECTED";
>              break;
> +        case QEVENT_BLOCK_JOB_COMPLETED:
> +            event_name = "BLOCK_JOB_COMPLETED";
> +            break;
>          default:
>              abort();
>              break;
> diff --git a/monitor.h b/monitor.h
> index e76795f..d9b07dc 100644
> --- a/monitor.h
> +++ b/monitor.h
> @@ -35,6 +35,7 @@ typedef enum MonitorEvent {
>      QEVENT_SPICE_CONNECTED,
>      QEVENT_SPICE_INITIALIZED,
>      QEVENT_SPICE_DISCONNECTED,
> +    QEVENT_BLOCK_JOB_COMPLETED,
>      QEVENT_MAX,
>  } MonitorEvent;
>  
> diff --git a/qapi-schema.json b/qapi-schema.json
> index fbbdbe0..65308d2 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -901,3 +901,56 @@
>  # Notes: Do not use this command.
>  ##
>  { 'command': 'cpu', 'data': {'index': 'int'} }
> +
> +##
> +# @block_stream:

Command names should be verbs and please use an hyphen.

> +#
> +# Copy data from a backing file into a block device.
> +#
> +# The block streaming operation is performed in the background until the entire
> +# backing file has been copied.  This command returns immediately once streaming
> +# has started.  The status of ongoing block streaming operations can be checked
> +# with query-block-jobs.  The operation can be stopped before it has completed
> +# using the block_job_cancel command.
> +#
> +# If a base file is specified then sectors are not copied from that base file and
> +# its backing chain.  When streaming completes the image file will have the base
> +# file as its backing file.  This can be used to stream a subset of the backing
> +# file chain instead of flattening the entire image.
> +#
> +# On successful completion the image file is updated to drop the backing file.

Nice doc.

> +#
> +# Arguments:
> +#
> +# @device: device name
> +# @base:   common backing file
> +#
> +# Errors:
> +#
> +# DeviceInUse:    streaming is already active on this device
> +# DeviceNotFound: device name is invalid
> +# NotSupported:   image streaming is not supported by this device

The convention used in this file to document errors is different.

> +#
> +# Events:
> +#
> +# On completion the BLOCK_JOB_COMPLETED event is raised with the following
> +# fields:
> +#
> +# - type:     job type ("stream" for image streaming, json-string)
> +# - device:   device name (json-string)
> +# - len:      maximum progress value (json-int)
> +# - offset:   current progress value (json-int)
> +# - speed:    rate limit, bytes per second (json-int)
> +# - error:    error message (json-string, only on error)
> +#
> +# The completion event is raised both on success and on failure.  On
> +# success offset is equal to len.  On failure offset and len can be
> +# used to indicate at which point the operation failed.
> +#
> +# On failure the error field contains a human-readable error message.  There are
> +# no semantics other than that streaming has failed and clients should not try
> +# to interpret the error string.

Events should be documented in QMP/qmp-events.txt.

> +#
> +# Since: 1.1
> +##
> +{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } }
> diff --git a/qerror.c b/qerror.c
> index 14a1d59..605381a 100644
> --- a/qerror.c
> +++ b/qerror.c
> @@ -174,6 +174,10 @@ static const QErrorStringTable qerror_table[] = {
>          .desc      = "No '%(bus)' bus found for device '%(device)'",
>      },
>      {
> +        .error_fmt = QERR_NOT_SUPPORTED,
> +        .desc      = "Not supported",

We have QERR_UNSUPPORTED already.

> +    },
> +    {
>          .error_fmt = QERR_OPEN_FILE_FAILED,
>          .desc      = "Could not open '%(filename)'",
>      },
> diff --git a/qerror.h b/qerror.h
> index 560d458..b5c0cfe 100644
> --- a/qerror.h
> +++ b/qerror.h
> @@ -147,6 +147,9 @@ QError *qobject_to_qerror(const QObject *obj);
>  #define QERR_NO_BUS_FOR_DEVICE \
>      "{ 'class': 'NoBusForDevice', 'data': { 'device': %s, 'bus': %s } }"
>  
> +#define QERR_NOT_SUPPORTED \
> +    "{ 'class': 'NotSupported', 'data': {} }"
> +
>  #define QERR_OPEN_FILE_FAILED \
>      "{ 'class': 'OpenFileFailed', 'data': { 'filename': %s } }"
>  
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 94da2a8..8c1c934 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -683,6 +683,24 @@ Example:
>  EQMP
>  
>      {
> +        .name       = "block_stream",
> +        .args_type  = "device:B,base:s?",
> +        .mhandler.cmd_new = qmp_marshal_input_block_stream,
> +    },

You can drop the docs below. The above entry is needed because the current
QMP server still uses it (which causes duplication with the schema file,
this will be fixed soon).

> +
> +SQMP
> +block_stream
> +------------
> +See qapi-schema.json for documentation.
> +
> +Examples:
> +
> +-> { "execute": "block_stream", "arguments": { "device": "virtio0" } }
> +<- { "return":  {} }
> +
> +EQMP
> +
> +    {
>          .name       = "blockdev-snapshot-sync",
>          .args_type  = "device:B,snapshot-file:s?,format:s?",
>          .params     = "device [new-image-file] [format]",
> diff --git a/trace-events b/trace-events
> index 4efcd95..6c1eec2 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -73,6 +73,10 @@ bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t clus
>  stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
>  stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
>  
> +# blockdev.c
> +block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
> +qmp_block_stream(void *bs, void *job) "bs %p job %p"
> +
>  # hw/virtio-blk.c
>  virtio_blk_req_complete(void *req, int status) "req %p status %d"
>  virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"
Luiz Capitulino Jan. 4, 2012, 1:11 p.m. UTC | #2
By the way, most of my review comments applies to the other commands being
added in this series:

- Use verbs and separate word with hyphens
- Follow the documentation syntax (see other commands for examples)
- Use the hmp_handle_error() helper
- Drop any SQMP/EQMP documentation
Stefan Hajnoczi Jan. 5, 2012, 1:48 p.m. UTC | #3
On Wed, Jan 4, 2012 at 12:59 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> On Tue, 13 Dec 2011 13:52:27 +0000
> Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> wrote:
>
>> Add the block_stream command, which starts copy backing file contents
>> into the image file.  Later patches add control over the background copy
>> speed, cancelation, and querying running streaming operations.
>
> Please also mention that you're adding an event, otherwise it comes as a
> surprise to the reviewer.

Ok.

> When we talked about this implementation (ie. having events, cancellation etc)
> I thought you were going to do something very specific to the streaming api,
> like migration. However, you ended up adding async QMP support to the block
> layer.
>
> I have the impression this could be generalized a bit more and be moved to
> QMP instead (and I strongly feel that's what we should do).
>
> There's only one problem: we are waiting for the new QMP server to work on
> that. I hope to have it integrated by the end of this month, but it might
> be possible to add async support to the current server if waiting is not
> an option.

I'm not sure what you'd like here, could you be more specific?  I have
introduced the concept of a block job - a long-running operation on
block devices - and the completion event when the job finishes.  I
guess you're thinking of a generic QMP job concept with a completion
event?  Or something else?

What I'd like to avoid at this stage is changing the QMP API as seen
by clients because we already have at least one client which relies on
this API.  I know that sucks and that this is my fault because I've
been dragging out this patch series for too long.  But in a situation
like this I think it's better to live with an API that doesn't meet
the current philosophy on nice APIs but works flawlessly with both new
and existing clients.  But it depends on the specifics, what changes
do you suggest?

>> +    /* Base device not supported */
>> +    if (base) {
>> +        error_set(errp, QERR_NOT_SUPPORTED);
>> +        return;
>> +    }
>
> Is this a future feature? In this case I'd rather drop the argument for
> now and add it later. The only detail is that we haven't reached consensus
> if it will be required to add a new command for that or if we'll be able
> to just extend existing commands.

Marcelo sent out patches for this features.  I think this series and
Marcelo's series can/will be merged one after another so that this
resolves itself without us needing to do anything.

http://www.mail-archive.com/qemu-devel@nongnu.org/msg91742.html

>> +    qmp_block_stream(device, base != NULL, base, &error);
>> +
>> +    if (error) {
>> +        monitor_printf(mon, "%s\n", error_get_pretty(error));
>> +        error_free(error);
>> +    }
>
> There's a hmp_handle_error() helper recently merged that does exactly that.

Ok.

>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index fbbdbe0..65308d2 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -901,3 +901,56 @@
>>  # Notes: Do not use this command.
>>  ##
>>  { 'command': 'cpu', 'data': {'index': 'int'} }
>> +
>> +##
>> +# @block_stream:
>
> Command names should be verbs and please use an hyphen.

These commands have been in the Image Streaming API spec from the beginning:

http://wiki.qemu.org/Features/LiveBlockMigration/ImageStreamingAPI

We cannot prettify the API at this stage.  You, Anthony, and I
discussed QAPI command naming on IRC maybe two months ago and this
seemed to be the way to do it.  These commands fit the existing
block_* commands and are already in use by libvirt - if we change
names now we break libvirt.

>> +# Errors:
>> +#
>> +# DeviceInUse:    streaming is already active on this device
>> +# DeviceNotFound: device name is invalid
>> +# NotSupported:   image streaming is not supported by this device
>
> The convention used in this file to document errors is different.

Ok.

>> +#
>> +# Events:
>> +#
>> +# On completion the BLOCK_JOB_COMPLETED event is raised with the following
>> +# fields:
>> +#
>> +# - type:     job type ("stream" for image streaming, json-string)
>> +# - device:   device name (json-string)
>> +# - len:      maximum progress value (json-int)
>> +# - offset:   current progress value (json-int)
>> +# - speed:    rate limit, bytes per second (json-int)
>> +# - error:    error message (json-string, only on error)
>> +#
>> +# The completion event is raised both on success and on failure.  On
>> +# success offset is equal to len.  On failure offset and len can be
>> +# used to indicate at which point the operation failed.
>> +#
>> +# On failure the error field contains a human-readable error message.  There are
>> +# no semantics other than that streaming has failed and clients should not try
>> +# to interpret the error string.
>
> Events should be documented in QMP/qmp-events.txt.

Ok.

>> +#
>> +# Since: 1.1
>> +##
>> +{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } }
>> diff --git a/qerror.c b/qerror.c
>> index 14a1d59..605381a 100644
>> --- a/qerror.c
>> +++ b/qerror.c
>> @@ -174,6 +174,10 @@ static const QErrorStringTable qerror_table[] = {
>>          .desc      = "No '%(bus)' bus found for device '%(device)'",
>>      },
>>      {
>> +        .error_fmt = QERR_NOT_SUPPORTED,
>> +        .desc      = "Not supported",
>
> We have QERR_UNSUPPORTED already.

I know.  We're stuck in a hard place here again because NotSupported
has been in the Image Streaming API spec and hence implemented in
libvirt for a while now.  If we change this then an old client which
only understands NotSupported will not know what to do with the
Unsupported error.

(Unsupported was not in QEMU when the Image Streaming API was defined.)

>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index 94da2a8..8c1c934 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -683,6 +683,24 @@ Example:
>>  EQMP
>>
>>      {
>> +        .name       = "block_stream",
>> +        .args_type  = "device:B,base:s?",
>> +        .mhandler.cmd_new = qmp_marshal_input_block_stream,
>> +    },
>
> You can drop the docs below. The above entry is needed because the current
> QMP server still uses it (which causes duplication with the schema file,
> this will be fixed soon).

Ok.
Luiz Capitulino Jan. 5, 2012, 2:16 p.m. UTC | #4
On Thu, 5 Jan 2012 13:48:43 +0000
Stefan Hajnoczi <stefanha@gmail.com> wrote:

> On Wed, Jan 4, 2012 at 12:59 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> > On Tue, 13 Dec 2011 13:52:27 +0000
> > Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> wrote:
> >
> >> Add the block_stream command, which starts copy backing file contents
> >> into the image file.  Later patches add control over the background copy
> >> speed, cancelation, and querying running streaming operations.
> >
> > Please also mention that you're adding an event, otherwise it comes as a
> > surprise to the reviewer.
> 
> Ok.
> 
> > When we talked about this implementation (ie. having events, cancellation etc)
> > I thought you were going to do something very specific to the streaming api,
> > like migration. However, you ended up adding async QMP support to the block
> > layer.
> >
> > I have the impression this could be generalized a bit more and be moved to
> > QMP instead (and I strongly feel that's what we should do).
> >
> > There's only one problem: we are waiting for the new QMP server to work on
> > that. I hope to have it integrated by the end of this month, but it might
> > be possible to add async support to the current server if waiting is not
> > an option.
> 
> I'm not sure what you'd like here, could you be more specific?  I have
> introduced the concept of a block job - a long-running operation on
> block devices - and the completion event when the job finishes.  I
> guess you're thinking of a generic QMP job concept with a completion
> event?

Exactly. We should have QMP_JOB_COMPLETED instead of BLOCK_JOB_COMPLETED,
for example.

>  Or something else?
> 
> What I'd like to avoid at this stage is changing the QMP API as seen
> by clients because we already have at least one client which relies on
> this API.  I know that sucks and that this is my fault because I've
> been dragging out this patch series for too long.  But in a situation
> like this I think it's better to live with an API that doesn't meet
> the current philosophy on nice APIs but works flawlessly with both new
> and existing clients.  But it depends on the specifics, what changes
> do you suggest?

[...]

> >> diff --git a/qapi-schema.json b/qapi-schema.json
> >> index fbbdbe0..65308d2 100644
> >> --- a/qapi-schema.json
> >> +++ b/qapi-schema.json
> >> @@ -901,3 +901,56 @@
> >>  # Notes: Do not use this command.
> >>  ##
> >>  { 'command': 'cpu', 'data': {'index': 'int'} }
> >> +
> >> +##
> >> +# @block_stream:
> >
> > Command names should be verbs and please use an hyphen.
> 
> These commands have been in the Image Streaming API spec from the beginning:
> 
> http://wiki.qemu.org/Features/LiveBlockMigration/ImageStreamingAPI
> 
> We cannot prettify the API at this stage.  You, Anthony, and I
> discussed QAPI command naming on IRC maybe two months ago and this
> seemed to be the way to do it.  These commands fit the existing
> block_* commands and are already in use by libvirt - if we change
> names now we break libvirt.

[...]

> >> +#
> >> +# Since: 1.1
> >> +##
> >> +{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } }
> >> diff --git a/qerror.c b/qerror.c
> >> index 14a1d59..605381a 100644
> >> --- a/qerror.c
> >> +++ b/qerror.c
> >> @@ -174,6 +174,10 @@ static const QErrorStringTable qerror_table[] = {
> >>          .desc      = "No '%(bus)' bus found for device '%(device)'",
> >>      },
> >>      {
> >> +        .error_fmt = QERR_NOT_SUPPORTED,
> >> +        .desc      = "Not supported",
> >
> > We have QERR_UNSUPPORTED already.
> 
> I know.  We're stuck in a hard place here again because NotSupported
> has been in the Image Streaming API spec and hence implemented in
> libvirt for a while now.  If we change this then an old client which
> only understands NotSupported will not know what to do with the
> Unsupported error.
> 
> (Unsupported was not in QEMU when the Image Streaming API was defined.)

Let me try to understand it: is libvirt relying on an off tree API and
we are now required to have stable guarantees to off tree APIs?

I can't even express how insane this looks to me, at least not without
being too harsh.
Stefan Hajnoczi Jan. 5, 2012, 2:20 p.m. UTC | #5
On Thu, Jan 5, 2012 at 1:48 PM, Stefan Hajnoczi <stefanha@gmail.com> wrote:
>>> +    /* Base device not supported */
>>> +    if (base) {
>>> +        error_set(errp, QERR_NOT_SUPPORTED);
>>> +        return;
>>> +    }
>>
>> Is this a future feature? In this case I'd rather drop the argument for
>> now and add it later. The only detail is that we haven't reached consensus
>> if it will be required to add a new command for that or if we'll be able
>> to just extend existing commands.
>
> Marcelo sent out patches for this features.  I think this series and
> Marcelo's series can/will be merged one after another so that this
> resolves itself without us needing to do anything.
>
> http://www.mail-archive.com/qemu-devel@nongnu.org/msg91742.html

Just talked to Marcelo.  We'll merge his series into this one so this
issue will be gone completely in v4.
Eric Blake Jan. 5, 2012, 3:35 p.m. UTC | #6
On 01/05/2012 07:16 AM, Luiz Capitulino wrote:
>> I know.  We're stuck in a hard place here again because NotSupported
>> has been in the Image Streaming API spec and hence implemented in
>> libvirt for a while now.  If we change this then an old client which
>> only understands NotSupported will not know what to do with the
>> Unsupported error.
>>
>> (Unsupported was not in QEMU when the Image Streaming API was defined.)
> 
> Let me try to understand it: is libvirt relying on an off tree API and
> we are now required to have stable guarantees to off tree APIs?

No.  Libvirt recognizes the off-tree spelling, but does not rely on it -
after all, the goal of libvirt is to provide the high level action,
using whatever underlying mechanism(s) necessary to get to that action,
even if it means using several different attempts until one actually works.

If a user has the older libvirt, which only expects the off-tree
spelling, then that user's setup will break if they upgrade qemu but not
libvirt.  But that's not a severe problem - they could have only been
relying on the situation if they were using an off-tree build in the
first place, so they should be aware that upgrading qemu is a
potentially risky scenario, and that they may have to deal with the pieces.

Newer libvirt can be easily taught to recognize both the off-tree and
stable spellings of the error (checking the stable first, of course,
since that will be more likely as the off-tree qemu builds filter out
over time).  At which point, using either the off-tree qemu or the
stable qemu should both work with the newer libvirt.
Anthony Liguori Jan. 5, 2012, 3:56 p.m. UTC | #7
On 01/05/2012 09:35 AM, Eric Blake wrote:
> On 01/05/2012 07:16 AM, Luiz Capitulino wrote:
>>> I know.  We're stuck in a hard place here again because NotSupported
>>> has been in the Image Streaming API spec and hence implemented in
>>> libvirt for a while now.  If we change this then an old client which
>>> only understands NotSupported will not know what to do with the
>>> Unsupported error.
>>>
>>> (Unsupported was not in QEMU when the Image Streaming API was defined.)
>>
>> Let me try to understand it: is libvirt relying on an off tree API and
>> we are now required to have stable guarantees to off tree APIs?
>
> No.  Libvirt recognizes the off-tree spelling, but does not rely on it -
> after all, the goal of libvirt is to provide the high level action,
> using whatever underlying mechanism(s) necessary to get to that action,
> even if it means using several different attempts until one actually works.
>
> If a user has the older libvirt, which only expects the off-tree
> spelling, then that user's setup will break if they upgrade qemu but not
> libvirt.  But that's not a severe problem - they could have only been
> relying on the situation if they were using an off-tree build in the
> first place, so they should be aware that upgrading qemu is a
> potentially risky scenario, and that they may have to deal with the pieces.

Right, this is the difference between ABI compatibility and strict backwards 
compatibility.

To achieve ABI compatibility, we need to not overload BLOCK_JOB_COMPLETED to 
mean something other than libvirt what expects it to mean.

We MUST provide ABI compatibility and SHOULD provide backwards compatibility 
whenever possible.

In this case, I'd suggest that in the very least, we should add 
BLOCK_JOB_COMPLETED to qapi-schema.json with gen=False set.  That way it's 
codified in the schema to ensure we maintain ABI compatibility.

That said, I'm inclined to say that we should just use the BLOCK_JOB_COMPLETED 
name because I don't think we gain a lot by using QMP_JOB_COMPLETED (not that we 
shouldn't introduce it, but using it here isn't going to make or break anything).

With respect to libvirt relying on interfaces before they exist in QEMU, we need 
to be a bit flexible here.  We want to get better at co-development to help make 
libvirt support QEMU features as the bleeding edge.

Forcing libvirt to wait until a feature is fully baked in QEMU will ensure 
there's always a feature gap in libvirt which is in none of our best interests.

Now that we have gen=False support in qapi-schema.json, we can agree to an API 
and add it to QEMU before we fully implement it.  This gives libvirt something 
to work off of that they can rely upon.

Regards,

Anthony Liguori

>
> Newer libvirt can be easily taught to recognize both the off-tree and
> stable spellings of the error (checking the stable first, of course,
> since that will be more likely as the off-tree qemu builds filter out
> over time).  At which point, using either the off-tree qemu or the
> stable qemu should both work with the newer libvirt.
>
Luiz Capitulino Jan. 5, 2012, 8:26 p.m. UTC | #8
On Thu, 05 Jan 2012 09:56:44 -0600
Anthony Liguori <aliguori@us.ibm.com> wrote:

> On 01/05/2012 09:35 AM, Eric Blake wrote:
> > On 01/05/2012 07:16 AM, Luiz Capitulino wrote:
> >>> I know.  We're stuck in a hard place here again because NotSupported
> >>> has been in the Image Streaming API spec and hence implemented in
> >>> libvirt for a while now.  If we change this then an old client which
> >>> only understands NotSupported will not know what to do with the
> >>> Unsupported error.
> >>>
> >>> (Unsupported was not in QEMU when the Image Streaming API was defined.)
> >>
> >> Let me try to understand it: is libvirt relying on an off tree API and
> >> we are now required to have stable guarantees to off tree APIs?
> >
> > No.  Libvirt recognizes the off-tree spelling, but does not rely on it -
> > after all, the goal of libvirt is to provide the high level action,
> > using whatever underlying mechanism(s) necessary to get to that action,
> > even if it means using several different attempts until one actually works.
> >
> > If a user has the older libvirt, which only expects the off-tree
> > spelling, then that user's setup will break if they upgrade qemu but not
> > libvirt.  But that's not a severe problem - they could have only been
> > relying on the situation if they were using an off-tree build in the
> > first place, so they should be aware that upgrading qemu is a
> > potentially risky scenario, and that they may have to deal with the pieces.
> 
> Right, this is the difference between ABI compatibility and strict backwards 
> compatibility.
> 
> To achieve ABI compatibility, we need to not overload BLOCK_JOB_COMPLETED to 
> mean something other than libvirt what expects it to mean.
> 
> We MUST provide ABI compatibility and SHOULD provide backwards compatibility 
> whenever possible.
> 
> In this case, I'd suggest that in the very least, we should add 
> BLOCK_JOB_COMPLETED to qapi-schema.json with gen=False set.  That way it's 
> codified in the schema to ensure we maintain ABI compatibility.
> 
> That said, I'm inclined to say that we should just use the BLOCK_JOB_COMPLETED 
> name because I don't think we gain a lot by using QMP_JOB_COMPLETED (not that we 
> shouldn't introduce it, but using it here isn't going to make or break anything).

What I'm proposing is not just a rename, but adding proper async support to QMP,
instead of adding something that is specific to the block layer.

> With respect to libvirt relying on interfaces before they exist in QEMU, we need 
> to be a bit flexible here.  We want to get better at co-development to help make 
> libvirt support QEMU features as the bleeding edge.
> 
> Forcing libvirt to wait until a feature is fully baked in QEMU will ensure 
> there's always a feature gap in libvirt which is in none of our best interests.

We can ask them to wait at least until the API is merged. Most good review
and potential problems will only come when the patches are worked on and
reviewed on the list.

> Now that we have gen=False support in qapi-schema.json, we can agree to an API 
> and add it to QEMU before we fully implement it.  This gives libvirt something 
> to work off of that they can rely upon.

Meaning that we can modify the API later? Okay, but in this case we have
reasons to modify it before it's merged and I don't see why we shouldn't
do it.

> 
> Regards,
> 
> Anthony Liguori
> 
> >
> > Newer libvirt can be easily taught to recognize both the off-tree and
> > stable spellings of the error (checking the stable first, of course,
> > since that will be more likely as the off-tree qemu builds filter out
> > over time).  At which point, using either the off-tree qemu or the
> > stable qemu should both work with the newer libvirt.
> >
>
Stefan Hajnoczi Jan. 6, 2012, 11:06 a.m. UTC | #9
On Thu, Jan 5, 2012 at 8:26 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> On Thu, 05 Jan 2012 09:56:44 -0600
> Anthony Liguori <aliguori@us.ibm.com> wrote:
>
>> On 01/05/2012 09:35 AM, Eric Blake wrote:
>> > On 01/05/2012 07:16 AM, Luiz Capitulino wrote:
>> >>> I know.  We're stuck in a hard place here again because NotSupported
>> >>> has been in the Image Streaming API spec and hence implemented in
>> >>> libvirt for a while now.  If we change this then an old client which
>> >>> only understands NotSupported will not know what to do with the
>> >>> Unsupported error.
>> >>>
>> >>> (Unsupported was not in QEMU when the Image Streaming API was defined.)
>> >>
>> >> Let me try to understand it: is libvirt relying on an off tree API and
>> >> we are now required to have stable guarantees to off tree APIs?
>> >
>> > No.  Libvirt recognizes the off-tree spelling, but does not rely on it -
>> > after all, the goal of libvirt is to provide the high level action,
>> > using whatever underlying mechanism(s) necessary to get to that action,
>> > even if it means using several different attempts until one actually works.
>> >
>> > If a user has the older libvirt, which only expects the off-tree
>> > spelling, then that user's setup will break if they upgrade qemu but not
>> > libvirt.  But that's not a severe problem - they could have only been
>> > relying on the situation if they were using an off-tree build in the
>> > first place, so they should be aware that upgrading qemu is a
>> > potentially risky scenario, and that they may have to deal with the pieces.
>>
>> Right, this is the difference between ABI compatibility and strict backwards
>> compatibility.
>>
>> To achieve ABI compatibility, we need to not overload BLOCK_JOB_COMPLETED to
>> mean something other than libvirt what expects it to mean.
>>
>> We MUST provide ABI compatibility and SHOULD provide backwards compatibility
>> whenever possible.
>>
>> In this case, I'd suggest that in the very least, we should add
>> BLOCK_JOB_COMPLETED to qapi-schema.json with gen=False set.  That way it's
>> codified in the schema to ensure we maintain ABI compatibility.
>>
>> That said, I'm inclined to say that we should just use the BLOCK_JOB_COMPLETED
>> name because I don't think we gain a lot by using QMP_JOB_COMPLETED (not that we
>> shouldn't introduce it, but using it here isn't going to make or break anything).
>
> What I'm proposing is not just a rename, but adding proper async support to QMP,
> instead of adding something that is specific to the block layer.

Proper async support - if you mean the ability to have multiple QMP
commands pending at a time - is harder than just fixing QEMU.  Clients
also need to start taking advantage of it.  Clients that do not will
be unable to continue when a QMP command takes a long time to
complete.

I think avoiding long-running QMP commands is a good idea.  We have
events which can be used to signal completion.  It's easy to implement
and does not require clients to change the way they think about QMP
commands.

Today I doubt many QMP clients have implemented multiple pending
commands, although the wire protocol allows it.

>> With respect to libvirt relying on interfaces before they exist in QEMU, we need
>> to be a bit flexible here.  We want to get better at co-development to help make
>> libvirt support QEMU features as the bleeding edge.
>>
>> Forcing libvirt to wait until a feature is fully baked in QEMU will ensure
>> there's always a feature gap in libvirt which is in none of our best interests.
>
> We can ask them to wait at least until the API is merged. Most good review
> and potential problems will only come when the patches are worked on and
> reviewed on the list.

The API was agreed on between QEMU and libvirt developers on the mailing
lists - you were included in that process.  Back in August I sent
patches which you saw ("[0/4] Image Streaming API").

I know the API is not what we'd design today when it comes to the
cosmetics.  We'd want to name things differently, use the Unsupported
event which was introduced in the meantime, and maybe make the job
completion concept generic.

QMP and QAPI have evolved in the time that this feature has been
reimplemented.  I have tried to keep up with QMP but the API itself is
from August.  We can't keep redrawing the lines.

In summary:
 * The API was designed and agreed several months ago.
 * You saw it back then and I've kept you up-to-date along the way.
 * It predates current QAPI conventions.
 * Merging it poses no problem, changing the API breaks existing libvirt.

I do feel bad that the code has been out of qemu.git for so long and I
certainly won't attempt this again in the future.  But I really think
the pros and cons say we should accept it as an August 2011 API just
like many of the other HMP/QMP commands we carry.

Regarding being more flexible about working together with libvirt, I
do think it's important to work on APIs together.  This avoids use
developing something purely from the QEMU internal perspective which
turns out to be unconsumable by our biggest QMP user :).

Stefan
Luiz Capitulino Jan. 6, 2012, 12:45 p.m. UTC | #10
On Fri, 6 Jan 2012 11:06:12 +0000
Stefan Hajnoczi <stefanha@gmail.com> wrote:

> On Thu, Jan 5, 2012 at 8:26 PM, Luiz Capitulino <lcapitulino@redhat.com> wrote:
> > On Thu, 05 Jan 2012 09:56:44 -0600
> > Anthony Liguori <aliguori@us.ibm.com> wrote:
> >
> >> On 01/05/2012 09:35 AM, Eric Blake wrote:
> >> > On 01/05/2012 07:16 AM, Luiz Capitulino wrote:
> >> >>> I know.  We're stuck in a hard place here again because NotSupported
> >> >>> has been in the Image Streaming API spec and hence implemented in
> >> >>> libvirt for a while now.  If we change this then an old client which
> >> >>> only understands NotSupported will not know what to do with the
> >> >>> Unsupported error.
> >> >>>
> >> >>> (Unsupported was not in QEMU when the Image Streaming API was defined.)
> >> >>
> >> >> Let me try to understand it: is libvirt relying on an off tree API and
> >> >> we are now required to have stable guarantees to off tree APIs?
> >> >
> >> > No.  Libvirt recognizes the off-tree spelling, but does not rely on it -
> >> > after all, the goal of libvirt is to provide the high level action,
> >> > using whatever underlying mechanism(s) necessary to get to that action,
> >> > even if it means using several different attempts until one actually works.
> >> >
> >> > If a user has the older libvirt, which only expects the off-tree
> >> > spelling, then that user's setup will break if they upgrade qemu but not
> >> > libvirt.  But that's not a severe problem - they could have only been
> >> > relying on the situation if they were using an off-tree build in the
> >> > first place, so they should be aware that upgrading qemu is a
> >> > potentially risky scenario, and that they may have to deal with the pieces.
> >>
> >> Right, this is the difference between ABI compatibility and strict backwards
> >> compatibility.
> >>
> >> To achieve ABI compatibility, we need to not overload BLOCK_JOB_COMPLETED to
> >> mean something other than libvirt what expects it to mean.
> >>
> >> We MUST provide ABI compatibility and SHOULD provide backwards compatibility
> >> whenever possible.
> >>
> >> In this case, I'd suggest that in the very least, we should add
> >> BLOCK_JOB_COMPLETED to qapi-schema.json with gen=False set.  That way it's
> >> codified in the schema to ensure we maintain ABI compatibility.
> >>
> >> That said, I'm inclined to say that we should just use the BLOCK_JOB_COMPLETED
> >> name because I don't think we gain a lot by using QMP_JOB_COMPLETED (not that we
> >> shouldn't introduce it, but using it here isn't going to make or break anything).
> >
> > What I'm proposing is not just a rename, but adding proper async support to QMP,
> > instead of adding something that is specific to the block layer.
> 
> Proper async support - if you mean the ability to have multiple QMP
> commands pending at a time - is harder than just fixing QEMU.  Clients
> also need to start taking advantage of it.  Clients that do not will
> be unable to continue when a QMP command takes a long time to
> complete.

They can be fixed if we offer proper async support. Today they can't.

> I think avoiding long-running QMP commands is a good idea.  We have
> events which can be used to signal completion.  It's easy to implement
> and does not require clients to change the way they think about QMP
> commands.

I agree in principle, but in practice we risk having different subsystems
and different commands introducing their own async support which is going
to make our API (which is already far from perfect) impossible to use,
not to mention the maintainability hell that will arise from it.

Note that I'm not exactly advocating for heavyweight async support, I just
want to avoid keeping messing with this area.

Maybe, we could go real simple by having a standard event for
asynchronous commands, say ASYNC_CMD_FINISHED or something and that event
would contain only the command id and if the command succeeded or
failed. The APIs for cancelling and querying would have to be provided
by the command itself.

I can start a new thread to discuss async support. I haven't done it yet
because I don't have a concrete proposal and I also suspect that people are
tired of discussing this over and over again.

> Today I doubt many QMP clients have implemented multiple pending
> commands, although the wire protocol allows it.

That's true, but adding the id field in the command dict was silly, as
we don't support multiple pending commands.

> >> With respect to libvirt relying on interfaces before they exist in QEMU, we need
> >> to be a bit flexible here.  We want to get better at co-development to help make
> >> libvirt support QEMU features as the bleeding edge.
> >>
> >> Forcing libvirt to wait until a feature is fully baked in QEMU will ensure
> >> there's always a feature gap in libvirt which is in none of our best interests.
> >
> > We can ask them to wait at least until the API is merged. Most good review
> > and potential problems will only come when the patches are worked on and
> > reviewed on the list.
> 
> The API was agreed on between QEMU and libvirt developers on the mailing
> lists - you were included in that process.  Back in August I sent
> patches which you saw ("[0/4] Image Streaming API").
> 
> I know the API is not what we'd design today when it comes to the
> cosmetics.  We'd want to name things differently, use the Unsupported
> event which was introduced in the meantime, and maybe make the job
> completion concept generic.
> 
> QMP and QAPI have evolved in the time that this feature has been
> reimplemented.  I have tried to keep up with QMP but the API itself is
> from August.  We can't keep redrawing the lines.
> 
> In summary:
>  * The API was designed and agreed several months ago.
>  * You saw it back then and I've kept you up-to-date along the way.
>  * It predates current QAPI conventions.
>  * Merging it poses no problem, changing the API breaks existing libvirt.

It does pose problems. The name changes I've proposed are not minor
things, it's about conforming to the protocol which is quite important.
Duplicating errors is something that just doesn't make sense either.

And most importantly, you're adding async support to the block layer. This
means that we'll have two different async APIs when we add one to QMP,
or worse other subsystems will be motivated to have their own async APIs
too.

> I do feel bad that the code has been out of qemu.git for so long and I
> certainly won't attempt this again in the future.  But I really think
> the pros and cons say we should accept it as an August 2011 API just
> like many of the other HMP/QMP commands we carry.

I disagree. This should be reviwed and changed as any other submission.

> Regarding being more flexible about working together with libvirt, I
> do think it's important to work on APIs together.  This avoids use
> developing something purely from the QEMU internal perspective which
> turns out to be unconsumable by our biggest QMP user :).

We do work together with them. I've never ignored their opinion and I'm
probably the strongest opinionated when it comes to compatibility.

I just can't see how accepting something that is now rotted is going
to help either of us.
Anthony Liguori Jan. 6, 2012, 3:08 p.m. UTC | #11
On 01/06/2012 06:45 AM, Luiz Capitulino wrote:
> On Fri, 6 Jan 2012 11:06:12 +0000
> Stefan Hajnoczi<stefanha@gmail.com>  wrote:
>
>> Proper async support - if you mean the ability to have multiple QMP
>> commands pending at a time - is harder than just fixing QEMU.  Clients
>> also need to start taking advantage of it.  Clients that do not will
>> be unable to continue when a QMP command takes a long time to
>> complete.
>
> They can be fixed if we offer proper async support. Today they can't.
>
>> I think avoiding long-running QMP commands is a good idea.  We have
>> events which can be used to signal completion.  It's easy to implement
>> and does not require clients to change the way they think about QMP
>> commands.
>
> I agree in principle, but in practice we risk having different subsystems
> and different commands introducing their own async support which is going
> to make our API (which is already far from perfect) impossible to use,
> not to mention the maintainability hell that will arise from it.

I absolutely agree with you but practically speaking, we don't have generic 
async support today.

It's been my experience that holding up patch series for generic infrastructure 
that does exist 1) causes unnecessary angst in contributors 2) puts pressure on 
the infrastructure to get something in fast vs. get something in that's good.

And honestly, it's (2) that I worry the most about.  I don't want us to rush 
async support because we're eager to get block streaming merged.  This is why 
I'm not holding any new devices back while we get QOM merged even if it creates 
more work for me and introduces new compatibility problems to solve.

We also need to look at this interface as a public interface whether we 
technically committed it to or not.  The fact is, an important user is relying 
upon so that makes it a supported interface.  Even though I absolutely hate it, 
this is why we haven't changed the help output even after all of these years. 
Not breaking users should be one of our highest priorities.

Now we could change this command to make it a better QMP interface and we could 
do that in a compatible fashion.

However, since I think we'll get proper async support really soon and that will 
involve fundamentally changing this command (along with a bunch of other 
commands), I don't think there's a lot of value in making cosmetic changes right 
now.  If we're going to break backwards compatibility, I'd rather do it once 
than twice.

What I'd suggest is that we take the command in as-is and we mark it:

Since: 1.1
Deprecated: 1.2
See Also: TBD

The idea being that we'll introduce new generic async commands in 1.2 and 
deprecate this command.  We can figure out the removal schedule then too.  Since 
this command hasn't been around all that long, we can probably have a short 
removal schedule.

We should also mark the other psuedo-async commands this way too FWIW.

Regards,

Anthony Liguori

> Note that I'm not exactly advocating for heavyweight async support, I just
> want to avoid keeping messing with this area.
>
> Maybe, we could go real simple by having a standard event for
> asynchronous commands, say ASYNC_CMD_FINISHED or something and that event
> would contain only the command id and if the command succeeded or
> failed. The APIs for cancelling and querying would have to be provided
> by the command itself.
>
> I can start a new thread to discuss async support. I haven't done it yet
> because I don't have a concrete proposal and I also suspect that people are
> tired of discussing this over and over again.
>
>> Today I doubt many QMP clients have implemented multiple pending
>> commands, although the wire protocol allows it.
>
> That's true, but adding the id field in the command dict was silly, as
> we don't support multiple pending commands.
>
>>>> With respect to libvirt relying on interfaces before they exist in QEMU, we need
>>>> to be a bit flexible here.  We want to get better at co-development to help make
>>>> libvirt support QEMU features as the bleeding edge.
>>>>
>>>> Forcing libvirt to wait until a feature is fully baked in QEMU will ensure
>>>> there's always a feature gap in libvirt which is in none of our best interests.
>>>
>>> We can ask them to wait at least until the API is merged. Most good review
>>> and potential problems will only come when the patches are worked on and
>>> reviewed on the list.
>>
>> The API was agreed on between QEMU and libvirt developers on the mailing
>> lists - you were included in that process.  Back in August I sent
>> patches which you saw ("[0/4] Image Streaming API").
>>
>> I know the API is not what we'd design today when it comes to the
>> cosmetics.  We'd want to name things differently, use the Unsupported
>> event which was introduced in the meantime, and maybe make the job
>> completion concept generic.
>>
>> QMP and QAPI have evolved in the time that this feature has been
>> reimplemented.  I have tried to keep up with QMP but the API itself is
>> from August.  We can't keep redrawing the lines.
>>
>> In summary:
>>   * The API was designed and agreed several months ago.
>>   * You saw it back then and I've kept you up-to-date along the way.
>>   * It predates current QAPI conventions.
>>   * Merging it poses no problem, changing the API breaks existing libvirt.
>
> It does pose problems. The name changes I've proposed are not minor
> things, it's about conforming to the protocol which is quite important.
> Duplicating errors is something that just doesn't make sense either.
>
> And most importantly, you're adding async support to the block layer. This
> means that we'll have two different async APIs when we add one to QMP,
> or worse other subsystems will be motivated to have their own async APIs
> too.
>
>> I do feel bad that the code has been out of qemu.git for so long and I
>> certainly won't attempt this again in the future.  But I really think
>> the pros and cons say we should accept it as an August 2011 API just
>> like many of the other HMP/QMP commands we carry.
>
> I disagree. This should be reviwed and changed as any other submission.
>
>> Regarding being more flexible about working together with libvirt, I
>> do think it's important to work on APIs together.  This avoids use
>> developing something purely from the QEMU internal perspective which
>> turns out to be unconsumable by our biggest QMP user :).
>
> We do work together with them. I've never ignored their opinion and I'm
> probably the strongest opinionated when it comes to compatibility.
>
> I just can't see how accepting something that is now rotted is going
> to help either of us.
>
Luiz Capitulino Jan. 6, 2012, 7:42 p.m. UTC | #12
On Fri, 06 Jan 2012 09:08:19 -0600
Anthony Liguori <anthony@codemonkey.ws> wrote:

> On 01/06/2012 06:45 AM, Luiz Capitulino wrote:
> > On Fri, 6 Jan 2012 11:06:12 +0000
> > Stefan Hajnoczi<stefanha@gmail.com>  wrote:
> >
> >> Proper async support - if you mean the ability to have multiple QMP
> >> commands pending at a time - is harder than just fixing QEMU.  Clients
> >> also need to start taking advantage of it.  Clients that do not will
> >> be unable to continue when a QMP command takes a long time to
> >> complete.
> >
> > They can be fixed if we offer proper async support. Today they can't.
> >
> >> I think avoiding long-running QMP commands is a good idea.  We have
> >> events which can be used to signal completion.  It's easy to implement
> >> and does not require clients to change the way they think about QMP
> >> commands.
> >
> > I agree in principle, but in practice we risk having different subsystems
> > and different commands introducing their own async support which is going
> > to make our API (which is already far from perfect) impossible to use,
> > not to mention the maintainability hell that will arise from it.
> 
> I absolutely agree with you but practically speaking, we don't have generic 
> async support today.
> 
> It's been my experience that holding up patch series for generic infrastructure 
> that does exist 1) causes unnecessary angst in contributors 2) puts pressure on 
> the infrastructure to get something in fast vs. get something in that's good.
> 
> And honestly, it's (2) that I worry the most about.  I don't want us to rush 
> async support because we're eager to get block streaming merged.  This is why 
> I'm not holding any new devices back while we get QOM merged even if it creates 
> more work for me and introduces new compatibility problems to solve.

I agree.

> We also need to look at this interface as a public interface whether we 
> technically committed it to or not.  The fact is, an important user is relying 
> upon so that makes it a supported interface.  Even though I absolutely hate it, 
> this is why we haven't changed the help output even after all of these years. 
> Not breaking users should be one of our highest priorities.

One thing I don't understand: how is libvirt relying on it if it doesn't
exist in qemu.git yet?

> Now we could change this command to make it a better QMP interface and we could 
> do that in a compatible fashion.
> 
> However, since I think we'll get proper async support really soon and that will 
> involve fundamentally changing this command (along with a bunch of other 
> commands), I don't think there's a lot of value in making cosmetic changes right 
> now.  If we're going to break backwards compatibility, I'd rather do it once 
> than twice.

It goes beyond cosmetic changes. For example, will we allow other async
block commands to use this interface? And if we're doing this for block,
why not accept something similar for other subsystems if someone happen to
submit it?

Let me take a non-cosmetic change request example. The BLOCK_JOB_COMPLETED
event has a 'error' field. However, it's impossible to know which error
happened because the 'error' field contains only the human error description.

Another problem: the event is called BLOCK_JOB_COMPLETED, but it's tied
to the streaming API. If we allow other commands to use it, they will likely
have to add fields there, making the event worse than it already is.

There's more, because I skipped this review in v3 as I jumped to the
"proper async support" discussion...

> What I'd suggest is that we take the command in as-is and we mark it:
> 
> Since: 1.1
> Deprecated: 1.2
> See Also: TBD
> 
> The idea being that we'll introduce new generic async commands in 1.2 and 
> deprecate this command.  We can figure out the removal schedule then too.  Since 
> this command hasn't been around all that long, we can probably have a short 
> removal schedule.

That makes its inclusion even discussable :) A few (very honest) questions:

 1. Is it really worth it to have the command for one or two releases?

 2. Will we allow other block commands to use this async API?

 3. Are we going to accept other ad-hoc async APIs until we have a
    proper one?

> 
> We should also mark the other psuedo-async commands this way too FWIW.
> 
> Regards,
> 
> Anthony Liguori
> 
> > Note that I'm not exactly advocating for heavyweight async support, I just
> > want to avoid keeping messing with this area.
> >
> > Maybe, we could go real simple by having a standard event for
> > asynchronous commands, say ASYNC_CMD_FINISHED or something and that event
> > would contain only the command id and if the command succeeded or
> > failed. The APIs for cancelling and querying would have to be provided
> > by the command itself.
> >
> > I can start a new thread to discuss async support. I haven't done it yet
> > because I don't have a concrete proposal and I also suspect that people are
> > tired of discussing this over and over again.
> >
> >> Today I doubt many QMP clients have implemented multiple pending
> >> commands, although the wire protocol allows it.
> >
> > That's true, but adding the id field in the command dict was silly, as
> > we don't support multiple pending commands.
> >
> >>>> With respect to libvirt relying on interfaces before they exist in QEMU, we need
> >>>> to be a bit flexible here.  We want to get better at co-development to help make
> >>>> libvirt support QEMU features as the bleeding edge.
> >>>>
> >>>> Forcing libvirt to wait until a feature is fully baked in QEMU will ensure
> >>>> there's always a feature gap in libvirt which is in none of our best interests.
> >>>
> >>> We can ask them to wait at least until the API is merged. Most good review
> >>> and potential problems will only come when the patches are worked on and
> >>> reviewed on the list.
> >>
> >> The API was agreed on between QEMU and libvirt developers on the mailing
> >> lists - you were included in that process.  Back in August I sent
> >> patches which you saw ("[0/4] Image Streaming API").
> >>
> >> I know the API is not what we'd design today when it comes to the
> >> cosmetics.  We'd want to name things differently, use the Unsupported
> >> event which was introduced in the meantime, and maybe make the job
> >> completion concept generic.
> >>
> >> QMP and QAPI have evolved in the time that this feature has been
> >> reimplemented.  I have tried to keep up with QMP but the API itself is
> >> from August.  We can't keep redrawing the lines.
> >>
> >> In summary:
> >>   * The API was designed and agreed several months ago.
> >>   * You saw it back then and I've kept you up-to-date along the way.
> >>   * It predates current QAPI conventions.
> >>   * Merging it poses no problem, changing the API breaks existing libvirt.
> >
> > It does pose problems. The name changes I've proposed are not minor
> > things, it's about conforming to the protocol which is quite important.
> > Duplicating errors is something that just doesn't make sense either.
> >
> > And most importantly, you're adding async support to the block layer. This
> > means that we'll have two different async APIs when we add one to QMP,
> > or worse other subsystems will be motivated to have their own async APIs
> > too.
> >
> >> I do feel bad that the code has been out of qemu.git for so long and I
> >> certainly won't attempt this again in the future.  But I really think
> >> the pros and cons say we should accept it as an August 2011 API just
> >> like many of the other HMP/QMP commands we carry.
> >
> > I disagree. This should be reviwed and changed as any other submission.
> >
> >> Regarding being more flexible about working together with libvirt, I
> >> do think it's important to work on APIs together.  This avoids use
> >> developing something purely from the QEMU internal perspective which
> >> turns out to be unconsumable by our biggest QMP user :).
> >
> > We do work together with them. I've never ignored their opinion and I'm
> > probably the strongest opinionated when it comes to compatibility.
> >
> > I just can't see how accepting something that is now rotted is going
> > to help either of us.
> >
>
Anthony Liguori Jan. 10, 2012, 7:18 p.m. UTC | #13
On 01/06/2012 01:42 PM, Luiz Capitulino wrote:
> On Fri, 06 Jan 2012 09:08:19 -0600
>> We also need to look at this interface as a public interface whether we
>> technically committed it to or not.  The fact is, an important user is relying
>> upon so that makes it a supported interface.  Even though I absolutely hate it,
>> this is why we haven't changed the help output even after all of these years.
>> Not breaking users should be one of our highest priorities.
>
> One thing I don't understand: how is libvirt relying on it if it doesn't
> exist in qemu.git yet?

Because there was a discussion on qemu-devel and we agreed on an interface that 
both sides would implement to.

I expect this to happen more often in the future too.

>> Now we could change this command to make it a better QMP interface and we could
>> do that in a compatible fashion.
>>
>> However, since I think we'll get proper async support really soon and that will
>> involve fundamentally changing this command (along with a bunch of other
>> commands), I don't think there's a lot of value in making cosmetic changes right
>> now.  If we're going to break backwards compatibility, I'd rather do it once
>> than twice.
>
> It goes beyond cosmetic changes. For example, will we allow other async
> block commands to use this interface? And if we're doing this for block,
> why not accept something similar for other subsystems if someone happen to
> submit it?

Yes, I'm in agreement with you on direction.  But adding proper async support is 
not a simple task, and blocking this interface based on that is a bad idea for 
reasons previously mentioned.

> Let me take a non-cosmetic change request example. The BLOCK_JOB_COMPLETED
> event has a 'error' field. However, it's impossible to know which error
> happened because the 'error' field contains only the human error description.

Ok.  That's worth thinking about.

> Another problem: the event is called BLOCK_JOB_COMPLETED, but it's tied
> to the streaming API. If we allow other commands to use it, they will likely
> have to add fields there, making the event worse than it already is.

But aren't we going to introduce a proper async interface?  This is what has me 
confused.

> There's more, because I skipped this review in v3 as I jumped to the
> "proper async support" discussion...

Well let's do proper async support.  As I mentioned, I'd rather take this 
command in as-is, introduce proper async support, and then deprecate a bunch of 
stuff at once.

>> What I'd suggest is that we take the command in as-is and we mark it:
>>
>> Since: 1.1
>> Deprecated: 1.2
>> See Also: TBD
>>
>> The idea being that we'll introduce new generic async commands in 1.2 and
>> deprecate this command.  We can figure out the removal schedule then too.  Since
>> this command hasn't been around all that long, we can probably have a short
>> removal schedule.
>
> That makes its inclusion even discussable :) A few (very honest) questions:
>
>   1. Is it really worth it to have the command for one or two releases?

Yes.  The most important consideration IMHO is parallelizing development.  We 
want the block layer to evolve to the greatest extent possible independent of 
our other subsystems.  If we don't have the right infrastructure in QMP to 
support a block feature, that shouldn't hold up progress in the block layer to 
the greatest extent possible.

>   2. Will we allow other block commands to use this async API?

Depends on how long it takes to do "proper async support".

>   3. Are we going to accept other ad-hoc async APIs until we have a
>      proper one?

Yes.  So let's get serious about getting a proper interface in :-)

Regards,

Anthony Liguori
Luiz Capitulino Jan. 10, 2012, 8:55 p.m. UTC | #14
On Tue, 10 Jan 2012 13:18:41 -0600
Anthony Liguori <anthony@codemonkey.ws> wrote:

> On 01/06/2012 01:42 PM, Luiz Capitulino wrote:
> > On Fri, 06 Jan 2012 09:08:19 -0600
> >> We also need to look at this interface as a public interface whether we
> >> technically committed it to or not.  The fact is, an important user is relying
> >> upon so that makes it a supported interface.  Even though I absolutely hate it,
> >> this is why we haven't changed the help output even after all of these years.
> >> Not breaking users should be one of our highest priorities.
> >
> > One thing I don't understand: how is libvirt relying on it if it doesn't
> > exist in qemu.git yet?
> 
> Because there was a discussion on qemu-devel and we agreed on an interface that 
> both sides would implement to.
> 
> I expect this to happen more often in the future too.

For future commands we either, implement it right away or ask libvirt to
wait for the command to be merged, or at least pass initial review.

> 
> >> Now we could change this command to make it a better QMP interface and we could
> >> do that in a compatible fashion.
> >>
> >> However, since I think we'll get proper async support really soon and that will
> >> involve fundamentally changing this command (along with a bunch of other
> >> commands), I don't think there's a lot of value in making cosmetic changes right
> >> now.  If we're going to break backwards compatibility, I'd rather do it once
> >> than twice.
> >
> > It goes beyond cosmetic changes. For example, will we allow other async
> > block commands to use this interface? And if we're doing this for block,
> > why not accept something similar for other subsystems if someone happen to
> > submit it?
> 
> Yes, I'm in agreement with you on direction.  But adding proper async support is 
> not a simple task, and blocking this interface based on that is a bad idea for 
> reasons previously mentioned.
> 
> > Let me take a non-cosmetic change request example. The BLOCK_JOB_COMPLETED
> > event has a 'error' field. However, it's impossible to know which error
> > happened because the 'error' field contains only the human error description.
> 
> Ok.  That's worth thinking about.
> 
> > Another problem: the event is called BLOCK_JOB_COMPLETED, but it's tied
> > to the streaming API. If we allow other commands to use it, they will likely
> > have to add fields there, making the event worse than it already is.
> 
> But aren't we going to introduce a proper async interface?  This is what has me 
> confused.

Yes, I was thinking about new block commands using this API before we get
proper async support...

> > There's more, because I skipped this review in v3 as I jumped to the
> > "proper async support" discussion...
> 
> Well let's do proper async support.  As I mentioned, I'd rather take this 
> command in as-is, introduce proper async support, and then deprecate a bunch of 
> stuff at once.

Ok, I'm willing to do this as Stefan has agreed on deprecating this as soon as
we get proper async support.

> 
> >> What I'd suggest is that we take the command in as-is and we mark it:
> >>
> >> Since: 1.1
> >> Deprecated: 1.2
> >> See Also: TBD
> >>
> >> The idea being that we'll introduce new generic async commands in 1.2 and
> >> deprecate this command.  We can figure out the removal schedule then too.  Since
> >> this command hasn't been around all that long, we can probably have a short
> >> removal schedule.
> >
> > That makes its inclusion even discussable :) A few (very honest) questions:
> >
> >   1. Is it really worth it to have the command for one or two releases?
> 
> Yes.  The most important consideration IMHO is parallelizing development.  We 
> want the block layer to evolve to the greatest extent possible independent of 
> our other subsystems.  If we don't have the right infrastructure in QMP to 
> support a block feature, that shouldn't hold up progress in the block layer to 
> the greatest extent possible.
> 
> >   2. Will we allow other block commands to use this async API?
> 
> Depends on how long it takes to do "proper async support".
> 
> >   3. Are we going to accept other ad-hoc async APIs until we have a
> >      proper one?
> 
> Yes.  So let's get serious about getting a proper interface in :-)

ACK

> 
> Regards,
> 
> Anthony Liguori
>
Anthony Liguori Jan. 10, 2012, 9:02 p.m. UTC | #15
On 01/10/2012 02:55 PM, Luiz Capitulino wrote:
> On Tue, 10 Jan 2012 13:18:41 -0600
> Anthony Liguori<anthony@codemonkey.ws>  wrote:
>
>> On 01/06/2012 01:42 PM, Luiz Capitulino wrote:
>>> On Fri, 06 Jan 2012 09:08:19 -0600
>>>> We also need to look at this interface as a public interface whether we
>>>> technically committed it to or not.  The fact is, an important user is relying
>>>> upon so that makes it a supported interface.  Even though I absolutely hate it,
>>>> this is why we haven't changed the help output even after all of these years.
>>>> Not breaking users should be one of our highest priorities.
>>>
>>> One thing I don't understand: how is libvirt relying on it if it doesn't
>>> exist in qemu.git yet?
>>
>> Because there was a discussion on qemu-devel and we agreed on an interface that
>> both sides would implement to.
>>
>> I expect this to happen more often in the future too.
>
> For future commands we either, implement it right away or ask libvirt to
> wait for the command to be merged, or at least pass initial review.

Or commit the schema entry to qapi-schema.json with gen=False.

But when this command was first proposed, qapi-schema.json didn't exist in the 
tree :-)

>> But aren't we going to introduce a proper async interface?  This is what has me
>> confused.
>
> Yes, I was thinking about new block commands using this API before we get
> proper async support...

Well let's avoid that problem by doing proper async support before we get new 
block commands ;-)

>>> There's more, because I skipped this review in v3 as I jumped to the
>>> "proper async support" discussion...
>>
>> Well let's do proper async support.  As I mentioned, I'd rather take this
>> command in as-is, introduce proper async support, and then deprecate a bunch of
>> stuff at once.
>
> Ok, I'm willing to do this as Stefan has agreed on deprecating this as soon as
> we get proper async support.

Excellent.

BTW, you mentioned that you were going to send an RFC for proper async support?

Regards,

Anthony Liguori

>>
>>>> What I'd suggest is that we take the command in as-is and we mark it:
>>>>
>>>> Since: 1.1
>>>> Deprecated: 1.2
>>>> See Also: TBD
>>>>
>>>> The idea being that we'll introduce new generic async commands in 1.2 and
>>>> deprecate this command.  We can figure out the removal schedule then too.  Since
>>>> this command hasn't been around all that long, we can probably have a short
>>>> removal schedule.
>>>
>>> That makes its inclusion even discussable :) A few (very honest) questions:
>>>
>>>    1. Is it really worth it to have the command for one or two releases?
>>
>> Yes.  The most important consideration IMHO is parallelizing development.  We
>> want the block layer to evolve to the greatest extent possible independent of
>> our other subsystems.  If we don't have the right infrastructure in QMP to
>> support a block feature, that shouldn't hold up progress in the block layer to
>> the greatest extent possible.
>>
>>>    2. Will we allow other block commands to use this async API?
>>
>> Depends on how long it takes to do "proper async support".
>>
>>>    3. Are we going to accept other ad-hoc async APIs until we have a
>>>       proper one?
>>
>> Yes.  So let's get serious about getting a proper interface in :-)
>
> ACK
>
>>
>> Regards,
>>
>> Anthony Liguori
>>
>
Luiz Capitulino Jan. 11, 2012, 1:41 a.m. UTC | #16
On Tue, 10 Jan 2012 15:02:48 -0600
Anthony Liguori <aliguori@us.ibm.com> wrote:

> On 01/10/2012 02:55 PM, Luiz Capitulino wrote:
> > On Tue, 10 Jan 2012 13:18:41 -0600
> > Anthony Liguori<anthony@codemonkey.ws>  wrote:
> >
> >> On 01/06/2012 01:42 PM, Luiz Capitulino wrote:
> >>> On Fri, 06 Jan 2012 09:08:19 -0600
> >>>> We also need to look at this interface as a public interface whether we
> >>>> technically committed it to or not.  The fact is, an important user is relying
> >>>> upon so that makes it a supported interface.  Even though I absolutely hate it,
> >>>> this is why we haven't changed the help output even after all of these years.
> >>>> Not breaking users should be one of our highest priorities.
> >>>
> >>> One thing I don't understand: how is libvirt relying on it if it doesn't
> >>> exist in qemu.git yet?
> >>
> >> Because there was a discussion on qemu-devel and we agreed on an interface that
> >> both sides would implement to.
> >>
> >> I expect this to happen more often in the future too.
> >
> > For future commands we either, implement it right away or ask libvirt to
> > wait for the command to be merged, or at least pass initial review.
> 
> Or commit the schema entry to qapi-schema.json with gen=False.
> 
> But when this command was first proposed, qapi-schema.json didn't exist in the 
> tree :-)
> 
> >> But aren't we going to introduce a proper async interface?  This is what has me
> >> confused.
> >
> > Yes, I was thinking about new block commands using this API before we get
> > proper async support...
> 
> Well let's avoid that problem by doing proper async support before we get new 
> block commands ;-)
> 
> >>> There's more, because I skipped this review in v3 as I jumped to the
> >>> "proper async support" discussion...
> >>
> >> Well let's do proper async support.  As I mentioned, I'd rather take this
> >> command in as-is, introduce proper async support, and then deprecate a bunch of
> >> stuff at once.
> >
> > Ok, I'm willing to do this as Stefan has agreed on deprecating this as soon as
> > we get proper async support.
> 
> Excellent.
> 
> BTW, you mentioned that you were going to send an RFC for proper async support?

It's just a few proposals for the high-level API (ie. no patches), I can send it
tomorrow.

> 
> Regards,
> 
> Anthony Liguori
> 
> >>
> >>>> What I'd suggest is that we take the command in as-is and we mark it:
> >>>>
> >>>> Since: 1.1
> >>>> Deprecated: 1.2
> >>>> See Also: TBD
> >>>>
> >>>> The idea being that we'll introduce new generic async commands in 1.2 and
> >>>> deprecate this command.  We can figure out the removal schedule then too.  Since
> >>>> this command hasn't been around all that long, we can probably have a short
> >>>> removal schedule.
> >>>
> >>> That makes its inclusion even discussable :) A few (very honest) questions:
> >>>
> >>>    1. Is it really worth it to have the command for one or two releases?
> >>
> >> Yes.  The most important consideration IMHO is parallelizing development.  We
> >> want the block layer to evolve to the greatest extent possible independent of
> >> our other subsystems.  If we don't have the right infrastructure in QMP to
> >> support a block feature, that shouldn't hold up progress in the block layer to
> >> the greatest extent possible.
> >>
> >>>    2. Will we allow other block commands to use this async API?
> >>
> >> Depends on how long it takes to do "proper async support".
> >>
> >>>    3. Are we going to accept other ad-hoc async APIs until we have a
> >>>       proper one?
> >>
> >> Yes.  So let's get serious about getting a proper interface in :-)
> >
> > ACK
> >
> >>
> >> Regards,
> >>
> >> Anthony Liguori
> >>
> >
>
diff mbox

Patch

diff --git a/blockdev.c b/blockdev.c
index dbf0251..08355cf 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -13,8 +13,11 @@ 
 #include "qerror.h"
 #include "qemu-option.h"
 #include "qemu-config.h"
+#include "qemu-objects.h"
 #include "sysemu.h"
 #include "block_int.h"
+#include "qmp-commands.h"
+#include "trace.h"
 
 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
 
@@ -887,3 +890,68 @@  int do_block_resize(Monitor *mon, const QDict *qdict, QObject **ret_data)
 
     return 0;
 }
+
+static QObject *qobject_from_block_job(BlockJob *job)
+{
+    return qobject_from_jsonf("{ 'type': %s,"
+                              "'device': %s,"
+                              "'len': %" PRId64 ","
+                              "'offset': %" PRId64 ","
+                              "'speed': %" PRId64 " }",
+                              job->job_type->job_type,
+                              bdrv_get_device_name(job->bs),
+                              job->len,
+                              job->offset,
+                              job->speed);
+}
+
+static void block_stream_cb(void *opaque, int ret)
+{
+    BlockDriverState *bs = opaque;
+    QObject *obj;
+
+    trace_block_stream_cb(bs, bs->job, ret);
+
+    assert(bs->job);
+    obj = qobject_from_block_job(bs->job);
+    if (ret < 0) {
+        QDict *dict = qobject_to_qdict(obj);
+        qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
+    }
+
+    monitor_protocol_event(QEVENT_BLOCK_JOB_COMPLETED, obj);
+    qobject_decref(obj);
+}
+
+void qmp_block_stream(const char *device, bool has_base,
+                      const char *base, Error **errp)
+{
+    BlockDriverState *bs;
+    int ret;
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    /* Base device not supported */
+    if (base) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+        return;
+    }
+
+    ret = stream_start(bs, NULL, block_stream_cb, bs);
+    if (ret < 0) {
+        switch (ret) {
+        case -EBUSY:
+            error_set(errp, QERR_DEVICE_IN_USE, device);
+            return;
+        default:
+            error_set(errp, QERR_NOT_SUPPORTED);
+            return;
+        }
+    }
+
+    trace_qmp_block_stream(bs, bs->job);
+}
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 79a9195..c0de41c 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -70,6 +70,19 @@  but should be used with extreme caution.  Note that this command only
 resizes image files, it can not resize block devices like LVM volumes.
 ETEXI
 
+    {
+        .name       = "block_stream",
+        .args_type  = "device:B,base:s?",
+        .params     = "device [base]",
+        .help       = "copy data from a backing file into a block device",
+        .mhandler.cmd = hmp_block_stream,
+    },
+
+STEXI
+@item block_stream
+@findex block_stream
+Copy data from a backing file into a block device.
+ETEXI
 
     {
         .name       = "eject",
diff --git a/hmp.c b/hmp.c
index dfab7ad..8b7d7d4 100644
--- a/hmp.c
+++ b/hmp.c
@@ -531,3 +531,17 @@  void hmp_cpu(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, "invalid CPU index\n");
     }
 }
+
+void hmp_block_stream(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+    const char *base = qdict_get_try_str(qdict, "base");
+
+    qmp_block_stream(device, base != NULL, base, &error);
+
+    if (error) {
+        monitor_printf(mon, "%s\n", error_get_pretty(error));
+        error_free(error);
+    }
+}
diff --git a/hmp.h b/hmp.h
index 4422578..3cb6fe5 100644
--- a/hmp.h
+++ b/hmp.h
@@ -37,5 +37,6 @@  void hmp_stop(Monitor *mon, const QDict *qdict);
 void hmp_system_reset(Monitor *mon, const QDict *qdict);
 void hmp_system_powerdown(Monitor *mon, const QDict *qdict);
 void hmp_cpu(Monitor *mon, const QDict *qdict);
+void hmp_block_stream(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/monitor.c b/monitor.c
index 1be222e..a33218c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -479,6 +479,9 @@  void monitor_protocol_event(MonitorEvent event, QObject *data)
         case QEVENT_SPICE_DISCONNECTED:
             event_name = "SPICE_DISCONNECTED";
             break;
+        case QEVENT_BLOCK_JOB_COMPLETED:
+            event_name = "BLOCK_JOB_COMPLETED";
+            break;
         default:
             abort();
             break;
diff --git a/monitor.h b/monitor.h
index e76795f..d9b07dc 100644
--- a/monitor.h
+++ b/monitor.h
@@ -35,6 +35,7 @@  typedef enum MonitorEvent {
     QEVENT_SPICE_CONNECTED,
     QEVENT_SPICE_INITIALIZED,
     QEVENT_SPICE_DISCONNECTED,
+    QEVENT_BLOCK_JOB_COMPLETED,
     QEVENT_MAX,
 } MonitorEvent;
 
diff --git a/qapi-schema.json b/qapi-schema.json
index fbbdbe0..65308d2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -901,3 +901,56 @@ 
 # Notes: Do not use this command.
 ##
 { 'command': 'cpu', 'data': {'index': 'int'} }
+
+##
+# @block_stream:
+#
+# Copy data from a backing file into a block device.
+#
+# The block streaming operation is performed in the background until the entire
+# backing file has been copied.  This command returns immediately once streaming
+# has started.  The status of ongoing block streaming operations can be checked
+# with query-block-jobs.  The operation can be stopped before it has completed
+# using the block_job_cancel command.
+#
+# If a base file is specified then sectors are not copied from that base file and
+# its backing chain.  When streaming completes the image file will have the base
+# file as its backing file.  This can be used to stream a subset of the backing
+# file chain instead of flattening the entire image.
+#
+# On successful completion the image file is updated to drop the backing file.
+#
+# Arguments:
+#
+# @device: device name
+# @base:   common backing file
+#
+# Errors:
+#
+# DeviceInUse:    streaming is already active on this device
+# DeviceNotFound: device name is invalid
+# NotSupported:   image streaming is not supported by this device
+#
+# Events:
+#
+# On completion the BLOCK_JOB_COMPLETED event is raised with the following
+# fields:
+#
+# - type:     job type ("stream" for image streaming, json-string)
+# - device:   device name (json-string)
+# - len:      maximum progress value (json-int)
+# - offset:   current progress value (json-int)
+# - speed:    rate limit, bytes per second (json-int)
+# - error:    error message (json-string, only on error)
+#
+# The completion event is raised both on success and on failure.  On
+# success offset is equal to len.  On failure offset and len can be
+# used to indicate at which point the operation failed.
+#
+# On failure the error field contains a human-readable error message.  There are
+# no semantics other than that streaming has failed and clients should not try
+# to interpret the error string.
+#
+# Since: 1.1
+##
+{ 'command': 'block_stream', 'data': { 'device': 'str', '*base': 'str' } }
diff --git a/qerror.c b/qerror.c
index 14a1d59..605381a 100644
--- a/qerror.c
+++ b/qerror.c
@@ -174,6 +174,10 @@  static const QErrorStringTable qerror_table[] = {
         .desc      = "No '%(bus)' bus found for device '%(device)'",
     },
     {
+        .error_fmt = QERR_NOT_SUPPORTED,
+        .desc      = "Not supported",
+    },
+    {
         .error_fmt = QERR_OPEN_FILE_FAILED,
         .desc      = "Could not open '%(filename)'",
     },
diff --git a/qerror.h b/qerror.h
index 560d458..b5c0cfe 100644
--- a/qerror.h
+++ b/qerror.h
@@ -147,6 +147,9 @@  QError *qobject_to_qerror(const QObject *obj);
 #define QERR_NO_BUS_FOR_DEVICE \
     "{ 'class': 'NoBusForDevice', 'data': { 'device': %s, 'bus': %s } }"
 
+#define QERR_NOT_SUPPORTED \
+    "{ 'class': 'NotSupported', 'data': {} }"
+
 #define QERR_OPEN_FILE_FAILED \
     "{ 'class': 'OpenFileFailed', 'data': { 'filename': %s } }"
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 94da2a8..8c1c934 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -683,6 +683,24 @@  Example:
 EQMP
 
     {
+        .name       = "block_stream",
+        .args_type  = "device:B,base:s?",
+        .mhandler.cmd_new = qmp_marshal_input_block_stream,
+    },
+
+SQMP
+block_stream
+------------
+See qapi-schema.json for documentation.
+
+Examples:
+
+-> { "execute": "block_stream", "arguments": { "device": "virtio0" } }
+<- { "return":  {} }
+
+EQMP
+
+    {
         .name       = "blockdev-snapshot-sync",
         .args_type  = "device:B,snapshot-file:s?,format:s?",
         .params     = "device [new-image-file] [format]",
diff --git a/trace-events b/trace-events
index 4efcd95..6c1eec2 100644
--- a/trace-events
+++ b/trace-events
@@ -73,6 +73,10 @@  bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t clus
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
 
+# blockdev.c
+block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_stream(void *bs, void *job) "bs %p job %p"
+
 # hw/virtio-blk.c
 virtio_blk_req_complete(void *req, int status) "req %p status %d"
 virtio_blk_rw_complete(void *req, int ret) "req %p ret %d"