diff mbox

[V2,2/3] block: Add drive-mirror-replace command to repair quorum files.

Message ID 1394574786-32579-3-git-send-email-benoit.canet@irqsave.net
State New
Headers show

Commit Message

Benoît Canet March 11, 2014, 9:53 p.m. UTC
When a quorum file is totally destroyed (broken NAS or SAN) the user can start a
drive-mirror job on the quorum block backend and then replace the broken
quorum file with drive-mirror-replace given it has a node-name.

Signed-off-by: Benoit Canet <benoit@irqsave.net>
---
 block.c                   |   8 ++--
 block/mirror.c            | 116 ++++++++++++++++++++++++++++++++++++++++++++--
 blockdev.c                |  27 +++++++++++
 include/block/block.h     |   3 ++
 include/block/block_int.h |  15 ++++++
 qapi-schema.json          |  33 +++++++++++++
 qmp-commands.hx           |   5 ++
 trace-events              |   1 +
 8 files changed, 202 insertions(+), 6 deletions(-)

Comments

Max Reitz March 13, 2014, 8:50 p.m. UTC | #1
On 11.03.2014 22:53, Benoît Canet wrote:
> When a quorum file is totally destroyed (broken NAS or SAN) the user can start a
> drive-mirror job on the quorum block backend and then replace the broken
> quorum file with drive-mirror-replace given it has a node-name.
>
> Signed-off-by: Benoit Canet <benoit@irqsave.net>
> ---
>   block.c                   |   8 ++--
>   block/mirror.c            | 116 ++++++++++++++++++++++++++++++++++++++++++++--
>   blockdev.c                |  27 +++++++++++
>   include/block/block.h     |   3 ++
>   include/block/block_int.h |  15 ++++++
>   qapi-schema.json          |  33 +++++++++++++
>   qmp-commands.hx           |   5 ++
>   trace-events              |   1 +
>   8 files changed, 202 insertions(+), 6 deletions(-)
>
> diff --git a/block.c b/block.c
> index 9f2fe85..3fd38b4 100644
> --- a/block.c
> +++ b/block.c
> @@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
>       return open_flags;
>   }
>   
> -static int bdrv_assign_node_name(BlockDriverState *bs,
> -                                 const char *node_name,
> -                                 Error **errp)
> +int bdrv_assign_node_name(BlockDriverState *bs,
> +                          const char *node_name,
> +                          Error **errp)
>   {
> +    assert(bs->node_name[0] == '\0');
> +
>       if (!node_name) {
>           return 0;
>       }
> diff --git a/block/mirror.c b/block/mirror.c
> index 6dc84e8..9365669 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
>       unsigned long *in_flight_bitmap;
>       int in_flight;
>       int ret;
> +    /* these four fields are used by drive-mirror-replace */
> +    /* The code must replace a target with the new mirror */
> +    bool must_replace;
> +    /* The block BDS to replace */
> +    BlockDriverState *to_replace;
> +    /* the node-name of the new mirror BDS */
> +    char *new_node_name;
> +    /* Used to block operations on the drive-mirror-replace target. */
> +    Error *replace_blocker;
>   } MirrorBlockJob;
>   
>   typedef struct MirrorOp {
> @@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
>       MirrorBlockJob *s = opaque;
>       BlockDriverState *bs = s->common.bs;
>       int64_t sector_num, end, sectors_per_chunk, length;
> +    BlockDriverState *to_replace;
>       uint64_t last_pause_ns;
>       BlockDriverInfo bdi;
>       char backing_filename[1024];
> @@ -477,11 +487,17 @@ immediate_exit:
>       g_free(s->in_flight_bitmap);
>       bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>       bdrv_iostatus_disable(s->target);
> +    /* Here we handle the drive-mirror-replace QMP command */
> +    if (s->must_replace) {
> +        to_replace = s->to_replace;
> +    } else {
> +        to_replace = s->common.bs;
> +    }
>       if (s->should_complete && ret == 0) {
> -        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> -            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> +        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> +            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
>           }
> -        bdrv_swap(s->target, s->common.bs);
> +        bdrv_swap(s->target, to_replace);
>           if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
>               /* drop the bs loop chain formed by the swap: break the loop then
>                * trigger the unref from the top one */
> @@ -491,6 +507,11 @@ immediate_exit:
>               bdrv_unref(p);
>           }
>       }
> +    if (s->must_replace) {
> +        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> +        error_free(s->replace_blocker);
> +        bdrv_unref(s->to_replace);
> +    }
>       bdrv_unref(s->target);
>       block_job_completed(&s->common, ret);
>   }
> @@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job)
>       bdrv_iostatus_reset(s->target);
>   }
>   
> +bool mirror_set_replace_target(BlockJob *job, const char *reference,
> +                               bool has_new_node_name,
> +                               const char *new_node_name, Error **errp)
> +{
> +    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> +    BlockDriverState *to_replace;
> +
> +    /* We don't want to give too much power to the user as this could result in
> +     * BlockDriverState loops if used with something other than sync=full.
> +     */
> +    if (s->is_none_mode || s->base) {
> +        error_setg(errp, "Can only be used on a mirror done with sync=full");
> +        return false;
> +    }
> +
> +    /* check that the target reference is not an empty string */
> +    if (!reference[0]) {
> +        error_setg(errp, "target-reference is an empty string");
> +        return false;
> +    }
> +
> +    /* Get the block driver state to be replaced */
> +    to_replace = bdrv_lookup_bs(reference, reference, errp);
> +    if (!to_replace) {
> +        return false;
> +    }
> +
> +    /* If the BDS to be replaced is a regular node we need a new node name */
> +    if (to_replace->node_name[0] && !has_new_node_name) {
> +        error_setg(errp, "A new-node-name must be provided");
> +        return false;
> +    }
> +
> +    /* Can only replace something else than the source of the mirror */
> +    if (to_replace == job->bs) {
> +        error_setg(errp, "Cannot replace the mirror source");
> +        return false;
> +    }
> +
> +    /* is this bs replace operation blocked */
> +    if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
> +        return false;
> +    }
> +
> +    /* save useful infos for later */
> +    s->to_replace = to_replace;
> +    assert(has_new_node_name);

Sorry to have missed this in v1: In qapi-schema.json, the description (I 
guess) tells me that the new_node_name is optional and does not need to 
be provided if the target device is not a (regular) node of the BDS graph.

In case it is a regular node and new_node_name is not given, you're 
returning an appropriate error message above. But in any other case, not 
giving new_node_name should be fine. However, you're asserting that it 
is always given here, and if it isn't, this won't return a more useful 
error message, but just abort qemu. Are you planning to add support for 
not passing new_node_name in the future?

> +    s->new_node_name = g_strdup(new_node_name);
> +
> +    return true;
> +}
> +
> +static void mirror_activate_replace_target(BlockJob *job, Error **errp)
> +{
> +    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> +
> +    /* Set the new node name if the BDS to replace is a regular node
> +     * of the graph.
> +     */
> +    if (s->to_replace->node_name[0]) {
> +        assert(s->new_node_name);
> +        bdrv_assign_node_name(s->target, s->new_node_name, errp);
> +    }
> +
> +    g_free(s->new_node_name);
> +
> +    if (error_is_set(errp)) {
> +        s->to_replace = NULL;
> +        return;
> +    }
> +
> +    /* block all operations on the target bs */
> +    error_setg(&s->replace_blocker,
> +               "block device is in use by drive-mirror-replace");
> +    bdrv_op_block_all(s->to_replace, s->replace_blocker);
> +
> +    bdrv_ref(s->to_replace);
> +
> +    /* activate the replacement operation */
> +    s->must_replace = true;
> +}
> +
>   static void mirror_complete(BlockJob *job, Error **errp)
>   {
>       MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> @@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
>           return;
>       }
>   
> +    /* drive-mirror-replace is being called on this job so activate the
> +     * replacement target
> +     */
> +    if (s->to_replace) {
> +        mirror_activate_replace_target(job, errp);
> +    }
> +
>       s->should_complete = true;
>       block_job_resume(job);
>   }
> diff --git a/blockdev.c b/blockdev.c
> index 8a6ae0a..901a05d 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp)
>       block_job_complete(job, errp);
>   }
>   
> +void qmp_drive_mirror_replace(const char *device, const char *target_reference,
> +                              bool has_new_node_name,
> +                              const char *new_node_name,
> +                              Error **errp)
> +{
> +    BlockJob *job = find_block_job(device);
> +
> +    if (!job) {
> +        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
> +        return;
> +    }
> +
> +    if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
> +        error_setg(errp, "Can only be used on a drive-mirror block job");
> +        return;
> +    }
> +
> +    if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
> +                                   new_node_name, errp)) {
> +        return;
> +    }
> +
> +    trace_qmp_drive_mirror_replace(job, target_reference,
> +                                   has_new_node_name ? new_node_name : "");
> +    block_job_complete(job, errp);
> +}
> +
>   void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
>   {
>       QmpOutputVisitor *ov = qmp_output_visitor_new();
> diff --git a/include/block/block.h b/include/block/block.h
> index ee1582d..a9d6ead 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -171,6 +171,7 @@ typedef enum BlockOpType {
>       BLOCK_OP_TYPE_MIRROR,
>       BLOCK_OP_TYPE_RESIZE,
>       BLOCK_OP_TYPE_STREAM,
> +    BLOCK_OP_TYPE_REPLACE,
>       BLOCK_OP_TYPE_MAX,
>   } BlockOpType;
>   
> @@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
>                       QDict *options, const char *bdref_key, int flags,
>                       bool allow_none, Error **errp);
>   void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
> +int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
> +                          Error **errp);
>   int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
>   int bdrv_open(BlockDriverState **pbs, const char *filename,
>                 const char *reference, QDict *options, int flags,
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 1f4f78b..ea281c8 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>                     void *opaque, Error **errp);
>   
>   /*
> + * mirror_set_replace_target:
> + * @job: An active mirroring block job started with sync=full.
> + * @reference: id or node-name of the BDS to replace when the mirror is done.
> + * @has_new_node_name: Set to true if new_node_name if provided
> + * @new_node_name: The optional new node name of the new mirror.
> + * @errp: Error object.
> + *
> + * Prepare a mirroring operation to replace a BDS pointed to by reference with
> + * the new mirror.
> + */
> +bool mirror_set_replace_target(BlockJob *job, const char *reference,
> +                               bool has_new_node_name,
> +                               const char *new_node_name, Error **errp);
> +
> +/*
>    * backup_start:
>    * @bs: Block device to operate on.
>    * @target: Block device to write to.
> diff --git a/qapi-schema.json b/qapi-schema.json
> index f5a8767..ef830e8 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2716,6 +2716,39 @@
>   { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
>   
>   ##
> +# @drive-mirror-replace:
> +#
> +# Manually trigger completion of an active background drive-mirror operation
> +# and replace the target reference with the new mirror.
> +# This switches the device to write to the target path only.
> +# The ability to complete is signaled with a BLOCK_JOB_READY event.
> +#
> +# This command completes an active drive-mirror background operation
> +# synchronously and replaces the target reference with the mirror.
> +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
> +# is not defined.  Note that if an I/O error occurs during the processing of
> +# this command: 1) the command itself will fail; 2) the error will be processed
> +# according to the rerror/werror arguments that were specified when starting
> +# the operation.
> +#
> +# A cancelled or paused drive-mirror job cannot be completed.
> +#
> +# @device:           the device name
> +# @target-reference: the id or node name of the block driver state to replace
> +# @new-node-name:    #optional set the node-name of the new block driver state
> +#                    needed the target reference point to a regular node of the
> +#                    graph

Again, sorry for not noting this in my review for v1, but I seem to have 
problems parsing this sentence (the last two lines). Did you omit an 
"if" and an "-s", so it would read "Needed if the target reference 
points to a regular node of the graph"?

Max

> +#
> +# Returns: Nothing on success
> +#          If no background operation is active on this device, DeviceNotActive
> +#
> +# Since: 2.1
> +##
> +{ 'command': 'drive-mirror-replace',
> +  'data': { 'device': 'str', 'target-reference': 'str',
> +            '*new-node-name': 'str' } }
> +
> +##
>   # @ObjectTypeInfo:
>   #
>   # This structure describes a search result from @qom-list-types
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index dd336f7..3b5b382 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -1150,6 +1150,11 @@ EQMP
>           .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
>       },
>       {
> +        .name       = "drive-mirror-replace",
> +        .args_type  = "device:B,target-reference:s,new-node-name:s?",
> +        .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
> +    },
> +    {
>           .name       = "transaction",
>           .args_type  = "actions:q",
>           .mhandler.cmd_new = qmp_marshal_input_transaction,
> diff --git a/trace-events b/trace-events
> index aec4202..5282904 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
>   qmp_block_job_pause(void *job) "job %p"
>   qmp_block_job_resume(void *job) "job %p"
>   qmp_block_job_complete(void *job) "job %p"
> +qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
>   block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
>   qmp_block_stream(void *bs, void *job) "bs %p job %p"
>
Benoît Canet March 14, 2014, 3:07 p.m. UTC | #2
The Thursday 13 Mar 2014 à 21:50:52 (+0100), Max Reitz wrote :
> On 11.03.2014 22:53, Benoît Canet wrote:
> >When a quorum file is totally destroyed (broken NAS or SAN) the user can start a
> >drive-mirror job on the quorum block backend and then replace the broken
> >quorum file with drive-mirror-replace given it has a node-name.
> >
> >Signed-off-by: Benoit Canet <benoit@irqsave.net>
> >---
> >  block.c                   |   8 ++--
> >  block/mirror.c            | 116 ++++++++++++++++++++++++++++++++++++++++++++--
> >  blockdev.c                |  27 +++++++++++
> >  include/block/block.h     |   3 ++
> >  include/block/block_int.h |  15 ++++++
> >  qapi-schema.json          |  33 +++++++++++++
> >  qmp-commands.hx           |   5 ++
> >  trace-events              |   1 +
> >  8 files changed, 202 insertions(+), 6 deletions(-)
> >
> >diff --git a/block.c b/block.c
> >index 9f2fe85..3fd38b4 100644
> >--- a/block.c
> >+++ b/block.c
> >@@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
> >      return open_flags;
> >  }
> >-static int bdrv_assign_node_name(BlockDriverState *bs,
> >-                                 const char *node_name,
> >-                                 Error **errp)
> >+int bdrv_assign_node_name(BlockDriverState *bs,
> >+                          const char *node_name,
> >+                          Error **errp)
> >  {
> >+    assert(bs->node_name[0] == '\0');
> >+
> >      if (!node_name) {
> >          return 0;
> >      }
> >diff --git a/block/mirror.c b/block/mirror.c
> >index 6dc84e8..9365669 100644
> >--- a/block/mirror.c
> >+++ b/block/mirror.c
> >@@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
> >      unsigned long *in_flight_bitmap;
> >      int in_flight;
> >      int ret;
> >+    /* these four fields are used by drive-mirror-replace */
> >+    /* The code must replace a target with the new mirror */
> >+    bool must_replace;
> >+    /* The block BDS to replace */
> >+    BlockDriverState *to_replace;
> >+    /* the node-name of the new mirror BDS */
> >+    char *new_node_name;
> >+    /* Used to block operations on the drive-mirror-replace target. */
> >+    Error *replace_blocker;
> >  } MirrorBlockJob;
> >  typedef struct MirrorOp {
> >@@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
> >      MirrorBlockJob *s = opaque;
> >      BlockDriverState *bs = s->common.bs;
> >      int64_t sector_num, end, sectors_per_chunk, length;
> >+    BlockDriverState *to_replace;
> >      uint64_t last_pause_ns;
> >      BlockDriverInfo bdi;
> >      char backing_filename[1024];
> >@@ -477,11 +487,17 @@ immediate_exit:
> >      g_free(s->in_flight_bitmap);
> >      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
> >      bdrv_iostatus_disable(s->target);
> >+    /* Here we handle the drive-mirror-replace QMP command */
> >+    if (s->must_replace) {
> >+        to_replace = s->to_replace;
> >+    } else {
> >+        to_replace = s->common.bs;
> >+    }
> >      if (s->should_complete && ret == 0) {
> >-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> >-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
> >+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
> >+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
> >          }
> >-        bdrv_swap(s->target, s->common.bs);
> >+        bdrv_swap(s->target, to_replace);
> >          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
> >              /* drop the bs loop chain formed by the swap: break the loop then
> >               * trigger the unref from the top one */
> >@@ -491,6 +507,11 @@ immediate_exit:
> >              bdrv_unref(p);
> >          }
> >      }
> >+    if (s->must_replace) {
> >+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
> >+        error_free(s->replace_blocker);
> >+        bdrv_unref(s->to_replace);
> >+    }
> >      bdrv_unref(s->target);
> >      block_job_completed(&s->common, ret);
> >  }
> >@@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job)
> >      bdrv_iostatus_reset(s->target);
> >  }
> >+bool mirror_set_replace_target(BlockJob *job, const char *reference,
> >+                               bool has_new_node_name,
> >+                               const char *new_node_name, Error **errp)
> >+{
> >+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >+    BlockDriverState *to_replace;
> >+
> >+    /* We don't want to give too much power to the user as this could result in
> >+     * BlockDriverState loops if used with something other than sync=full.
> >+     */
> >+    if (s->is_none_mode || s->base) {
> >+        error_setg(errp, "Can only be used on a mirror done with sync=full");
> >+        return false;
> >+    }
> >+
> >+    /* check that the target reference is not an empty string */
> >+    if (!reference[0]) {
> >+        error_setg(errp, "target-reference is an empty string");
> >+        return false;
> >+    }
> >+
> >+    /* Get the block driver state to be replaced */
> >+    to_replace = bdrv_lookup_bs(reference, reference, errp);
> >+    if (!to_replace) {
> >+        return false;
> >+    }
> >+
> >+    /* If the BDS to be replaced is a regular node we need a new node name */
> >+    if (to_replace->node_name[0] && !has_new_node_name) {
> >+        error_setg(errp, "A new-node-name must be provided");
> >+        return false;
> >+    }
> >+
> >+    /* Can only replace something else than the source of the mirror */
> >+    if (to_replace == job->bs) {
> >+        error_setg(errp, "Cannot replace the mirror source");
> >+        return false;
> >+    }
> >+
> >+    /* is this bs replace operation blocked */
> >+    if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
> >+        return false;
> >+    }
> >+
> >+    /* save useful infos for later */
> >+    s->to_replace = to_replace;
> >+    assert(has_new_node_name);
> 
> Sorry to have missed this in v1: In qapi-schema.json, the
> description (I guess) tells me that the new_node_name is optional
> and does not need to be provided if the target device is not a
> (regular) node of the BDS graph.
> 
> In case it is a regular node and new_node_name is not given, you're
> returning an appropriate error message above. But in any other case,
> not giving new_node_name should be fine. However, you're asserting
> that it is always given here, and if it isn't, this won't return a
> more useful error message, but just abort qemu. Are you planning to
> add support for not passing new_node_name in the future?

This assert is just plain wrong.

> 
> >+    s->new_node_name = g_strdup(new_node_name);
> >+
> >+    return true;
> >+}
> >+
> >+static void mirror_activate_replace_target(BlockJob *job, Error **errp)
> >+{
> >+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >+
> >+    /* Set the new node name if the BDS to replace is a regular node
> >+     * of the graph.
> >+     */
> >+    if (s->to_replace->node_name[0]) {
> >+        assert(s->new_node_name);
> >+        bdrv_assign_node_name(s->target, s->new_node_name, errp);
> >+    }
> >+
> >+    g_free(s->new_node_name);
> >+
> >+    if (error_is_set(errp)) {
> >+        s->to_replace = NULL;
> >+        return;
> >+    }
> >+
> >+    /* block all operations on the target bs */
> >+    error_setg(&s->replace_blocker,
> >+               "block device is in use by drive-mirror-replace");
> >+    bdrv_op_block_all(s->to_replace, s->replace_blocker);
> >+
> >+    bdrv_ref(s->to_replace);
> >+
> >+    /* activate the replacement operation */
> >+    s->must_replace = true;
> >+}
> >+
> >  static void mirror_complete(BlockJob *job, Error **errp)
> >  {
> >      MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
> >@@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
> >          return;
> >      }
> >+    /* drive-mirror-replace is being called on this job so activate the
> >+     * replacement target
> >+     */
> >+    if (s->to_replace) {
> >+        mirror_activate_replace_target(job, errp);
> >+    }
> >+
> >      s->should_complete = true;
> >      block_job_resume(job);
> >  }
> >diff --git a/blockdev.c b/blockdev.c
> >index 8a6ae0a..901a05d 100644
> >--- a/blockdev.c
> >+++ b/blockdev.c
> >@@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp)
> >      block_job_complete(job, errp);
> >  }
> >+void qmp_drive_mirror_replace(const char *device, const char *target_reference,
> >+                              bool has_new_node_name,
> >+                              const char *new_node_name,
> >+                              Error **errp)
> >+{
> >+    BlockJob *job = find_block_job(device);
> >+
> >+    if (!job) {
> >+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
> >+        return;
> >+    }
> >+
> >+    if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
> >+        error_setg(errp, "Can only be used on a drive-mirror block job");
> >+        return;
> >+    }
> >+
> >+    if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
> >+                                   new_node_name, errp)) {
> >+        return;
> >+    }
> >+
> >+    trace_qmp_drive_mirror_replace(job, target_reference,
> >+                                   has_new_node_name ? new_node_name : "");
> >+    block_job_complete(job, errp);
> >+}
> >+
> >  void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
> >  {
> >      QmpOutputVisitor *ov = qmp_output_visitor_new();
> >diff --git a/include/block/block.h b/include/block/block.h
> >index ee1582d..a9d6ead 100644
> >--- a/include/block/block.h
> >+++ b/include/block/block.h
> >@@ -171,6 +171,7 @@ typedef enum BlockOpType {
> >      BLOCK_OP_TYPE_MIRROR,
> >      BLOCK_OP_TYPE_RESIZE,
> >      BLOCK_OP_TYPE_STREAM,
> >+    BLOCK_OP_TYPE_REPLACE,
> >      BLOCK_OP_TYPE_MAX,
> >  } BlockOpType;
> >@@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
> >                      QDict *options, const char *bdref_key, int flags,
> >                      bool allow_none, Error **errp);
> >  void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
> >+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
> >+                          Error **errp);
> >  int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
> >  int bdrv_open(BlockDriverState **pbs, const char *filename,
> >                const char *reference, QDict *options, int flags,
> >diff --git a/include/block/block_int.h b/include/block/block_int.h
> >index 1f4f78b..ea281c8 100644
> >--- a/include/block/block_int.h
> >+++ b/include/block/block_int.h
> >@@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
> >                    void *opaque, Error **errp);
> >  /*
> >+ * mirror_set_replace_target:
> >+ * @job: An active mirroring block job started with sync=full.
> >+ * @reference: id or node-name of the BDS to replace when the mirror is done.
> >+ * @has_new_node_name: Set to true if new_node_name if provided
> >+ * @new_node_name: The optional new node name of the new mirror.
> >+ * @errp: Error object.
> >+ *
> >+ * Prepare a mirroring operation to replace a BDS pointed to by reference with
> >+ * the new mirror.
> >+ */
> >+bool mirror_set_replace_target(BlockJob *job, const char *reference,
> >+                               bool has_new_node_name,
> >+                               const char *new_node_name, Error **errp);
> >+
> >+/*
> >   * backup_start:
> >   * @bs: Block device to operate on.
> >   * @target: Block device to write to.
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index f5a8767..ef830e8 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -2716,6 +2716,39 @@
> >  { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
> >  ##
> >+# @drive-mirror-replace:
> >+#
> >+# Manually trigger completion of an active background drive-mirror operation
> >+# and replace the target reference with the new mirror.
> >+# This switches the device to write to the target path only.
> >+# The ability to complete is signaled with a BLOCK_JOB_READY event.
> >+#
> >+# This command completes an active drive-mirror background operation
> >+# synchronously and replaces the target reference with the mirror.
> >+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
> >+# is not defined.  Note that if an I/O error occurs during the processing of
> >+# this command: 1) the command itself will fail; 2) the error will be processed
> >+# according to the rerror/werror arguments that were specified when starting
> >+# the operation.
> >+#
> >+# A cancelled or paused drive-mirror job cannot be completed.
> >+#
> >+# @device:           the device name
> >+# @target-reference: the id or node name of the block driver state to replace
> >+# @new-node-name:    #optional set the node-name of the new block driver state
> >+#                    needed the target reference point to a regular node of the
> >+#                    graph
> 
> Again, sorry for not noting this in my review for v1, but I seem to
> have problems parsing this sentence (the last two lines). Did you
> omit an "if" and an "-s", so it would read "Needed if the target
> reference points to a regular node of the graph"?
> 
> Max
> 
> >+#
> >+# Returns: Nothing on success
> >+#          If no background operation is active on this device, DeviceNotActive
> >+#
> >+# Since: 2.1
> >+##
> >+{ 'command': 'drive-mirror-replace',
> >+  'data': { 'device': 'str', 'target-reference': 'str',
> >+            '*new-node-name': 'str' } }
> >+
> >+##
> >  # @ObjectTypeInfo:
> >  #
> >  # This structure describes a search result from @qom-list-types
> >diff --git a/qmp-commands.hx b/qmp-commands.hx
> >index dd336f7..3b5b382 100644
> >--- a/qmp-commands.hx
> >+++ b/qmp-commands.hx
> >@@ -1150,6 +1150,11 @@ EQMP
> >          .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
> >      },
> >      {
> >+        .name       = "drive-mirror-replace",
> >+        .args_type  = "device:B,target-reference:s,new-node-name:s?",
> >+        .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
> >+    },
> >+    {
> >          .name       = "transaction",
> >          .args_type  = "actions:q",
> >          .mhandler.cmd_new = qmp_marshal_input_transaction,
> >diff --git a/trace-events b/trace-events
> >index aec4202..5282904 100644
> >--- a/trace-events
> >+++ b/trace-events
> >@@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
> >  qmp_block_job_pause(void *job) "job %p"
> >  qmp_block_job_resume(void *job) "job %p"
> >  qmp_block_job_complete(void *job) "job %p"
> >+qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
> >  block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
> >  qmp_block_stream(void *bs, void *job) "bs %p job %p"
>
diff mbox

Patch

diff --git a/block.c b/block.c
index 9f2fe85..3fd38b4 100644
--- a/block.c
+++ b/block.c
@@ -787,10 +787,12 @@  static int bdrv_open_flags(BlockDriverState *bs, int flags)
     return open_flags;
 }
 
-static int bdrv_assign_node_name(BlockDriverState *bs,
-                                 const char *node_name,
-                                 Error **errp)
+int bdrv_assign_node_name(BlockDriverState *bs,
+                          const char *node_name,
+                          Error **errp)
 {
+    assert(bs->node_name[0] == '\0');
+
     if (!node_name) {
         return 0;
     }
diff --git a/block/mirror.c b/block/mirror.c
index 6dc84e8..9365669 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -49,6 +49,15 @@  typedef struct MirrorBlockJob {
     unsigned long *in_flight_bitmap;
     int in_flight;
     int ret;
+    /* these four fields are used by drive-mirror-replace */
+    /* The code must replace a target with the new mirror */
+    bool must_replace;
+    /* The block BDS to replace */
+    BlockDriverState *to_replace;
+    /* the node-name of the new mirror BDS */
+    char *new_node_name;
+    /* Used to block operations on the drive-mirror-replace target. */
+    Error *replace_blocker;
 } MirrorBlockJob;
 
 typedef struct MirrorOp {
@@ -299,6 +308,7 @@  static void coroutine_fn mirror_run(void *opaque)
     MirrorBlockJob *s = opaque;
     BlockDriverState *bs = s->common.bs;
     int64_t sector_num, end, sectors_per_chunk, length;
+    BlockDriverState *to_replace;
     uint64_t last_pause_ns;
     BlockDriverInfo bdi;
     char backing_filename[1024];
@@ -477,11 +487,17 @@  immediate_exit:
     g_free(s->in_flight_bitmap);
     bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
+    /* Here we handle the drive-mirror-replace QMP command */
+    if (s->must_replace) {
+        to_replace = s->to_replace;
+    } else {
+        to_replace = s->common.bs;
+    }
     if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
         }
-        bdrv_swap(s->target, s->common.bs);
+        bdrv_swap(s->target, to_replace);
         if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
             /* drop the bs loop chain formed by the swap: break the loop then
              * trigger the unref from the top one */
@@ -491,6 +507,11 @@  immediate_exit:
             bdrv_unref(p);
         }
     }
+    if (s->must_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
     bdrv_unref(s->target);
     block_job_completed(&s->common, ret);
 }
@@ -513,6 +534,88 @@  static void mirror_iostatus_reset(BlockJob *job)
     bdrv_iostatus_reset(s->target);
 }
 
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+                               bool has_new_node_name,
+                               const char *new_node_name, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    BlockDriverState *to_replace;
+
+    /* We don't want to give too much power to the user as this could result in
+     * BlockDriverState loops if used with something other than sync=full.
+     */
+    if (s->is_none_mode || s->base) {
+        error_setg(errp, "Can only be used on a mirror done with sync=full");
+        return false;
+    }
+
+    /* check that the target reference is not an empty string */
+    if (!reference[0]) {
+        error_setg(errp, "target-reference is an empty string");
+        return false;
+    }
+
+    /* Get the block driver state to be replaced */
+    to_replace = bdrv_lookup_bs(reference, reference, errp);
+    if (!to_replace) {
+        return false;
+    }
+
+    /* If the BDS to be replaced is a regular node we need a new node name */
+    if (to_replace->node_name[0] && !has_new_node_name) {
+        error_setg(errp, "A new-node-name must be provided");
+        return false;
+    }
+
+    /* Can only replace something else than the source of the mirror */
+    if (to_replace == job->bs) {
+        error_setg(errp, "Cannot replace the mirror source");
+        return false;
+    }
+
+    /* is this bs replace operation blocked */
+    if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return false;
+    }
+
+    /* save useful infos for later */
+    s->to_replace = to_replace;
+    assert(has_new_node_name);
+    s->new_node_name = g_strdup(new_node_name);
+
+    return true;
+}
+
+static void mirror_activate_replace_target(BlockJob *job, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    /* Set the new node name if the BDS to replace is a regular node
+     * of the graph.
+     */
+    if (s->to_replace->node_name[0]) {
+        assert(s->new_node_name);
+        bdrv_assign_node_name(s->target, s->new_node_name, errp);
+    }
+
+    g_free(s->new_node_name);
+
+    if (error_is_set(errp)) {
+        s->to_replace = NULL;
+        return;
+    }
+
+    /* block all operations on the target bs */
+    error_setg(&s->replace_blocker,
+               "block device is in use by drive-mirror-replace");
+    bdrv_op_block_all(s->to_replace, s->replace_blocker);
+
+    bdrv_ref(s->to_replace);
+
+    /* activate the replacement operation */
+    s->must_replace = true;
+}
+
 static void mirror_complete(BlockJob *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -529,6 +632,13 @@  static void mirror_complete(BlockJob *job, Error **errp)
         return;
     }
 
+    /* drive-mirror-replace is being called on this job so activate the
+     * replacement target
+     */
+    if (s->to_replace) {
+        mirror_activate_replace_target(job, errp);
+    }
+
     s->should_complete = true;
     block_job_resume(job);
 }
diff --git a/blockdev.c b/blockdev.c
index 8a6ae0a..901a05d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2365,6 +2365,33 @@  void qmp_block_job_complete(const char *device, Error **errp)
     block_job_complete(job, errp);
 }
 
+void qmp_drive_mirror_replace(const char *device, const char *target_reference,
+                              bool has_new_node_name,
+                              const char *new_node_name,
+                              Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
+        error_setg(errp, "Can only be used on a drive-mirror block job");
+        return;
+    }
+
+    if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
+                                   new_node_name, errp)) {
+        return;
+    }
+
+    trace_qmp_drive_mirror_replace(job, target_reference,
+                                   has_new_node_name ? new_node_name : "");
+    block_job_complete(job, errp);
+}
+
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
     QmpOutputVisitor *ov = qmp_output_visitor_new();
diff --git a/include/block/block.h b/include/block/block.h
index ee1582d..a9d6ead 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -171,6 +171,7 @@  typedef enum BlockOpType {
     BLOCK_OP_TYPE_MIRROR,
     BLOCK_OP_TYPE_RESIZE,
     BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
     BLOCK_OP_TYPE_MAX,
 } BlockOpType;
 
@@ -209,6 +210,8 @@  int bdrv_open_image(BlockDriverState **pbs, const char *filename,
                     QDict *options, const char *bdref_key, int flags,
                     bool allow_none, Error **errp);
 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
+                          Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
 int bdrv_open(BlockDriverState **pbs, const char *filename,
               const char *reference, QDict *options, int flags,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1f4f78b..ea281c8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -486,6 +486,21 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   void *opaque, Error **errp);
 
 /*
+ * mirror_set_replace_target:
+ * @job: An active mirroring block job started with sync=full.
+ * @reference: id or node-name of the BDS to replace when the mirror is done.
+ * @has_new_node_name: Set to true if new_node_name if provided
+ * @new_node_name: The optional new node name of the new mirror.
+ * @errp: Error object.
+ *
+ * Prepare a mirroring operation to replace a BDS pointed to by reference with
+ * the new mirror.
+ */
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+                               bool has_new_node_name,
+                               const char *new_node_name, Error **errp);
+
+/*
  * backup_start:
  * @bs: Block device to operate on.
  * @target: Block device to write to.
diff --git a/qapi-schema.json b/qapi-schema.json
index f5a8767..ef830e8 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2716,6 +2716,39 @@ 
 { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
 
 ##
+# @drive-mirror-replace:
+#
+# Manually trigger completion of an active background drive-mirror operation
+# and replace the target reference with the new mirror.
+# This switches the device to write to the target path only.
+# The ability to complete is signaled with a BLOCK_JOB_READY event.
+#
+# This command completes an active drive-mirror background operation
+# synchronously and replaces the target reference with the mirror.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused drive-mirror job cannot be completed.
+#
+# @device:           the device name
+# @target-reference: the id or node name of the block driver state to replace
+# @new-node-name:    #optional set the node-name of the new block driver state
+#                    needed the target reference point to a regular node of the
+#                    graph
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 2.1
+##
+{ 'command': 'drive-mirror-replace',
+  'data': { 'device': 'str', 'target-reference': 'str',
+            '*new-node-name': 'str' } }
+
+##
 # @ObjectTypeInfo:
 #
 # This structure describes a search result from @qom-list-types
diff --git a/qmp-commands.hx b/qmp-commands.hx
index dd336f7..3b5b382 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1150,6 +1150,11 @@  EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
     },
     {
+        .name       = "drive-mirror-replace",
+        .args_type  = "device:B,target-reference:s,new-node-name:s?",
+        .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
+    },
+    {
         .name       = "transaction",
         .args_type  = "actions:q",
         .mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/trace-events b/trace-events
index aec4202..5282904 100644
--- a/trace-events
+++ b/trace-events
@@ -103,6 +103,7 @@  qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
+qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s"
 block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"