Message ID | 1394574786-32579-3-git-send-email-benoit.canet@irqsave.net |
---|---|
State | New |
Headers | show |
On 11.03.2014 22:53, Benoît Canet wrote: > When a quorum file is totally destroyed (broken NAS or SAN) the user can start a > drive-mirror job on the quorum block backend and then replace the broken > quorum file with drive-mirror-replace given it has a node-name. > > Signed-off-by: Benoit Canet <benoit@irqsave.net> > --- > block.c | 8 ++-- > block/mirror.c | 116 ++++++++++++++++++++++++++++++++++++++++++++-- > blockdev.c | 27 +++++++++++ > include/block/block.h | 3 ++ > include/block/block_int.h | 15 ++++++ > qapi-schema.json | 33 +++++++++++++ > qmp-commands.hx | 5 ++ > trace-events | 1 + > 8 files changed, 202 insertions(+), 6 deletions(-) > > diff --git a/block.c b/block.c > index 9f2fe85..3fd38b4 100644 > --- a/block.c > +++ b/block.c > @@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) > return open_flags; > } > > -static int bdrv_assign_node_name(BlockDriverState *bs, > - const char *node_name, > - Error **errp) > +int bdrv_assign_node_name(BlockDriverState *bs, > + const char *node_name, > + Error **errp) > { > + assert(bs->node_name[0] == '\0'); > + > if (!node_name) { > return 0; > } > diff --git a/block/mirror.c b/block/mirror.c > index 6dc84e8..9365669 100644 > --- a/block/mirror.c > +++ b/block/mirror.c > @@ -49,6 +49,15 @@ typedef struct MirrorBlockJob { > unsigned long *in_flight_bitmap; > int in_flight; > int ret; > + /* these four fields are used by drive-mirror-replace */ > + /* The code must replace a target with the new mirror */ > + bool must_replace; > + /* The block BDS to replace */ > + BlockDriverState *to_replace; > + /* the node-name of the new mirror BDS */ > + char *new_node_name; > + /* Used to block operations on the drive-mirror-replace target. */ > + Error *replace_blocker; > } MirrorBlockJob; > > typedef struct MirrorOp { > @@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque) > MirrorBlockJob *s = opaque; > BlockDriverState *bs = s->common.bs; > int64_t sector_num, end, sectors_per_chunk, length; > + BlockDriverState *to_replace; > uint64_t last_pause_ns; > BlockDriverInfo bdi; > char backing_filename[1024]; > @@ -477,11 +487,17 @@ immediate_exit: > g_free(s->in_flight_bitmap); > bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); > bdrv_iostatus_disable(s->target); > + /* Here we handle the drive-mirror-replace QMP command */ > + if (s->must_replace) { > + to_replace = s->to_replace; > + } else { > + to_replace = s->common.bs; > + } > if (s->should_complete && ret == 0) { > - if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { > - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); > + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { > + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); > } > - bdrv_swap(s->target, s->common.bs); > + bdrv_swap(s->target, to_replace); > if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { > /* drop the bs loop chain formed by the swap: break the loop then > * trigger the unref from the top one */ > @@ -491,6 +507,11 @@ immediate_exit: > bdrv_unref(p); > } > } > + if (s->must_replace) { > + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); > + error_free(s->replace_blocker); > + bdrv_unref(s->to_replace); > + } > bdrv_unref(s->target); > block_job_completed(&s->common, ret); > } > @@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job) > bdrv_iostatus_reset(s->target); > } > > +bool mirror_set_replace_target(BlockJob *job, const char *reference, > + bool has_new_node_name, > + const char *new_node_name, Error **errp) > +{ > + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > + BlockDriverState *to_replace; > + > + /* We don't want to give too much power to the user as this could result in > + * BlockDriverState loops if used with something other than sync=full. > + */ > + if (s->is_none_mode || s->base) { > + error_setg(errp, "Can only be used on a mirror done with sync=full"); > + return false; > + } > + > + /* check that the target reference is not an empty string */ > + if (!reference[0]) { > + error_setg(errp, "target-reference is an empty string"); > + return false; > + } > + > + /* Get the block driver state to be replaced */ > + to_replace = bdrv_lookup_bs(reference, reference, errp); > + if (!to_replace) { > + return false; > + } > + > + /* If the BDS to be replaced is a regular node we need a new node name */ > + if (to_replace->node_name[0] && !has_new_node_name) { > + error_setg(errp, "A new-node-name must be provided"); > + return false; > + } > + > + /* Can only replace something else than the source of the mirror */ > + if (to_replace == job->bs) { > + error_setg(errp, "Cannot replace the mirror source"); > + return false; > + } > + > + /* is this bs replace operation blocked */ > + if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) { > + return false; > + } > + > + /* save useful infos for later */ > + s->to_replace = to_replace; > + assert(has_new_node_name); Sorry to have missed this in v1: In qapi-schema.json, the description (I guess) tells me that the new_node_name is optional and does not need to be provided if the target device is not a (regular) node of the BDS graph. In case it is a regular node and new_node_name is not given, you're returning an appropriate error message above. But in any other case, not giving new_node_name should be fine. However, you're asserting that it is always given here, and if it isn't, this won't return a more useful error message, but just abort qemu. Are you planning to add support for not passing new_node_name in the future? > + s->new_node_name = g_strdup(new_node_name); > + > + return true; > +} > + > +static void mirror_activate_replace_target(BlockJob *job, Error **errp) > +{ > + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > + > + /* Set the new node name if the BDS to replace is a regular node > + * of the graph. > + */ > + if (s->to_replace->node_name[0]) { > + assert(s->new_node_name); > + bdrv_assign_node_name(s->target, s->new_node_name, errp); > + } > + > + g_free(s->new_node_name); > + > + if (error_is_set(errp)) { > + s->to_replace = NULL; > + return; > + } > + > + /* block all operations on the target bs */ > + error_setg(&s->replace_blocker, > + "block device is in use by drive-mirror-replace"); > + bdrv_op_block_all(s->to_replace, s->replace_blocker); > + > + bdrv_ref(s->to_replace); > + > + /* activate the replacement operation */ > + s->must_replace = true; > +} > + > static void mirror_complete(BlockJob *job, Error **errp) > { > MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > @@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error **errp) > return; > } > > + /* drive-mirror-replace is being called on this job so activate the > + * replacement target > + */ > + if (s->to_replace) { > + mirror_activate_replace_target(job, errp); > + } > + > s->should_complete = true; > block_job_resume(job); > } > diff --git a/blockdev.c b/blockdev.c > index 8a6ae0a..901a05d 100644 > --- a/blockdev.c > +++ b/blockdev.c > @@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp) > block_job_complete(job, errp); > } > > +void qmp_drive_mirror_replace(const char *device, const char *target_reference, > + bool has_new_node_name, > + const char *new_node_name, > + Error **errp) > +{ > + BlockJob *job = find_block_job(device); > + > + if (!job) { > + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); > + return; > + } > + > + if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) { > + error_setg(errp, "Can only be used on a drive-mirror block job"); > + return; > + } > + > + if (!mirror_set_replace_target(job, target_reference, has_new_node_name, > + new_node_name, errp)) { > + return; > + } > + > + trace_qmp_drive_mirror_replace(job, target_reference, > + has_new_node_name ? new_node_name : ""); > + block_job_complete(job, errp); > +} > + > void qmp_blockdev_add(BlockdevOptions *options, Error **errp) > { > QmpOutputVisitor *ov = qmp_output_visitor_new(); > diff --git a/include/block/block.h b/include/block/block.h > index ee1582d..a9d6ead 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -171,6 +171,7 @@ typedef enum BlockOpType { > BLOCK_OP_TYPE_MIRROR, > BLOCK_OP_TYPE_RESIZE, > BLOCK_OP_TYPE_STREAM, > + BLOCK_OP_TYPE_REPLACE, > BLOCK_OP_TYPE_MAX, > } BlockOpType; > > @@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, > QDict *options, const char *bdref_key, int flags, > bool allow_none, Error **errp); > void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); > +int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name, > + Error **errp); > int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); > int bdrv_open(BlockDriverState **pbs, const char *filename, > const char *reference, QDict *options, int flags, > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 1f4f78b..ea281c8 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > void *opaque, Error **errp); > > /* > + * mirror_set_replace_target: > + * @job: An active mirroring block job started with sync=full. > + * @reference: id or node-name of the BDS to replace when the mirror is done. > + * @has_new_node_name: Set to true if new_node_name if provided > + * @new_node_name: The optional new node name of the new mirror. > + * @errp: Error object. > + * > + * Prepare a mirroring operation to replace a BDS pointed to by reference with > + * the new mirror. > + */ > +bool mirror_set_replace_target(BlockJob *job, const char *reference, > + bool has_new_node_name, > + const char *new_node_name, Error **errp); > + > +/* > * backup_start: > * @bs: Block device to operate on. > * @target: Block device to write to. > diff --git a/qapi-schema.json b/qapi-schema.json > index f5a8767..ef830e8 100644 > --- a/qapi-schema.json > +++ b/qapi-schema.json > @@ -2716,6 +2716,39 @@ > { 'command': 'block-job-complete', 'data': { 'device': 'str' } } > > ## > +# @drive-mirror-replace: > +# > +# Manually trigger completion of an active background drive-mirror operation > +# and replace the target reference with the new mirror. > +# This switches the device to write to the target path only. > +# The ability to complete is signaled with a BLOCK_JOB_READY event. > +# > +# This command completes an active drive-mirror background operation > +# synchronously and replaces the target reference with the mirror. > +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event > +# is not defined. Note that if an I/O error occurs during the processing of > +# this command: 1) the command itself will fail; 2) the error will be processed > +# according to the rerror/werror arguments that were specified when starting > +# the operation. > +# > +# A cancelled or paused drive-mirror job cannot be completed. > +# > +# @device: the device name > +# @target-reference: the id or node name of the block driver state to replace > +# @new-node-name: #optional set the node-name of the new block driver state > +# needed the target reference point to a regular node of the > +# graph Again, sorry for not noting this in my review for v1, but I seem to have problems parsing this sentence (the last two lines). Did you omit an "if" and an "-s", so it would read "Needed if the target reference points to a regular node of the graph"? Max > +# > +# Returns: Nothing on success > +# If no background operation is active on this device, DeviceNotActive > +# > +# Since: 2.1 > +## > +{ 'command': 'drive-mirror-replace', > + 'data': { 'device': 'str', 'target-reference': 'str', > + '*new-node-name': 'str' } } > + > +## > # @ObjectTypeInfo: > # > # This structure describes a search result from @qom-list-types > diff --git a/qmp-commands.hx b/qmp-commands.hx > index dd336f7..3b5b382 100644 > --- a/qmp-commands.hx > +++ b/qmp-commands.hx > @@ -1150,6 +1150,11 @@ EQMP > .mhandler.cmd_new = qmp_marshal_input_block_job_complete, > }, > { > + .name = "drive-mirror-replace", > + .args_type = "device:B,target-reference:s,new-node-name:s?", > + .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace, > + }, > + { > .name = "transaction", > .args_type = "actions:q", > .mhandler.cmd_new = qmp_marshal_input_transaction, > diff --git a/trace-events b/trace-events > index aec4202..5282904 100644 > --- a/trace-events > +++ b/trace-events > @@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p" > qmp_block_job_pause(void *job) "job %p" > qmp_block_job_resume(void *job) "job %p" > qmp_block_job_complete(void *job) "job %p" > +qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s" > block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" > qmp_block_stream(void *bs, void *job) "bs %p job %p" >
The Thursday 13 Mar 2014 à 21:50:52 (+0100), Max Reitz wrote : > On 11.03.2014 22:53, Benoît Canet wrote: > >When a quorum file is totally destroyed (broken NAS or SAN) the user can start a > >drive-mirror job on the quorum block backend and then replace the broken > >quorum file with drive-mirror-replace given it has a node-name. > > > >Signed-off-by: Benoit Canet <benoit@irqsave.net> > >--- > > block.c | 8 ++-- > > block/mirror.c | 116 ++++++++++++++++++++++++++++++++++++++++++++-- > > blockdev.c | 27 +++++++++++ > > include/block/block.h | 3 ++ > > include/block/block_int.h | 15 ++++++ > > qapi-schema.json | 33 +++++++++++++ > > qmp-commands.hx | 5 ++ > > trace-events | 1 + > > 8 files changed, 202 insertions(+), 6 deletions(-) > > > >diff --git a/block.c b/block.c > >index 9f2fe85..3fd38b4 100644 > >--- a/block.c > >+++ b/block.c > >@@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) > > return open_flags; > > } > >-static int bdrv_assign_node_name(BlockDriverState *bs, > >- const char *node_name, > >- Error **errp) > >+int bdrv_assign_node_name(BlockDriverState *bs, > >+ const char *node_name, > >+ Error **errp) > > { > >+ assert(bs->node_name[0] == '\0'); > >+ > > if (!node_name) { > > return 0; > > } > >diff --git a/block/mirror.c b/block/mirror.c > >index 6dc84e8..9365669 100644 > >--- a/block/mirror.c > >+++ b/block/mirror.c > >@@ -49,6 +49,15 @@ typedef struct MirrorBlockJob { > > unsigned long *in_flight_bitmap; > > int in_flight; > > int ret; > >+ /* these four fields are used by drive-mirror-replace */ > >+ /* The code must replace a target with the new mirror */ > >+ bool must_replace; > >+ /* The block BDS to replace */ > >+ BlockDriverState *to_replace; > >+ /* the node-name of the new mirror BDS */ > >+ char *new_node_name; > >+ /* Used to block operations on the drive-mirror-replace target. */ > >+ Error *replace_blocker; > > } MirrorBlockJob; > > typedef struct MirrorOp { > >@@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque) > > MirrorBlockJob *s = opaque; > > BlockDriverState *bs = s->common.bs; > > int64_t sector_num, end, sectors_per_chunk, length; > >+ BlockDriverState *to_replace; > > uint64_t last_pause_ns; > > BlockDriverInfo bdi; > > char backing_filename[1024]; > >@@ -477,11 +487,17 @@ immediate_exit: > > g_free(s->in_flight_bitmap); > > bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); > > bdrv_iostatus_disable(s->target); > >+ /* Here we handle the drive-mirror-replace QMP command */ > >+ if (s->must_replace) { > >+ to_replace = s->to_replace; > >+ } else { > >+ to_replace = s->common.bs; > >+ } > > if (s->should_complete && ret == 0) { > >- if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { > >- bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); > >+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { > >+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); > > } > >- bdrv_swap(s->target, s->common.bs); > >+ bdrv_swap(s->target, to_replace); > > if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { > > /* drop the bs loop chain formed by the swap: break the loop then > > * trigger the unref from the top one */ > >@@ -491,6 +507,11 @@ immediate_exit: > > bdrv_unref(p); > > } > > } > >+ if (s->must_replace) { > >+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker); > >+ error_free(s->replace_blocker); > >+ bdrv_unref(s->to_replace); > >+ } > > bdrv_unref(s->target); > > block_job_completed(&s->common, ret); > > } > >@@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job) > > bdrv_iostatus_reset(s->target); > > } > >+bool mirror_set_replace_target(BlockJob *job, const char *reference, > >+ bool has_new_node_name, > >+ const char *new_node_name, Error **errp) > >+{ > >+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > >+ BlockDriverState *to_replace; > >+ > >+ /* We don't want to give too much power to the user as this could result in > >+ * BlockDriverState loops if used with something other than sync=full. > >+ */ > >+ if (s->is_none_mode || s->base) { > >+ error_setg(errp, "Can only be used on a mirror done with sync=full"); > >+ return false; > >+ } > >+ > >+ /* check that the target reference is not an empty string */ > >+ if (!reference[0]) { > >+ error_setg(errp, "target-reference is an empty string"); > >+ return false; > >+ } > >+ > >+ /* Get the block driver state to be replaced */ > >+ to_replace = bdrv_lookup_bs(reference, reference, errp); > >+ if (!to_replace) { > >+ return false; > >+ } > >+ > >+ /* If the BDS to be replaced is a regular node we need a new node name */ > >+ if (to_replace->node_name[0] && !has_new_node_name) { > >+ error_setg(errp, "A new-node-name must be provided"); > >+ return false; > >+ } > >+ > >+ /* Can only replace something else than the source of the mirror */ > >+ if (to_replace == job->bs) { > >+ error_setg(errp, "Cannot replace the mirror source"); > >+ return false; > >+ } > >+ > >+ /* is this bs replace operation blocked */ > >+ if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) { > >+ return false; > >+ } > >+ > >+ /* save useful infos for later */ > >+ s->to_replace = to_replace; > >+ assert(has_new_node_name); > > Sorry to have missed this in v1: In qapi-schema.json, the > description (I guess) tells me that the new_node_name is optional > and does not need to be provided if the target device is not a > (regular) node of the BDS graph. > > In case it is a regular node and new_node_name is not given, you're > returning an appropriate error message above. But in any other case, > not giving new_node_name should be fine. However, you're asserting > that it is always given here, and if it isn't, this won't return a > more useful error message, but just abort qemu. Are you planning to > add support for not passing new_node_name in the future? This assert is just plain wrong. > > >+ s->new_node_name = g_strdup(new_node_name); > >+ > >+ return true; > >+} > >+ > >+static void mirror_activate_replace_target(BlockJob *job, Error **errp) > >+{ > >+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > >+ > >+ /* Set the new node name if the BDS to replace is a regular node > >+ * of the graph. > >+ */ > >+ if (s->to_replace->node_name[0]) { > >+ assert(s->new_node_name); > >+ bdrv_assign_node_name(s->target, s->new_node_name, errp); > >+ } > >+ > >+ g_free(s->new_node_name); > >+ > >+ if (error_is_set(errp)) { > >+ s->to_replace = NULL; > >+ return; > >+ } > >+ > >+ /* block all operations on the target bs */ > >+ error_setg(&s->replace_blocker, > >+ "block device is in use by drive-mirror-replace"); > >+ bdrv_op_block_all(s->to_replace, s->replace_blocker); > >+ > >+ bdrv_ref(s->to_replace); > >+ > >+ /* activate the replacement operation */ > >+ s->must_replace = true; > >+} > >+ > > static void mirror_complete(BlockJob *job, Error **errp) > > { > > MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); > >@@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error **errp) > > return; > > } > >+ /* drive-mirror-replace is being called on this job so activate the > >+ * replacement target > >+ */ > >+ if (s->to_replace) { > >+ mirror_activate_replace_target(job, errp); > >+ } > >+ > > s->should_complete = true; > > block_job_resume(job); > > } > >diff --git a/blockdev.c b/blockdev.c > >index 8a6ae0a..901a05d 100644 > >--- a/blockdev.c > >+++ b/blockdev.c > >@@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp) > > block_job_complete(job, errp); > > } > >+void qmp_drive_mirror_replace(const char *device, const char *target_reference, > >+ bool has_new_node_name, > >+ const char *new_node_name, > >+ Error **errp) > >+{ > >+ BlockJob *job = find_block_job(device); > >+ > >+ if (!job) { > >+ error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); > >+ return; > >+ } > >+ > >+ if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) { > >+ error_setg(errp, "Can only be used on a drive-mirror block job"); > >+ return; > >+ } > >+ > >+ if (!mirror_set_replace_target(job, target_reference, has_new_node_name, > >+ new_node_name, errp)) { > >+ return; > >+ } > >+ > >+ trace_qmp_drive_mirror_replace(job, target_reference, > >+ has_new_node_name ? new_node_name : ""); > >+ block_job_complete(job, errp); > >+} > >+ > > void qmp_blockdev_add(BlockdevOptions *options, Error **errp) > > { > > QmpOutputVisitor *ov = qmp_output_visitor_new(); > >diff --git a/include/block/block.h b/include/block/block.h > >index ee1582d..a9d6ead 100644 > >--- a/include/block/block.h > >+++ b/include/block/block.h > >@@ -171,6 +171,7 @@ typedef enum BlockOpType { > > BLOCK_OP_TYPE_MIRROR, > > BLOCK_OP_TYPE_RESIZE, > > BLOCK_OP_TYPE_STREAM, > >+ BLOCK_OP_TYPE_REPLACE, > > BLOCK_OP_TYPE_MAX, > > } BlockOpType; > >@@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, > > QDict *options, const char *bdref_key, int flags, > > bool allow_none, Error **errp); > > void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); > >+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name, > >+ Error **errp); > > int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); > > int bdrv_open(BlockDriverState **pbs, const char *filename, > > const char *reference, QDict *options, int flags, > >diff --git a/include/block/block_int.h b/include/block/block_int.h > >index 1f4f78b..ea281c8 100644 > >--- a/include/block/block_int.h > >+++ b/include/block/block_int.h > >@@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, > > void *opaque, Error **errp); > > /* > >+ * mirror_set_replace_target: > >+ * @job: An active mirroring block job started with sync=full. > >+ * @reference: id or node-name of the BDS to replace when the mirror is done. > >+ * @has_new_node_name: Set to true if new_node_name if provided > >+ * @new_node_name: The optional new node name of the new mirror. > >+ * @errp: Error object. > >+ * > >+ * Prepare a mirroring operation to replace a BDS pointed to by reference with > >+ * the new mirror. > >+ */ > >+bool mirror_set_replace_target(BlockJob *job, const char *reference, > >+ bool has_new_node_name, > >+ const char *new_node_name, Error **errp); > >+ > >+/* > > * backup_start: > > * @bs: Block device to operate on. > > * @target: Block device to write to. > >diff --git a/qapi-schema.json b/qapi-schema.json > >index f5a8767..ef830e8 100644 > >--- a/qapi-schema.json > >+++ b/qapi-schema.json > >@@ -2716,6 +2716,39 @@ > > { 'command': 'block-job-complete', 'data': { 'device': 'str' } } > > ## > >+# @drive-mirror-replace: > >+# > >+# Manually trigger completion of an active background drive-mirror operation > >+# and replace the target reference with the new mirror. > >+# This switches the device to write to the target path only. > >+# The ability to complete is signaled with a BLOCK_JOB_READY event. > >+# > >+# This command completes an active drive-mirror background operation > >+# synchronously and replaces the target reference with the mirror. > >+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event > >+# is not defined. Note that if an I/O error occurs during the processing of > >+# this command: 1) the command itself will fail; 2) the error will be processed > >+# according to the rerror/werror arguments that were specified when starting > >+# the operation. > >+# > >+# A cancelled or paused drive-mirror job cannot be completed. > >+# > >+# @device: the device name > >+# @target-reference: the id or node name of the block driver state to replace > >+# @new-node-name: #optional set the node-name of the new block driver state > >+# needed the target reference point to a regular node of the > >+# graph > > Again, sorry for not noting this in my review for v1, but I seem to > have problems parsing this sentence (the last two lines). Did you > omit an "if" and an "-s", so it would read "Needed if the target > reference points to a regular node of the graph"? > > Max > > >+# > >+# Returns: Nothing on success > >+# If no background operation is active on this device, DeviceNotActive > >+# > >+# Since: 2.1 > >+## > >+{ 'command': 'drive-mirror-replace', > >+ 'data': { 'device': 'str', 'target-reference': 'str', > >+ '*new-node-name': 'str' } } > >+ > >+## > > # @ObjectTypeInfo: > > # > > # This structure describes a search result from @qom-list-types > >diff --git a/qmp-commands.hx b/qmp-commands.hx > >index dd336f7..3b5b382 100644 > >--- a/qmp-commands.hx > >+++ b/qmp-commands.hx > >@@ -1150,6 +1150,11 @@ EQMP > > .mhandler.cmd_new = qmp_marshal_input_block_job_complete, > > }, > > { > >+ .name = "drive-mirror-replace", > >+ .args_type = "device:B,target-reference:s,new-node-name:s?", > >+ .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace, > >+ }, > >+ { > > .name = "transaction", > > .args_type = "actions:q", > > .mhandler.cmd_new = qmp_marshal_input_transaction, > >diff --git a/trace-events b/trace-events > >index aec4202..5282904 100644 > >--- a/trace-events > >+++ b/trace-events > >@@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p" > > qmp_block_job_pause(void *job) "job %p" > > qmp_block_job_resume(void *job) "job %p" > > qmp_block_job_complete(void *job) "job %p" > >+qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s" > > block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" > > qmp_block_stream(void *bs, void *job) "bs %p job %p" >
diff --git a/block.c b/block.c index 9f2fe85..3fd38b4 100644 --- a/block.c +++ b/block.c @@ -787,10 +787,12 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) return open_flags; } -static int bdrv_assign_node_name(BlockDriverState *bs, - const char *node_name, - Error **errp) +int bdrv_assign_node_name(BlockDriverState *bs, + const char *node_name, + Error **errp) { + assert(bs->node_name[0] == '\0'); + if (!node_name) { return 0; } diff --git a/block/mirror.c b/block/mirror.c index 6dc84e8..9365669 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -49,6 +49,15 @@ typedef struct MirrorBlockJob { unsigned long *in_flight_bitmap; int in_flight; int ret; + /* these four fields are used by drive-mirror-replace */ + /* The code must replace a target with the new mirror */ + bool must_replace; + /* The block BDS to replace */ + BlockDriverState *to_replace; + /* the node-name of the new mirror BDS */ + char *new_node_name; + /* Used to block operations on the drive-mirror-replace target. */ + Error *replace_blocker; } MirrorBlockJob; typedef struct MirrorOp { @@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque) MirrorBlockJob *s = opaque; BlockDriverState *bs = s->common.bs; int64_t sector_num, end, sectors_per_chunk, length; + BlockDriverState *to_replace; uint64_t last_pause_ns; BlockDriverInfo bdi; char backing_filename[1024]; @@ -477,11 +487,17 @@ immediate_exit: g_free(s->in_flight_bitmap); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); bdrv_iostatus_disable(s->target); + /* Here we handle the drive-mirror-replace QMP command */ + if (s->must_replace) { + to_replace = s->to_replace; + } else { + to_replace = s->common.bs; + } if (s->should_complete && ret == 0) { - if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { - bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { + bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } - bdrv_swap(s->target, s->common.bs); + bdrv_swap(s->target, to_replace); if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { /* drop the bs loop chain formed by the swap: break the loop then * trigger the unref from the top one */ @@ -491,6 +507,11 @@ immediate_exit: bdrv_unref(p); } } + if (s->must_replace) { + bdrv_op_unblock_all(s->to_replace, s->replace_blocker); + error_free(s->replace_blocker); + bdrv_unref(s->to_replace); + } bdrv_unref(s->target); block_job_completed(&s->common, ret); } @@ -513,6 +534,88 @@ static void mirror_iostatus_reset(BlockJob *job) bdrv_iostatus_reset(s->target); } +bool mirror_set_replace_target(BlockJob *job, const char *reference, + bool has_new_node_name, + const char *new_node_name, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + BlockDriverState *to_replace; + + /* We don't want to give too much power to the user as this could result in + * BlockDriverState loops if used with something other than sync=full. + */ + if (s->is_none_mode || s->base) { + error_setg(errp, "Can only be used on a mirror done with sync=full"); + return false; + } + + /* check that the target reference is not an empty string */ + if (!reference[0]) { + error_setg(errp, "target-reference is an empty string"); + return false; + } + + /* Get the block driver state to be replaced */ + to_replace = bdrv_lookup_bs(reference, reference, errp); + if (!to_replace) { + return false; + } + + /* If the BDS to be replaced is a regular node we need a new node name */ + if (to_replace->node_name[0] && !has_new_node_name) { + error_setg(errp, "A new-node-name must be provided"); + return false; + } + + /* Can only replace something else than the source of the mirror */ + if (to_replace == job->bs) { + error_setg(errp, "Cannot replace the mirror source"); + return false; + } + + /* is this bs replace operation blocked */ + if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) { + return false; + } + + /* save useful infos for later */ + s->to_replace = to_replace; + assert(has_new_node_name); + s->new_node_name = g_strdup(new_node_name); + + return true; +} + +static void mirror_activate_replace_target(BlockJob *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + /* Set the new node name if the BDS to replace is a regular node + * of the graph. + */ + if (s->to_replace->node_name[0]) { + assert(s->new_node_name); + bdrv_assign_node_name(s->target, s->new_node_name, errp); + } + + g_free(s->new_node_name); + + if (error_is_set(errp)) { + s->to_replace = NULL; + return; + } + + /* block all operations on the target bs */ + error_setg(&s->replace_blocker, + "block device is in use by drive-mirror-replace"); + bdrv_op_block_all(s->to_replace, s->replace_blocker); + + bdrv_ref(s->to_replace); + + /* activate the replacement operation */ + s->must_replace = true; +} + static void mirror_complete(BlockJob *job, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); @@ -529,6 +632,13 @@ static void mirror_complete(BlockJob *job, Error **errp) return; } + /* drive-mirror-replace is being called on this job so activate the + * replacement target + */ + if (s->to_replace) { + mirror_activate_replace_target(job, errp); + } + s->should_complete = true; block_job_resume(job); } diff --git a/blockdev.c b/blockdev.c index 8a6ae0a..901a05d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error **errp) block_job_complete(job, errp); } +void qmp_drive_mirror_replace(const char *device, const char *target_reference, + bool has_new_node_name, + const char *new_node_name, + Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) { + error_setg(errp, "Can only be used on a drive-mirror block job"); + return; + } + + if (!mirror_set_replace_target(job, target_reference, has_new_node_name, + new_node_name, errp)) { + return; + } + + trace_qmp_drive_mirror_replace(job, target_reference, + has_new_node_name ? new_node_name : ""); + block_job_complete(job, errp); +} + void qmp_blockdev_add(BlockdevOptions *options, Error **errp) { QmpOutputVisitor *ov = qmp_output_visitor_new(); diff --git a/include/block/block.h b/include/block/block.h index ee1582d..a9d6ead 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -171,6 +171,7 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_MIRROR, BLOCK_OP_TYPE_RESIZE, BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, BLOCK_OP_TYPE_MAX, } BlockOpType; @@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, QDict *options, const char *bdref_key, int flags, bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name, + Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState **pbs, const char *filename, const char *reference, QDict *options, int flags, diff --git a/include/block/block_int.h b/include/block/block_int.h index 1f4f78b..ea281c8 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, void *opaque, Error **errp); /* + * mirror_set_replace_target: + * @job: An active mirroring block job started with sync=full. + * @reference: id or node-name of the BDS to replace when the mirror is done. + * @has_new_node_name: Set to true if new_node_name if provided + * @new_node_name: The optional new node name of the new mirror. + * @errp: Error object. + * + * Prepare a mirroring operation to replace a BDS pointed to by reference with + * the new mirror. + */ +bool mirror_set_replace_target(BlockJob *job, const char *reference, + bool has_new_node_name, + const char *new_node_name, Error **errp); + +/* * backup_start: * @bs: Block device to operate on. * @target: Block device to write to. diff --git a/qapi-schema.json b/qapi-schema.json index f5a8767..ef830e8 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2716,6 +2716,39 @@ { 'command': 'block-job-complete', 'data': { 'device': 'str' } } ## +# @drive-mirror-replace: +# +# Manually trigger completion of an active background drive-mirror operation +# and replace the target reference with the new mirror. +# This switches the device to write to the target path only. +# The ability to complete is signaled with a BLOCK_JOB_READY event. +# +# This command completes an active drive-mirror background operation +# synchronously and replaces the target reference with the mirror. +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event +# is not defined. Note that if an I/O error occurs during the processing of +# this command: 1) the command itself will fail; 2) the error will be processed +# according to the rerror/werror arguments that were specified when starting +# the operation. +# +# A cancelled or paused drive-mirror job cannot be completed. +# +# @device: the device name +# @target-reference: the id or node name of the block driver state to replace +# @new-node-name: #optional set the node-name of the new block driver state +# needed the target reference point to a regular node of the +# graph +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 2.1 +## +{ 'command': 'drive-mirror-replace', + 'data': { 'device': 'str', 'target-reference': 'str', + '*new-node-name': 'str' } } + +## # @ObjectTypeInfo: # # This structure describes a search result from @qom-list-types diff --git a/qmp-commands.hx b/qmp-commands.hx index dd336f7..3b5b382 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1150,6 +1150,11 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_complete, }, { + .name = "drive-mirror-replace", + .args_type = "device:B,target-reference:s,new-node-name:s?", + .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace, + }, + { .name = "transaction", .args_type = "actions:q", .mhandler.cmd_new = qmp_marshal_input_transaction, diff --git a/trace-events b/trace-events index aec4202..5282904 100644 --- a/trace-events +++ b/trace-events @@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" qmp_block_job_resume(void *job) "job %p" qmp_block_job_complete(void *job) "job %p" +qmp_drive_mirror_replace(void *job, const char *target_reference, const char *new_node_name) "job %p target_reference %s new_node_name %s" block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p"
When a quorum file is totally destroyed (broken NAS or SAN) the user can start a drive-mirror job on the quorum block backend and then replace the broken quorum file with drive-mirror-replace given it has a node-name. Signed-off-by: Benoit Canet <benoit@irqsave.net> --- block.c | 8 ++-- block/mirror.c | 116 ++++++++++++++++++++++++++++++++++++++++++++-- blockdev.c | 27 +++++++++++ include/block/block.h | 3 ++ include/block/block_int.h | 15 ++++++ qapi-schema.json | 33 +++++++++++++ qmp-commands.hx | 5 ++ trace-events | 1 + 8 files changed, 202 insertions(+), 6 deletions(-)