Patchwork [2/3] block: adjust qmp_transaction to be extendable

login
register
mail settings
Submitter Wayne Xia
Date April 1, 2013, 10:01 a.m.
Message ID <1364810491-21404-3-git-send-email-xiawenc@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/232720/
State New
Headers show

Comments

Wayne Xia - April 1, 2013, 10:01 a.m.
Now code for external snapshot are packaged as one case
in qmp_transaction, so later other operation could be added.
  The logic in qmp_transaction is changed a bit: Original code
tries to create all images first and then update all *bdrv
once together, new code create and update one *bdrv one time,
and use bdrv_deappend() to rollback on fail. This allows mixing
different kind of requests in qmp_transaction() later.

Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
---
 blockdev.c |  250 +++++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 153 insertions(+), 97 deletions(-)
Kevin Wolf - April 2, 2013, 1:55 p.m.
Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben:
>   Now code for external snapshot are packaged as one case
> in qmp_transaction, so later other operation could be added.
>   The logic in qmp_transaction is changed a bit: Original code
> tries to create all images first and then update all *bdrv
> once together, new code create and update one *bdrv one time,
> and use bdrv_deappend() to rollback on fail. This allows mixing
> different kind of requests in qmp_transaction() later.
> 
> Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
> ---
>  blockdev.c |  250 +++++++++++++++++++++++++++++++++++++-----------------------
>  1 files changed, 153 insertions(+), 97 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 8cdc9ce..75416fb 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
>  
>  
>  /* New and old BlockDriverState structs for group snapshots */
> -typedef struct BlkTransactionStates {
> +typedef struct BdrvActionOps {
> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
> +    void (*rollback)(BlockdevAction *action, void *opaque);
> +    void (*clean)(BlockdevAction *action, void *opaque);
> +} BdrvActionOps;

You don't really implement the transactional pattern that was used by
the original code (and is used elsewhere). It should work like this:

1. Prepare: This stage performs all operations that can fail. They are
   not made active yet. For example with snapshotting, open a new
   BlockDriver state, but don't change the backing file chain yet.

2. Commit: Activate the change. This operation can never fail. For this
   reason, you never have to undo anything done here.

3. Rollback: Basically just free everything that prepare did up to the
   error.

If you do it your way, you get into serious trouble if rollback involves
operations that can fail.

In the original code, here start the prepare:

> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
>      /* We don't do anything in this loop that commits us to the snapshot */
>      while (NULL != dev_entry) {
>          BlockdevAction *dev_info = NULL;
> -        BlockDriver *proto_drv;
> -        BlockDriver *drv;
> -        int flags;
> -        enum NewImageMode mode;
> -        const char *new_image_file;
> -        const char *device;
> -        const char *format = "qcow2";
> -
>          dev_info = dev_entry->value;
>          dev_entry = dev_entry->next;
>  
>          states = g_malloc0(sizeof(BlkTransactionStates));
>          QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
>  
> +        states->action = dev_info;
>          switch (dev_info->kind) {
>          case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
> -            device = dev_info->blockdev_snapshot_sync->device;
> -            if (!dev_info->blockdev_snapshot_sync->has_mode) {
> -                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
> -            }
> -            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
> -            if (dev_info->blockdev_snapshot_sync->has_format) {
> -                format = dev_info->blockdev_snapshot_sync->format;
> -            }
> -            mode = dev_info->blockdev_snapshot_sync->mode;
> +            states->ops = &external_snapshot_ops;
>              break;
>          default:
>              abort();
>          }
>  
> -        drv = bdrv_find_format(format);
> -        if (!drv) {
> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
> -            goto delete_and_fail;
> -        }
> -
> -        states->old_bs = bdrv_find(device);
> -        if (!states->old_bs) {
> -            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> -            goto delete_and_fail;
> -        }
> -
> -        if (!bdrv_is_inserted(states->old_bs)) {
> -            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
> -            goto delete_and_fail;
> -        }
> -
> -        if (bdrv_in_use(states->old_bs)) {
> -            error_set(errp, QERR_DEVICE_IN_USE, device);
> -            goto delete_and_fail;
> -        }
> -
> -        if (!bdrv_is_read_only(states->old_bs)) {
> -            if (bdrv_flush(states->old_bs)) {
> -                error_set(errp, QERR_IO_ERROR);
> -                goto delete_and_fail;
> -            }
> -        }
> -
> -        flags = states->old_bs->open_flags;
> -
> -        proto_drv = bdrv_find_protocol(new_image_file);
> -        if (!proto_drv) {
> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
> -            goto delete_and_fail;
> -        }
> -
> -        /* create new image w/backing file */
> -        if (mode != NEW_IMAGE_MODE_EXISTING) {
> -            bdrv_img_create(new_image_file, format,
> -                            states->old_bs->filename,
> -                            states->old_bs->drv->format_name,
> -                            NULL, -1, flags, &local_err, false);
> -            if (error_is_set(&local_err)) {
> -                error_propagate(errp, local_err);
> -                goto delete_and_fail;
> -            }
> -        }
> -
> -        /* We will manually add the backing_hd field to the bs later */
> -        states->new_bs = bdrv_new("");
> -        /* TODO Inherit bs->options or only take explicit options with an
> -         * extended QMP command? */
> -        ret = bdrv_open(states->new_bs, new_image_file, NULL,
> -                        flags | BDRV_O_NO_BACKING, drv);
> -        if (ret != 0) {
> -            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
> +        if (states->ops->commit(states->action, &states->opaque, errp)) {
>              goto delete_and_fail;
>          }
>      }

The following block is the commit:

> -
> -    /* Now we are going to do the actual pivot.  Everything up to this point
> -     * is reversible, but we are committed at this point */
> -    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
> -        /* This removes our old bs from the bdrv_states, and adds the new bs */
> -        bdrv_append(states->new_bs, states->old_bs);
> -        /* We don't need (or want) to use the transactional
> -         * bdrv_reopen_multiple() across all the entries at once, because we
> -         * don't want to abort all of them if one of them fails the reopen */
> -        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
> -                    NULL);
> -    }
> -
>      /* success */
>      goto exit;

And this is rollback:

>  delete_and_fail:
> -    /*
> -    * failure, and it is all-or-none; abandon each new bs, and keep using
> -    * the original bs for all images
> -    */
>      QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
> -        if (states->new_bs) {
> -             bdrv_delete(states->new_bs);
> -        }
> +        states->ops->rollback(states->action, states->opaque);
>      }

Kevin
Wayne Xia - April 3, 2013, 5:51 a.m.
于 2013-4-2 21:55, Kevin Wolf 写道:
> Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben:
>>    Now code for external snapshot are packaged as one case
>> in qmp_transaction, so later other operation could be added.
>>    The logic in qmp_transaction is changed a bit: Original code
>> tries to create all images first and then update all *bdrv
>> once together, new code create and update one *bdrv one time,
>> and use bdrv_deappend() to rollback on fail. This allows mixing
>> different kind of requests in qmp_transaction() later.
>>
>> Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
>> ---
>>   blockdev.c |  250 +++++++++++++++++++++++++++++++++++++-----------------------
>>   1 files changed, 153 insertions(+), 97 deletions(-)
>>
>> diff --git a/blockdev.c b/blockdev.c
>> index 8cdc9ce..75416fb 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
>>
>>
>>   /* New and old BlockDriverState structs for group snapshots */
>> -typedef struct BlkTransactionStates {
>> +typedef struct BdrvActionOps {
>> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
>> +    void (*rollback)(BlockdevAction *action, void *opaque);
>> +    void (*clean)(BlockdevAction *action, void *opaque);
>> +} BdrvActionOps;
>
> You don't really implement the transactional pattern that was used by
> the original code (and is used elsewhere). It should work like this:
>
> 1. Prepare: This stage performs all operations that can fail. They are
>     not made active yet. For example with snapshotting, open a new
>     BlockDriver state, but don't change the backing file chain yet.
>
> 2. Commit: Activate the change. This operation can never fail. For this
>     reason, you never have to undo anything done here.
>
> 3. Rollback: Basically just free everything that prepare did up to the
>     error.
>
> If you do it your way, you get into serious trouble if rollback involves
> operations that can fail.
>
> In the original code, here start the prepare:
>
   That is a clear comment, thanks. I changed the model since expecting
commit operation may need rollback, if not I am confident to keep
original model.
   Since bdrv_snapshot_delete() can fail, I guess assertion of its
success should be used in the rollback later.

>> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
>>       /* We don't do anything in this loop that commits us to the snapshot */
>>       while (NULL != dev_entry) {
>>           BlockdevAction *dev_info = NULL;
>> -        BlockDriver *proto_drv;
>> -        BlockDriver *drv;
>> -        int flags;
>> -        enum NewImageMode mode;
>> -        const char *new_image_file;
>> -        const char *device;
>> -        const char *format = "qcow2";
>> -
>>           dev_info = dev_entry->value;
>>           dev_entry = dev_entry->next;
>>
>>           states = g_malloc0(sizeof(BlkTransactionStates));
>>           QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
>>
>> +        states->action = dev_info;
>>           switch (dev_info->kind) {
>>           case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
>> -            device = dev_info->blockdev_snapshot_sync->device;
>> -            if (!dev_info->blockdev_snapshot_sync->has_mode) {
>> -                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
>> -            }
>> -            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
>> -            if (dev_info->blockdev_snapshot_sync->has_format) {
>> -                format = dev_info->blockdev_snapshot_sync->format;
>> -            }
>> -            mode = dev_info->blockdev_snapshot_sync->mode;
>> +            states->ops = &external_snapshot_ops;
>>               break;
>>           default:
>>               abort();
>>           }
>>
>> -        drv = bdrv_find_format(format);
>> -        if (!drv) {
>> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
>> -            goto delete_and_fail;
>> -        }
>> -
>> -        states->old_bs = bdrv_find(device);
>> -        if (!states->old_bs) {
>> -            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
>> -            goto delete_and_fail;
>> -        }
>> -
>> -        if (!bdrv_is_inserted(states->old_bs)) {
>> -            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
>> -            goto delete_and_fail;
>> -        }
>> -
>> -        if (bdrv_in_use(states->old_bs)) {
>> -            error_set(errp, QERR_DEVICE_IN_USE, device);
>> -            goto delete_and_fail;
>> -        }
>> -
>> -        if (!bdrv_is_read_only(states->old_bs)) {
>> -            if (bdrv_flush(states->old_bs)) {
>> -                error_set(errp, QERR_IO_ERROR);
>> -                goto delete_and_fail;
>> -            }
>> -        }
>> -
>> -        flags = states->old_bs->open_flags;
>> -
>> -        proto_drv = bdrv_find_protocol(new_image_file);
>> -        if (!proto_drv) {
>> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
>> -            goto delete_and_fail;
>> -        }
>> -
>> -        /* create new image w/backing file */
>> -        if (mode != NEW_IMAGE_MODE_EXISTING) {
>> -            bdrv_img_create(new_image_file, format,
>> -                            states->old_bs->filename,
>> -                            states->old_bs->drv->format_name,
>> -                            NULL, -1, flags, &local_err, false);
>> -            if (error_is_set(&local_err)) {
>> -                error_propagate(errp, local_err);
>> -                goto delete_and_fail;
>> -            }
>> -        }
>> -
>> -        /* We will manually add the backing_hd field to the bs later */
>> -        states->new_bs = bdrv_new("");
>> -        /* TODO Inherit bs->options or only take explicit options with an
>> -         * extended QMP command? */
>> -        ret = bdrv_open(states->new_bs, new_image_file, NULL,
>> -                        flags | BDRV_O_NO_BACKING, drv);
>> -        if (ret != 0) {
>> -            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
>> +        if (states->ops->commit(states->action, &states->opaque, errp)) {
>>               goto delete_and_fail;
>>           }
>>       }
>
> The following block is the commit:
>
>> -
>> -    /* Now we are going to do the actual pivot.  Everything up to this point
>> -     * is reversible, but we are committed at this point */
>> -    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
>> -        /* This removes our old bs from the bdrv_states, and adds the new bs */
>> -        bdrv_append(states->new_bs, states->old_bs);
>> -        /* We don't need (or want) to use the transactional
>> -         * bdrv_reopen_multiple() across all the entries at once, because we
>> -         * don't want to abort all of them if one of them fails the reopen */
>> -        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
>> -                    NULL);
>> -    }
>> -
>>       /* success */
>>       goto exit;
>
> And this is rollback:
>
>>   delete_and_fail:
>> -    /*
>> -    * failure, and it is all-or-none; abandon each new bs, and keep using
>> -    * the original bs for all images
>> -    */
>>       QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
>> -        if (states->new_bs) {
>> -             bdrv_delete(states->new_bs);
>> -        }
>> +        states->ops->rollback(states->action, states->opaque);
>>       }
>
> Kevin
>
Paolo Bonzini - April 3, 2013, 7:21 a.m.
----- Messaggio originale -----
> Da: "Wenchao Xia" <xiawenc@linux.vnet.ibm.com>
> A: "Kevin Wolf" <kwolf@redhat.com>
> Cc: pbonzini@redhat.com, qemu-devel@nongnu.org, dietmar@proxmox.com, stefanha@gmail.com
> Inviato: Mercoledì, 3 aprile 2013 7:51:43
> Oggetto: Re: [Qemu-devel] [PATCH 2/3] block: adjust qmp_transaction to be extendable
> 
> 于 2013-4-2 21:55, Kevin Wolf 写道:
> > Am 01.04.2013 um 12:01 hat Wenchao Xia geschrieben:
> >>    Now code for external snapshot are packaged as one case
> >> in qmp_transaction, so later other operation could be added.
> >>    The logic in qmp_transaction is changed a bit: Original code
> >> tries to create all images first and then update all *bdrv
> >> once together, new code create and update one *bdrv one time,
> >> and use bdrv_deappend() to rollback on fail. This allows mixing
> >> different kind of requests in qmp_transaction() later.
> >>
> >> Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
> >> ---
> >>   blockdev.c |  250
> >>   +++++++++++++++++++++++++++++++++++++-----------------------
> >>   1 files changed, 153 insertions(+), 97 deletions(-)
> >>
> >> diff --git a/blockdev.c b/blockdev.c
> >> index 8cdc9ce..75416fb 100644
> >> --- a/blockdev.c
> >> +++ b/blockdev.c
> >> @@ -779,9 +779,155 @@ void qmp_blockdev_snapshot_sync(const char *device,
> >> const char *snapshot_file,
> >>
> >>
> >>   /* New and old BlockDriverState structs for group snapshots */
> >> -typedef struct BlkTransactionStates {
> >> +typedef struct BdrvActionOps {
> >> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
> >> +    void (*rollback)(BlockdevAction *action, void *opaque);
> >> +    void (*clean)(BlockdevAction *action, void *opaque);
> >> +} BdrvActionOps;
> >
> > You don't really implement the transactional pattern that was used by
> > the original code (and is used elsewhere). It should work like this:
> >
> > 1. Prepare: This stage performs all operations that can fail. They are
> >     not made active yet. For example with snapshotting, open a new
> >     BlockDriver state, but don't change the backing file chain yet.
> >
> > 2. Commit: Activate the change. This operation can never fail. For this
> >     reason, you never have to undo anything done here.
> >
> > 3. Rollback: Basically just free everything that prepare did up to the
> >     error.
> >
> > If you do it your way, you get into serious trouble if rollback involves
> > operations that can fail.
> >
> > In the original code, here start the prepare:
>
>    That is a clear comment, thanks. I changed the model since expecting
> commit operation may need rollback, if not I am confident to keep
> original model.
>    Since bdrv_snapshot_delete() can fail, I guess assertion of its
> success should be used in the rollback later.

No, if bdrv_snapshot_delete() can fail, you need to split it in two
parts: one that can fail, and one that cannot.  If you cannot, then
there are two possibilities:

- if the failures are minor and could be repaired with "qemu-img check -r"
(e.g. lost clusters), then this is not optimal but can still be done;

- otherwise, the operation simply cannot be made transactionable.

In the case of qcow2_snapshot_delete, everything except

    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
                           &header_data, sizeof(header_data));
    if (ret < 0) {
        goto fail;
    }

must be in the prepare phase.  Everything after "fail" (which right now
is nothing, but it should at least undo the qcow2_alloc_clusters operation)
must be in the rollback phase.  Everything in the middle is the commit
phase.

Paolo
 
> 
> >> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list,
> >> Error **errp)
> >>       /* We don't do anything in this loop that commits us to the snapshot
> >>       */
> >>       while (NULL != dev_entry) {
> >>           BlockdevAction *dev_info = NULL;
> >> -        BlockDriver *proto_drv;
> >> -        BlockDriver *drv;
> >> -        int flags;
> >> -        enum NewImageMode mode;
> >> -        const char *new_image_file;
> >> -        const char *device;
> >> -        const char *format = "qcow2";
> >> -
> >>           dev_info = dev_entry->value;
> >>           dev_entry = dev_entry->next;
> >>
> >>           states = g_malloc0(sizeof(BlkTransactionStates));
> >>           QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
> >>
> >> +        states->action = dev_info;
> >>           switch (dev_info->kind) {
> >>           case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
> >> -            device = dev_info->blockdev_snapshot_sync->device;
> >> -            if (!dev_info->blockdev_snapshot_sync->has_mode) {
> >> -                dev_info->blockdev_snapshot_sync->mode =
> >> NEW_IMAGE_MODE_ABSOLUTE_PATHS;
> >> -            }
> >> -            new_image_file =
> >> dev_info->blockdev_snapshot_sync->snapshot_file;
> >> -            if (dev_info->blockdev_snapshot_sync->has_format) {
> >> -                format = dev_info->blockdev_snapshot_sync->format;
> >> -            }
> >> -            mode = dev_info->blockdev_snapshot_sync->mode;
> >> +            states->ops = &external_snapshot_ops;
> >>               break;
> >>           default:
> >>               abort();
> >>           }
> >>
> >> -        drv = bdrv_find_format(format);
> >> -        if (!drv) {
> >> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
> >> -            goto delete_and_fail;
> >> -        }
> >> -
> >> -        states->old_bs = bdrv_find(device);
> >> -        if (!states->old_bs) {
> >> -            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> >> -            goto delete_and_fail;
> >> -        }
> >> -
> >> -        if (!bdrv_is_inserted(states->old_bs)) {
> >> -            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
> >> -            goto delete_and_fail;
> >> -        }
> >> -
> >> -        if (bdrv_in_use(states->old_bs)) {
> >> -            error_set(errp, QERR_DEVICE_IN_USE, device);
> >> -            goto delete_and_fail;
> >> -        }
> >> -
> >> -        if (!bdrv_is_read_only(states->old_bs)) {
> >> -            if (bdrv_flush(states->old_bs)) {
> >> -                error_set(errp, QERR_IO_ERROR);
> >> -                goto delete_and_fail;
> >> -            }
> >> -        }
> >> -
> >> -        flags = states->old_bs->open_flags;
> >> -
> >> -        proto_drv = bdrv_find_protocol(new_image_file);
> >> -        if (!proto_drv) {
> >> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
> >> -            goto delete_and_fail;
> >> -        }
> >> -
> >> -        /* create new image w/backing file */
> >> -        if (mode != NEW_IMAGE_MODE_EXISTING) {
> >> -            bdrv_img_create(new_image_file, format,
> >> -                            states->old_bs->filename,
> >> -                            states->old_bs->drv->format_name,
> >> -                            NULL, -1, flags, &local_err, false);
> >> -            if (error_is_set(&local_err)) {
> >> -                error_propagate(errp, local_err);
> >> -                goto delete_and_fail;
> >> -            }
> >> -        }
> >> -
> >> -        /* We will manually add the backing_hd field to the bs later */
> >> -        states->new_bs = bdrv_new("");
> >> -        /* TODO Inherit bs->options or only take explicit options with an
> >> -         * extended QMP command? */
> >> -        ret = bdrv_open(states->new_bs, new_image_file, NULL,
> >> -                        flags | BDRV_O_NO_BACKING, drv);
> >> -        if (ret != 0) {
> >> -            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
> >> +        if (states->ops->commit(states->action, &states->opaque, errp)) {
> >>               goto delete_and_fail;
> >>           }
> >>       }
> >
> > The following block is the commit:
> >
> >> -
> >> -    /* Now we are going to do the actual pivot.  Everything up to this
> >> point
> >> -     * is reversible, but we are committed at this point */
> >> -    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
> >> -        /* This removes our old bs from the bdrv_states, and adds the new
> >> bs */
> >> -        bdrv_append(states->new_bs, states->old_bs);
> >> -        /* We don't need (or want) to use the transactional
> >> -         * bdrv_reopen_multiple() across all the entries at once, because
> >> we
> >> -         * don't want to abort all of them if one of them fails the
> >> reopen */
> >> -        bdrv_reopen(states->new_bs, states->new_bs->open_flags &
> >> ~BDRV_O_RDWR,
> >> -                    NULL);
> >> -    }
> >> -
> >>       /* success */
> >>       goto exit;
> >
> > And this is rollback:
> >
> >>   delete_and_fail:
> >> -    /*
> >> -    * failure, and it is all-or-none; abandon each new bs, and keep using
> >> -    * the original bs for all images
> >> -    */
> >>       QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
> >> -        if (states->new_bs) {
> >> -             bdrv_delete(states->new_bs);
> >> -        }
> >> +        states->ops->rollback(states->action, states->opaque);
> >>       }
> >
> > Kevin
> >
> 
> 
> --
> Best Regards
> 
> Wenchao Xia
> 
>
Wayne Xia - April 3, 2013, 9:02 a.m.
>
> No, if bdrv_snapshot_delete() can fail, you need to split it in two
> parts: one that can fail, and one that cannot.  If you cannot, then
> there are two possibilities:
>
> - if the failures are minor and could be repaired with "qemu-img check -r"
> (e.g. lost clusters), then this is not optimal but can still be done;
>
> - otherwise, the operation simply cannot be made transactionable.
>
> In the case of qcow2_snapshot_delete, everything except
>
>      ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
>                             &header_data, sizeof(header_data));
>      if (ret < 0) {
>          goto fail;
>      }
>
> must be in the prepare phase.  Everything after "fail" (which right now
> is nothing, but it should at least undo the qcow2_alloc_clusters operation)
> must be in the rollback phase.  Everything in the middle is the commit
> phase.
>
> Paolo
>
   Sorry I haven't state it clearly. What about bdrv_snapshot_create()
operation? If it need to be rolled back, I think bdrv_snapshot_delete()
will get called and it may fail. But in most case if
bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should
succeed also, so if fail there may be unexpected error below, could
assert be used for this?

>>
>>>> @@ -806,125 +950,37 @@ void qmp_transaction(BlockdevActionList *dev_list,
>>>> Error **errp)
>>>>        /* We don't do anything in this loop that commits us to the snapshot
>>>>        */
>>>>        while (NULL != dev_entry) {
>>>>            BlockdevAction *dev_info = NULL;
>>>> -        BlockDriver *proto_drv;
>>>> -        BlockDriver *drv;
>>>> -        int flags;
>>>> -        enum NewImageMode mode;
>>>> -        const char *new_image_file;
>>>> -        const char *device;
>>>> -        const char *format = "qcow2";
>>>> -
>>>>            dev_info = dev_entry->value;
>>>>            dev_entry = dev_entry->next;
>>>>
>>>>            states = g_malloc0(sizeof(BlkTransactionStates));
>>>>            QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
>>>>
>>>> +        states->action = dev_info;
>>>>            switch (dev_info->kind) {
>>>>            case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
>>>> -            device = dev_info->blockdev_snapshot_sync->device;
>>>> -            if (!dev_info->blockdev_snapshot_sync->has_mode) {
>>>> -                dev_info->blockdev_snapshot_sync->mode =
>>>> NEW_IMAGE_MODE_ABSOLUTE_PATHS;
>>>> -            }
>>>> -            new_image_file =
>>>> dev_info->blockdev_snapshot_sync->snapshot_file;
>>>> -            if (dev_info->blockdev_snapshot_sync->has_format) {
>>>> -                format = dev_info->blockdev_snapshot_sync->format;
>>>> -            }
>>>> -            mode = dev_info->blockdev_snapshot_sync->mode;
>>>> +            states->ops = &external_snapshot_ops;
>>>>                break;
>>>>            default:
>>>>                abort();
>>>>            }
>>>>
>>>> -        drv = bdrv_find_format(format);
>>>> -        if (!drv) {
>>>> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
>>>> -            goto delete_and_fail;
>>>> -        }
>>>> -
>>>> -        states->old_bs = bdrv_find(device);
>>>> -        if (!states->old_bs) {
>>>> -            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
>>>> -            goto delete_and_fail;
>>>> -        }
>>>> -
>>>> -        if (!bdrv_is_inserted(states->old_bs)) {
>>>> -            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
>>>> -            goto delete_and_fail;
>>>> -        }
>>>> -
>>>> -        if (bdrv_in_use(states->old_bs)) {
>>>> -            error_set(errp, QERR_DEVICE_IN_USE, device);
>>>> -            goto delete_and_fail;
>>>> -        }
>>>> -
>>>> -        if (!bdrv_is_read_only(states->old_bs)) {
>>>> -            if (bdrv_flush(states->old_bs)) {
>>>> -                error_set(errp, QERR_IO_ERROR);
>>>> -                goto delete_and_fail;
>>>> -            }
>>>> -        }
>>>> -
>>>> -        flags = states->old_bs->open_flags;
>>>> -
>>>> -        proto_drv = bdrv_find_protocol(new_image_file);
>>>> -        if (!proto_drv) {
>>>> -            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
>>>> -            goto delete_and_fail;
>>>> -        }
>>>> -
>>>> -        /* create new image w/backing file */
>>>> -        if (mode != NEW_IMAGE_MODE_EXISTING) {
>>>> -            bdrv_img_create(new_image_file, format,
>>>> -                            states->old_bs->filename,
>>>> -                            states->old_bs->drv->format_name,
>>>> -                            NULL, -1, flags, &local_err, false);
>>>> -            if (error_is_set(&local_err)) {
>>>> -                error_propagate(errp, local_err);
>>>> -                goto delete_and_fail;
>>>> -            }
>>>> -        }
>>>> -
>>>> -        /* We will manually add the backing_hd field to the bs later */
>>>> -        states->new_bs = bdrv_new("");
>>>> -        /* TODO Inherit bs->options or only take explicit options with an
>>>> -         * extended QMP command? */
>>>> -        ret = bdrv_open(states->new_bs, new_image_file, NULL,
>>>> -                        flags | BDRV_O_NO_BACKING, drv);
>>>> -        if (ret != 0) {
>>>> -            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
>>>> +        if (states->ops->commit(states->action, &states->opaque, errp)) {
>>>>                goto delete_and_fail;
>>>>            }
>>>>        }
>>>
>>> The following block is the commit:
>>>
>>>> -
>>>> -    /* Now we are going to do the actual pivot.  Everything up to this
>>>> point
>>>> -     * is reversible, but we are committed at this point */
>>>> -    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
>>>> -        /* This removes our old bs from the bdrv_states, and adds the new
>>>> bs */
>>>> -        bdrv_append(states->new_bs, states->old_bs);
>>>> -        /* We don't need (or want) to use the transactional
>>>> -         * bdrv_reopen_multiple() across all the entries at once, because
>>>> we
>>>> -         * don't want to abort all of them if one of them fails the
>>>> reopen */
>>>> -        bdrv_reopen(states->new_bs, states->new_bs->open_flags &
>>>> ~BDRV_O_RDWR,
>>>> -                    NULL);
>>>> -    }
>>>> -
>>>>        /* success */
>>>>        goto exit;
>>>
>>> And this is rollback:
>>>
>>>>    delete_and_fail:
>>>> -    /*
>>>> -    * failure, and it is all-or-none; abandon each new bs, and keep using
>>>> -    * the original bs for all images
>>>> -    */
>>>>        QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
>>>> -        if (states->new_bs) {
>>>> -             bdrv_delete(states->new_bs);
>>>> -        }
>>>> +        states->ops->rollback(states->action, states->opaque);
>>>>        }
>>>
>>> Kevin
>>>
>>
>>
>> --
>> Best Regards
>>
>> Wenchao Xia
>>
>>
>
Paolo Bonzini - April 3, 2013, 9:17 a.m.
Il 03/04/2013 11:02, Wenchao Xia ha scritto:
>>
>   Sorry I haven't state it clearly. What about bdrv_snapshot_create()
> operation? If it need to be rolled back, I think bdrv_snapshot_delete()
> will get called and it may fail. But in most case if
> bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should
> succeed also, so if fail there may be unexpected error below, could
> assert be used for this?

No.  Transactionable means that commit and rollback cannot fail.

For bdrv_snapshot_create() it is the same as for bdrv_snapshot_delete()
(the overview I wrote was for qcow2_write_snapshots, which is the common
part of bdrv_snapshot_create() and bdrv_snapshot_delete()).

Please do one thing at a time.  Split the series in multiple pieces if
possible.  Otherwise you will just fail, and you will have wasted a lot
of your and other people's time.

Paolo
Kevin Wolf - April 3, 2013, 9:22 a.m.
Am 03.04.2013 um 11:02 hat Wenchao Xia geschrieben:
> >
> >No, if bdrv_snapshot_delete() can fail, you need to split it in two
> >parts: one that can fail, and one that cannot.  If you cannot, then
> >there are two possibilities:
> >
> >- if the failures are minor and could be repaired with "qemu-img check -r"
> >(e.g. lost clusters), then this is not optimal but can still be done;
> >
> >- otherwise, the operation simply cannot be made transactionable.
> >
> >In the case of qcow2_snapshot_delete, everything except
> >
> >     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
> >                            &header_data, sizeof(header_data));
> >     if (ret < 0) {
> >         goto fail;
> >     }
> >
> >must be in the prepare phase.  Everything after "fail" (which right now
> >is nothing, but it should at least undo the qcow2_alloc_clusters operation)
> >must be in the rollback phase.  Everything in the middle is the commit
> >phase.
> >
> >Paolo
> >
>   Sorry I haven't state it clearly. What about bdrv_snapshot_create()
> operation? If it need to be rolled back, I think bdrv_snapshot_delete()
> will get called and it may fail.

This means that you're doing it wrong. Instead of deleting the snapshot
on rollback, you shouldn't complete the creation until commit.

If the one bdrv_pwrite_sync() in the commit stage fails, we cannot
maintain transactional semantics. I guess its unlikely enough.

> But in most case if
> bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should
> succeed also, so if fail there may be unexpected error below, could
> assert be used for this?

assert() is only for programming errors, not for things like hardware
failure.

Kevin
Wayne Xia - April 3, 2013, 10:33 a.m.
于 2013-4-3 17:02, Wenchao Xia 写道:
>>
>> No, if bdrv_snapshot_delete() can fail, you need to split it in two
>> parts: one that can fail, and one that cannot.  If you cannot, then
>> there are two possibilities:
>>
>> - if the failures are minor and could be repaired with "qemu-img check
>> -r"
>> (e.g. lost clusters), then this is not optimal but can still be done;
>>
>> - otherwise, the operation simply cannot be made transactionable.
>>
>> In the case of qcow2_snapshot_delete, everything except
>>
>>      ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
>>                             &header_data, sizeof(header_data));
>>      if (ret < 0) {
>>          goto fail;
>>      }
>>
>> must be in the prepare phase.  Everything after "fail" (which right now
>> is nothing, but it should at least undo the qcow2_alloc_clusters
>> operation)
>> must be in the rollback phase.  Everything in the middle is the commit
>> phase.
>>
>> Paolo
>>
>    Sorry I haven't state it clearly. What about bdrv_snapshot_create()
> operation? If it need to be rolled back, I think bdrv_snapshot_delete()
> will get called and it may fail. But in most case if
> bdrv_snapshot_create() succeed before, the bdrv_snapshot_delete should
> succeed also, so if fail there may be unexpected error below, could
> assert be used for this?
>
   After consideration again, I think bdrv_snapshot_create() should be
split apart like above, thank u for the tip.
Stefan Hajnoczi - April 17, 2013, 2:42 p.m.
On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote:
>  /* New and old BlockDriverState structs for group snapshots */
> -typedef struct BlkTransactionStates {
> +typedef struct BdrvActionOps {
> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
> +    void (*rollback)(BlockdevAction *action, void *opaque);
> +    void (*clean)(BlockdevAction *action, void *opaque);

Please document these functions.

> +const BdrvActionOps external_snapshot_ops = {
> +    .commit = external_snapshot_commit,
> +    .rollback = external_snapshot_rollback,
> +    .clean = external_snapshot_clean,
> +};
> +
> +typedef struct BlkTransactionStates {
> +    BlockdevAction *action;
> +    void *opaque;
> +    const BdrvActionOps *ops;
>      QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
>  } BlkTransactionStates;

The relationship between BlkTransactionStates and ExternalSnapshotState
can be simplified:

typedef struct {
    int (*commit)(BlkTransactionState *state, Error **errp);
    void (*rollback)(BlkTransactionState *state);
    void (*clean)(BlkTransactionState *state);
    size_t instance_size;
} BdrvActionOps;

typedef struct BlkTransactionState {
    BlockDevAction *action;
    const BdrvActionOps *ops;
    QSIMPLEQ_ENTRY(BlkTransactionState) entry;
} BlkTransactionState;

typedef struct {
    BlkTransactionState common;
    BlockDriverState *old_bs;
    BlockDriverState *new_bs;
} ExternalSnapshotState;

const BdrvActionOps external_snapshot_ops = {
    .commit = external_snapshot_commit,
    .rollback = external_snapshot_rollback,
    .clean = external_snapshot_clean,
    .instance_size = sizeof(ExternalSnapshotState);
};

This eliminates the opaque pointer and g_free(state) can be handled by
qmp_transaction().  This way action types no longer need to free opaque.
Wayne Xia - April 18, 2013, 3 a.m.
于 2013-4-17 22:42, Stefan Hajnoczi 写道:
> On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote:
>>   /* New and old BlockDriverState structs for group snapshots */
>> -typedef struct BlkTransactionStates {
>> +typedef struct BdrvActionOps {
>> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
>> +    void (*rollback)(BlockdevAction *action, void *opaque);
>> +    void (*clean)(BlockdevAction *action, void *opaque);
>
> Please document these functions.
>
   OK.

>> +const BdrvActionOps external_snapshot_ops = {
>> +    .commit = external_snapshot_commit,
>> +    .rollback = external_snapshot_rollback,
>> +    .clean = external_snapshot_clean,
>> +};
>> +
>> +typedef struct BlkTransactionStates {
>> +    BlockdevAction *action;
>> +    void *opaque;
>> +    const BdrvActionOps *ops;
>>       QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
>>   } BlkTransactionStates;
>
> The relationship between BlkTransactionStates and ExternalSnapshotState
> can be simplified:
>
> typedef struct {
>      int (*commit)(BlkTransactionState *state, Error **errp);
>      void (*rollback)(BlkTransactionState *state);
>      void (*clean)(BlkTransactionState *state);
>      size_t instance_size;
> } BdrvActionOps;
>
> typedef struct BlkTransactionState {
>      BlockDevAction *action;
>      const BdrvActionOps *ops;
>      QSIMPLEQ_ENTRY(BlkTransactionState) entry;
> } BlkTransactionState;
>
> typedef struct {
>      BlkTransactionState common;
>      BlockDriverState *old_bs;
>      BlockDriverState *new_bs;
> } ExternalSnapshotState;
>
> const BdrvActionOps external_snapshot_ops = {
>      .commit = external_snapshot_commit,
>      .rollback = external_snapshot_rollback,
>      .clean = external_snapshot_clean,
>      .instance_size = sizeof(ExternalSnapshotState);
> };
>
> This eliminates the opaque pointer and g_free(state) can be handled by
> qmp_transaction().  This way action types no longer need to free opaque.
>
   Where should be ExternalSnapshotState* placed, insert it into
BlkTransactionState, or BdrvActionOps?
Stefan Hajnoczi - April 18, 2013, 6:35 a.m.
On Thu, Apr 18, 2013 at 5:00 AM, Wenchao Xia <xiawenc@linux.vnet.ibm.com> wrote:
> 于 2013-4-17 22:42, Stefan Hajnoczi 写道:
>
>> On Mon, Apr 01, 2013 at 06:01:30PM +0800, Wenchao Xia wrote:
>>>
>>>   /* New and old BlockDriverState structs for group snapshots */
>>> -typedef struct BlkTransactionStates {
>>> +typedef struct BdrvActionOps {
>>> +    int (*commit)(BlockdevAction *action, void **p_opaque, Error
>>> **errp);
>>> +    void (*rollback)(BlockdevAction *action, void *opaque);
>>> +    void (*clean)(BlockdevAction *action, void *opaque);
>>
>>
>> Please document these functions.
>>
>   OK.
>
>
>>> +const BdrvActionOps external_snapshot_ops = {
>>> +    .commit = external_snapshot_commit,
>>> +    .rollback = external_snapshot_rollback,
>>> +    .clean = external_snapshot_clean,
>>> +};
>>> +
>>> +typedef struct BlkTransactionStates {
>>> +    BlockdevAction *action;
>>> +    void *opaque;
>>> +    const BdrvActionOps *ops;
>>>       QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
>>>   } BlkTransactionStates;
>>
>>
>> The relationship between BlkTransactionStates and ExternalSnapshotState
>> can be simplified:
>>
>> typedef struct {
>>      int (*commit)(BlkTransactionState *state, Error **errp);
>>      void (*rollback)(BlkTransactionState *state);
>>      void (*clean)(BlkTransactionState *state);
>>      size_t instance_size;
>> } BdrvActionOps;
>>
>> typedef struct BlkTransactionState {
>>      BlockDevAction *action;
>>      const BdrvActionOps *ops;
>>      QSIMPLEQ_ENTRY(BlkTransactionState) entry;
>> } BlkTransactionState;
>>
>> typedef struct {
>>      BlkTransactionState common;
>>      BlockDriverState *old_bs;
>>      BlockDriverState *new_bs;
>> } ExternalSnapshotState;
>>
>> const BdrvActionOps external_snapshot_ops = {
>>      .commit = external_snapshot_commit,
>>      .rollback = external_snapshot_rollback,
>>      .clean = external_snapshot_clean,
>>      .instance_size = sizeof(ExternalSnapshotState);
>> };
>>
>> This eliminates the opaque pointer and g_free(state) can be handled by
>> qmp_transaction().  This way action types no longer need to free opaque.
>>
>   Where should be ExternalSnapshotState* placed, insert it into
> BlkTransactionState, or BdrvActionOps?

No explicit pointer is needed since ExternalSnapshotState embeds
BlkTransactionState.  See container_of() or DO_UPCAST().

Stefan

Patch

diff --git a/blockdev.c b/blockdev.c
index 8cdc9ce..75416fb 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -779,9 +779,155 @@  void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
 
 
 /* New and old BlockDriverState structs for group snapshots */
-typedef struct BlkTransactionStates {
+typedef struct BdrvActionOps {
+    int (*commit)(BlockdevAction *action, void **p_opaque, Error **errp);
+    void (*rollback)(BlockdevAction *action, void *opaque);
+    void (*clean)(BlockdevAction *action, void *opaque);
+} BdrvActionOps;
+
+typedef struct ExternalSnapshotState {
     BlockDriverState *old_bs;
     BlockDriverState *new_bs;
+} ExternalSnapshotState;
+
+static int external_snapshot_commit(BlockdevAction *action,
+                                    void **p_opaque,
+                                    Error **errp)
+{
+    const char *device;
+    const char *new_image_file;
+    const char *format = "qcow2";
+    enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    BlockDriver *drv, *proto_drv;
+    ExternalSnapshotState *states = NULL;
+    int flags, ret;
+    Error *local_err = NULL;
+
+    g_assert(action->kind == BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
+
+    /* get parameters */
+    device = action->blockdev_snapshot_sync->device;
+    new_image_file = action->blockdev_snapshot_sync->snapshot_file;
+    if (action->blockdev_snapshot_sync->has_format) {
+        format = action->blockdev_snapshot_sync->format;
+    }
+    if (action->blockdev_snapshot_sync->has_mode) {
+        mode = action->blockdev_snapshot_sync->mode;
+    }
+
+    /* start processing */
+    states = g_malloc0(sizeof(*states));
+    drv = bdrv_find_format(format);
+    if (!drv) {
+        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+        goto fail;
+    }
+
+    states->old_bs = bdrv_find(device);
+    if (!states->old_bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        goto fail;
+    }
+
+    if (!bdrv_is_inserted(states->old_bs)) {
+        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+        goto fail;
+    }
+
+    if (bdrv_in_use(states->old_bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        goto fail;
+    }
+
+    if (!bdrv_is_read_only(states->old_bs)) {
+        if (bdrv_flush(states->old_bs)) {
+            error_set(errp, QERR_IO_ERROR);
+            goto fail;
+        }
+    }
+
+    proto_drv = bdrv_find_protocol(new_image_file);
+    if (!proto_drv) {
+        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+        goto fail;
+    }
+
+    flags = states->old_bs->open_flags;
+
+    /* create new image w/backing file */
+    if (mode != NEW_IMAGE_MODE_EXISTING) {
+        bdrv_img_create(new_image_file, format,
+                        states->old_bs->filename,
+                        states->old_bs->drv->format_name,
+                        NULL, -1, flags, &local_err, false);
+        if (error_is_set(&local_err)) {
+            error_propagate(errp, local_err);
+            goto fail;
+        }
+    }
+
+    states->new_bs = bdrv_new("");
+    /* TODO Inherit bs->options or only take explicit options with an
+     * extended QMP command? */
+    ret = bdrv_open(states->new_bs, new_image_file, NULL,
+                    flags | BDRV_O_NO_BACKING, drv);
+    if (ret != 0) {
+        error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+        goto fail;
+    }
+
+    /* This removes our old bs from the bdrv_states, and adds the new bs */
+    bdrv_append(states->new_bs, states->old_bs);
+    /* We don't need (or want) to use the transactional
+     * bdrv_reopen_multiple() across all the entries at once, because we
+     * don't want to abort all of them if one of them fails the reopen */
+    bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
+
+    *p_opaque = states;
+    return 0;
+
+fail:
+    if (states) {
+        if (states->new_bs) {
+            bdrv_delete(states->new_bs);
+        }
+        g_free(states);
+    }
+    return -1;
+}
+
+static void external_snapshot_rollback(BlockdevAction *action,
+                                       void *opaque)
+{
+    ExternalSnapshotState *states = opaque;
+    int open_flags;
+
+    if (states) {
+        open_flags = states->old_bs->open_flags;
+        bdrv_deappend(states->old_bs);
+        bdrv_reopen(states->old_bs, open_flags, NULL);
+    }
+}
+
+static void external_snapshot_clean(BlockdevAction *action,
+                                    void *opaque)
+{
+    ExternalSnapshotState *states = opaque;
+
+    g_free(states);
+}
+
+const BdrvActionOps external_snapshot_ops = {
+    .commit = external_snapshot_commit,
+    .rollback = external_snapshot_rollback,
+    .clean = external_snapshot_clean,
+};
+
+typedef struct BlkTransactionStates {
+    BlockdevAction *action;
+    void *opaque;
+    const BdrvActionOps *ops;
     QSIMPLEQ_ENTRY(BlkTransactionStates) entry;
 } BlkTransactionStates;
 
@@ -792,10 +938,8 @@  typedef struct BlkTransactionStates {
  */
 void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
 {
-    int ret = 0;
     BlockdevActionList *dev_entry = dev_list;
     BlkTransactionStates *states, *next;
-    Error *local_err = NULL;
 
     QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states;
     QSIMPLEQ_INIT(&snap_bdrv_states);
@@ -806,125 +950,37 @@  void qmp_transaction(BlockdevActionList *dev_list, Error **errp)
     /* We don't do anything in this loop that commits us to the snapshot */
     while (NULL != dev_entry) {
         BlockdevAction *dev_info = NULL;
-        BlockDriver *proto_drv;
-        BlockDriver *drv;
-        int flags;
-        enum NewImageMode mode;
-        const char *new_image_file;
-        const char *device;
-        const char *format = "qcow2";
-
         dev_info = dev_entry->value;
         dev_entry = dev_entry->next;
 
         states = g_malloc0(sizeof(BlkTransactionStates));
         QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry);
 
+        states->action = dev_info;
         switch (dev_info->kind) {
         case BLOCKDEV_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
-            device = dev_info->blockdev_snapshot_sync->device;
-            if (!dev_info->blockdev_snapshot_sync->has_mode) {
-                dev_info->blockdev_snapshot_sync->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
-            }
-            new_image_file = dev_info->blockdev_snapshot_sync->snapshot_file;
-            if (dev_info->blockdev_snapshot_sync->has_format) {
-                format = dev_info->blockdev_snapshot_sync->format;
-            }
-            mode = dev_info->blockdev_snapshot_sync->mode;
+            states->ops = &external_snapshot_ops;
             break;
         default:
             abort();
         }
 
-        drv = bdrv_find_format(format);
-        if (!drv) {
-            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-            goto delete_and_fail;
-        }
-
-        states->old_bs = bdrv_find(device);
-        if (!states->old_bs) {
-            error_set(errp, QERR_DEVICE_NOT_FOUND, device);
-            goto delete_and_fail;
-        }
-
-        if (!bdrv_is_inserted(states->old_bs)) {
-            error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
-            goto delete_and_fail;
-        }
-
-        if (bdrv_in_use(states->old_bs)) {
-            error_set(errp, QERR_DEVICE_IN_USE, device);
-            goto delete_and_fail;
-        }
-
-        if (!bdrv_is_read_only(states->old_bs)) {
-            if (bdrv_flush(states->old_bs)) {
-                error_set(errp, QERR_IO_ERROR);
-                goto delete_and_fail;
-            }
-        }
-
-        flags = states->old_bs->open_flags;
-
-        proto_drv = bdrv_find_protocol(new_image_file);
-        if (!proto_drv) {
-            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
-            goto delete_and_fail;
-        }
-
-        /* create new image w/backing file */
-        if (mode != NEW_IMAGE_MODE_EXISTING) {
-            bdrv_img_create(new_image_file, format,
-                            states->old_bs->filename,
-                            states->old_bs->drv->format_name,
-                            NULL, -1, flags, &local_err, false);
-            if (error_is_set(&local_err)) {
-                error_propagate(errp, local_err);
-                goto delete_and_fail;
-            }
-        }
-
-        /* We will manually add the backing_hd field to the bs later */
-        states->new_bs = bdrv_new("");
-        /* TODO Inherit bs->options or only take explicit options with an
-         * extended QMP command? */
-        ret = bdrv_open(states->new_bs, new_image_file, NULL,
-                        flags | BDRV_O_NO_BACKING, drv);
-        if (ret != 0) {
-            error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+        if (states->ops->commit(states->action, &states->opaque, errp)) {
             goto delete_and_fail;
         }
     }
 
-
-    /* Now we are going to do the actual pivot.  Everything up to this point
-     * is reversible, but we are committed at this point */
-    QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
-        /* This removes our old bs from the bdrv_states, and adds the new bs */
-        bdrv_append(states->new_bs, states->old_bs);
-        /* We don't need (or want) to use the transactional
-         * bdrv_reopen_multiple() across all the entries at once, because we
-         * don't want to abort all of them if one of them fails the reopen */
-        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
-                    NULL);
-    }
-
     /* success */
     goto exit;
 
 delete_and_fail:
-    /*
-    * failure, and it is all-or-none; abandon each new bs, and keep using
-    * the original bs for all images
-    */
     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
-        if (states->new_bs) {
-             bdrv_delete(states->new_bs);
-        }
+        states->ops->rollback(states->action, states->opaque);
     }
+
 exit:
     QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
+        states->ops->clean(states->action, states->opaque);
         g_free(states);
     }
 }