diff mbox

[07/10] block/backup: support block job transactions

Message ID 1435234332-581-8-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi June 25, 2015, 12:12 p.m. UTC
Join the transaction when the backup block job is in incremental backup
mode.

This ensures that the sync bitmap is not thrown away if another block
job in the transaction is cancelled or fails.  This is critical so
incremental backup with multiple disks can be retried in case of
cancellation/failure.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 block/backup.c |  7 +++++-
 blockdev.c     | 73 ++++++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 59 insertions(+), 21 deletions(-)

Comments

Fam Zheng June 26, 2015, 6:44 a.m. UTC | #1
On Thu, 06/25 13:12, Stefan Hajnoczi wrote:
> Join the transaction when the backup block job is in incremental backup
> mode.
> 
> This ensures that the sync bitmap is not thrown away if another block
> job in the transaction is cancelled or fails.  This is critical so
> incremental backup with multiple disks can be retried in case of
> cancellation/failure.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

Reviewed-by: Fam Zheng <famz@redhat.com>

> ---
>  block/backup.c |  7 +++++-
>  blockdev.c     | 73 ++++++++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 59 insertions(+), 21 deletions(-)
> 
> diff --git a/block/backup.c b/block/backup.c
> index ddf8424..fa86b0e 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -429,6 +429,8 @@ static void coroutine_fn backup_run(void *opaque)
>      qemu_co_rwlock_wrlock(&job->flush_rwlock);
>      qemu_co_rwlock_unlock(&job->flush_rwlock);
>  
> +    block_job_txn_prepare_to_complete(job->common.txn, &job->common, ret);
> +
>      if (job->sync_bitmap) {
>          BdrvDirtyBitmap *bm;
>          if (ret < 0 || block_job_is_cancelled(&job->common)) {
> @@ -457,7 +459,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                    BlockdevOnError on_source_error,
>                    BlockdevOnError on_target_error,
>                    BlockCompletionFunc *cb, void *opaque,
> -                  Error **errp)
> +                  BlockJobTxn *txn, Error **errp)
>  {
>      int64_t len;
>  
> @@ -537,6 +539,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>      job->sync_mode = sync_mode;
>      job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>                         sync_bitmap : NULL;
> +    if (job->sync_bitmap) {
> +        block_job_txn_add_job(txn, &job->common);
> +    }
>      job->common.len = len;
>      job->common.co = qemu_coroutine_create(backup_run);
>      qemu_coroutine_enter(job->common.co, job);
> diff --git a/blockdev.c b/blockdev.c
> index 453f3ec..4f27c35 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1586,6 +1586,18 @@ typedef struct DriveBackupState {
>      BlockJob *job;
>  } DriveBackupState;
>  
> +static void do_drive_backup(const char *device, const char *target,
> +                            bool has_format, const char *format,
> +                            enum MirrorSyncMode sync,
> +                            bool has_mode, enum NewImageMode mode,
> +                            bool has_speed, int64_t speed,
> +                            bool has_bitmap, const char *bitmap,
> +                            bool has_on_source_error,
> +                            BlockdevOnError on_source_error,
> +                            bool has_on_target_error,
> +                            BlockdevOnError on_target_error,
> +                            BlockJobTxn *txn, Error **errp);
> +
>  static void drive_backup_prepare(BlkActionState *common, Error **errp)
>  {
>      DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
> @@ -1609,15 +1621,16 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
>      state->aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(state->aio_context);
>  
> -    qmp_drive_backup(backup->device, backup->target,
> -                     backup->has_format, backup->format,
> -                     backup->sync,
> -                     backup->has_mode, backup->mode,
> -                     backup->has_speed, backup->speed,
> -                     backup->has_bitmap, backup->bitmap,
> -                     backup->has_on_source_error, backup->on_source_error,
> -                     backup->has_on_target_error, backup->on_target_error,
> -                     &local_err);
> +    do_drive_backup(backup->device, backup->target,
> +                    backup->has_format, backup->format,
> +                    backup->sync,
> +                    backup->has_mode, backup->mode,
> +                    backup->has_speed, backup->speed,
> +                    backup->has_bitmap, backup->bitmap,
> +                    backup->has_on_source_error, backup->on_source_error,
> +                    backup->has_on_target_error, backup->on_target_error,
> +                    common->block_job_txn,
> +                    &local_err);
>      if (local_err) {
>          error_propagate(errp, local_err);
>          return;
> @@ -2585,15 +2598,17 @@ out:
>      aio_context_release(aio_context);
>  }
>  
> -void qmp_drive_backup(const char *device, const char *target,
> -                      bool has_format, const char *format,
> -                      enum MirrorSyncMode sync,
> -                      bool has_mode, enum NewImageMode mode,
> -                      bool has_speed, int64_t speed,
> -                      bool has_bitmap, const char *bitmap,
> -                      bool has_on_source_error, BlockdevOnError on_source_error,
> -                      bool has_on_target_error, BlockdevOnError on_target_error,
> -                      Error **errp)
> +static void do_drive_backup(const char *device, const char *target,
> +                            bool has_format, const char *format,
> +                            enum MirrorSyncMode sync,
> +                            bool has_mode, enum NewImageMode mode,
> +                            bool has_speed, int64_t speed,
> +                            bool has_bitmap, const char *bitmap,
> +                            bool has_on_source_error,
> +                            BlockdevOnError on_source_error,
> +                            bool has_on_target_error,
> +                            BlockdevOnError on_target_error,
> +                            BlockJobTxn *txn, Error **errp)
>  {
>      BlockBackend *blk;
>      BlockDriverState *bs;
> @@ -2710,7 +2725,7 @@ void qmp_drive_backup(const char *device, const char *target,
>  
>      backup_start(bs, target_bs, speed, sync, bmap,
>                   on_source_error, on_target_error,
> -                 block_job_cb, bs, &local_err);
> +                 block_job_cb, bs, txn, &local_err);
>      if (local_err != NULL) {
>          bdrv_unref(target_bs);
>          error_propagate(errp, local_err);
> @@ -2721,6 +2736,24 @@ out:
>      aio_context_release(aio_context);
>  }
>  
> +void qmp_drive_backup(const char *device, const char *target,
> +                      bool has_format, const char *format,
> +                      enum MirrorSyncMode sync,
> +                      bool has_mode, enum NewImageMode mode,
> +                      bool has_speed, int64_t speed,
> +                      bool has_bitmap, const char *bitmap,
> +                      bool has_on_source_error, BlockdevOnError on_source_error,
> +                      bool has_on_target_error, BlockdevOnError on_target_error,
> +                      Error **errp)
> +{
> +    return do_drive_backup(device, target, has_format, format, sync,
> +                           has_mode, mode, has_speed, speed,
> +                           has_bitmap, bitmap,
> +                           has_on_source_error, on_source_error,
> +                           has_on_target_error, on_target_error,
> +                           NULL, errp);
> +}
> +
>  BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
>  {
>      return bdrv_named_nodes_list(errp);
> @@ -2771,7 +2804,7 @@ void qmp_blockdev_backup(const char *device, const char *target,
>      bdrv_ref(target_bs);
>      bdrv_set_aio_context(target_bs, aio_context);
>      backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
> -                 on_target_error, block_job_cb, bs, &local_err);
> +                 on_target_error, block_job_cb, bs, NULL, &local_err);
>      if (local_err != NULL) {
>          bdrv_unref(target_bs);
>          error_propagate(errp, local_err);
> -- 
> 2.4.3
> 
>
John Snow June 29, 2015, 10:39 p.m. UTC | #2
On 06/25/2015 08:12 AM, Stefan Hajnoczi wrote:
> Join the transaction when the backup block job is in incremental backup
> mode.
> 
> This ensures that the sync bitmap is not thrown away if another block
> job in the transaction is cancelled or fails.  This is critical so
> incremental backup with multiple disks can be retried in case of
> cancellation/failure.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  block/backup.c |  7 +++++-
>  blockdev.c     | 73 ++++++++++++++++++++++++++++++++++++++++++----------------
>  2 files changed, 59 insertions(+), 21 deletions(-)
> 
> diff --git a/block/backup.c b/block/backup.c
> index ddf8424..fa86b0e 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -429,6 +429,8 @@ static void coroutine_fn backup_run(void *opaque)
>      qemu_co_rwlock_wrlock(&job->flush_rwlock);
>      qemu_co_rwlock_unlock(&job->flush_rwlock);
>  
> +    block_job_txn_prepare_to_complete(job->common.txn, &job->common, ret);
> +
>      if (job->sync_bitmap) {
>          BdrvDirtyBitmap *bm;
>          if (ret < 0 || block_job_is_cancelled(&job->common)) {
> @@ -457,7 +459,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                    BlockdevOnError on_source_error,
>                    BlockdevOnError on_target_error,
>                    BlockCompletionFunc *cb, void *opaque,
> -                  Error **errp)
> +                  BlockJobTxn *txn, Error **errp)
>  {
>      int64_t len;
>  
> @@ -537,6 +539,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>      job->sync_mode = sync_mode;
>      job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>                         sync_bitmap : NULL;
> +    if (job->sync_bitmap) {
> +        block_job_txn_add_job(txn, &job->common);
> +    }

Hmm, is this what we want? This will add backup jobs to a transaction
only if they have a bitmap attached to the job.

However, if we're doing a mixture of full and incremental backups, we
may still want to roll back the incremental backups if the full backups
failed as part of the transaction.

The (admittedly more complicated) design I submitted will always add a
job to the transactional group, whether it has a bitmap or not. The
membership test was only if it was launched by the backup transaction
action. The bitmap is only checked for purposes of refcounting and
cleanup mechanics.

Maybe that wasn't what we wanted either, but this is a difference in how
our series will behave.

>      job->common.len = len;
>      job->common.co = qemu_coroutine_create(backup_run);
>      qemu_coroutine_enter(job->common.co, job);
> diff --git a/blockdev.c b/blockdev.c
> index 453f3ec..4f27c35 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1586,6 +1586,18 @@ typedef struct DriveBackupState {
>      BlockJob *job;
>  } DriveBackupState;
>  
> +static void do_drive_backup(const char *device, const char *target,
> +                            bool has_format, const char *format,
> +                            enum MirrorSyncMode sync,
> +                            bool has_mode, enum NewImageMode mode,
> +                            bool has_speed, int64_t speed,
> +                            bool has_bitmap, const char *bitmap,
> +                            bool has_on_source_error,
> +                            BlockdevOnError on_source_error,
> +                            bool has_on_target_error,
> +                            BlockdevOnError on_target_error,
> +                            BlockJobTxn *txn, Error **errp);
> +
>  static void drive_backup_prepare(BlkActionState *common, Error **errp)
>  {
>      DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
> @@ -1609,15 +1621,16 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
>      state->aio_context = bdrv_get_aio_context(bs);
>      aio_context_acquire(state->aio_context);
>  
> -    qmp_drive_backup(backup->device, backup->target,
> -                     backup->has_format, backup->format,
> -                     backup->sync,
> -                     backup->has_mode, backup->mode,
> -                     backup->has_speed, backup->speed,
> -                     backup->has_bitmap, backup->bitmap,
> -                     backup->has_on_source_error, backup->on_source_error,
> -                     backup->has_on_target_error, backup->on_target_error,
> -                     &local_err);
> +    do_drive_backup(backup->device, backup->target,
> +                    backup->has_format, backup->format,
> +                    backup->sync,
> +                    backup->has_mode, backup->mode,
> +                    backup->has_speed, backup->speed,
> +                    backup->has_bitmap, backup->bitmap,
> +                    backup->has_on_source_error, backup->on_source_error,
> +                    backup->has_on_target_error, backup->on_target_error,
> +                    common->block_job_txn,
> +                    &local_err);
>      if (local_err) {
>          error_propagate(errp, local_err);
>          return;
> @@ -2585,15 +2598,17 @@ out:
>      aio_context_release(aio_context);
>  }
>  
> -void qmp_drive_backup(const char *device, const char *target,
> -                      bool has_format, const char *format,
> -                      enum MirrorSyncMode sync,
> -                      bool has_mode, enum NewImageMode mode,
> -                      bool has_speed, int64_t speed,
> -                      bool has_bitmap, const char *bitmap,
> -                      bool has_on_source_error, BlockdevOnError on_source_error,
> -                      bool has_on_target_error, BlockdevOnError on_target_error,
> -                      Error **errp)
> +static void do_drive_backup(const char *device, const char *target,
> +                            bool has_format, const char *format,
> +                            enum MirrorSyncMode sync,
> +                            bool has_mode, enum NewImageMode mode,
> +                            bool has_speed, int64_t speed,
> +                            bool has_bitmap, const char *bitmap,
> +                            bool has_on_source_error,
> +                            BlockdevOnError on_source_error,
> +                            bool has_on_target_error,
> +                            BlockdevOnError on_target_error,
> +                            BlockJobTxn *txn, Error **errp)
>  {
>      BlockBackend *blk;
>      BlockDriverState *bs;
> @@ -2710,7 +2725,7 @@ void qmp_drive_backup(const char *device, const char *target,
>  
>      backup_start(bs, target_bs, speed, sync, bmap,
>                   on_source_error, on_target_error,
> -                 block_job_cb, bs, &local_err);
> +                 block_job_cb, bs, txn, &local_err);
>      if (local_err != NULL) {
>          bdrv_unref(target_bs);
>          error_propagate(errp, local_err);
> @@ -2721,6 +2736,24 @@ out:
>      aio_context_release(aio_context);
>  }
>  
> +void qmp_drive_backup(const char *device, const char *target,
> +                      bool has_format, const char *format,
> +                      enum MirrorSyncMode sync,
> +                      bool has_mode, enum NewImageMode mode,
> +                      bool has_speed, int64_t speed,
> +                      bool has_bitmap, const char *bitmap,
> +                      bool has_on_source_error, BlockdevOnError on_source_error,
> +                      bool has_on_target_error, BlockdevOnError on_target_error,
> +                      Error **errp)
> +{
> +    return do_drive_backup(device, target, has_format, format, sync,
> +                           has_mode, mode, has_speed, speed,
> +                           has_bitmap, bitmap,
> +                           has_on_source_error, on_source_error,
> +                           has_on_target_error, on_target_error,
> +                           NULL, errp);
> +}
> +
>  BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
>  {
>      return bdrv_named_nodes_list(errp);
> @@ -2771,7 +2804,7 @@ void qmp_blockdev_backup(const char *device, const char *target,
>      bdrv_ref(target_bs);
>      bdrv_set_aio_context(target_bs, aio_context);
>      backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
> -                 on_target_error, block_job_cb, bs, &local_err);
> +                 on_target_error, block_job_cb, bs, NULL, &local_err);
>      if (local_err != NULL) {
>          bdrv_unref(target_bs);
>          error_propagate(errp, local_err);
> 

Looks solid otherwise.

--js
Stefan Hajnoczi June 30, 2015, 3:27 p.m. UTC | #3
On Mon, Jun 29, 2015 at 06:39:08PM -0400, John Snow wrote:
> 
> 
> On 06/25/2015 08:12 AM, Stefan Hajnoczi wrote:
> > Join the transaction when the backup block job is in incremental backup
> > mode.
> > 
> > This ensures that the sync bitmap is not thrown away if another block
> > job in the transaction is cancelled or fails.  This is critical so
> > incremental backup with multiple disks can be retried in case of
> > cancellation/failure.
> > 
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> >  block/backup.c |  7 +++++-
> >  blockdev.c     | 73 ++++++++++++++++++++++++++++++++++++++++++----------------
> >  2 files changed, 59 insertions(+), 21 deletions(-)
> > 
> > diff --git a/block/backup.c b/block/backup.c
> > index ddf8424..fa86b0e 100644
> > --- a/block/backup.c
> > +++ b/block/backup.c
> > @@ -429,6 +429,8 @@ static void coroutine_fn backup_run(void *opaque)
> >      qemu_co_rwlock_wrlock(&job->flush_rwlock);
> >      qemu_co_rwlock_unlock(&job->flush_rwlock);
> >  
> > +    block_job_txn_prepare_to_complete(job->common.txn, &job->common, ret);
> > +
> >      if (job->sync_bitmap) {
> >          BdrvDirtyBitmap *bm;
> >          if (ret < 0 || block_job_is_cancelled(&job->common)) {
> > @@ -457,7 +459,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
> >                    BlockdevOnError on_source_error,
> >                    BlockdevOnError on_target_error,
> >                    BlockCompletionFunc *cb, void *opaque,
> > -                  Error **errp)
> > +                  BlockJobTxn *txn, Error **errp)
> >  {
> >      int64_t len;
> >  
> > @@ -537,6 +539,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
> >      job->sync_mode = sync_mode;
> >      job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
> >                         sync_bitmap : NULL;
> > +    if (job->sync_bitmap) {
> > +        block_job_txn_add_job(txn, &job->common);
> > +    }
> 
> Hmm, is this what we want? This will add backup jobs to a transaction
> only if they have a bitmap attached to the job.
> 
> However, if we're doing a mixture of full and incremental backups, we
> may still want to roll back the incremental backups if the full backups
> failed as part of the transaction.
> 
> The (admittedly more complicated) design I submitted will always add a
> job to the transactional group, whether it has a bitmap or not. The
> membership test was only if it was launched by the backup transaction
> action. The bitmap is only checked for purposes of refcounting and
> cleanup mechanics.
> 
> Maybe that wasn't what we wanted either, but this is a difference in how
> our series will behave.

The 'backup' operation was added to the QMP 'transaction' command in
QEMU 1.6.  If we add non-incremental backup commands to the transaction
then behavior changes.

Perhaps DriveBackup and BlockdevBackup QAPI structures should take an
optional 'transaction' bool argument.  That way the caller decides which
behavior to use.
John Snow June 30, 2015, 3:52 p.m. UTC | #4
On 06/30/2015 11:27 AM, Stefan Hajnoczi wrote:
> On Mon, Jun 29, 2015 at 06:39:08PM -0400, John Snow wrote:
>>
>>
>> On 06/25/2015 08:12 AM, Stefan Hajnoczi wrote:
>>> Join the transaction when the backup block job is in incremental backup
>>> mode.
>>>
>>> This ensures that the sync bitmap is not thrown away if another block
>>> job in the transaction is cancelled or fails.  This is critical so
>>> incremental backup with multiple disks can be retried in case of
>>> cancellation/failure.
>>>
>>> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
>>> ---
>>>  block/backup.c |  7 +++++-
>>>  blockdev.c     | 73 ++++++++++++++++++++++++++++++++++++++++++----------------
>>>  2 files changed, 59 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/block/backup.c b/block/backup.c
>>> index ddf8424..fa86b0e 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -429,6 +429,8 @@ static void coroutine_fn backup_run(void *opaque)
>>>      qemu_co_rwlock_wrlock(&job->flush_rwlock);
>>>      qemu_co_rwlock_unlock(&job->flush_rwlock);
>>>  
>>> +    block_job_txn_prepare_to_complete(job->common.txn, &job->common, ret);
>>> +
>>>      if (job->sync_bitmap) {
>>>          BdrvDirtyBitmap *bm;
>>>          if (ret < 0 || block_job_is_cancelled(&job->common)) {
>>> @@ -457,7 +459,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>>                    BlockdevOnError on_source_error,
>>>                    BlockdevOnError on_target_error,
>>>                    BlockCompletionFunc *cb, void *opaque,
>>> -                  Error **errp)
>>> +                  BlockJobTxn *txn, Error **errp)
>>>  {
>>>      int64_t len;
>>>  
>>> @@ -537,6 +539,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>>      job->sync_mode = sync_mode;
>>>      job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>>>                         sync_bitmap : NULL;
>>> +    if (job->sync_bitmap) {
>>> +        block_job_txn_add_job(txn, &job->common);
>>> +    }
>>
>> Hmm, is this what we want? This will add backup jobs to a transaction
>> only if they have a bitmap attached to the job.
>>
>> However, if we're doing a mixture of full and incremental backups, we
>> may still want to roll back the incremental backups if the full backups
>> failed as part of the transaction.
>>
>> The (admittedly more complicated) design I submitted will always add a
>> job to the transactional group, whether it has a bitmap or not. The
>> membership test was only if it was launched by the backup transaction
>> action. The bitmap is only checked for purposes of refcounting and
>> cleanup mechanics.
>>
>> Maybe that wasn't what we wanted either, but this is a difference in how
>> our series will behave.
> 
> The 'backup' operation was added to the QMP 'transaction' command in
> QEMU 1.6.  If we add non-incremental backup commands to the transaction
> then behavior changes.
> 

Ugh, good point...

> Perhaps DriveBackup and BlockdevBackup QAPI structures should take an
> optional 'transaction' bool argument.  That way the caller decides which
> behavior to use.
> 

The way my version operated only changed the cleanup behavior -- it
didn't attempt to cancel other jobs if they failed or not. It naively
let them finish, then performed cleanup based on the overall completion
status.

That let the old behavior continue working like it did, but changed how
incrementals worked upon completion.

(1) Perhaps we can change the forced cancellation aspect and just allow
jobs to finish naturally even in the event of failure. It's wasteful,
but it does allow us to maintain the existing behavior while getting the
behavior we want for incremental transactions.

(2) Or, yes, add some sort of "all or nothing" flag to transactions(?*)
that users can toggle on/off. I had wondered in the past if it wouldn't
be advantageous for libvirt to be able to choose this behavior, if
managing state of partial completions was desirable in some cases to
avoid re-running operations unnecessarily.

*As a thought, perhaps cancel-all-on-error as a flag can be a property
of the QMP transaction command itself. When set, actions that launch
jobs can add the job to the TXN. An error can be raised if the flag is
used in conjunction with an action that doesn't currently/won't ever
support the do-or-die flag.
Stefan Hajnoczi July 1, 2015, 8:45 a.m. UTC | #5
On Tue, Jun 30, 2015 at 11:52:54AM -0400, John Snow wrote:
> On 06/30/2015 11:27 AM, Stefan Hajnoczi wrote:
> > On Mon, Jun 29, 2015 at 06:39:08PM -0400, John Snow wrote:
> >> On 06/25/2015 08:12 AM, Stefan Hajnoczi wrote:
> >>> @@ -537,6 +539,9 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
> >>>      job->sync_mode = sync_mode;
> >>>      job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
> >>>                         sync_bitmap : NULL;
> >>> +    if (job->sync_bitmap) {
> >>> +        block_job_txn_add_job(txn, &job->common);
> >>> +    }
> >>
> >> Hmm, is this what we want? This will add backup jobs to a transaction
> >> only if they have a bitmap attached to the job.
> >>
> >> However, if we're doing a mixture of full and incremental backups, we
> >> may still want to roll back the incremental backups if the full backups
> >> failed as part of the transaction.
> >>
> >> The (admittedly more complicated) design I submitted will always add a
> >> job to the transactional group, whether it has a bitmap or not. The
> >> membership test was only if it was launched by the backup transaction
> >> action. The bitmap is only checked for purposes of refcounting and
> >> cleanup mechanics.
> >>
> >> Maybe that wasn't what we wanted either, but this is a difference in how
> >> our series will behave.
> > 
> > The 'backup' operation was added to the QMP 'transaction' command in
> > QEMU 1.6.  If we add non-incremental backup commands to the transaction
> > then behavior changes.
> > 
> 
> Ugh, good point...
> 
> > Perhaps DriveBackup and BlockdevBackup QAPI structures should take an
> > optional 'transaction' bool argument.  That way the caller decides which
> > behavior to use.
> > 
> 
> The way my version operated only changed the cleanup behavior -- it
> didn't attempt to cancel other jobs if they failed or not. It naively
> let them finish, then performed cleanup based on the overall completion
> status.
> 
> That let the old behavior continue working like it did, but changed how
> incrementals worked upon completion.
> 
> (1) Perhaps we can change the forced cancellation aspect and just allow
> jobs to finish naturally even in the event of failure. It's wasteful,
> but it does allow us to maintain the existing behavior while getting the
> behavior we want for incremental transactions.
> 
> (2) Or, yes, add some sort of "all or nothing" flag to transactions(?*)
> that users can toggle on/off. I had wondered in the past if it wouldn't
> be advantageous for libvirt to be able to choose this behavior, if
> managing state of partial completions was desirable in some cases to
> avoid re-running operations unnecessarily.
> 
> *As a thought, perhaps cancel-all-on-error as a flag can be a property
> of the QMP transaction command itself. When set, actions that launch
> jobs can add the job to the TXN. An error can be raised if the flag is
> used in conjunction with an action that doesn't currently/won't ever
> support the do-or-die flag.

Good, this is easy to add.
diff mbox

Patch

diff --git a/block/backup.c b/block/backup.c
index ddf8424..fa86b0e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -429,6 +429,8 @@  static void coroutine_fn backup_run(void *opaque)
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
+    block_job_txn_prepare_to_complete(job->common.txn, &job->common, ret);
+
     if (job->sync_bitmap) {
         BdrvDirtyBitmap *bm;
         if (ret < 0 || block_job_is_cancelled(&job->common)) {
@@ -457,7 +459,7 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
-                  Error **errp)
+                  BlockJobTxn *txn, Error **errp)
 {
     int64_t len;
 
@@ -537,6 +539,9 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->sync_mode = sync_mode;
     job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
                        sync_bitmap : NULL;
+    if (job->sync_bitmap) {
+        block_job_txn_add_job(txn, &job->common);
+    }
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
     qemu_coroutine_enter(job->common.co, job);
diff --git a/blockdev.c b/blockdev.c
index 453f3ec..4f27c35 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1586,6 +1586,18 @@  typedef struct DriveBackupState {
     BlockJob *job;
 } DriveBackupState;
 
+static void do_drive_backup(const char *device, const char *target,
+                            bool has_format, const char *format,
+                            enum MirrorSyncMode sync,
+                            bool has_mode, enum NewImageMode mode,
+                            bool has_speed, int64_t speed,
+                            bool has_bitmap, const char *bitmap,
+                            bool has_on_source_error,
+                            BlockdevOnError on_source_error,
+                            bool has_on_target_error,
+                            BlockdevOnError on_target_error,
+                            BlockJobTxn *txn, Error **errp);
+
 static void drive_backup_prepare(BlkActionState *common, Error **errp)
 {
     DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
@@ -1609,15 +1621,16 @@  static void drive_backup_prepare(BlkActionState *common, Error **errp)
     state->aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(state->aio_context);
 
-    qmp_drive_backup(backup->device, backup->target,
-                     backup->has_format, backup->format,
-                     backup->sync,
-                     backup->has_mode, backup->mode,
-                     backup->has_speed, backup->speed,
-                     backup->has_bitmap, backup->bitmap,
-                     backup->has_on_source_error, backup->on_source_error,
-                     backup->has_on_target_error, backup->on_target_error,
-                     &local_err);
+    do_drive_backup(backup->device, backup->target,
+                    backup->has_format, backup->format,
+                    backup->sync,
+                    backup->has_mode, backup->mode,
+                    backup->has_speed, backup->speed,
+                    backup->has_bitmap, backup->bitmap,
+                    backup->has_on_source_error, backup->on_source_error,
+                    backup->has_on_target_error, backup->on_target_error,
+                    common->block_job_txn,
+                    &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
         return;
@@ -2585,15 +2598,17 @@  out:
     aio_context_release(aio_context);
 }
 
-void qmp_drive_backup(const char *device, const char *target,
-                      bool has_format, const char *format,
-                      enum MirrorSyncMode sync,
-                      bool has_mode, enum NewImageMode mode,
-                      bool has_speed, int64_t speed,
-                      bool has_bitmap, const char *bitmap,
-                      bool has_on_source_error, BlockdevOnError on_source_error,
-                      bool has_on_target_error, BlockdevOnError on_target_error,
-                      Error **errp)
+static void do_drive_backup(const char *device, const char *target,
+                            bool has_format, const char *format,
+                            enum MirrorSyncMode sync,
+                            bool has_mode, enum NewImageMode mode,
+                            bool has_speed, int64_t speed,
+                            bool has_bitmap, const char *bitmap,
+                            bool has_on_source_error,
+                            BlockdevOnError on_source_error,
+                            bool has_on_target_error,
+                            BlockdevOnError on_target_error,
+                            BlockJobTxn *txn, Error **errp)
 {
     BlockBackend *blk;
     BlockDriverState *bs;
@@ -2710,7 +2725,7 @@  void qmp_drive_backup(const char *device, const char *target,
 
     backup_start(bs, target_bs, speed, sync, bmap,
                  on_source_error, on_target_error,
-                 block_job_cb, bs, &local_err);
+                 block_job_cb, bs, txn, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
         error_propagate(errp, local_err);
@@ -2721,6 +2736,24 @@  out:
     aio_context_release(aio_context);
 }
 
+void qmp_drive_backup(const char *device, const char *target,
+                      bool has_format, const char *format,
+                      enum MirrorSyncMode sync,
+                      bool has_mode, enum NewImageMode mode,
+                      bool has_speed, int64_t speed,
+                      bool has_bitmap, const char *bitmap,
+                      bool has_on_source_error, BlockdevOnError on_source_error,
+                      bool has_on_target_error, BlockdevOnError on_target_error,
+                      Error **errp)
+{
+    return do_drive_backup(device, target, has_format, format, sync,
+                           has_mode, mode, has_speed, speed,
+                           has_bitmap, bitmap,
+                           has_on_source_error, on_source_error,
+                           has_on_target_error, on_target_error,
+                           NULL, errp);
+}
+
 BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
 {
     return bdrv_named_nodes_list(errp);
@@ -2771,7 +2804,7 @@  void qmp_blockdev_backup(const char *device, const char *target,
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
     backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
-                 on_target_error, block_job_cb, bs, &local_err);
+                 on_target_error, block_job_cb, bs, NULL, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
         error_propagate(errp, local_err);