diff mbox

[07/11] block: drive_backup transaction callback support

Message ID 1425528911-10300-8-git-send-email-jsnow@redhat.com
State New
Headers show

Commit Message

John Snow March 5, 2015, 4:15 a.m. UTC
This patch actually implements the transactional callback system
for the drive_backup transaction.

(1) We manually pick up a reference to the bitmap if present to allow
    its cleanup to be delayed until after all drive_backup jobs launched
    by the transaction have fully completed.

(2) We create a functional closure that envelops the original drive_backup
    callback, to be able to intercept the completion status and return code
    for the job.

(3) We add the drive_backup_cb method for the drive_backup action, which
    unpacks the completion information and invokes the final cleanup.

(4) backup_transaction_complete will perform the final cleanup on the
    backup job.

(5) In the case of transaction cancellation, drive_backup_cb is still
    responsible for cleaning up the mess we may have already made.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 block/backup.c            |  9 +++++++
 blockdev.c                | 64 ++++++++++++++++++++++++++++++++++++++---------
 include/block/block_int.h |  8 ++++++
 3 files changed, 69 insertions(+), 12 deletions(-)

Comments

Max Reitz March 17, 2015, 7:49 p.m. UTC | #1
On 2015-03-04 at 23:15, John Snow wrote:
> This patch actually implements the transactional callback system
> for the drive_backup transaction.
>
> (1) We manually pick up a reference to the bitmap if present to allow
>      its cleanup to be delayed until after all drive_backup jobs launched
>      by the transaction have fully completed.
>
> (2) We create a functional closure that envelops the original drive_backup
>      callback, to be able to intercept the completion status and return code
>      for the job.
>
> (3) We add the drive_backup_cb method for the drive_backup action, which
>      unpacks the completion information and invokes the final cleanup.
>
> (4) backup_transaction_complete will perform the final cleanup on the
>      backup job.
>
> (5) In the case of transaction cancellation, drive_backup_cb is still
>      responsible for cleaning up the mess we may have already made.
>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>   block/backup.c            |  9 +++++++
>   blockdev.c                | 64 ++++++++++++++++++++++++++++++++++++++---------
>   include/block/block_int.h |  8 ++++++
>   3 files changed, 69 insertions(+), 12 deletions(-)
>
> diff --git a/block/backup.c b/block/backup.c
> index 4332df4..3673fb0 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -233,6 +233,15 @@ typedef struct {
>       int ret;
>   } BackupCompleteData;
>   
> +void backup_transaction_complete(BlockJob *job, int ret)
> +{
> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
> +
> +    if (s->sync_bitmap) {
> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
> +    }
> +}
> +
>   static void backup_complete(BlockJob *job, void *opaque)
>   {
>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
> diff --git a/blockdev.c b/blockdev.c
> index 9e3b9e9..3ff14a7 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque, int ret)
>   
>   typedef void (CallbackFn)(void *opaque, int ret);
>   
> -/* Temporary. Removed in the next patch. */
> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
> -                                    void *opaque,
> -                                    void (*callback)(void *, int),
> -                                    void **new_opaque);
> -
> -void undo_transaction_wrapper(BlkTransactionState *common);
> -
>   /**
>    * Create a new transactional callback wrapper.
>    *
> @@ -1389,7 +1381,7 @@ void undo_transaction_wrapper(BlkTransactionState *common);
>    *
>    * @return The callback to be used instead of @callback.
>    */
> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
> +static CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>                                              void *opaque,
>                                              CallbackFn *callback,
>                                              void **new_opaque)
> @@ -1416,7 +1408,7 @@ CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>   /**
>    * Undo any actions performed by the above call.
>    */
> -void undo_transaction_wrapper(BlkTransactionState *common)
> +static void undo_transaction_wrapper(BlkTransactionState *common)
>   {
>       BlkTransactionList *btl = common->list;
>       BlkTransactionState *bts;
> @@ -1449,6 +1441,7 @@ void undo_transaction_wrapper(BlkTransactionState *common)
>       blk_put_transaction_state(common);
>   }
>   
> +static void block_job_cb(void *opaque, int ret);
>   static void _drive_backup(const char *device, const char *target,
>                             bool has_format, const char *format,
>                             enum MirrorSyncMode sync,
> @@ -1767,6 +1760,9 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>       BlockDriverState *bs;
>       DriveBackup *backup;
>       Error *local_err = NULL;
> +    CallbackFn *cb;
> +    void *opaque;
> +    BdrvDirtyBitmap *bmap = NULL;
>   
>       assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>       backup = common->action->drive_backup;
> @@ -1777,6 +1773,19 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>           return;
>       }
>   
> +    /* BackupBlockJob is opaque to us, so look up the bitmap ourselves */
> +    if (backup->has_bitmap) {
> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
> +        if (!bmap) {
> +            error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
> +            return;
> +        }
> +    }
> +
> +    /* Create our transactional callback wrapper,
> +       and register that we'd like to call .cb() later. */
> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
> +
>       /* AioContext is released in .clean() */
>       state->aio_context = bdrv_get_aio_context(bs);
>       aio_context_acquire(state->aio_context);
> @@ -1789,7 +1798,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>                     backup->has_bitmap, backup->bitmap,
>                     backup->has_on_source_error, backup->on_source_error,
>                     backup->has_on_target_error, backup->on_target_error,
> -                  NULL, NULL,
> +                  cb, opaque,
>                     &local_err);
>       if (local_err) {
>           error_propagate(errp, local_err);
> @@ -1798,6 +1807,11 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>   
>       state->bs = bs;
>       state->job = state->bs->job;
> +    /* Keep the job alive until .cb(), too. */
> +    block_job_incref(state->job);
> +    if (bmap) {
> +        bdrv_dirty_bitmap_incref(bmap);
> +    }
>   }
>   
>   static void drive_backup_abort(BlkTransactionState *common)
> @@ -1809,6 +1823,10 @@ static void drive_backup_abort(BlkTransactionState *common)
>       if (bs && bs->job && bs->job == state->job) {
>           block_job_cancel_sync(bs->job);
>       }
> +
> +    /* Undo any callback actions we may have done. Putting down references
> +     * obtained during prepare() is handled by cb(). */
> +    undo_transaction_wrapper(common);
>   }
>   
>   static void drive_backup_clean(BlkTransactionState *common)
> @@ -1820,6 +1838,27 @@ static void drive_backup_clean(BlkTransactionState *common)
>       }
>   }
>   
> +static void drive_backup_cb(BlkTransactionState *common)
> +{
> +    BlkTransactionData *btd = common->opaque;
> +    BlockDriverState *bs = btd->opaque;
> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
> +
> +    assert(state->bs == bs);
> +    if (bs->job) {
> +        assert(state->job == bs->job);
> +    }
> +
> +    state->aio_context = bdrv_get_aio_context(bs);
> +    aio_context_acquire(state->aio_context);
> +
> +    /* Note: We also have the individual job's return code in btd->ret */
> +    backup_transaction_complete(state->job, common->list->status);
> +    block_job_decref(state->job);
> +
> +    aio_context_release(state->aio_context);
> +}
> +
>   typedef struct BlockdevBackupState {
>       BlkTransactionState common;
>       BlockDriverState *bs;
> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>           .instance_size = sizeof(DriveBackupState),
>           .prepare = drive_backup_prepare,
>           .abort = drive_backup_abort,
> -        .clean = drive_backup_clean
> +        .clean = drive_backup_clean,
> +        .cb = drive_backup_cb
>       },
>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>           .instance_size = sizeof(BlockdevBackupState),
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index e0d5561..731684d 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                     BlockCompletionFunc *cb, void *opaque,
>                     Error **errp);
>   
> +/*
> + * backup_transaction_complete
> + * @job The BackupJob that was associated with a transaction

s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no 
structure named "BackupJob", but this looks like there might be one)

> + * @ret Amalgamated return code for the entire transaction

Hm. The call to this function you're introducing in this patch will 
probably stay the only one so there won't be anyone who'll have to worry 
about what this means, but if there was, they probably won't reach a 
conclusive result.

I know what it means because I've seen patch 3 (right now it means 
"everything OR-ed together so it's 0 for success or some non-zero (maybe 
positive, maybe negative, depending on whether you have an even or an 
odd number of errors, and depending on whether the jobs return negative 
values for errors or not) for error"), but I wouldn't be able to infer 
it from this. At the least you should add that 0 means success and 
everything else means error (if you take my suggestion for patch 3, it 
would be 0 for success and -errno for error, where that error number is 
one of the errors encountered).

Other than that, looks good (as far as I can tell with my still limited 
insights into patch 3).

Max

> + */
> +void backup_transaction_complete(BlockJob *job, int ret);
> +
> +
>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>   bool blk_dev_has_removable_media(BlockBackend *blk);
>   void blk_dev_eject_request(BlockBackend *blk, bool force);
John Snow March 17, 2015, 11:27 p.m. UTC | #2
On 03/17/2015 03:49 PM, Max Reitz wrote:
> On 2015-03-04 at 23:15, John Snow wrote:
>> This patch actually implements the transactional callback system
>> for the drive_backup transaction.
>>
>> (1) We manually pick up a reference to the bitmap if present to allow
>>      its cleanup to be delayed until after all drive_backup jobs launched
>>      by the transaction have fully completed.
>>
>> (2) We create a functional closure that envelops the original
>> drive_backup
>>      callback, to be able to intercept the completion status and
>> return code
>>      for the job.
>>
>> (3) We add the drive_backup_cb method for the drive_backup action, which
>>      unpacks the completion information and invokes the final cleanup.
>>
>> (4) backup_transaction_complete will perform the final cleanup on the
>>      backup job.
>>
>> (5) In the case of transaction cancellation, drive_backup_cb is still
>>      responsible for cleaning up the mess we may have already made.
>>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   block/backup.c            |  9 +++++++
>>   blockdev.c                | 64
>> ++++++++++++++++++++++++++++++++++++++---------
>>   include/block/block_int.h |  8 ++++++
>>   3 files changed, 69 insertions(+), 12 deletions(-)
>>
>> diff --git a/block/backup.c b/block/backup.c
>> index 4332df4..3673fb0 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -233,6 +233,15 @@ typedef struct {
>>       int ret;
>>   } BackupCompleteData;
>> +void backup_transaction_complete(BlockJob *job, int ret)
>> +{
>> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>> +
>> +    if (s->sync_bitmap) {
>> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
>> +    }
>> +}
>> +
>>   static void backup_complete(BlockJob *job, void *opaque)
>>   {
>>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>> diff --git a/blockdev.c b/blockdev.c
>> index 9e3b9e9..3ff14a7 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
>> int ret)
>>   typedef void (CallbackFn)(void *opaque, int ret);
>> -/* Temporary. Removed in the next patch. */
>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>> -                                    void *opaque,
>> -                                    void (*callback)(void *, int),
>> -                                    void **new_opaque);
>> -
>> -void undo_transaction_wrapper(BlkTransactionState *common);
>> -
>>   /**
>>    * Create a new transactional callback wrapper.
>>    *
>> @@ -1389,7 +1381,7 @@ void
>> undo_transaction_wrapper(BlkTransactionState *common);
>>    *
>>    * @return The callback to be used instead of @callback.
>>    */
>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>> +static CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>                                              void *opaque,
>>                                              CallbackFn *callback,
>>                                              void **new_opaque)
>> @@ -1416,7 +1408,7 @@ CallbackFn
>> *new_transaction_wrapper(BlkTransactionState *common,
>>   /**
>>    * Undo any actions performed by the above call.
>>    */
>> -void undo_transaction_wrapper(BlkTransactionState *common)
>> +static void undo_transaction_wrapper(BlkTransactionState *common)
>>   {
>>       BlkTransactionList *btl = common->list;
>>       BlkTransactionState *bts;
>> @@ -1449,6 +1441,7 @@ void
>> undo_transaction_wrapper(BlkTransactionState *common)
>>       blk_put_transaction_state(common);
>>   }
>> +static void block_job_cb(void *opaque, int ret);
>>   static void _drive_backup(const char *device, const char *target,
>>                             bool has_format, const char *format,
>>                             enum MirrorSyncMode sync,
>> @@ -1767,6 +1760,9 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>       BlockDriverState *bs;
>>       DriveBackup *backup;
>>       Error *local_err = NULL;
>> +    CallbackFn *cb;
>> +    void *opaque;
>> +    BdrvDirtyBitmap *bmap = NULL;
>>       assert(common->action->kind ==
>> TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>>       backup = common->action->drive_backup;
>> @@ -1777,6 +1773,19 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>           return;
>>       }
>> +    /* BackupBlockJob is opaque to us, so look up the bitmap
>> ourselves */
>> +    if (backup->has_bitmap) {
>> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
>> +        if (!bmap) {
>> +            error_setg(errp, "Bitmap '%s' could not be found",
>> backup->bitmap);
>> +            return;
>> +        }
>> +    }
>> +
>> +    /* Create our transactional callback wrapper,
>> +       and register that we'd like to call .cb() later. */
>> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
>> +
>>       /* AioContext is released in .clean() */
>>       state->aio_context = bdrv_get_aio_context(bs);
>>       aio_context_acquire(state->aio_context);
>> @@ -1789,7 +1798,7 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>                     backup->has_bitmap, backup->bitmap,
>>                     backup->has_on_source_error, backup->on_source_error,
>>                     backup->has_on_target_error, backup->on_target_error,
>> -                  NULL, NULL,
>> +                  cb, opaque,
>>                     &local_err);
>>       if (local_err) {
>>           error_propagate(errp, local_err);
>> @@ -1798,6 +1807,11 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>       state->bs = bs;
>>       state->job = state->bs->job;
>> +    /* Keep the job alive until .cb(), too. */
>> +    block_job_incref(state->job);
>> +    if (bmap) {
>> +        bdrv_dirty_bitmap_incref(bmap);
>> +    }
>>   }
>>   static void drive_backup_abort(BlkTransactionState *common)
>> @@ -1809,6 +1823,10 @@ static void
>> drive_backup_abort(BlkTransactionState *common)
>>       if (bs && bs->job && bs->job == state->job) {
>>           block_job_cancel_sync(bs->job);
>>       }
>> +
>> +    /* Undo any callback actions we may have done. Putting down
>> references
>> +     * obtained during prepare() is handled by cb(). */
>> +    undo_transaction_wrapper(common);
>>   }
>>   static void drive_backup_clean(BlkTransactionState *common)
>> @@ -1820,6 +1838,27 @@ static void
>> drive_backup_clean(BlkTransactionState *common)
>>       }
>>   }
>> +static void drive_backup_cb(BlkTransactionState *common)
>> +{
>> +    BlkTransactionData *btd = common->opaque;
>> +    BlockDriverState *bs = btd->opaque;
>> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
>> common);
>> +
>> +    assert(state->bs == bs);
>> +    if (bs->job) {
>> +        assert(state->job == bs->job);
>> +    }
>> +
>> +    state->aio_context = bdrv_get_aio_context(bs);
>> +    aio_context_acquire(state->aio_context);
>> +
>> +    /* Note: We also have the individual job's return code in
>> btd->ret */
>> +    backup_transaction_complete(state->job, common->list->status);
>> +    block_job_decref(state->job);
>> +
>> +    aio_context_release(state->aio_context);
>> +}
>> +
>>   typedef struct BlockdevBackupState {
>>       BlkTransactionState common;
>>       BlockDriverState *bs;
>> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>>           .instance_size = sizeof(DriveBackupState),
>>           .prepare = drive_backup_prepare,
>>           .abort = drive_backup_abort,
>> -        .clean = drive_backup_clean
>> +        .clean = drive_backup_clean,
>> +        .cb = drive_backup_cb
>>       },
>>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>           .instance_size = sizeof(BlockdevBackupState),
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index e0d5561..731684d 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>                     BlockCompletionFunc *cb, void *opaque,
>>                     Error **errp);
>> +/*
>> + * backup_transaction_complete
>> + * @job The BackupJob that was associated with a transaction
>
> s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
> structure named "BackupJob", but this looks like there might be one)
>
>> + * @ret Amalgamated return code for the entire transaction
>
> Hm. The call to this function you're introducing in this patch will
> probably stay the only one so there won't be anyone who'll have to worry
> about what this means, but if there was, they probably won't reach a
> conclusive result.
>
> I know what it means because I've seen patch 3 (right now it means
> "everything OR-ed together so it's 0 for success or some non-zero (maybe
> positive, maybe negative, depending on whether you have an even or an
> odd number of errors, and depending on whether the jobs return negative
> values for errors or not) for error"), but I wouldn't be able to infer
> it from this. At the least you should add that 0 means success and
> everything else means error (if you take my suggestion for patch 3, it
> would be 0 for success and -errno for error, where that error number is
> one of the errors encountered).
>
> Other than that, looks good (as far as I can tell with my still limited
> insights into patch 3).
>
> Max
>

It's my opinion that this patch should give you insight into patch #3, 
instead of the other way around. This patch is useful for demonstrating 
the general flow, because I kept all drive_backup specific concerns 
strictly separated from patch #3.




I'll write a more comprehensive document for the docs/ folder soon, but 
the general shape of the idea is "The cleanup actions defined in the 
.cb() method will be executed after every job in the transactional group 
has completed."

But there's some fine print:

"'every job' refers only to jobs that have the capability to register a 
post-transaction callback, which currently means only drive_backup."

The general approach to this is, mechanically:

(1) Extend the life of objects of interest with reference counts, 
including Jobs, Bitmaps, and BlkTransactionStates.

(2) "steal" the callback from a BlockJob and, when invoked, update our 
management information for the transaction group in BlkTransactionList.

(3) Once all jobs we have sent out return, execute the .cb() methods as 
indicated in the BlkTransactionList.



So, if you were adding a callback to a different QMP transaction:
- Look at new_transaction_wrapper; you'll use this to bump up various 
references used internally for this whole system, and it'll keep 
qmp_transaction from being able to delete the list and state objects.

   This function will give to you (as a gimmick) a new callback and 
opaque data structure pointer for you to give to the job that you are 
starting. I obfuscate this just a bit to make it clear that you should 
be using this function every time to help manage the transactional state.

- Now, the job will run on its own happy way, When it finishes, it will 
call transaction_callback, which is the function that "intercepts" the 
callbacks, and dispatches the original enveloped callbacks. It ditches 
the original data we used to know how to call the original callback, and 
begins storing completion information for jobs instead.

- transaction_callback will call blk_transaction_complete to store 
completion info for one job. In turn, it calls put_blk_transaction_list 
to decrement the pending jobs count (the "jobs" refcount) and once that 
hits zero ...

- All of the callback mechanisms associated with each 
BlockTransactionState are run via blk_run_transaction_callbacks.

- This is where drive_backup_cb is going to get invoked again, and then 
we will splinter back out into backup-specific concerns, with Jobs and 
Bitmaps and what not.


 From the point of view of the "transaction action developer," the 
interface to the new feature is found via the "new_transaction_wrapper" 
function, But some care must be taken to make sure this callback 
actually gets used once it has been requested. The whole thing can be 
aborted with the "undo_transaction_wrapper" function.

All of the other functions that got added to blockdev.c in Patch #3 are 
there to assist these two "interface" functions in doing what they claim to.

Everything in patch 4 and 5 just adds more reference count flexibility 
to things that are only of interest to drive_backup.

Patch 6 allows us to inject our special callback wrapper into the 
qmp_drive_backup function.


Clear as mud?
--js

>> + */
>> +void backup_transaction_complete(BlockJob *job, int ret);
>> +
>> +
>>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>>   bool blk_dev_has_removable_media(BlockBackend *blk);
>>   void blk_dev_eject_request(BlockBackend *blk, bool force);
>
>
Max Reitz March 18, 2015, 1:41 p.m. UTC | #3
On 2015-03-17 at 19:27, John Snow wrote:
>
>
> On 03/17/2015 03:49 PM, Max Reitz wrote:
>> On 2015-03-04 at 23:15, John Snow wrote:
>>> This patch actually implements the transactional callback system
>>> for the drive_backup transaction.
>>>
>>> (1) We manually pick up a reference to the bitmap if present to allow
>>>      its cleanup to be delayed until after all drive_backup jobs 
>>> launched
>>>      by the transaction have fully completed.
>>>
>>> (2) We create a functional closure that envelops the original
>>> drive_backup
>>>      callback, to be able to intercept the completion status and
>>> return code
>>>      for the job.
>>>
>>> (3) We add the drive_backup_cb method for the drive_backup action, 
>>> which
>>>      unpacks the completion information and invokes the final cleanup.
>>>
>>> (4) backup_transaction_complete will perform the final cleanup on the
>>>      backup job.
>>>
>>> (5) In the case of transaction cancellation, drive_backup_cb is still
>>>      responsible for cleaning up the mess we may have already made.
>>>
>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>> ---
>>>   block/backup.c            |  9 +++++++
>>>   blockdev.c                | 64
>>> ++++++++++++++++++++++++++++++++++++++---------
>>>   include/block/block_int.h |  8 ++++++
>>>   3 files changed, 69 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/block/backup.c b/block/backup.c
>>> index 4332df4..3673fb0 100644
>>> --- a/block/backup.c
>>> +++ b/block/backup.c
>>> @@ -233,6 +233,15 @@ typedef struct {
>>>       int ret;
>>>   } BackupCompleteData;
>>> +void backup_transaction_complete(BlockJob *job, int ret)
>>> +{
>>> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>> +
>>> +    if (s->sync_bitmap) {
>>> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
>>> +    }
>>> +}
>>> +
>>>   static void backup_complete(BlockJob *job, void *opaque)
>>>   {
>>>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>> diff --git a/blockdev.c b/blockdev.c
>>> index 9e3b9e9..3ff14a7 100644
>>> --- a/blockdev.c
>>> +++ b/blockdev.c
>>> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
>>> int ret)
>>>   typedef void (CallbackFn)(void *opaque, int ret);
>>> -/* Temporary. Removed in the next patch. */
>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>> -                                    void *opaque,
>>> -                                    void (*callback)(void *, int),
>>> -                                    void **new_opaque);
>>> -
>>> -void undo_transaction_wrapper(BlkTransactionState *common);
>>> -
>>>   /**
>>>    * Create a new transactional callback wrapper.
>>>    *
>>> @@ -1389,7 +1381,7 @@ void
>>> undo_transaction_wrapper(BlkTransactionState *common);
>>>    *
>>>    * @return The callback to be used instead of @callback.
>>>    */
>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>> +static CallbackFn *new_transaction_wrapper(BlkTransactionState 
>>> *common,
>>>                                              void *opaque,
>>>                                              CallbackFn *callback,
>>>                                              void **new_opaque)
>>> @@ -1416,7 +1408,7 @@ CallbackFn
>>> *new_transaction_wrapper(BlkTransactionState *common,
>>>   /**
>>>    * Undo any actions performed by the above call.
>>>    */
>>> -void undo_transaction_wrapper(BlkTransactionState *common)
>>> +static void undo_transaction_wrapper(BlkTransactionState *common)
>>>   {
>>>       BlkTransactionList *btl = common->list;
>>>       BlkTransactionState *bts;
>>> @@ -1449,6 +1441,7 @@ void
>>> undo_transaction_wrapper(BlkTransactionState *common)
>>>       blk_put_transaction_state(common);
>>>   }
>>> +static void block_job_cb(void *opaque, int ret);
>>>   static void _drive_backup(const char *device, const char *target,
>>>                             bool has_format, const char *format,
>>>                             enum MirrorSyncMode sync,
>>> @@ -1767,6 +1760,9 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>       BlockDriverState *bs;
>>>       DriveBackup *backup;
>>>       Error *local_err = NULL;
>>> +    CallbackFn *cb;
>>> +    void *opaque;
>>> +    BdrvDirtyBitmap *bmap = NULL;
>>>       assert(common->action->kind ==
>>> TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>>>       backup = common->action->drive_backup;
>>> @@ -1777,6 +1773,19 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>           return;
>>>       }
>>> +    /* BackupBlockJob is opaque to us, so look up the bitmap
>>> ourselves */
>>> +    if (backup->has_bitmap) {
>>> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
>>> +        if (!bmap) {
>>> +            error_setg(errp, "Bitmap '%s' could not be found",
>>> backup->bitmap);
>>> +            return;
>>> +        }
>>> +    }
>>> +
>>> +    /* Create our transactional callback wrapper,
>>> +       and register that we'd like to call .cb() later. */
>>> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
>>> +
>>>       /* AioContext is released in .clean() */
>>>       state->aio_context = bdrv_get_aio_context(bs);
>>>       aio_context_acquire(state->aio_context);
>>> @@ -1789,7 +1798,7 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>                     backup->has_bitmap, backup->bitmap,
>>>                     backup->has_on_source_error, 
>>> backup->on_source_error,
>>>                     backup->has_on_target_error, 
>>> backup->on_target_error,
>>> -                  NULL, NULL,
>>> +                  cb, opaque,
>>>                     &local_err);
>>>       if (local_err) {
>>>           error_propagate(errp, local_err);
>>> @@ -1798,6 +1807,11 @@ static void
>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>       state->bs = bs;
>>>       state->job = state->bs->job;
>>> +    /* Keep the job alive until .cb(), too. */
>>> +    block_job_incref(state->job);
>>> +    if (bmap) {
>>> +        bdrv_dirty_bitmap_incref(bmap);
>>> +    }
>>>   }
>>>   static void drive_backup_abort(BlkTransactionState *common)
>>> @@ -1809,6 +1823,10 @@ static void
>>> drive_backup_abort(BlkTransactionState *common)
>>>       if (bs && bs->job && bs->job == state->job) {
>>>           block_job_cancel_sync(bs->job);
>>>       }
>>> +
>>> +    /* Undo any callback actions we may have done. Putting down
>>> references
>>> +     * obtained during prepare() is handled by cb(). */
>>> +    undo_transaction_wrapper(common);
>>>   }
>>>   static void drive_backup_clean(BlkTransactionState *common)
>>> @@ -1820,6 +1838,27 @@ static void
>>> drive_backup_clean(BlkTransactionState *common)
>>>       }
>>>   }
>>> +static void drive_backup_cb(BlkTransactionState *common)
>>> +{
>>> +    BlkTransactionData *btd = common->opaque;
>>> +    BlockDriverState *bs = btd->opaque;
>>> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
>>> common);
>>> +
>>> +    assert(state->bs == bs);
>>> +    if (bs->job) {
>>> +        assert(state->job == bs->job);
>>> +    }
>>> +
>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>> +    aio_context_acquire(state->aio_context);
>>> +
>>> +    /* Note: We also have the individual job's return code in
>>> btd->ret */
>>> +    backup_transaction_complete(state->job, common->list->status);
>>> +    block_job_decref(state->job);
>>> +
>>> +    aio_context_release(state->aio_context);
>>> +}
>>> +
>>>   typedef struct BlockdevBackupState {
>>>       BlkTransactionState common;
>>>       BlockDriverState *bs;
>>> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>>>           .instance_size = sizeof(DriveBackupState),
>>>           .prepare = drive_backup_prepare,
>>>           .abort = drive_backup_abort,
>>> -        .clean = drive_backup_clean
>>> +        .clean = drive_backup_clean,
>>> +        .cb = drive_backup_cb
>>>       },
>>>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>>           .instance_size = sizeof(BlockdevBackupState),
>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>> index e0d5561..731684d 100644
>>> --- a/include/block/block_int.h
>>> +++ b/include/block/block_int.h
>>> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
>>> BlockDriverState *target,
>>>                     BlockCompletionFunc *cb, void *opaque,
>>>                     Error **errp);
>>> +/*
>>> + * backup_transaction_complete
>>> + * @job The BackupJob that was associated with a transaction
>>
>> s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
>> structure named "BackupJob", but this looks like there might be one)
>>
>>> + * @ret Amalgamated return code for the entire transaction
>>
>> Hm. The call to this function you're introducing in this patch will
>> probably stay the only one so there won't be anyone who'll have to worry
>> about what this means, but if there was, they probably won't reach a
>> conclusive result.
>>
>> I know what it means because I've seen patch 3 (right now it means
>> "everything OR-ed together so it's 0 for success or some non-zero (maybe
>> positive, maybe negative, depending on whether you have an even or an
>> odd number of errors, and depending on whether the jobs return negative
>> values for errors or not) for error"), but I wouldn't be able to infer
>> it from this. At the least you should add that 0 means success and
>> everything else means error (if you take my suggestion for patch 3, it
>> would be 0 for success and -errno for error, where that error number is
>> one of the errors encountered).
>>
>> Other than that, looks good (as far as I can tell with my still limited
>> insights into patch 3).
>>
>> Max
>>
>
> It's my opinion that this patch should give you insight into patch #3, 
> instead of the other way around. This patch is useful for 
> demonstrating the general flow, because I kept all drive_backup 
> specific concerns strictly separated from patch #3.

It only truly helps me understand patch 3 under the assumption that this 
patch is correct. For reviewing, I cannot really take that assumption. ;-)

I mean, it does help me with the interfaces patch 3 offers, but it still 
doesn't help me with the objects introduced in patch 3, for instance, 
because those are all contained in patch 3 itself. And if I cannot 
verify patch 3 on its own, I cannot really verify that what this patch 
does is correct in regards to the interfaces offered there.

> I'll write a more comprehensive document for the docs/ folder soon, 
> but the general shape of the idea is "The cleanup actions defined in 
> the .cb() method will be executed after every job in the transactional 
> group has completed."

Right, again, assuming your implementation is correct, but questioning 
that is part of the natural reviewing process. :-)

> But there's some fine print:
>
> "'every job' refers only to jobs that have the capability to register 
> a post-transaction callback, which currently means only drive_backup."
>
> The general approach to this is, mechanically:
>
> (1) Extend the life of objects of interest with reference counts, 
> including Jobs, Bitmaps, and BlkTransactionStates.
>
> (2) "steal" the callback from a BlockJob and, when invoked, update our 
> management information for the transaction group in BlkTransactionList.
>
> (3) Once all jobs we have sent out return, execute the .cb() methods 
> as indicated in the BlkTransactionList.
>
>
>
> So, if you were adding a callback to a different QMP transaction:
> - Look at new_transaction_wrapper; you'll use this to bump up various 
> references used internally for this whole system, and it'll keep 
> qmp_transaction from being able to delete the list and state objects.
>
>   This function will give to you (as a gimmick) a new callback and 
> opaque data structure pointer for you to give to the job that you are 
> starting. I obfuscate this just a bit to make it clear that you should 
> be using this function every time to help manage the transactional state.
>
> - Now, the job will run on its own happy way, When it finishes, it 
> will call transaction_callback, which is the function that 
> "intercepts" the callbacks, and dispatches the original enveloped 
> callbacks. It ditches the original data we used to know how to call 
> the original callback, and begins storing completion information for 
> jobs instead.
>
> - transaction_callback will call blk_transaction_complete to store 
> completion info for one job. In turn, it calls 
> put_blk_transaction_list to decrement the pending jobs count (the 
> "jobs" refcount) and once that hits zero ...
>
> - All of the callback mechanisms associated with each 
> BlockTransactionState are run via blk_run_transaction_callbacks.
>
> - This is where drive_backup_cb is going to get invoked again, and 
> then we will splinter back out into backup-specific concerns, with 
> Jobs and Bitmaps and what not.

Yes, I can see the control flow; my main problem lies in the three 
different structs introduced in patch 3. I'll tell you why I'm confused 
by them:

First, there is BlkTransactionList. Sounds to me like it should be a 
list of transactions. Judging from its members, it is rather a list of 
operations ("actions") for a single transaction, however. Apparently, 
there is a double-use for the word "transaction": I'd consider a 
transaction to be the thing started by the QMP command which is a group 
of atomic operations. However, that one structure is called 
BlkTransactionState, but it apparently this is only the state of a 
single operation in the transaction. Pre-existing, but doesn't make it 
better. You added comments for its list entry members, with the first 
one stating "All transactions in the current group" and the second being 
a subset of that. However, again, I consider these to be operations and 
not transactions. The group is the transaction.

So we have that adding to my confusion. Furthermore, I think we don't 
need to list entry members (entry and list_entry) in 
BlkTransactionState. I think it should be fine to drop the 
snap_bdrv_states list in qmp_transaction() and just add all 
BlkTransactionStates to the BlkTransactionList (or however you name it 
if you rename it) in the preparation while () loop there. I don't think 
we need to add them only in blk_transaction_complete() (and we should 
drop that from there if it's been done in qmp_transaction() already, of 
course). Then we can really just use that list generally as *the* list 
of operations in a transaction.

Second, there is BlkTransactionData. From the name I cannot judge at all 
how it is supposed to differ from BlkTransactionState, because, well, it 
contains some data about a transaction... Maybe, because I consider 
BlkTransactionState to be misnamed, BlkTransactionData actually contains 
data about the transaction and not about a single operation? But from 
looking at the members, I have no idea. It has "opaque" and "ret"... I 
don't know how that is vital data to the transaction itself (and I don't 
think it is, it is rather data for the completion callback, I guess?), 
it only looks like some sort of pattern I have seen for AIO callbacks 
and the like.

Third, there is BlkTransactionWrapper. I suppose it wraps a 
transaction...? Or maybe at least an operation, because apparently I can 
no longer be sure whether a transaction is a transaction or an 
operation... But there is no transaction or even a single operation in 
there. It's just a structure describing a callback. I don't know how 
that is a "transaction wrapper"...

So, I don't understand any of the names given to the objects, and while 
I might understand what they are used for by looking at them, I have a 
*really* hard time understanding the code that uses them, because if I 
don't stare at their definitions all the time, I will immediately forget 
what they contain and what they are used for because I don't understand 
the names at all (and there is nothing which explicitly tells me what 
they are used for either). The fact that all of them are starting with 
BlkTransaction makes mixing them up even easier.

> From the point of view of the "transaction action developer," the 
> interface to the new feature is found via the 
> "new_transaction_wrapper" function, But some care must be taken to 
> make sure this callback actually gets used once it has been requested. 
> The whole thing can be aborted with the "undo_transaction_wrapper" 
> function.
>
> All of the other functions that got added to blockdev.c in Patch #3 
> are there to assist these two "interface" functions in doing what they 
> claim to.
>
> Everything in patch 4 and 5 just adds more reference count flexibility 
> to things that are only of interest to drive_backup.
>
> Patch 6 allows us to inject our special callback wrapper into the 
> qmp_drive_backup function.
>
>
> Clear as mud?

Control flow and concept, clear. Implementation, still very muddy, sorry...

Max

> --js
>
>>> + */
>>> +void backup_transaction_complete(BlockJob *job, int ret);
>>> +
>>> +
>>>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>>>   bool blk_dev_has_removable_media(BlockBackend *blk);
>>>   void blk_dev_eject_request(BlockBackend *blk, bool force);
>>
>>
John Snow March 18, 2015, 7:51 p.m. UTC | #4
On 03/18/2015 09:41 AM, Max Reitz wrote:
> On 2015-03-17 at 19:27, John Snow wrote:
>>
>>
>> On 03/17/2015 03:49 PM, Max Reitz wrote:
>>> On 2015-03-04 at 23:15, John Snow wrote:
>>>> This patch actually implements the transactional callback system
>>>> for the drive_backup transaction.
>>>>
>>>> (1) We manually pick up a reference to the bitmap if present to allow
>>>>      its cleanup to be delayed until after all drive_backup jobs
>>>> launched
>>>>      by the transaction have fully completed.
>>>>
>>>> (2) We create a functional closure that envelops the original
>>>> drive_backup
>>>>      callback, to be able to intercept the completion status and
>>>> return code
>>>>      for the job.
>>>>
>>>> (3) We add the drive_backup_cb method for the drive_backup action,
>>>> which
>>>>      unpacks the completion information and invokes the final cleanup.
>>>>
>>>> (4) backup_transaction_complete will perform the final cleanup on the
>>>>      backup job.
>>>>
>>>> (5) In the case of transaction cancellation, drive_backup_cb is still
>>>>      responsible for cleaning up the mess we may have already made.
>>>>
>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>> ---
>>>>   block/backup.c            |  9 +++++++
>>>>   blockdev.c                | 64
>>>> ++++++++++++++++++++++++++++++++++++++---------
>>>>   include/block/block_int.h |  8 ++++++
>>>>   3 files changed, 69 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/block/backup.c b/block/backup.c
>>>> index 4332df4..3673fb0 100644
>>>> --- a/block/backup.c
>>>> +++ b/block/backup.c
>>>> @@ -233,6 +233,15 @@ typedef struct {
>>>>       int ret;
>>>>   } BackupCompleteData;
>>>> +void backup_transaction_complete(BlockJob *job, int ret)
>>>> +{
>>>> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>>> +
>>>> +    if (s->sync_bitmap) {
>>>> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
>>>> +    }
>>>> +}
>>>> +
>>>>   static void backup_complete(BlockJob *job, void *opaque)
>>>>   {
>>>>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>>> diff --git a/blockdev.c b/blockdev.c
>>>> index 9e3b9e9..3ff14a7 100644
>>>> --- a/blockdev.c
>>>> +++ b/blockdev.c
>>>> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
>>>> int ret)
>>>>   typedef void (CallbackFn)(void *opaque, int ret);
>>>> -/* Temporary. Removed in the next patch. */
>>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>>> -                                    void *opaque,
>>>> -                                    void (*callback)(void *, int),
>>>> -                                    void **new_opaque);
>>>> -
>>>> -void undo_transaction_wrapper(BlkTransactionState *common);
>>>> -
>>>>   /**
>>>>    * Create a new transactional callback wrapper.
>>>>    *
>>>> @@ -1389,7 +1381,7 @@ void
>>>> undo_transaction_wrapper(BlkTransactionState *common);
>>>>    *
>>>>    * @return The callback to be used instead of @callback.
>>>>    */
>>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>>> +static CallbackFn *new_transaction_wrapper(BlkTransactionState
>>>> *common,
>>>>                                              void *opaque,
>>>>                                              CallbackFn *callback,
>>>>                                              void **new_opaque)
>>>> @@ -1416,7 +1408,7 @@ CallbackFn
>>>> *new_transaction_wrapper(BlkTransactionState *common,
>>>>   /**
>>>>    * Undo any actions performed by the above call.
>>>>    */
>>>> -void undo_transaction_wrapper(BlkTransactionState *common)
>>>> +static void undo_transaction_wrapper(BlkTransactionState *common)
>>>>   {
>>>>       BlkTransactionList *btl = common->list;
>>>>       BlkTransactionState *bts;
>>>> @@ -1449,6 +1441,7 @@ void
>>>> undo_transaction_wrapper(BlkTransactionState *common)
>>>>       blk_put_transaction_state(common);
>>>>   }
>>>> +static void block_job_cb(void *opaque, int ret);
>>>>   static void _drive_backup(const char *device, const char *target,
>>>>                             bool has_format, const char *format,
>>>>                             enum MirrorSyncMode sync,
>>>> @@ -1767,6 +1760,9 @@ static void
>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>       BlockDriverState *bs;
>>>>       DriveBackup *backup;
>>>>       Error *local_err = NULL;
>>>> +    CallbackFn *cb;
>>>> +    void *opaque;
>>>> +    BdrvDirtyBitmap *bmap = NULL;
>>>>       assert(common->action->kind ==
>>>> TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>>>>       backup = common->action->drive_backup;
>>>> @@ -1777,6 +1773,19 @@ static void
>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>           return;
>>>>       }
>>>> +    /* BackupBlockJob is opaque to us, so look up the bitmap
>>>> ourselves */
>>>> +    if (backup->has_bitmap) {
>>>> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
>>>> +        if (!bmap) {
>>>> +            error_setg(errp, "Bitmap '%s' could not be found",
>>>> backup->bitmap);
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    /* Create our transactional callback wrapper,
>>>> +       and register that we'd like to call .cb() later. */
>>>> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
>>>> +
>>>>       /* AioContext is released in .clean() */
>>>>       state->aio_context = bdrv_get_aio_context(bs);
>>>>       aio_context_acquire(state->aio_context);
>>>> @@ -1789,7 +1798,7 @@ static void
>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>                     backup->has_bitmap, backup->bitmap,
>>>>                     backup->has_on_source_error,
>>>> backup->on_source_error,
>>>>                     backup->has_on_target_error,
>>>> backup->on_target_error,
>>>> -                  NULL, NULL,
>>>> +                  cb, opaque,
>>>>                     &local_err);
>>>>       if (local_err) {
>>>>           error_propagate(errp, local_err);
>>>> @@ -1798,6 +1807,11 @@ static void
>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>       state->bs = bs;
>>>>       state->job = state->bs->job;
>>>> +    /* Keep the job alive until .cb(), too. */
>>>> +    block_job_incref(state->job);
>>>> +    if (bmap) {
>>>> +        bdrv_dirty_bitmap_incref(bmap);
>>>> +    }
>>>>   }
>>>>   static void drive_backup_abort(BlkTransactionState *common)
>>>> @@ -1809,6 +1823,10 @@ static void
>>>> drive_backup_abort(BlkTransactionState *common)
>>>>       if (bs && bs->job && bs->job == state->job) {
>>>>           block_job_cancel_sync(bs->job);
>>>>       }
>>>> +
>>>> +    /* Undo any callback actions we may have done. Putting down
>>>> references
>>>> +     * obtained during prepare() is handled by cb(). */
>>>> +    undo_transaction_wrapper(common);
>>>>   }
>>>>   static void drive_backup_clean(BlkTransactionState *common)
>>>> @@ -1820,6 +1838,27 @@ static void
>>>> drive_backup_clean(BlkTransactionState *common)
>>>>       }
>>>>   }
>>>> +static void drive_backup_cb(BlkTransactionState *common)
>>>> +{
>>>> +    BlkTransactionData *btd = common->opaque;
>>>> +    BlockDriverState *bs = btd->opaque;
>>>> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
>>>> common);
>>>> +
>>>> +    assert(state->bs == bs);
>>>> +    if (bs->job) {
>>>> +        assert(state->job == bs->job);
>>>> +    }
>>>> +
>>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>>> +    aio_context_acquire(state->aio_context);
>>>> +
>>>> +    /* Note: We also have the individual job's return code in
>>>> btd->ret */
>>>> +    backup_transaction_complete(state->job, common->list->status);
>>>> +    block_job_decref(state->job);
>>>> +
>>>> +    aio_context_release(state->aio_context);
>>>> +}
>>>> +
>>>>   typedef struct BlockdevBackupState {
>>>>       BlkTransactionState common;
>>>>       BlockDriverState *bs;
>>>> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>>>>           .instance_size = sizeof(DriveBackupState),
>>>>           .prepare = drive_backup_prepare,
>>>>           .abort = drive_backup_abort,
>>>> -        .clean = drive_backup_clean
>>>> +        .clean = drive_backup_clean,
>>>> +        .cb = drive_backup_cb
>>>>       },
>>>>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>>>           .instance_size = sizeof(BlockdevBackupState),
>>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>>> index e0d5561..731684d 100644
>>>> --- a/include/block/block_int.h
>>>> +++ b/include/block/block_int.h
>>>> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
>>>> BlockDriverState *target,
>>>>                     BlockCompletionFunc *cb, void *opaque,
>>>>                     Error **errp);
>>>> +/*
>>>> + * backup_transaction_complete
>>>> + * @job The BackupJob that was associated with a transaction
>>>
>>> s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
>>> structure named "BackupJob", but this looks like there might be one)
>>>
>>>> + * @ret Amalgamated return code for the entire transaction
>>>
>>> Hm. The call to this function you're introducing in this patch will
>>> probably stay the only one so there won't be anyone who'll have to worry
>>> about what this means, but if there was, they probably won't reach a
>>> conclusive result.
>>>
>>> I know what it means because I've seen patch 3 (right now it means
>>> "everything OR-ed together so it's 0 for success or some non-zero (maybe
>>> positive, maybe negative, depending on whether you have an even or an
>>> odd number of errors, and depending on whether the jobs return negative
>>> values for errors or not) for error"), but I wouldn't be able to infer
>>> it from this. At the least you should add that 0 means success and
>>> everything else means error (if you take my suggestion for patch 3, it
>>> would be 0 for success and -errno for error, where that error number is
>>> one of the errors encountered).
>>>
>>> Other than that, looks good (as far as I can tell with my still limited
>>> insights into patch 3).
>>>
>>> Max
>>>
>>
>> It's my opinion that this patch should give you insight into patch #3,
>> instead of the other way around. This patch is useful for
>> demonstrating the general flow, because I kept all drive_backup
>> specific concerns strictly separated from patch #3.
>
> It only truly helps me understand patch 3 under the assumption that this
> patch is correct. For reviewing, I cannot really take that assumption. ;-)
>
> I mean, it does help me with the interfaces patch 3 offers, but it still
> doesn't help me with the objects introduced in patch 3, for instance,
> because those are all contained in patch 3 itself. And if I cannot
> verify patch 3 on its own, I cannot really verify that what this patch
> does is correct in regards to the interfaces offered there.
>
>> I'll write a more comprehensive document for the docs/ folder soon,
>> but the general shape of the idea is "The cleanup actions defined in
>> the .cb() method will be executed after every job in the transactional
>> group has completed."
>
> Right, again, assuming your implementation is correct, but questioning
> that is part of the natural reviewing process. :-)
>

Yes, that's fine! I am just trying to help you unravel this strange loop 
(ha ha) and/or try to understand what is not necessarily clear with the 
information as presented.

>> But there's some fine print:
>>
>> "'every job' refers only to jobs that have the capability to register
>> a post-transaction callback, which currently means only drive_backup."
>>
>> The general approach to this is, mechanically:
>>
>> (1) Extend the life of objects of interest with reference counts,
>> including Jobs, Bitmaps, and BlkTransactionStates.
>>
>> (2) "steal" the callback from a BlockJob and, when invoked, update our
>> management information for the transaction group in BlkTransactionList.
>>
>> (3) Once all jobs we have sent out return, execute the .cb() methods
>> as indicated in the BlkTransactionList.
>>
>>
>>
>> So, if you were adding a callback to a different QMP transaction:
>> - Look at new_transaction_wrapper; you'll use this to bump up various
>> references used internally for this whole system, and it'll keep
>> qmp_transaction from being able to delete the list and state objects.
>>
>>   This function will give to you (as a gimmick) a new callback and
>> opaque data structure pointer for you to give to the job that you are
>> starting. I obfuscate this just a bit to make it clear that you should
>> be using this function every time to help manage the transactional state.
>>
>> - Now, the job will run on its own happy way, When it finishes, it
>> will call transaction_callback, which is the function that
>> "intercepts" the callbacks, and dispatches the original enveloped
>> callbacks. It ditches the original data we used to know how to call
>> the original callback, and begins storing completion information for
>> jobs instead.
>>
>> - transaction_callback will call blk_transaction_complete to store
>> completion info for one job. In turn, it calls
>> put_blk_transaction_list to decrement the pending jobs count (the
>> "jobs" refcount) and once that hits zero ...
>>
>> - All of the callback mechanisms associated with each
>> BlockTransactionState are run via blk_run_transaction_callbacks.
>>
>> - This is where drive_backup_cb is going to get invoked again, and
>> then we will splinter back out into backup-specific concerns, with
>> Jobs and Bitmaps and what not.
>
> Yes, I can see the control flow; my main problem lies in the three
> different structs introduced in patch 3. I'll tell you why I'm confused
> by them:
>

Paydirt!

> First, there is BlkTransactionList. Sounds to me like it should be a
> list of transactions. Judging from its members, it is rather a list of
> operations ("actions") for a single transaction, however. Apparently,
> there is a double-use for the word "transaction": I'd consider a
> transaction to be the thing started by the QMP command which is a group
> of atomic operations. However, that one structure is called
> BlkTransactionState, but it apparently this is only the state of a
> single operation in the transaction. Pre-existing, but doesn't make it
> better. You added comments for its list entry members, with the first
> one stating "All transactions in the current group" and the second being
> a subset of that. However, again, I consider these to be operations and
> not transactions. The group is the transaction.
>

Ah, yes. Good points. We need to decide on consistent terminology.

Transaction: The entire group.
Action: One particular command within a transaction group.
Action Op: One particular stage of one command (e.g. prepare, commit)

BlkTransactionState is an existing structure that does indeed refer 
instead to one particular action's state, not that of the entire 
transaction. This can be changed to be clearer. Better names might be 
BlkTransactionActionState or BlkActionState.

The BlkTransactionList here is indeed intended to be a list for the 
entire transaction, so it is actually a "list of actions" and not, 
perhaps as its name implies, a list of transactions. (We have no data 
structures with a scope this vast.)

The BlkTransactionList is an object that is created once per transaction 
group (once per qmp_transaction()) and keeps a count of how many jobs 
we've launched (and thus how many to expect to return to us.)

Once a job completes, it adds that *action's state* 
(BlkTransactionState) to its list. Once all jobs sent out have returned, 
it will iterate this list and execute their .cb() actions.

So perhaps an amendment would be:

typedef struct BlkTransactionList {
     int pending_jobs;
     int transaction_rc;
     QLIST(...) completed_actions;
} BlkTransactionList;

to make this part clearer.


> So we have that adding to my confusion. Furthermore, I think we don't
> need to list entry members (entry and list_entry) in
> BlkTransactionState. I think it should be fine to drop the
> snap_bdrv_states list in qmp_transaction() and just add all
> BlkTransactionStates to the BlkTransactionList (or however you name it
> if you rename it) in the preparation while () loop there. I don't think
> we need to add them only in blk_transaction_complete() (and we should
> drop that from there if it's been done in qmp_transaction() already, of
> course). Then we can really just use that list generally as *the* list
> of operations in a transaction.
>

Yes, there's not necessarily a strong reason to keep two lists.

One of the existing reasons, though, is that not all transactions launch 
jobs or have been modified to work with this system yet, so we 
definitely don't want to consider all of these entries in THIS system 
just yet.

So one of the simplifications two lists makes is that we don't have to 
worry about "unsupported actions" making it into the list. Everything in 
*my* list can be processed without further thought.

We can also just add more state into the BlkTransactionState (the 
ActionState) to help us distinguish, and then we can get rid of both lists.

A state machine tag or something might be useful in that way.

> Second, there is BlkTransactionData. From the name I cannot judge at all
> how it is supposed to differ from BlkTransactionState, because, well, it
> contains some data about a transaction... Maybe, because I consider
> BlkTransactionState to be misnamed, BlkTransactionData actually contains
> data about the transaction and not about a single operation? But from
> looking at the members, I have no idea. It has "opaque" and "ret"... I
> don't know how that is vital data to the transaction itself (and I don't
> think it is, it is rather data for the completion callback, I guess?),
> it only looks like some sort of pattern I have seen for AIO callbacks
> and the like.
>

BlkTransactionData was meant to contain completion info about a "job" 
that was launched by an action. I tried to avoid tying it explicitly to 
a BlockJob -- it can be any action with a callback, really.

Anyway, this structure contains "Data that was delivered to the original 
callback." In our case here, it's data that would've gone to 
block_job_complete.

So it contains an opaque pointer (A BDS, we secretly know) and a return 
code for that individual action's task.

> Third, there is BlkTransactionWrapper. I suppose it wraps a
> transaction...? Or maybe at least an operation, because apparently I can
> no longer be sure whether a transaction is a transaction or an
> operation... But there is no transaction or even a single operation in
> there. It's just a structure describing a callback. I don't know how
> that is a "transaction wrapper"...
>

We have achieved maximum Strawberry.

The BlkTransactionWrapper was meant to wrap any data necessary to be 
able to deliver the original callback we're stealing.

So in our case, it's for *one action's callback* and in this one 
instance, it's being used to hold the data needed to manually invoke the 
block_job_complete callback.

So both BTD and BTW here contain callback related information for one 
particular action.

They are differentiated by "Wrapper" having a "Before we execute the 
original callback" context, and "Data" having a "We've already executed 
the original callback" context.

Can they be combined into one structure? Yes, as long as we keep a tag 
that lets us know what state we're in.

Can they be inlined into "BlkTransactionState"? Yes -- but I opted to 
keep it in a separate glob to not clutter up the BlkTransactionState 
with values only useful for an optional feature.

Is that wise? Maybe, maybe not.

I'm open to names. The most semantically accurate would be 
BlkTransactionActionCallbackData if we combined BTD/BTW to be one 
structure. It's a mouthful, though.

> So, I don't understand any of the names given to the objects, and while
> I might understand what they are used for by looking at them, I have a
> *really* hard time understanding the code that uses them, because if I
> don't stare at their definitions all the time, I will immediately forget
> what they contain and what they are used for because I don't understand
> the names at all (and there is nothing which explicitly tells me what
> they are used for either). The fact that all of them are starting with
> BlkTransaction makes mixing them up even easier.
>

Understood.

>> From the point of view of the "transaction action developer," the
>> interface to the new feature is found via the
>> "new_transaction_wrapper" function, But some care must be taken to
>> make sure this callback actually gets used once it has been requested.
>> The whole thing can be aborted with the "undo_transaction_wrapper"
>> function.
>>
>> All of the other functions that got added to blockdev.c in Patch #3
>> are there to assist these two "interface" functions in doing what they
>> claim to.
>>
>> Everything in patch 4 and 5 just adds more reference count flexibility
>> to things that are only of interest to drive_backup.
>>
>> Patch 6 allows us to inject our special callback wrapper into the
>> qmp_drive_backup function.
>>
>>
>> Clear as mud?
>
> Control flow and concept, clear. Implementation, still very muddy, sorry...
>
> Max
>
>> --js
>>
>>>> + */
>>>> +void backup_transaction_complete(BlockJob *job, int ret);
>>>> +
>>>> +
>>>>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>>>>   bool blk_dev_has_removable_media(BlockBackend *blk);
>>>>   void blk_dev_eject_request(BlockBackend *blk, bool force);
>>>
>>>
>
Max Reitz March 18, 2015, 8:20 p.m. UTC | #5
On 2015-03-18 at 15:51, John Snow wrote:
>
>
> On 03/18/2015 09:41 AM, Max Reitz wrote:
>> On 2015-03-17 at 19:27, John Snow wrote:
>>>
>>>
>>> On 03/17/2015 03:49 PM, Max Reitz wrote:
>>>> On 2015-03-04 at 23:15, John Snow wrote:
>>>>> This patch actually implements the transactional callback system
>>>>> for the drive_backup transaction.
>>>>>
>>>>> (1) We manually pick up a reference to the bitmap if present to allow
>>>>>      its cleanup to be delayed until after all drive_backup jobs
>>>>> launched
>>>>>      by the transaction have fully completed.
>>>>>
>>>>> (2) We create a functional closure that envelops the original
>>>>> drive_backup
>>>>>      callback, to be able to intercept the completion status and
>>>>> return code
>>>>>      for the job.
>>>>>
>>>>> (3) We add the drive_backup_cb method for the drive_backup action,
>>>>> which
>>>>>      unpacks the completion information and invokes the final 
>>>>> cleanup.
>>>>>
>>>>> (4) backup_transaction_complete will perform the final cleanup on the
>>>>>      backup job.
>>>>>
>>>>> (5) In the case of transaction cancellation, drive_backup_cb is still
>>>>>      responsible for cleaning up the mess we may have already made.
>>>>>
>>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>>> ---
>>>>>   block/backup.c            |  9 +++++++
>>>>>   blockdev.c                | 64
>>>>> ++++++++++++++++++++++++++++++++++++++---------
>>>>>   include/block/block_int.h |  8 ++++++
>>>>>   3 files changed, 69 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/block/backup.c b/block/backup.c
>>>>> index 4332df4..3673fb0 100644
>>>>> --- a/block/backup.c
>>>>> +++ b/block/backup.c
>>>>> @@ -233,6 +233,15 @@ typedef struct {
>>>>>       int ret;
>>>>>   } BackupCompleteData;
>>>>> +void backup_transaction_complete(BlockJob *job, int ret)
>>>>> +{
>>>>> +    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>>>> +
>>>>> +    if (s->sync_bitmap) {
>>>>> +        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, 
>>>>> NULL);
>>>>> +    }
>>>>> +}
>>>>> +
>>>>>   static void backup_complete(BlockJob *job, void *opaque)
>>>>>   {
>>>>>       BackupBlockJob *s = container_of(job, BackupBlockJob, common);
>>>>> diff --git a/blockdev.c b/blockdev.c
>>>>> index 9e3b9e9..3ff14a7 100644
>>>>> --- a/blockdev.c
>>>>> +++ b/blockdev.c
>>>>> @@ -1363,14 +1363,6 @@ static void transaction_callback(void *opaque,
>>>>> int ret)
>>>>>   typedef void (CallbackFn)(void *opaque, int ret);
>>>>> -/* Temporary. Removed in the next patch. */
>>>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>>>> -                                    void *opaque,
>>>>> -                                    void (*callback)(void *, int),
>>>>> -                                    void **new_opaque);
>>>>> -
>>>>> -void undo_transaction_wrapper(BlkTransactionState *common);
>>>>> -
>>>>>   /**
>>>>>    * Create a new transactional callback wrapper.
>>>>>    *
>>>>> @@ -1389,7 +1381,7 @@ void
>>>>> undo_transaction_wrapper(BlkTransactionState *common);
>>>>>    *
>>>>>    * @return The callback to be used instead of @callback.
>>>>>    */
>>>>> -CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
>>>>> +static CallbackFn *new_transaction_wrapper(BlkTransactionState
>>>>> *common,
>>>>>                                              void *opaque,
>>>>>                                              CallbackFn *callback,
>>>>>                                              void **new_opaque)
>>>>> @@ -1416,7 +1408,7 @@ CallbackFn
>>>>> *new_transaction_wrapper(BlkTransactionState *common,
>>>>>   /**
>>>>>    * Undo any actions performed by the above call.
>>>>>    */
>>>>> -void undo_transaction_wrapper(BlkTransactionState *common)
>>>>> +static void undo_transaction_wrapper(BlkTransactionState *common)
>>>>>   {
>>>>>       BlkTransactionList *btl = common->list;
>>>>>       BlkTransactionState *bts;
>>>>> @@ -1449,6 +1441,7 @@ void
>>>>> undo_transaction_wrapper(BlkTransactionState *common)
>>>>>       blk_put_transaction_state(common);
>>>>>   }
>>>>> +static void block_job_cb(void *opaque, int ret);
>>>>>   static void _drive_backup(const char *device, const char *target,
>>>>>                             bool has_format, const char *format,
>>>>>                             enum MirrorSyncMode sync,
>>>>> @@ -1767,6 +1760,9 @@ static void
>>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>>       BlockDriverState *bs;
>>>>>       DriveBackup *backup;
>>>>>       Error *local_err = NULL;
>>>>> +    CallbackFn *cb;
>>>>> +    void *opaque;
>>>>> +    BdrvDirtyBitmap *bmap = NULL;
>>>>>       assert(common->action->kind ==
>>>>> TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
>>>>>       backup = common->action->drive_backup;
>>>>> @@ -1777,6 +1773,19 @@ static void
>>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>>           return;
>>>>>       }
>>>>> +    /* BackupBlockJob is opaque to us, so look up the bitmap
>>>>> ourselves */
>>>>> +    if (backup->has_bitmap) {
>>>>> +        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
>>>>> +        if (!bmap) {
>>>>> +            error_setg(errp, "Bitmap '%s' could not be found",
>>>>> backup->bitmap);
>>>>> +            return;
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>> +    /* Create our transactional callback wrapper,
>>>>> +       and register that we'd like to call .cb() later. */
>>>>> +    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
>>>>> +
>>>>>       /* AioContext is released in .clean() */
>>>>>       state->aio_context = bdrv_get_aio_context(bs);
>>>>>       aio_context_acquire(state->aio_context);
>>>>> @@ -1789,7 +1798,7 @@ static void
>>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>>                     backup->has_bitmap, backup->bitmap,
>>>>>                     backup->has_on_source_error,
>>>>> backup->on_source_error,
>>>>>                     backup->has_on_target_error,
>>>>> backup->on_target_error,
>>>>> -                  NULL, NULL,
>>>>> +                  cb, opaque,
>>>>>                     &local_err);
>>>>>       if (local_err) {
>>>>>           error_propagate(errp, local_err);
>>>>> @@ -1798,6 +1807,11 @@ static void
>>>>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>>>>       state->bs = bs;
>>>>>       state->job = state->bs->job;
>>>>> +    /* Keep the job alive until .cb(), too. */
>>>>> +    block_job_incref(state->job);
>>>>> +    if (bmap) {
>>>>> +        bdrv_dirty_bitmap_incref(bmap);
>>>>> +    }
>>>>>   }
>>>>>   static void drive_backup_abort(BlkTransactionState *common)
>>>>> @@ -1809,6 +1823,10 @@ static void
>>>>> drive_backup_abort(BlkTransactionState *common)
>>>>>       if (bs && bs->job && bs->job == state->job) {
>>>>>           block_job_cancel_sync(bs->job);
>>>>>       }
>>>>> +
>>>>> +    /* Undo any callback actions we may have done. Putting down
>>>>> references
>>>>> +     * obtained during prepare() is handled by cb(). */
>>>>> +    undo_transaction_wrapper(common);
>>>>>   }
>>>>>   static void drive_backup_clean(BlkTransactionState *common)
>>>>> @@ -1820,6 +1838,27 @@ static void
>>>>> drive_backup_clean(BlkTransactionState *common)
>>>>>       }
>>>>>   }
>>>>> +static void drive_backup_cb(BlkTransactionState *common)
>>>>> +{
>>>>> +    BlkTransactionData *btd = common->opaque;
>>>>> +    BlockDriverState *bs = btd->opaque;
>>>>> +    DriveBackupState *state = DO_UPCAST(DriveBackupState, common,
>>>>> common);
>>>>> +
>>>>> +    assert(state->bs == bs);
>>>>> +    if (bs->job) {
>>>>> +        assert(state->job == bs->job);
>>>>> +    }
>>>>> +
>>>>> +    state->aio_context = bdrv_get_aio_context(bs);
>>>>> +    aio_context_acquire(state->aio_context);
>>>>> +
>>>>> +    /* Note: We also have the individual job's return code in
>>>>> btd->ret */
>>>>> +    backup_transaction_complete(state->job, common->list->status);
>>>>> +    block_job_decref(state->job);
>>>>> +
>>>>> +    aio_context_release(state->aio_context);
>>>>> +}
>>>>> +
>>>>>   typedef struct BlockdevBackupState {
>>>>>       BlkTransactionState common;
>>>>>       BlockDriverState *bs;
>>>>> @@ -2004,7 +2043,8 @@ static const BdrvActionOps actions[] = {
>>>>>           .instance_size = sizeof(DriveBackupState),
>>>>>           .prepare = drive_backup_prepare,
>>>>>           .abort = drive_backup_abort,
>>>>> -        .clean = drive_backup_clean
>>>>> +        .clean = drive_backup_clean,
>>>>> +        .cb = drive_backup_cb
>>>>>       },
>>>>>       [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
>>>>>           .instance_size = sizeof(BlockdevBackupState),
>>>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>>>> index e0d5561..731684d 100644
>>>>> --- a/include/block/block_int.h
>>>>> +++ b/include/block/block_int.h
>>>>> @@ -619,6 +619,14 @@ void backup_start(BlockDriverState *bs,
>>>>> BlockDriverState *target,
>>>>>                     BlockCompletionFunc *cb, void *opaque,
>>>>>                     Error **errp);
>>>>> +/*
>>>>> + * backup_transaction_complete
>>>>> + * @job The BackupJob that was associated with a transaction
>>>>
>>>> s/BackupJob/backup block job/ or s/BackupJob/backup job/? (there is no
>>>> structure named "BackupJob", but this looks like there might be one)
>>>>
>>>>> + * @ret Amalgamated return code for the entire transaction
>>>>
>>>> Hm. The call to this function you're introducing in this patch will
>>>> probably stay the only one so there won't be anyone who'll have to 
>>>> worry
>>>> about what this means, but if there was, they probably won't reach a
>>>> conclusive result.
>>>>
>>>> I know what it means because I've seen patch 3 (right now it means
>>>> "everything OR-ed together so it's 0 for success or some non-zero 
>>>> (maybe
>>>> positive, maybe negative, depending on whether you have an even or an
>>>> odd number of errors, and depending on whether the jobs return 
>>>> negative
>>>> values for errors or not) for error"), but I wouldn't be able to infer
>>>> it from this. At the least you should add that 0 means success and
>>>> everything else means error (if you take my suggestion for patch 3, it
>>>> would be 0 for success and -errno for error, where that error 
>>>> number is
>>>> one of the errors encountered).
>>>>
>>>> Other than that, looks good (as far as I can tell with my still 
>>>> limited
>>>> insights into patch 3).
>>>>
>>>> Max
>>>>
>>>
>>> It's my opinion that this patch should give you insight into patch #3,
>>> instead of the other way around. This patch is useful for
>>> demonstrating the general flow, because I kept all drive_backup
>>> specific concerns strictly separated from patch #3.
>>
>> It only truly helps me understand patch 3 under the assumption that this
>> patch is correct. For reviewing, I cannot really take that 
>> assumption. ;-)
>>
>> I mean, it does help me with the interfaces patch 3 offers, but it still
>> doesn't help me with the objects introduced in patch 3, for instance,
>> because those are all contained in patch 3 itself. And if I cannot
>> verify patch 3 on its own, I cannot really verify that what this patch
>> does is correct in regards to the interfaces offered there.
>>
>>> I'll write a more comprehensive document for the docs/ folder soon,
>>> but the general shape of the idea is "The cleanup actions defined in
>>> the .cb() method will be executed after every job in the transactional
>>> group has completed."
>>
>> Right, again, assuming your implementation is correct, but questioning
>> that is part of the natural reviewing process. :-)
>>
>
> Yes, that's fine! I am just trying to help you unravel this strange 
> loop (ha ha) and/or try to understand what is not necessarily clear 
> with the information as presented.
>
>>> But there's some fine print:
>>>
>>> "'every job' refers only to jobs that have the capability to register
>>> a post-transaction callback, which currently means only drive_backup."
>>>
>>> The general approach to this is, mechanically:
>>>
>>> (1) Extend the life of objects of interest with reference counts,
>>> including Jobs, Bitmaps, and BlkTransactionStates.
>>>
>>> (2) "steal" the callback from a BlockJob and, when invoked, update our
>>> management information for the transaction group in BlkTransactionList.
>>>
>>> (3) Once all jobs we have sent out return, execute the .cb() methods
>>> as indicated in the BlkTransactionList.
>>>
>>>
>>>
>>> So, if you were adding a callback to a different QMP transaction:
>>> - Look at new_transaction_wrapper; you'll use this to bump up various
>>> references used internally for this whole system, and it'll keep
>>> qmp_transaction from being able to delete the list and state objects.
>>>
>>>   This function will give to you (as a gimmick) a new callback and
>>> opaque data structure pointer for you to give to the job that you are
>>> starting. I obfuscate this just a bit to make it clear that you should
>>> be using this function every time to help manage the transactional 
>>> state.
>>>
>>> - Now, the job will run on its own happy way, When it finishes, it
>>> will call transaction_callback, which is the function that
>>> "intercepts" the callbacks, and dispatches the original enveloped
>>> callbacks. It ditches the original data we used to know how to call
>>> the original callback, and begins storing completion information for
>>> jobs instead.
>>>
>>> - transaction_callback will call blk_transaction_complete to store
>>> completion info for one job. In turn, it calls
>>> put_blk_transaction_list to decrement the pending jobs count (the
>>> "jobs" refcount) and once that hits zero ...
>>>
>>> - All of the callback mechanisms associated with each
>>> BlockTransactionState are run via blk_run_transaction_callbacks.
>>>
>>> - This is where drive_backup_cb is going to get invoked again, and
>>> then we will splinter back out into backup-specific concerns, with
>>> Jobs and Bitmaps and what not.
>>
>> Yes, I can see the control flow; my main problem lies in the three
>> different structs introduced in patch 3. I'll tell you why I'm confused
>> by them:
>>
>
> Paydirt!
>
>> First, there is BlkTransactionList. Sounds to me like it should be a
>> list of transactions. Judging from its members, it is rather a list of
>> operations ("actions") for a single transaction, however. Apparently,
>> there is a double-use for the word "transaction": I'd consider a
>> transaction to be the thing started by the QMP command which is a group
>> of atomic operations. However, that one structure is called
>> BlkTransactionState, but it apparently this is only the state of a
>> single operation in the transaction. Pre-existing, but doesn't make it
>> better. You added comments for its list entry members, with the first
>> one stating "All transactions in the current group" and the second being
>> a subset of that. However, again, I consider these to be operations and
>> not transactions. The group is the transaction.
>>
>
> Ah, yes. Good points. We need to decide on consistent terminology.
>
> Transaction: The entire group.
> Action: One particular command within a transaction group.
> Action Op: One particular stage of one command (e.g. prepare, commit)

Sounds good.

> BlkTransactionState is an existing structure that does indeed refer 
> instead to one particular action's state, not that of the entire 
> transaction. This can be changed to be clearer. Better names might be 
> BlkTransactionActionState or BlkActionState.
>
> The BlkTransactionList here is indeed intended to be a list for the 
> entire transaction, so it is actually a "list of actions" and not, 
> perhaps as its name implies, a list of transactions. (We have no data 
> structures with a scope this vast.)
>
> The BlkTransactionList is an object that is created once per 
> transaction group (once per qmp_transaction()) and keeps a count of 
> how many jobs we've launched (and thus how many to expect to return to 
> us.)
>
> Once a job completes, it adds that *action's state* 
> (BlkTransactionState) to its list. Once all jobs sent out have 
> returned, it will iterate this list and execute their .cb() actions.
>
> So perhaps an amendment would be:
>
> typedef struct BlkTransactionList {
>     int pending_jobs;
>     int transaction_rc;
>     QLIST(...) completed_actions;
> } BlkTransactionList;
>
> to make this part clearer.
>
>
>> So we have that adding to my confusion. Furthermore, I think we don't
>> need to list entry members (entry and list_entry) in
>> BlkTransactionState. I think it should be fine to drop the
>> snap_bdrv_states list in qmp_transaction() and just add all
>> BlkTransactionStates to the BlkTransactionList (or however you name it
>> if you rename it) in the preparation while () loop there. I don't think
>> we need to add them only in blk_transaction_complete() (and we should
>> drop that from there if it's been done in qmp_transaction() already, of
>> course). Then we can really just use that list generally as *the* list
>> of operations in a transaction.
>>
>
> Yes, there's not necessarily a strong reason to keep two lists.
>
> One of the existing reasons, though, is that not all transactions 
> launch jobs or have been modified to work with this system yet, so we 
> definitely don't want to consider all of these entries in THIS system 
> just yet.
>
> So one of the simplifications two lists makes is that we don't have to 
> worry about "unsupported actions" making it into the list. Everything 
> in *my* list can be processed without further thought.

Hm, okay. I guess here I got confused because there are so many 
different callbacks... Okay. Makes sense.

Would it make sense to you to replace "Transactions in the current group 
with callbacks" by "Actions in the current transaction with (job) 
completion callbacks"? I think it might be helpful to specify exactly 
which callback this is about (because we do have a couple of callbacks 
here...).

> We can also just add more state into the BlkTransactionState (the 
> ActionState) to help us distinguish, and then we can get rid of both 
> lists.
>
> A state machine tag or something might be useful in that way.
>
>> Second, there is BlkTransactionData. From the name I cannot judge at all
>> how it is supposed to differ from BlkTransactionState, because, well, it
>> contains some data about a transaction... Maybe, because I consider
>> BlkTransactionState to be misnamed, BlkTransactionData actually contains
>> data about the transaction and not about a single operation? But from
>> looking at the members, I have no idea. It has "opaque" and "ret"... I
>> don't know how that is vital data to the transaction itself (and I don't
>> think it is, it is rather data for the completion callback, I guess?),
>> it only looks like some sort of pattern I have seen for AIO callbacks
>> and the like.
>>
>
> BlkTransactionData was meant to contain completion info about a "job" 
> that was launched by an action. I tried to avoid tying it explicitly 
> to a BlockJob -- it can be any action with a callback, really.

Hm. I'm in favor of just calling it "job". The problem is that the 
actions regarding block jobs only launch the block jobs; therefore, the 
action itself is completed by the time qmp_transaction() returns, but 
the block job (which has been launched by the action) is probably still 
running. Therefore, there is a difference between an action and a job.

It may help to really make a clear distinction there, like replace the 
comment "Execute this after +all+ jobs in the transaction finish" by 
"Executed after +all+ jobs launched by the transaction finish" (because 
the jobs aren't part of the transaction).

While it can be any action with a callback, it will be only block jobs 
for now, so once we add something else, we can think about the names 
once again. But I think that "job" is a generic enough description and 
will fit any long-term operation started by a transaction action which 
continues even beyond the transaction itself, so I think we should be 
fine even then.

> Anyway, this structure contains "Data that was delivered to the 
> original callback." In our case here, it's data that would've gone to 
> block_job_complete.
>
> So it contains an opaque pointer (A BDS, we secretly know) and a 
> return code for that individual action's task.
>
>> Third, there is BlkTransactionWrapper. I suppose it wraps a
>> transaction...? Or maybe at least an operation, because apparently I can
>> no longer be sure whether a transaction is a transaction or an
>> operation... But there is no transaction or even a single operation in
>> there. It's just a structure describing a callback. I don't know how
>> that is a "transaction wrapper"...
>>
>
> We have achieved maximum Strawberry.
>
> The BlkTransactionWrapper was meant to wrap any data necessary to be 
> able to deliver the original callback we're stealing.
>
> So in our case, it's for *one action's callback* and in this one 
> instance, it's being used to hold the data needed to manually invoke 
> the block_job_complete callback.
>
> So both BTD and BTW here contain callback related information for one 
> particular action.
>
> They are differentiated by "Wrapper" having a "Before we execute the 
> original callback" context, and "Data" having a "We've already 
> executed the original callback" context.
>
> Can they be combined into one structure? Yes, as long as we keep a tag 
> that lets us know what state we're in.
>
> Can they be inlined into "BlkTransactionState"? Yes -- but I opted to 
> keep it in a separate glob to not clutter up the BlkTransactionState 
> with values only useful for an optional feature.
>
> Is that wise? Maybe, maybe not.
>
> I'm open to names. The most semantically accurate would be 
> BlkTransactionActionCallbackData if we combined BTD/BTW to be one 
> structure. It's a mouthful, though.

I was playing a bit dumb here: I did see that 
BlkTransaction{Data,Wrapper} described callbacks ("it is rather data for 
the completion callback, I guess?", "It's just a structure describing a 
callback"), but I mainly wanted to point out what I would infer from the 
names alone.

I'm all in favor of making the names overly long if need be. Maybe I'd 
call BTD BlkTransactionCallbackData, BlkTransactionCompletionCBData, 
BlkTransactionCBData, BlkTransactionCBParams, or just 
BlkTACBData/BlkTACBParams instead (it is a structure local to 
blockdev.c, so I'd be fine with abbreviations); BTW I'd call 
BlkTransactionCB/BlkTACB (because it contains the function pointer which 
actually is the callback itself).

Maybe merging both does make sense. The name you proposed looks good to 
me, but I'd abbreviate it (at least BlkTransactionActionCBData, or 
BlkTAActionCBData (which is ugly), or BlkTransactionOpCBData, or 
BlkTAOpCBData, or BlkTACBData (I think omitting the "Action" should be 
fine here, because the important thing is to have the "CB" in it to 
signify that this is not data describing the transaction but a 
callback), or even omit the "Data": BlkTransactionCB/BlkTACB. You are, 
after all, describing a complete callback with it (the function pointer 
and all the parameters)).

I don't know whether that helps. Naming is always hard, I know. Just 
make sure that data about a callback contains the word "callback" or 
"CB" and it should be fine, and add proper comments which describe why 
the structure is named the way it is. If you can explain it, it's 
probably a good name.

Max

>
>> So, I don't understand any of the names given to the objects, and while
>> I might understand what they are used for by looking at them, I have a
>> *really* hard time understanding the code that uses them, because if I
>> don't stare at their definitions all the time, I will immediately forget
>> what they contain and what they are used for because I don't understand
>> the names at all (and there is nothing which explicitly tells me what
>> they are used for either). The fact that all of them are starting with
>> BlkTransaction makes mixing them up even easier.
>>
>
> Understood.
>
>>> From the point of view of the "transaction action developer," the
>>> interface to the new feature is found via the
>>> "new_transaction_wrapper" function, But some care must be taken to
>>> make sure this callback actually gets used once it has been requested.
>>> The whole thing can be aborted with the "undo_transaction_wrapper"
>>> function.
>>>
>>> All of the other functions that got added to blockdev.c in Patch #3
>>> are there to assist these two "interface" functions in doing what they
>>> claim to.
>>>
>>> Everything in patch 4 and 5 just adds more reference count flexibility
>>> to things that are only of interest to drive_backup.
>>>
>>> Patch 6 allows us to inject our special callback wrapper into the
>>> qmp_drive_backup function.
>>>
>>>
>>> Clear as mud?
>>
>> Control flow and concept, clear. Implementation, still very muddy, 
>> sorry...
>>
>> Max
>>
>>> --js
>>>
>>>>> + */
>>>>> +void backup_transaction_complete(BlockJob *job, int ret);
>>>>> +
>>>>> +
>>>>>   void blk_dev_change_media_cb(BlockBackend *blk, bool load);
>>>>>   bool blk_dev_has_removable_media(BlockBackend *blk);
>>>>>   void blk_dev_eject_request(BlockBackend *blk, bool force);
>>>>
>>>>
>>
>
diff mbox

Patch

diff --git a/block/backup.c b/block/backup.c
index 4332df4..3673fb0 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -233,6 +233,15 @@  typedef struct {
     int ret;
 } BackupCompleteData;
 
+void backup_transaction_complete(BlockJob *job, int ret)
+{
+    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+    if (s->sync_bitmap) {
+        bdrv_dirty_bitmap_decref(job->bs, s->sync_bitmap, ret, NULL);
+    }
+}
+
 static void backup_complete(BlockJob *job, void *opaque)
 {
     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
diff --git a/blockdev.c b/blockdev.c
index 9e3b9e9..3ff14a7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1363,14 +1363,6 @@  static void transaction_callback(void *opaque, int ret)
 
 typedef void (CallbackFn)(void *opaque, int ret);
 
-/* Temporary. Removed in the next patch. */
-CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
-                                    void *opaque,
-                                    void (*callback)(void *, int),
-                                    void **new_opaque);
-
-void undo_transaction_wrapper(BlkTransactionState *common);
-
 /**
  * Create a new transactional callback wrapper.
  *
@@ -1389,7 +1381,7 @@  void undo_transaction_wrapper(BlkTransactionState *common);
  *
  * @return The callback to be used instead of @callback.
  */
-CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
+static CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
                                            void *opaque,
                                            CallbackFn *callback,
                                            void **new_opaque)
@@ -1416,7 +1408,7 @@  CallbackFn *new_transaction_wrapper(BlkTransactionState *common,
 /**
  * Undo any actions performed by the above call.
  */
-void undo_transaction_wrapper(BlkTransactionState *common)
+static void undo_transaction_wrapper(BlkTransactionState *common)
 {
     BlkTransactionList *btl = common->list;
     BlkTransactionState *bts;
@@ -1449,6 +1441,7 @@  void undo_transaction_wrapper(BlkTransactionState *common)
     blk_put_transaction_state(common);
 }
 
+static void block_job_cb(void *opaque, int ret);
 static void _drive_backup(const char *device, const char *target,
                           bool has_format, const char *format,
                           enum MirrorSyncMode sync,
@@ -1767,6 +1760,9 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
     BlockDriverState *bs;
     DriveBackup *backup;
     Error *local_err = NULL;
+    CallbackFn *cb;
+    void *opaque;
+    BdrvDirtyBitmap *bmap = NULL;
 
     assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
     backup = common->action->drive_backup;
@@ -1777,6 +1773,19 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
         return;
     }
 
+    /* BackupBlockJob is opaque to us, so look up the bitmap ourselves */
+    if (backup->has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, backup->bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", backup->bitmap);
+            return;
+        }
+    }
+
+    /* Create our transactional callback wrapper,
+       and register that we'd like to call .cb() later. */
+    cb = new_transaction_wrapper(common, bs, block_job_cb, &opaque);
+
     /* AioContext is released in .clean() */
     state->aio_context = bdrv_get_aio_context(bs);
     aio_context_acquire(state->aio_context);
@@ -1789,7 +1798,7 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                   backup->has_bitmap, backup->bitmap,
                   backup->has_on_source_error, backup->on_source_error,
                   backup->has_on_target_error, backup->on_target_error,
-                  NULL, NULL,
+                  cb, opaque,
                   &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -1798,6 +1807,11 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 
     state->bs = bs;
     state->job = state->bs->job;
+    /* Keep the job alive until .cb(), too. */
+    block_job_incref(state->job);
+    if (bmap) {
+        bdrv_dirty_bitmap_incref(bmap);
+    }
 }
 
 static void drive_backup_abort(BlkTransactionState *common)
@@ -1809,6 +1823,10 @@  static void drive_backup_abort(BlkTransactionState *common)
     if (bs && bs->job && bs->job == state->job) {
         block_job_cancel_sync(bs->job);
     }
+
+    /* Undo any callback actions we may have done. Putting down references
+     * obtained during prepare() is handled by cb(). */
+    undo_transaction_wrapper(common);
 }
 
 static void drive_backup_clean(BlkTransactionState *common)
@@ -1820,6 +1838,27 @@  static void drive_backup_clean(BlkTransactionState *common)
     }
 }
 
+static void drive_backup_cb(BlkTransactionState *common)
+{
+    BlkTransactionData *btd = common->opaque;
+    BlockDriverState *bs = btd->opaque;
+    DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+
+    assert(state->bs == bs);
+    if (bs->job) {
+        assert(state->job == bs->job);
+    }
+
+    state->aio_context = bdrv_get_aio_context(bs);
+    aio_context_acquire(state->aio_context);
+
+    /* Note: We also have the individual job's return code in btd->ret */
+    backup_transaction_complete(state->job, common->list->status);
+    block_job_decref(state->job);
+
+    aio_context_release(state->aio_context);
+}
+
 typedef struct BlockdevBackupState {
     BlkTransactionState common;
     BlockDriverState *bs;
@@ -2004,7 +2043,8 @@  static const BdrvActionOps actions[] = {
         .instance_size = sizeof(DriveBackupState),
         .prepare = drive_backup_prepare,
         .abort = drive_backup_abort,
-        .clean = drive_backup_clean
+        .clean = drive_backup_clean,
+        .cb = drive_backup_cb
     },
     [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
         .instance_size = sizeof(BlockdevBackupState),
diff --git a/include/block/block_int.h b/include/block/block_int.h
index e0d5561..731684d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -619,6 +619,14 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   BlockCompletionFunc *cb, void *opaque,
                   Error **errp);
 
+/*
+ * backup_transaction_complete
+ * @job The BackupJob that was associated with a transaction
+ * @ret Amalgamated return code for the entire transaction
+ */
+void backup_transaction_complete(BlockJob *job, int ret);
+
+
 void blk_dev_change_media_cb(BlockBackend *blk, bool load);
 bool blk_dev_has_removable_media(BlockBackend *blk);
 void blk_dev_eject_request(BlockBackend *blk, bool force);