diff mbox

[v12,07/17] qmp: Add support of "dirty-bitmap" sync mode for drive-backup

Message ID 1423532117-14490-8-git-send-email-jsnow@redhat.com
State New
Headers show

Commit Message

John Snow Feb. 10, 2015, 1:35 a.m. UTC
For "dirty-bitmap" sync mode, the block job will iterate through the
given dirty bitmap to decide if a sector needs backup (backup all the
dirty clusters and skip clean ones), just as allocation conditions of
"top" sync mode.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
---
 block.c                   |   9 +++
 block/backup.c            | 149 +++++++++++++++++++++++++++++++++++++++-------
 block/mirror.c            |   4 ++
 blockdev.c                |  18 +++++-
 hmp.c                     |   3 +-
 include/block/block.h     |   1 +
 include/block/block_int.h |   2 +
 qapi/block-core.json      |  13 ++--
 qmp-commands.hx           |   7 ++-
 9 files changed, 172 insertions(+), 34 deletions(-)

Comments

Max Reitz Feb. 11, 2015, 5:47 p.m. UTC | #1
On 2015-02-09 at 20:35, John Snow wrote:
> For "dirty-bitmap" sync mode, the block job will iterate through the
> given dirty bitmap to decide if a sector needs backup (backup all the
> dirty clusters and skip clean ones), just as allocation conditions of
> "top" sync mode.
>
> Signed-off-by: Fam Zheng <famz@redhat.com>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>   block.c                   |   9 +++
>   block/backup.c            | 149 +++++++++++++++++++++++++++++++++++++++-------
>   block/mirror.c            |   4 ++
>   blockdev.c                |  18 +++++-
>   hmp.c                     |   3 +-
>   include/block/block.h     |   1 +
>   include/block/block_int.h |   2 +
>   qapi/block-core.json      |  13 ++--
>   qmp-commands.hx           |   7 ++-
>   9 files changed, 172 insertions(+), 34 deletions(-)
>
> diff --git a/block.c b/block.c
> index 8d84ace..e93fceb 100644
> --- a/block.c
> +++ b/block.c
> @@ -5648,6 +5648,15 @@ static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
>       }
>   }
>   
> +/**
> + * Advance an HBitmapIter to an arbitrary offset.
> + */
> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
> +{
> +    assert(hbi->hb);
> +    hbitmap_iter_init(hbi, hbi->hb, offset);
> +}
> +
>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>   {
>       return hbitmap_count(bitmap->bitmap);
> diff --git a/block/backup.c b/block/backup.c
> index 1c535b1..bf8c0e7 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -37,6 +37,8 @@ typedef struct CowRequest {
>   typedef struct BackupBlockJob {
>       BlockJob common;
>       BlockDriverState *target;
> +    /* bitmap for sync=dirty-bitmap */
> +    BdrvDirtyBitmap *sync_bitmap;
>       MirrorSyncMode sync_mode;
>       RateLimit limit;
>       BlockdevOnError on_source_error;
> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void *opaque)
>       g_free(data);
>   }
>   
> +static bool coroutine_fn yield_and_check(BackupBlockJob *job)
> +{
> +    if (block_job_is_cancelled(&job->common)) {
> +        return true;
> +    }
> +
> +    /* we need to yield so that qemu_aio_flush() returns.
> +     * (without, VM does not reboot)
> +     */
> +    if (job->common.speed) {
> +        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
> +                                                      job->sectors_read);
> +        job->sectors_read = 0;
> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> +    } else {
> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> +    }
> +
> +    if (block_job_is_cancelled(&job->common)) {
> +        return true;
> +    }
> +
> +    return false;
> +}
> +
>   static void coroutine_fn backup_run(void *opaque)
>   {
>       BackupBlockJob *job = opaque;
> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
>       };
>       int64_t start, end;
>       int ret = 0;
> +    bool error_is_read;
>   
>       QLIST_INIT(&job->inflight_reqs);
>       qemu_co_rwlock_init(&job->flush_rwlock);
>   
>       start = 0;
> -    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
> -                       BACKUP_SECTORS_PER_CLUSTER);
> +    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>   
>       job->bitmap = hbitmap_alloc(end, 0);
>   
> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
>               qemu_coroutine_yield();
>               job->common.busy = true;
>           }
> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        /* Dirty Bitmap sync has a slightly different iteration method */
> +        HBitmapIter hbi;
> +        int64_t sector;
> +        int64_t cluster;
> +        int64_t last_cluster = -1;
> +        bool polyrhythmic;
> +
> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
> +        /* Does the granularity happen to match our backup cluster size? */
> +        polyrhythmic = (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
> +                        BACKUP_CLUSTER_SIZE);
> +
> +        /* Find the next dirty /sector/ and copy that /cluster/ */
> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
> +            if (yield_and_check(job)) {
> +                goto leave;
> +            }
> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
> +
> +            /* Play some catchup with the progress meter */
> +            if (cluster != last_cluster + 1) {
> +                job->common.offset += ((cluster - last_cluster - 1) *
> +                                       BACKUP_CLUSTER_SIZE);
> +            }

I guess the "- 1" in the calculation comes from backup_do_cow() 
modifying job->common.offset, too. How about simply overwriting it like 
"job->common.offset = cluster * BACKUP_CLUSTER_SIZE"? And it may be a 
good idea to move this update above yield_and_check() because that's the 
progress we have actually done up to that point (and I imagine 
yield_and_check() is the point where the caller might check the progress).

> +
> +            do {
> +                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
> +                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
> +                if ((ret < 0) &&
> +                    backup_error_action(job, error_is_read, -ret) ==
> +                    BLOCK_ERROR_ACTION_REPORT) {
> +                    goto leave;
> +                }
> +            } while (ret < 0);
> +
> +            /* Advance (or rewind) our iterator if we need to. */
> +            if (polyrhythmic) {
> +                bdrv_set_dirty_iter(&hbi,
> +                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
> +            }
> +
> +            last_cluster = cluster;
> +        }
> +
> +        /* Play some final catchup with the progress meter */
> +        if (last_cluster + 1 < end) {
> +            job->common.offset += ((end - last_cluster - 1) *
> +                                   BACKUP_CLUSTER_SIZE);
> +        }
> +

Superfluous empty line.

>       } else {
>           /* Both FULL and TOP SYNC_MODE's require copying.. */
>           for (; start < end; start++) {
> -            bool error_is_read;
> -
> -            if (block_job_is_cancelled(&job->common)) {
> -                break;
> -            }
> -
> -            /* we need to yield so that qemu_aio_flush() returns.
> -             * (without, VM does not reboot)
> -             */
> -            if (job->common.speed) {
> -                uint64_t delay_ns = ratelimit_calculate_delay(
> -                        &job->limit, job->sectors_read);
> -                job->sectors_read = 0;
> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> -            } else {
> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> -            }
> -
> -            if (block_job_is_cancelled(&job->common)) {
> +            if (yield_and_check(job)) {
>                   break;
>               }
>   
> @@ -351,12 +411,26 @@ static void coroutine_fn backup_run(void *opaque)
>           }
>       }
>   
> +leave:
>       notifier_with_return_remove(&before_write);
>   
>       /* wait until pending backup_do_cow() calls have completed */
>       qemu_co_rwlock_wrlock(&job->flush_rwlock);
>       qemu_co_rwlock_unlock(&job->flush_rwlock);
>   
> +    if (job->sync_bitmap) {
> +        BdrvDirtyBitmap *bm;
> +        if (ret < 0) {
> +            /* Merge the successor back into the parent, delete nothing. */
> +            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
> +            assert(bm);
> +            bdrv_enable_dirty_bitmap(job->sync_bitmap);
> +        } else {
> +            /* Everything is fine, delete this bitmap and install the backup. */
> +            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
> +            assert(bm);
> +        }
> +    }
>       hbitmap_free(job->bitmap);
>   
>       bdrv_iostatus_disable(target);
> @@ -369,6 +443,7 @@ static void coroutine_fn backup_run(void *opaque)
>   
>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                     int64_t speed, MirrorSyncMode sync_mode,
> +                  BdrvDirtyBitmap *sync_bitmap,
>                     BlockdevOnError on_source_error,
>                     BlockdevOnError on_target_error,
>                     BlockCompletionFunc *cb, void *opaque,
> @@ -412,17 +487,37 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>           return;
>       }
>   
> +    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        if (!sync_bitmap) {
> +            error_setg(errp, "must provide a valid bitmap name for "
> +                             "\"dirty-bitmap\" sync mode");
> +            return;
> +        }
> +
> +        /* Create a new bitmap, and freeze/disable this one. */
> +        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
> +            return;
> +        }
> +

Another superfluous empty line.

> +    } else if (sync_bitmap) {
> +        error_setg(errp,
> +                   "a sync_bitmap was provided to backup_run, "
> +                   "but received an incompatible sync_mode (%s)",
> +                   MirrorSyncMode_lookup[sync_mode]);
> +        return;
> +    }
> +
>       len = bdrv_getlength(bs);
>       if (len < 0) {
>           error_setg_errno(errp, -len, "unable to get length for '%s'",
>                            bdrv_get_device_name(bs));
> -        return;
> +        goto error;
>       }
>   
>       BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
>                                              cb, opaque, errp);
>       if (!job) {
> -        return;
> +        goto error;
>       }
>   
>       bdrv_op_block_all(target, job->common.blocker);
> @@ -431,7 +526,15 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>       job->on_target_error = on_target_error;
>       job->target = target;
>       job->sync_mode = sync_mode;
> +    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
> +                       sync_bitmap : NULL;
>       job->common.len = len;
>       job->common.co = qemu_coroutine_create(backup_run);
>       qemu_coroutine_enter(job->common.co, job);
> +    return;
> +
> + error:
> +    if (sync_bitmap) {
> +        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
> +    }
>   }
> diff --git a/block/mirror.c b/block/mirror.c
> index 77bd1ed..271dbf3 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -718,6 +718,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>       bool is_none_mode;
>       BlockDriverState *base;
>   
> +    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
> +        return;
> +    }
>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>       base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
>       mirror_start_job(bs, target, replaces,
> diff --git a/blockdev.c b/blockdev.c
> index 826d0c1..3f41e82 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1569,6 +1569,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>                        backup->sync,
>                        backup->has_mode, backup->mode,
>                        backup->has_speed, backup->speed,
> +                     backup->has_bitmap, backup->bitmap,
>                        backup->has_on_source_error, backup->on_source_error,
>                        backup->has_on_target_error, backup->on_target_error,
>                        &local_err);
> @@ -2429,6 +2430,7 @@ void qmp_drive_backup(const char *device, const char *target,
>                         enum MirrorSyncMode sync,
>                         bool has_mode, enum NewImageMode mode,
>                         bool has_speed, int64_t speed,
> +                      bool has_bitmap, const char *bitmap,
>                         bool has_on_source_error, BlockdevOnError on_source_error,
>                         bool has_on_target_error, BlockdevOnError on_target_error,
>                         Error **errp)
> @@ -2436,6 +2438,7 @@ void qmp_drive_backup(const char *device, const char *target,
>       BlockDriverState *bs;
>       BlockDriverState *target_bs;
>       BlockDriverState *source = NULL;
> +    BdrvDirtyBitmap *bmap = NULL;
>       AioContext *aio_context;
>       BlockDriver *drv = NULL;
>       Error *local_err = NULL;
> @@ -2534,7 +2537,16 @@ void qmp_drive_backup(const char *device, const char *target,
>   
>       bdrv_set_aio_context(target_bs, aio_context);
>   
> -    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
> +    if (has_bitmap) {
> +        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
> +        if (!bmap) {
> +            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
> +            goto out;
> +        }
> +    }
> +
> +    backup_start(bs, target_bs, speed, sync, bmap,
> +                 on_source_error, on_target_error,
>                    block_job_cb, bs, &local_err);
>       if (local_err != NULL) {
>           bdrv_unref(target_bs);
> @@ -2592,8 +2604,8 @@ void qmp_blockdev_backup(const char *device, const char *target,
>   
>       bdrv_ref(target_bs);
>       bdrv_set_aio_context(target_bs, aio_context);
> -    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
> -                 block_job_cb, bs, &local_err);
> +    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
> +                 on_target_error, block_job_cb, bs, &local_err);
>       if (local_err != NULL) {
>           bdrv_unref(target_bs);
>           error_propagate(errp, local_err);
> diff --git a/hmp.c b/hmp.c
> index b47f331..015499f 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1027,7 +1027,8 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
>   
>       qmp_drive_backup(device, filename, !!format, format,
>                        full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> -                     true, mode, false, 0, false, 0, false, 0, &err);
> +                     true, mode, false, 0, false, NULL,
> +                     false, 0, false, 0, &err);
>       hmp_handle_error(mon, &err);
>   }
>   
> diff --git a/include/block/block.h b/include/block/block.h
> index b2d84d6..8589e77 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -469,6 +469,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
>                                int64_t cur_sector, int nr_sectors);
>   void bdrv_dirty_iter_init(BlockDriverState *bs,
>                             BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>   
>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 7ad1950..2233790 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -588,6 +588,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>    * @target: Block device to write to.
>    * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
>    * @sync_mode: What parts of the disk image should be copied to the destination.
> + * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
>    * @on_source_error: The action to take upon error reading from the source.
>    * @on_target_error: The action to take upon error writing to the target.
>    * @cb: Completion function for the job.
> @@ -598,6 +599,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>    */
>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                     int64_t speed, MirrorSyncMode sync_mode,
> +                  BdrvDirtyBitmap *sync_bitmap,
>                     BlockdevOnError on_source_error,
>                     BlockdevOnError on_target_error,
>                     BlockCompletionFunc *cb, void *opaque,
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 940eff7..9c5a99c 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -508,10 +508,12 @@
>   #
>   # @none: only copy data written from now on
>   #
> +# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.3
> +#
>   # Since: 1.3
>   ##
>   { 'enum': 'MirrorSyncMode',
> -  'data': ['top', 'full', 'none'] }
> +  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
>   
>   ##
>   # @BlockJobType:
> @@ -686,14 +688,17 @@
>   #          probe if @mode is 'existing', else the format of the source
>   #
>   # @sync: what parts of the disk image should be copied to the destination
> -#        (all the disk, only the sectors allocated in the topmost image, or
> -#        only new I/O).
> +#        (all the disk, only the sectors allocated in the topmost image, from a
> +#        dirty bitmap, or only new I/O).
>   #
>   # @mode: #optional whether and how QEMU should create a new image, default is
>   #        'absolute-paths'.
>   #
>   # @speed: #optional the maximum speed, in bytes per second
>   #
> +# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
> +#          (Since 2.3)
> +#
>   # @on-source-error: #optional the action to take on an error on the source,
>   #                   default 'report'.  'stop' and 'enospc' can only be used
>   #                   if the block device supports io-status (see BlockInfo).
> @@ -711,7 +716,7 @@
>   { 'type': 'DriveBackup',
>     'data': { 'device': 'str', 'target': 'str', '*format': 'str',
>               'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
> -            '*speed': 'int',
> +            '*speed': 'int', '*bitmap': 'str',
>               '*on-source-error': 'BlockdevOnError',
>               '*on-target-error': 'BlockdevOnError' } }
>   
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index ce7782f..5aa3845 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -1048,7 +1048,7 @@ EQMP
>       {
>           .name       = "drive-backup",
>           .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
> -                      "on-source-error:s?,on-target-error:s?",
> +                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
>           .mhandler.cmd_new = qmp_marshal_input_drive_backup,
>       },
>   
> @@ -1075,8 +1075,9 @@ Arguments:
>               (json-string, optional)
>   - "sync": what parts of the disk image should be copied to the destination;
>     possibilities include "full" for all the disk, "top" for only the sectors
> -  allocated in the topmost image, or "none" to only replicate new I/O
> -  (MirrorSyncMode).
> +  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
> +  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
> +- "bitmap": dirty bitmap name for sync==dirty-bitmap
>   - "mode": whether and how QEMU should create a new image
>             (NewImageMode, optional, default 'absolute-paths')
>   - "speed": the maximum speed, in bytes per second (json-int, optional)

Looks good to me in general, now I need to find out what the successor 
bitmap is used for; but I guess I'll find that out by reviewing the rest 
of this series.

Max
John Snow Feb. 11, 2015, 5:54 p.m. UTC | #2
On 02/11/2015 12:47 PM, Max Reitz wrote:
> On 2015-02-09 at 20:35, John Snow wrote:
>> For "dirty-bitmap" sync mode, the block job will iterate through the
>> given dirty bitmap to decide if a sector needs backup (backup all the
>> dirty clusters and skip clean ones), just as allocation conditions of
>> "top" sync mode.
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   block.c                   |   9 +++
>>   block/backup.c            | 149
>> +++++++++++++++++++++++++++++++++++++++-------
>>   block/mirror.c            |   4 ++
>>   blockdev.c                |  18 +++++-
>>   hmp.c                     |   3 +-
>>   include/block/block.h     |   1 +
>>   include/block/block_int.h |   2 +
>>   qapi/block-core.json      |  13 ++--
>>   qmp-commands.hx           |   7 ++-
>>   9 files changed, 172 insertions(+), 34 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index 8d84ace..e93fceb 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -5648,6 +5648,15 @@ static void bdrv_reset_dirty(BlockDriverState
>> *bs, int64_t cur_sector,
>>       }
>>   }
>> +/**
>> + * Advance an HBitmapIter to an arbitrary offset.
>> + */
>> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
>> +{
>> +    assert(hbi->hb);
>> +    hbitmap_iter_init(hbi, hbi->hb, offset);
>> +}
>> +
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
>> *bitmap)
>>   {
>>       return hbitmap_count(bitmap->bitmap);
>> diff --git a/block/backup.c b/block/backup.c
>> index 1c535b1..bf8c0e7 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -37,6 +37,8 @@ typedef struct CowRequest {
>>   typedef struct BackupBlockJob {
>>       BlockJob common;
>>       BlockDriverState *target;
>> +    /* bitmap for sync=dirty-bitmap */
>> +    BdrvDirtyBitmap *sync_bitmap;
>>       MirrorSyncMode sync_mode;
>>       RateLimit limit;
>>       BlockdevOnError on_source_error;
>> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void
>> *opaque)
>>       g_free(data);
>>   }
>> +static bool coroutine_fn yield_and_check(BackupBlockJob *job)
>> +{
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    /* we need to yield so that qemu_aio_flush() returns.
>> +     * (without, VM does not reboot)
>> +     */
>> +    if (job->common.speed) {
>> +        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
>> +
>> job->sectors_read);
>> +        job->sectors_read = 0;
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
>> +    } else {
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
>> +    }
>> +
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>   static void coroutine_fn backup_run(void *opaque)
>>   {
>>       BackupBlockJob *job = opaque;
>> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
>>       };
>>       int64_t start, end;
>>       int ret = 0;
>> +    bool error_is_read;
>>       QLIST_INIT(&job->inflight_reqs);
>>       qemu_co_rwlock_init(&job->flush_rwlock);
>>       start = 0;
>> -    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
>> -                       BACKUP_SECTORS_PER_CLUSTER);
>> +    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>>       job->bitmap = hbitmap_alloc(end, 0);
>> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
>>               qemu_coroutine_yield();
>>               job->common.busy = true;
>>           }
>> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        /* Dirty Bitmap sync has a slightly different iteration
>> method */
>> +        HBitmapIter hbi;
>> +        int64_t sector;
>> +        int64_t cluster;
>> +        int64_t last_cluster = -1;
>> +        bool polyrhythmic;
>> +
>> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
>> +        /* Does the granularity happen to match our backup cluster
>> size? */
>> +        polyrhythmic =
>> (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
>> +                        BACKUP_CLUSTER_SIZE);
>> +
>> +        /* Find the next dirty /sector/ and copy that /cluster/ */
>> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
>> +            if (yield_and_check(job)) {
>> +                goto leave;
>> +            }
>> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
>> +
>> +            /* Play some catchup with the progress meter */
>> +            if (cluster != last_cluster + 1) {
>> +                job->common.offset += ((cluster - last_cluster - 1) *
>> +                                       BACKUP_CLUSTER_SIZE);
>> +            }
>
> I guess the "- 1" in the calculation comes from backup_do_cow()
> modifying job->common.offset, too. How about simply overwriting it like
> "job->common.offset = cluster * BACKUP_CLUSTER_SIZE"? And it may be a
> good idea to move this update above yield_and_check() because that's the
> progress we have actually done up to that point (and I imagine
> yield_and_check() is the point where the caller might check the progress).
>
>> +
>> +            do {
>> +                ret = backup_do_cow(bs, cluster *
>> BACKUP_SECTORS_PER_CLUSTER,
>> +                                    BACKUP_SECTORS_PER_CLUSTER,
>> &error_is_read);
>> +                if ((ret < 0) &&
>> +                    backup_error_action(job, error_is_read, -ret) ==
>> +                    BLOCK_ERROR_ACTION_REPORT) {
>> +                    goto leave;
>> +                }
>> +            } while (ret < 0);
>> +
>> +            /* Advance (or rewind) our iterator if we need to. */
>> +            if (polyrhythmic) {
>> +                bdrv_set_dirty_iter(&hbi,
>> +                                    (cluster + 1) *
>> BACKUP_SECTORS_PER_CLUSTER);
>> +            }
>> +
>> +            last_cluster = cluster;
>> +        }
>> +
>> +        /* Play some final catchup with the progress meter */
>> +        if (last_cluster + 1 < end) {
>> +            job->common.offset += ((end - last_cluster - 1) *
>> +                                   BACKUP_CLUSTER_SIZE);
>> +        }
>> +
>
> Superfluous empty line.
>
>>       } else {
>>           /* Both FULL and TOP SYNC_MODE's require copying.. */
>>           for (; start < end; start++) {
>> -            bool error_is_read;
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> -                break;
>> -            }
>> -
>> -            /* we need to yield so that qemu_aio_flush() returns.
>> -             * (without, VM does not reboot)
>> -             */
>> -            if (job->common.speed) {
>> -                uint64_t delay_ns = ratelimit_calculate_delay(
>> -                        &job->limit, job->sectors_read);
>> -                job->sectors_read = 0;
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
>> delay_ns);
>> -            } else {
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
>> 0);
>> -            }
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> +            if (yield_and_check(job)) {
>>                   break;
>>               }
>> @@ -351,12 +411,26 @@ static void coroutine_fn backup_run(void *opaque)
>>           }
>>       }
>> +leave:
>>       notifier_with_return_remove(&before_write);
>>       /* wait until pending backup_do_cow() calls have completed */
>>       qemu_co_rwlock_wrlock(&job->flush_rwlock);
>>       qemu_co_rwlock_unlock(&job->flush_rwlock);
>> +    if (job->sync_bitmap) {
>> +        BdrvDirtyBitmap *bm;
>> +        if (ret < 0) {
>> +            /* Merge the successor back into the parent, delete
>> nothing. */
>> +            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
>> +            assert(bm);
>> +            bdrv_enable_dirty_bitmap(job->sync_bitmap);
>> +        } else {
>> +            /* Everything is fine, delete this bitmap and install the
>> backup. */
>> +            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
>> +            assert(bm);
>> +        }
>> +    }
>>       hbitmap_free(job->bitmap);
>>       bdrv_iostatus_disable(target);
>> @@ -369,6 +443,7 @@ static void coroutine_fn backup_run(void *opaque)
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> @@ -412,17 +487,37 @@ void backup_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>           return;
>>       }
>> +    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        if (!sync_bitmap) {
>> +            error_setg(errp, "must provide a valid bitmap name for "
>> +                             "\"dirty-bitmap\" sync mode");
>> +            return;
>> +        }
>> +
>> +        /* Create a new bitmap, and freeze/disable this one. */
>> +        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp)
>> < 0) {
>> +            return;
>> +        }
>> +
>
> Another superfluous empty line.
>
>> +    } else if (sync_bitmap) {
>> +        error_setg(errp,
>> +                   "a sync_bitmap was provided to backup_run, "
>> +                   "but received an incompatible sync_mode (%s)",
>> +                   MirrorSyncMode_lookup[sync_mode]);
>> +        return;
>> +    }
>> +
>>       len = bdrv_getlength(bs);
>>       if (len < 0) {
>>           error_setg_errno(errp, -len, "unable to get length for '%s'",
>>                            bdrv_get_device_name(bs));
>> -        return;
>> +        goto error;
>>       }
>>       BackupBlockJob *job = block_job_create(&backup_job_driver, bs,
>> speed,
>>                                              cb, opaque, errp);
>>       if (!job) {
>> -        return;
>> +        goto error;
>>       }
>>       bdrv_op_block_all(target, job->common.blocker);
>> @@ -431,7 +526,15 @@ void backup_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>       job->on_target_error = on_target_error;
>>       job->target = target;
>>       job->sync_mode = sync_mode;
>> +    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>> +                       sync_bitmap : NULL;
>>       job->common.len = len;
>>       job->common.co = qemu_coroutine_create(backup_run);
>>       qemu_coroutine_enter(job->common.co, job);
>> +    return;
>> +
>> + error:
>> +    if (sync_bitmap) {
>> +        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
>> +    }
>>   }
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 77bd1ed..271dbf3 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -718,6 +718,10 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>       bool is_none_mode;
>>       BlockDriverState *base;
>> +    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
>> +        return;
>> +    }
>>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>>       base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
>>       mirror_start_job(bs, target, replaces,
>> diff --git a/blockdev.c b/blockdev.c
>> index 826d0c1..3f41e82 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1569,6 +1569,7 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>                        backup->sync,
>>                        backup->has_mode, backup->mode,
>>                        backup->has_speed, backup->speed,
>> +                     backup->has_bitmap, backup->bitmap,
>>                        backup->has_on_source_error,
>> backup->on_source_error,
>>                        backup->has_on_target_error,
>> backup->on_target_error,
>>                        &local_err);
>> @@ -2429,6 +2430,7 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>                         enum MirrorSyncMode sync,
>>                         bool has_mode, enum NewImageMode mode,
>>                         bool has_speed, int64_t speed,
>> +                      bool has_bitmap, const char *bitmap,
>>                         bool has_on_source_error, BlockdevOnError
>> on_source_error,
>>                         bool has_on_target_error, BlockdevOnError
>> on_target_error,
>>                         Error **errp)
>> @@ -2436,6 +2438,7 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>       BlockDriverState *bs;
>>       BlockDriverState *target_bs;
>>       BlockDriverState *source = NULL;
>> +    BdrvDirtyBitmap *bmap = NULL;
>>       AioContext *aio_context;
>>       BlockDriver *drv = NULL;
>>       Error *local_err = NULL;
>> @@ -2534,7 +2537,16 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>       bdrv_set_aio_context(target_bs, aio_context);
>> -    backup_start(bs, target_bs, speed, sync, on_source_error,
>> on_target_error,
>> +    if (has_bitmap) {
>> +        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
>> +        if (!bmap) {
>> +            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    backup_start(bs, target_bs, speed, sync, bmap,
>> +                 on_source_error, on_target_error,
>>                    block_job_cb, bs, &local_err);
>>       if (local_err != NULL) {
>>           bdrv_unref(target_bs);
>> @@ -2592,8 +2604,8 @@ void qmp_blockdev_backup(const char *device,
>> const char *target,
>>       bdrv_ref(target_bs);
>>       bdrv_set_aio_context(target_bs, aio_context);
>> -    backup_start(bs, target_bs, speed, sync, on_source_error,
>> on_target_error,
>> -                 block_job_cb, bs, &local_err);
>> +    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
>> +                 on_target_error, block_job_cb, bs, &local_err);
>>       if (local_err != NULL) {
>>           bdrv_unref(target_bs);
>>           error_propagate(errp, local_err);
>> diff --git a/hmp.c b/hmp.c
>> index b47f331..015499f 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -1027,7 +1027,8 @@ void hmp_drive_backup(Monitor *mon, const QDict
>> *qdict)
>>       qmp_drive_backup(device, filename, !!format, format,
>>                        full ? MIRROR_SYNC_MODE_FULL :
>> MIRROR_SYNC_MODE_TOP,
>> -                     true, mode, false, 0, false, 0, false, 0, &err);
>> +                     true, mode, false, 0, false, NULL,
>> +                     false, 0, false, 0, &err);
>>       hmp_handle_error(mon, &err);
>>   }
>> diff --git a/include/block/block.h b/include/block/block.h
>> index b2d84d6..8589e77 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -469,6 +469,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs,
>> BdrvDirtyBitmap *bitmap,
>>                                int64_t cur_sector, int nr_sectors);
>>   void bdrv_dirty_iter_init(BlockDriverState *bs,
>>                             BdrvDirtyBitmap *bitmap, struct
>> HBitmapIter *hbi);
>> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
>> *bitmap);
>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 7ad1950..2233790 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -588,6 +588,7 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>    * @target: Block device to write to.
>>    * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
>>    * @sync_mode: What parts of the disk image should be copied to the
>> destination.
>> + * @sync_bitmap: The dirty bitmap if sync_mode is
>> MIRROR_SYNC_MODE_DIRTY_BITMAP.
>>    * @on_source_error: The action to take upon error reading from the
>> source.
>>    * @on_target_error: The action to take upon error writing to the
>> target.
>>    * @cb: Completion function for the job.
>> @@ -598,6 +599,7 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>    */
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 940eff7..9c5a99c 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -508,10 +508,12 @@
>>   #
>>   # @none: only copy data written from now on
>>   #
>> +# @dirty-bitmap: only copy data described by the dirty bitmap. Since:
>> 2.3
>> +#
>>   # Since: 1.3
>>   ##
>>   { 'enum': 'MirrorSyncMode',
>> -  'data': ['top', 'full', 'none'] }
>> +  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
>>   ##
>>   # @BlockJobType:
>> @@ -686,14 +688,17 @@
>>   #          probe if @mode is 'existing', else the format of the source
>>   #
>>   # @sync: what parts of the disk image should be copied to the
>> destination
>> -#        (all the disk, only the sectors allocated in the topmost
>> image, or
>> -#        only new I/O).
>> +#        (all the disk, only the sectors allocated in the topmost
>> image, from a
>> +#        dirty bitmap, or only new I/O).
>>   #
>>   # @mode: #optional whether and how QEMU should create a new image,
>> default is
>>   #        'absolute-paths'.
>>   #
>>   # @speed: #optional the maximum speed, in bytes per second
>>   #
>> +# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
>> +#          (Since 2.3)
>> +#
>>   # @on-source-error: #optional the action to take on an error on the
>> source,
>>   #                   default 'report'.  'stop' and 'enospc' can only
>> be used
>>   #                   if the block device supports io-status (see
>> BlockInfo).
>> @@ -711,7 +716,7 @@
>>   { 'type': 'DriveBackup',
>>     'data': { 'device': 'str', 'target': 'str', '*format': 'str',
>>               'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
>> -            '*speed': 'int',
>> +            '*speed': 'int', '*bitmap': 'str',
>>               '*on-source-error': 'BlockdevOnError',
>>               '*on-target-error': 'BlockdevOnError' } }
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index ce7782f..5aa3845 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -1048,7 +1048,7 @@ EQMP
>>       {
>>           .name       = "drive-backup",
>>           .args_type  =
>> "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
>> -                      "on-source-error:s?,on-target-error:s?",
>> +                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
>>           .mhandler.cmd_new = qmp_marshal_input_drive_backup,
>>       },
>> @@ -1075,8 +1075,9 @@ Arguments:
>>               (json-string, optional)
>>   - "sync": what parts of the disk image should be copied to the
>> destination;
>>     possibilities include "full" for all the disk, "top" for only the
>> sectors
>> -  allocated in the topmost image, or "none" to only replicate new I/O
>> -  (MirrorSyncMode).
>> +  allocated in the topmost image, "dirty-bitmap" for only the dirty
>> sectors in
>> +  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
>> +- "bitmap": dirty bitmap name for sync==dirty-bitmap
>>   - "mode": whether and how QEMU should create a new image
>>             (NewImageMode, optional, default 'absolute-paths')
>>   - "speed": the maximum speed, in bytes per second (json-int, optional)
>
> Looks good to me in general, now I need to find out what the successor
> bitmap is used for; but I guess I'll find that out by reviewing the rest
> of this series.
>
> Max

They don't really come up again, actually.

The basic idea is this: While the backup is going on, reads and writes 
may occur (albeit delayed) and we want to track those writes in a 
separate bitmap for the duration of the backup operation.

If the backup operation fails, we use the dirty sector tracking info in 
the successor to know what has changed since we started the backup, and 
we merge this bitmap back into the originating bitmap; then if an 
incremental backup is tried again, it includes all of the original data 
plus any data changed while we failed to do a backup.

If the backup operation succeeds, the originating bitmap is deleted and 
the successor is installed in its place.

It's a namespace trick: by having an anonymous bitmap as a child of the 
"real" bitmap, the real bitmap can be frozen and prohibited from being 
moved, renamed, deleted, etc. This prevents the user from adding a new 
bitmap with the same name or similar while the backup is in progress.

A previous approach was to immediately take the bitmap off of the BDS, 
but in the error case here, the logic becomes more complicated when we 
need to re-install the bitmap but the user has already installed a new 
bitmap with the same name, etc.

So the general lifetime is this:

(1) A backup is started. the block/backup routine calls create_successor.
(2) If the backup fails to start, the block/backup routine will call the 
"reclaim" method, which will merge the (empty) successor back into the 
original bitmap, unfreezing it.
(3) If the backup starts, and then fails, the bitmap is "reclaim"ed 
(merged back into one bitmap.)
(4) If the backup succeeds, the bitmap "abdicates" to the successor. 
(The parent bitmap is erased and the successor is installed in its place.)
Max Reitz Feb. 11, 2015, 6:18 p.m. UTC | #3
On 2015-02-11 at 12:54, John Snow wrote:
>
> On 02/11/2015 12:47 PM, Max Reitz wrote:
>> Looks good to me in general, now I need to find out what the successor
>> bitmap is used for; but I guess I'll find that out by reviewing the rest
>> of this series.
>>
>> Max
>
> They don't really come up again, actually.
>
> The basic idea is this: While the backup is going on, reads and writes 
> may occur (albeit delayed) and we want to track those writes in a 
> separate bitmap for the duration of the backup operation.

Yes, I thought as much; but where are writes to the named bitmap being 
redirected to its successor? bdrv_set_dirty() doesn't do that, as far as 
I can see.

> If the backup operation fails, we use the dirty sector tracking info 
> in the successor to know what has changed since we started the backup, 
> and we merge this bitmap back into the originating bitmap; then if an 
> incremental backup is tried again, it includes all of the original 
> data plus any data changed while we failed to do a backup.
>
> If the backup operation succeeds, the originating bitmap is deleted 
> and the successor is installed in its place.
>
> It's a namespace trick: by having an anonymous bitmap as a child of 
> the "real" bitmap, the real bitmap can be frozen and prohibited from 
> being moved, renamed, deleted, etc. This prevents the user from adding 
> a new bitmap with the same name or similar while the backup is in 
> progress.

Hm, if it's just for that, wouldn't disabling the bitmap suffice?

Max

> A previous approach was to immediately take the bitmap off of the BDS, 
> but in the error case here, the logic becomes more complicated when we 
> need to re-install the bitmap but the user has already installed a new 
> bitmap with the same name, etc.
>
> So the general lifetime is this:
>
> (1) A backup is started. the block/backup routine calls create_successor.
> (2) If the backup fails to start, the block/backup routine will call 
> the "reclaim" method, which will merge the (empty) successor back into 
> the original bitmap, unfreezing it.
> (3) If the backup starts, and then fails, the bitmap is "reclaim"ed 
> (merged back into one bitmap.)
> (4) If the backup succeeds, the bitmap "abdicates" to the successor. 
> (The parent bitmap is erased and the successor is installed in its 
> place.)

Yes, see the graph at the whiteboard behind me. :-)

Max
John Snow Feb. 11, 2015, 6:31 p.m. UTC | #4
On 02/11/2015 01:18 PM, Max Reitz wrote:
> On 2015-02-11 at 12:54, John Snow wrote:
>>
>> On 02/11/2015 12:47 PM, Max Reitz wrote:
>>> Looks good to me in general, now I need to find out what the successor
>>> bitmap is used for; but I guess I'll find that out by reviewing the rest
>>> of this series.
>>>
>>> Max
>>
>> They don't really come up again, actually.
>>
>> The basic idea is this: While the backup is going on, reads and writes
>> may occur (albeit delayed) and we want to track those writes in a
>> separate bitmap for the duration of the backup operation.
>
> Yes, I thought as much; but where are writes to the named bitmap being
> redirected to its successor? bdrv_set_dirty() doesn't do that, as far as
> I can see.
>

bdrv_dirty_bitmap_create_successor calls bdrv_create_dirty_bitmap, which 
installs it in the bitmap chain attached to a BDS:

     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);

which is read by bdrv_set_dirty.

bdrv_set_dirty operates on all bitmaps attached to a BDS, while 
bdrv_set_dirty_bitmap operates on a single specific instance.

>> If the backup operation fails, we use the dirty sector tracking info
>> in the successor to know what has changed since we started the backup,
>> and we merge this bitmap back into the originating bitmap; then if an
>> incremental backup is tried again, it includes all of the original
>> data plus any data changed while we failed to do a backup.
>>
>> If the backup operation succeeds, the originating bitmap is deleted
>> and the successor is installed in its place.
>>
>> It's a namespace trick: by having an anonymous bitmap as a child of
>> the "real" bitmap, the real bitmap can be frozen and prohibited from
>> being moved, renamed, deleted, etc. This prevents the user from adding
>> a new bitmap with the same name or similar while the backup is in
>> progress.
>
> Hm, if it's just for that, wouldn't disabling the bitmap suffice?
>
> Max
>

Kind of? We still want to track writes while it's disabled.

If we try to use a single bitmap, we have no real way to know which bits 
to clear after the operation succeeds. I think two bitmaps is a 
requirement to accommodate both failure and success cases.

A distinction is made between a disabled bitmap (which is just 
read-only: it can be deleted) and a frozen bitmap (which is in-use by an 
operation, implicitly disabled, and cannot be enabled, disabled, 
deleted, cleared, set or reset.)

>> A previous approach was to immediately take the bitmap off of the BDS,
>> but in the error case here, the logic becomes more complicated when we
>> need to re-install the bitmap but the user has already installed a new
>> bitmap with the same name, etc.
>>
>> So the general lifetime is this:
>>
>> (1) A backup is started. the block/backup routine calls create_successor.
>> (2) If the backup fails to start, the block/backup routine will call
>> the "reclaim" method, which will merge the (empty) successor back into
>> the original bitmap, unfreezing it.
>> (3) If the backup starts, and then fails, the bitmap is "reclaim"ed
>> (merged back into one bitmap.)
>> (4) If the backup succeeds, the bitmap "abdicates" to the successor.
>> (The parent bitmap is erased and the successor is installed in its
>> place.)
>
> Yes, see the graph at the whiteboard behind me. :-)
>
> Max
Max Reitz Feb. 11, 2015, 6:33 p.m. UTC | #5
On 2015-02-11 at 13:31, John Snow wrote:
>
>
> On 02/11/2015 01:18 PM, Max Reitz wrote:
>> On 2015-02-11 at 12:54, John Snow wrote:
>>>
>>> On 02/11/2015 12:47 PM, Max Reitz wrote:
>>>> Looks good to me in general, now I need to find out what the successor
>>>> bitmap is used for; but I guess I'll find that out by reviewing the 
>>>> rest
>>>> of this series.
>>>>
>>>> Max
>>>
>>> They don't really come up again, actually.
>>>
>>> The basic idea is this: While the backup is going on, reads and writes
>>> may occur (albeit delayed) and we want to track those writes in a
>>> separate bitmap for the duration of the backup operation.
>>
>> Yes, I thought as much; but where are writes to the named bitmap being
>> redirected to its successor? bdrv_set_dirty() doesn't do that, as far as
>> I can see.
>>
>
> bdrv_dirty_bitmap_create_successor calls bdrv_create_dirty_bitmap, 
> which installs it in the bitmap chain attached to a BDS:
>
>     QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
>
> which is read by bdrv_set_dirty.

Oooh, clever. Right.

>
> bdrv_set_dirty operates on all bitmaps attached to a BDS, while 
> bdrv_set_dirty_bitmap operates on a single specific instance.
>
>>> If the backup operation fails, we use the dirty sector tracking info
>>> in the successor to know what has changed since we started the backup,
>>> and we merge this bitmap back into the originating bitmap; then if an
>>> incremental backup is tried again, it includes all of the original
>>> data plus any data changed while we failed to do a backup.
>>>
>>> If the backup operation succeeds, the originating bitmap is deleted
>>> and the successor is installed in its place.
>>>
>>> It's a namespace trick: by having an anonymous bitmap as a child of
>>> the "real" bitmap, the real bitmap can be frozen and prohibited from
>>> being moved, renamed, deleted, etc. This prevents the user from adding
>>> a new bitmap with the same name or similar while the backup is in
>>> progress.
>>
>> Hm, if it's just for that, wouldn't disabling the bitmap suffice?
>>
>> Max
>>
>
> Kind of? We still want to track writes while it's disabled.

Right, I was assuming here that writes are not tracked in the successor.

Thanks for pointing out that they are!

Max

>
> If we try to use a single bitmap, we have no real way to know which 
> bits to clear after the operation succeeds. I think two bitmaps is a 
> requirement to accommodate both failure and success cases.
>
> A distinction is made between a disabled bitmap (which is just 
> read-only: it can be deleted) and a frozen bitmap (which is in-use by 
> an operation, implicitly disabled, and cannot be enabled, disabled, 
> deleted, cleared, set or reset.)
>
>>> A previous approach was to immediately take the bitmap off of the BDS,
>>> but in the error case here, the logic becomes more complicated when we
>>> need to re-install the bitmap but the user has already installed a new
>>> bitmap with the same name, etc.
>>>
>>> So the general lifetime is this:
>>>
>>> (1) A backup is started. the block/backup routine calls 
>>> create_successor.
>>> (2) If the backup fails to start, the block/backup routine will call
>>> the "reclaim" method, which will merge the (empty) successor back into
>>> the original bitmap, unfreezing it.
>>> (3) If the backup starts, and then fails, the bitmap is "reclaim"ed
>>> (merged back into one bitmap.)
>>> (4) If the backup succeeds, the bitmap "abdicates" to the successor.
>>> (The parent bitmap is erased and the successor is installed in its
>>> place.)
>>
>> Yes, see the graph at the whiteboard behind me. :-)
>>
>> Max
John Snow Feb. 11, 2015, 9:13 p.m. UTC | #6
On 02/11/2015 12:47 PM, Max Reitz wrote:
> On 2015-02-09 at 20:35, John Snow wrote:
>> For "dirty-bitmap" sync mode, the block job will iterate through the
>> given dirty bitmap to decide if a sector needs backup (backup all the
>> dirty clusters and skip clean ones), just as allocation conditions of
>> "top" sync mode.
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   block.c                   |   9 +++
>>   block/backup.c            | 149
>> +++++++++++++++++++++++++++++++++++++++-------
>>   block/mirror.c            |   4 ++
>>   blockdev.c                |  18 +++++-
>>   hmp.c                     |   3 +-
>>   include/block/block.h     |   1 +
>>   include/block/block_int.h |   2 +
>>   qapi/block-core.json      |  13 ++--
>>   qmp-commands.hx           |   7 ++-
>>   9 files changed, 172 insertions(+), 34 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index 8d84ace..e93fceb 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -5648,6 +5648,15 @@ static void bdrv_reset_dirty(BlockDriverState
>> *bs, int64_t cur_sector,
>>       }
>>   }
>> +/**
>> + * Advance an HBitmapIter to an arbitrary offset.
>> + */
>> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
>> +{
>> +    assert(hbi->hb);
>> +    hbitmap_iter_init(hbi, hbi->hb, offset);
>> +}
>> +
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
>> *bitmap)
>>   {
>>       return hbitmap_count(bitmap->bitmap);
>> diff --git a/block/backup.c b/block/backup.c
>> index 1c535b1..bf8c0e7 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -37,6 +37,8 @@ typedef struct CowRequest {
>>   typedef struct BackupBlockJob {
>>       BlockJob common;
>>       BlockDriverState *target;
>> +    /* bitmap for sync=dirty-bitmap */
>> +    BdrvDirtyBitmap *sync_bitmap;
>>       MirrorSyncMode sync_mode;
>>       RateLimit limit;
>>       BlockdevOnError on_source_error;
>> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void
>> *opaque)
>>       g_free(data);
>>   }
>> +static bool coroutine_fn yield_and_check(BackupBlockJob *job)
>> +{
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    /* we need to yield so that qemu_aio_flush() returns.
>> +     * (without, VM does not reboot)
>> +     */
>> +    if (job->common.speed) {
>> +        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
>> +
>> job->sectors_read);
>> +        job->sectors_read = 0;
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
>> +    } else {
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
>> +    }
>> +
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>   static void coroutine_fn backup_run(void *opaque)
>>   {
>>       BackupBlockJob *job = opaque;
>> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
>>       };
>>       int64_t start, end;
>>       int ret = 0;
>> +    bool error_is_read;
>>       QLIST_INIT(&job->inflight_reqs);
>>       qemu_co_rwlock_init(&job->flush_rwlock);
>>       start = 0;
>> -    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
>> -                       BACKUP_SECTORS_PER_CLUSTER);
>> +    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>>       job->bitmap = hbitmap_alloc(end, 0);
>> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
>>               qemu_coroutine_yield();
>>               job->common.busy = true;
>>           }
>> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        /* Dirty Bitmap sync has a slightly different iteration
>> method */
>> +        HBitmapIter hbi;
>> +        int64_t sector;
>> +        int64_t cluster;
>> +        int64_t last_cluster = -1;
>> +        bool polyrhythmic;
>> +
>> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
>> +        /* Does the granularity happen to match our backup cluster
>> size? */
>> +        polyrhythmic =
>> (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
>> +                        BACKUP_CLUSTER_SIZE);
>> +
>> +        /* Find the next dirty /sector/ and copy that /cluster/ */
>> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
>> +            if (yield_and_check(job)) {
>> +                goto leave;
>> +            }
>> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
>> +
>> +            /* Play some catchup with the progress meter */
>> +            if (cluster != last_cluster + 1) {
>> +                job->common.offset += ((cluster - last_cluster - 1) *
>> +                                       BACKUP_CLUSTER_SIZE);
>> +            }
>
> I guess the "- 1" in the calculation comes from backup_do_cow()
> modifying job->common.offset, too. How about simply overwriting it like
> "job->common.offset = cluster * BACKUP_CLUSTER_SIZE"? And it may be a
> good idea to move this update above yield_and_check() because that's the
> progress we have actually done up to that point (and I imagine
> yield_and_check() is the point where the caller might check the progress).
>

Yes: If the last cluster we wrote out was 2, and we're on 4 now, we only 
need to make up for #3, because #4 will be handled in the usual ways.

I don't know how wise this was, but I wanted to only "fake" progress 
updates for clusters I *know* I skipped in incremental backups, so I 
wanted to make sure the math worked out. Just overwriting the progress 
seemed like cheating a little, I guess ...

It was kind of just a "testing thing" to just do the additions and see 
if the math checked out in the end. You may notice that unlike Fam's 
iotests, I leave the progress checks enabled for my tests.

Putting it above the cancellation check does make sense, though. I 
didn't test cancellations as well as I ought have.

>> +
>> +            do {
>> +                ret = backup_do_cow(bs, cluster *
>> BACKUP_SECTORS_PER_CLUSTER,
>> +                                    BACKUP_SECTORS_PER_CLUSTER,
>> &error_is_read);
>> +                if ((ret < 0) &&
>> +                    backup_error_action(job, error_is_read, -ret) ==
>> +                    BLOCK_ERROR_ACTION_REPORT) {
>> +                    goto leave;
>> +                }
>> +            } while (ret < 0);
>> +
>> +            /* Advance (or rewind) our iterator if we need to. */
>> +            if (polyrhythmic) {
>> +                bdrv_set_dirty_iter(&hbi,
>> +                                    (cluster + 1) *
>> BACKUP_SECTORS_PER_CLUSTER);
>> +            }
>> +
>> +            last_cluster = cluster;
>> +        }
>> +
>> +        /* Play some final catchup with the progress meter */
>> +        if (last_cluster + 1 < end) {
>> +            job->common.offset += ((end - last_cluster - 1) *
>> +                                   BACKUP_CLUSTER_SIZE);
>> +        }
>> +
>
> Superfluous empty line.
>
>>       } else {
>>           /* Both FULL and TOP SYNC_MODE's require copying.. */
>>           for (; start < end; start++) {
>> -            bool error_is_read;
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> -                break;
>> -            }
>> -
>> -            /* we need to yield so that qemu_aio_flush() returns.
>> -             * (without, VM does not reboot)
>> -             */
>> -            if (job->common.speed) {
>> -                uint64_t delay_ns = ratelimit_calculate_delay(
>> -                        &job->limit, job->sectors_read);
>> -                job->sectors_read = 0;
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
>> delay_ns);
>> -            } else {
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME,
>> 0);
>> -            }
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> +            if (yield_and_check(job)) {
>>                   break;
>>               }
>> @@ -351,12 +411,26 @@ static void coroutine_fn backup_run(void *opaque)
>>           }
>>       }
>> +leave:
>>       notifier_with_return_remove(&before_write);
>>       /* wait until pending backup_do_cow() calls have completed */
>>       qemu_co_rwlock_wrlock(&job->flush_rwlock);
>>       qemu_co_rwlock_unlock(&job->flush_rwlock);
>> +    if (job->sync_bitmap) {
>> +        BdrvDirtyBitmap *bm;
>> +        if (ret < 0) {
>> +            /* Merge the successor back into the parent, delete
>> nothing. */
>> +            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
>> +            assert(bm);
>> +            bdrv_enable_dirty_bitmap(job->sync_bitmap);
>> +        } else {
>> +            /* Everything is fine, delete this bitmap and install the
>> backup. */
>> +            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
>> +            assert(bm);
>> +        }
>> +    }
>>       hbitmap_free(job->bitmap);
>>       bdrv_iostatus_disable(target);
>> @@ -369,6 +443,7 @@ static void coroutine_fn backup_run(void *opaque)
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> @@ -412,17 +487,37 @@ void backup_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>           return;
>>       }
>> +    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        if (!sync_bitmap) {
>> +            error_setg(errp, "must provide a valid bitmap name for "
>> +                             "\"dirty-bitmap\" sync mode");
>> +            return;
>> +        }
>> +
>> +        /* Create a new bitmap, and freeze/disable this one. */
>> +        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp)
>> < 0) {
>> +            return;
>> +        }
>> +
>
> Another superfluous empty line.
>
>> +    } else if (sync_bitmap) {
>> +        error_setg(errp,
>> +                   "a sync_bitmap was provided to backup_run, "
>> +                   "but received an incompatible sync_mode (%s)",
>> +                   MirrorSyncMode_lookup[sync_mode]);
>> +        return;
>> +    }
>> +
>>       len = bdrv_getlength(bs);
>>       if (len < 0) {
>>           error_setg_errno(errp, -len, "unable to get length for '%s'",
>>                            bdrv_get_device_name(bs));
>> -        return;
>> +        goto error;
>>       }
>>       BackupBlockJob *job = block_job_create(&backup_job_driver, bs,
>> speed,
>>                                              cb, opaque, errp);
>>       if (!job) {
>> -        return;
>> +        goto error;
>>       }
>>       bdrv_op_block_all(target, job->common.blocker);
>> @@ -431,7 +526,15 @@ void backup_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>       job->on_target_error = on_target_error;
>>       job->target = target;
>>       job->sync_mode = sync_mode;
>> +    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>> +                       sync_bitmap : NULL;
>>       job->common.len = len;
>>       job->common.co = qemu_coroutine_create(backup_run);
>>       qemu_coroutine_enter(job->common.co, job);
>> +    return;
>> +
>> + error:
>> +    if (sync_bitmap) {
>> +        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
>> +    }
>>   }
>> diff --git a/block/mirror.c b/block/mirror.c
>> index 77bd1ed..271dbf3 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -718,6 +718,10 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>       bool is_none_mode;
>>       BlockDriverState *base;
>> +    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
>> +        return;
>> +    }
>>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>>       base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
>>       mirror_start_job(bs, target, replaces,
>> diff --git a/blockdev.c b/blockdev.c
>> index 826d0c1..3f41e82 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1569,6 +1569,7 @@ static void
>> drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>                        backup->sync,
>>                        backup->has_mode, backup->mode,
>>                        backup->has_speed, backup->speed,
>> +                     backup->has_bitmap, backup->bitmap,
>>                        backup->has_on_source_error,
>> backup->on_source_error,
>>                        backup->has_on_target_error,
>> backup->on_target_error,
>>                        &local_err);
>> @@ -2429,6 +2430,7 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>                         enum MirrorSyncMode sync,
>>                         bool has_mode, enum NewImageMode mode,
>>                         bool has_speed, int64_t speed,
>> +                      bool has_bitmap, const char *bitmap,
>>                         bool has_on_source_error, BlockdevOnError
>> on_source_error,
>>                         bool has_on_target_error, BlockdevOnError
>> on_target_error,
>>                         Error **errp)
>> @@ -2436,6 +2438,7 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>       BlockDriverState *bs;
>>       BlockDriverState *target_bs;
>>       BlockDriverState *source = NULL;
>> +    BdrvDirtyBitmap *bmap = NULL;
>>       AioContext *aio_context;
>>       BlockDriver *drv = NULL;
>>       Error *local_err = NULL;
>> @@ -2534,7 +2537,16 @@ void qmp_drive_backup(const char *device, const
>> char *target,
>>       bdrv_set_aio_context(target_bs, aio_context);
>> -    backup_start(bs, target_bs, speed, sync, on_source_error,
>> on_target_error,
>> +    if (has_bitmap) {
>> +        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
>> +        if (!bmap) {
>> +            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    backup_start(bs, target_bs, speed, sync, bmap,
>> +                 on_source_error, on_target_error,
>>                    block_job_cb, bs, &local_err);
>>       if (local_err != NULL) {
>>           bdrv_unref(target_bs);
>> @@ -2592,8 +2604,8 @@ void qmp_blockdev_backup(const char *device,
>> const char *target,
>>       bdrv_ref(target_bs);
>>       bdrv_set_aio_context(target_bs, aio_context);
>> -    backup_start(bs, target_bs, speed, sync, on_source_error,
>> on_target_error,
>> -                 block_job_cb, bs, &local_err);
>> +    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
>> +                 on_target_error, block_job_cb, bs, &local_err);
>>       if (local_err != NULL) {
>>           bdrv_unref(target_bs);
>>           error_propagate(errp, local_err);
>> diff --git a/hmp.c b/hmp.c
>> index b47f331..015499f 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -1027,7 +1027,8 @@ void hmp_drive_backup(Monitor *mon, const QDict
>> *qdict)
>>       qmp_drive_backup(device, filename, !!format, format,
>>                        full ? MIRROR_SYNC_MODE_FULL :
>> MIRROR_SYNC_MODE_TOP,
>> -                     true, mode, false, 0, false, 0, false, 0, &err);
>> +                     true, mode, false, 0, false, NULL,
>> +                     false, 0, false, 0, &err);
>>       hmp_handle_error(mon, &err);
>>   }
>> diff --git a/include/block/block.h b/include/block/block.h
>> index b2d84d6..8589e77 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -469,6 +469,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs,
>> BdrvDirtyBitmap *bitmap,
>>                                int64_t cur_sector, int nr_sectors);
>>   void bdrv_dirty_iter_init(BlockDriverState *bs,
>>                             BdrvDirtyBitmap *bitmap, struct
>> HBitmapIter *hbi);
>> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
>> *bitmap);
>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 7ad1950..2233790 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -588,6 +588,7 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>    * @target: Block device to write to.
>>    * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
>>    * @sync_mode: What parts of the disk image should be copied to the
>> destination.
>> + * @sync_bitmap: The dirty bitmap if sync_mode is
>> MIRROR_SYNC_MODE_DIRTY_BITMAP.
>>    * @on_source_error: The action to take upon error reading from the
>> source.
>>    * @on_target_error: The action to take upon error writing to the
>> target.
>>    * @cb: Completion function for the job.
>> @@ -598,6 +599,7 @@ void mirror_start(BlockDriverState *bs,
>> BlockDriverState *target,
>>    */
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 940eff7..9c5a99c 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -508,10 +508,12 @@
>>   #
>>   # @none: only copy data written from now on
>>   #
>> +# @dirty-bitmap: only copy data described by the dirty bitmap. Since:
>> 2.3
>> +#
>>   # Since: 1.3
>>   ##
>>   { 'enum': 'MirrorSyncMode',
>> -  'data': ['top', 'full', 'none'] }
>> +  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
>>   ##
>>   # @BlockJobType:
>> @@ -686,14 +688,17 @@
>>   #          probe if @mode is 'existing', else the format of the source
>>   #
>>   # @sync: what parts of the disk image should be copied to the
>> destination
>> -#        (all the disk, only the sectors allocated in the topmost
>> image, or
>> -#        only new I/O).
>> +#        (all the disk, only the sectors allocated in the topmost
>> image, from a
>> +#        dirty bitmap, or only new I/O).
>>   #
>>   # @mode: #optional whether and how QEMU should create a new image,
>> default is
>>   #        'absolute-paths'.
>>   #
>>   # @speed: #optional the maximum speed, in bytes per second
>>   #
>> +# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
>> +#          (Since 2.3)
>> +#
>>   # @on-source-error: #optional the action to take on an error on the
>> source,
>>   #                   default 'report'.  'stop' and 'enospc' can only
>> be used
>>   #                   if the block device supports io-status (see
>> BlockInfo).
>> @@ -711,7 +716,7 @@
>>   { 'type': 'DriveBackup',
>>     'data': { 'device': 'str', 'target': 'str', '*format': 'str',
>>               'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
>> -            '*speed': 'int',
>> +            '*speed': 'int', '*bitmap': 'str',
>>               '*on-source-error': 'BlockdevOnError',
>>               '*on-target-error': 'BlockdevOnError' } }
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index ce7782f..5aa3845 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -1048,7 +1048,7 @@ EQMP
>>       {
>>           .name       = "drive-backup",
>>           .args_type  =
>> "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
>> -                      "on-source-error:s?,on-target-error:s?",
>> +                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
>>           .mhandler.cmd_new = qmp_marshal_input_drive_backup,
>>       },
>> @@ -1075,8 +1075,9 @@ Arguments:
>>               (json-string, optional)
>>   - "sync": what parts of the disk image should be copied to the
>> destination;
>>     possibilities include "full" for all the disk, "top" for only the
>> sectors
>> -  allocated in the topmost image, or "none" to only replicate new I/O
>> -  (MirrorSyncMode).
>> +  allocated in the topmost image, "dirty-bitmap" for only the dirty
>> sectors in
>> +  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
>> +- "bitmap": dirty bitmap name for sync==dirty-bitmap
>>   - "mode": whether and how QEMU should create a new image
>>             (NewImageMode, optional, default 'absolute-paths')
>>   - "speed": the maximum speed, in bytes per second (json-int, optional)
>
> Looks good to me in general, now I need to find out what the successor
> bitmap is used for; but I guess I'll find that out by reviewing the rest
> of this series.
>
> Max
Vladimir Sementsov-Ogievskiy Feb. 13, 2015, 5:33 p.m. UTC | #7
On 10.02.2015 04:35, John Snow wrote:
> ......
>   
> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
>               qemu_coroutine_yield();
>               job->common.busy = true;
>           }
> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        /* Dirty Bitmap sync has a slightly different iteration method */
> +        HBitmapIter hbi;
> +        int64_t sector;
> +        int64_t cluster;
> +        int64_t last_cluster = -1;
> +        bool polyrhythmic;
> +
> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
> +        /* Does the granularity happen to match our backup cluster size? */
> +        polyrhythmic = (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
> +                        BACKUP_CLUSTER_SIZE);
let it be false, i.e. granularity == cluster
> +
> +        /* Find the next dirty /sector/ and copy that /cluster/ */
> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
then, don't we skip here the very first cluster, if it is dirty?
> +            if (yield_and_check(job)) {
> +                goto leave;
> +            }
> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
> +
> +            /* Play some catchup with the progress meter */
> +            if (cluster != last_cluster + 1) {
> +                job->common.offset += ((cluster - last_cluster - 1) *
> +                                       BACKUP_CLUSTER_SIZE);
> +            }
> +
> +            do {
> +                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
> +                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
> +                if ((ret < 0) &&
> +                    backup_error_action(job, error_is_read, -ret) ==
> +                    BLOCK_ERROR_ACTION_REPORT) {
> +                    goto leave;
> +                }
> +            } while (ret < 0);
> +
> +            /* Advance (or rewind) our iterator if we need to. */
> +            if (polyrhythmic) {
> +                bdrv_set_dirty_iter(&hbi,
> +                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
> +            }
> +
> +            last_cluster = cluster;
> +        }
> +
> +        /* Play some final catchup with the progress meter */
> +        if (last_cluster + 1 < end) {
> +            job->common.offset += ((end - last_cluster - 1) *
> +                                   BACKUP_CLUSTER_SIZE);
> +        }
> +
>       } else {
>
John Snow Feb. 13, 2015, 6:35 p.m. UTC | #8
On 02/13/2015 12:33 PM, Vladimir Sementsov-Ogievskiy wrote:
> On 10.02.2015 04:35, John Snow wrote:
>> ......
>> @@ -278,28 +305,61 @@ static void coroutine_fn backup_run(void *opaque)
>>               qemu_coroutine_yield();
>>               job->common.busy = true;
>>           }
>> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        /* Dirty Bitmap sync has a slightly different iteration
>> method */
>> +        HBitmapIter hbi;
>> +        int64_t sector;
>> +        int64_t cluster;
>> +        int64_t last_cluster = -1;
>> +        bool polyrhythmic;
>> +
>> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
>> +        /* Does the granularity happen to match our backup cluster
>> size? */
>> +        polyrhythmic =
>> (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
>> +                        BACKUP_CLUSTER_SIZE);
> let it be false, i.e. granularity == cluster
>> +
>> +        /* Find the next dirty /sector/ and copy that /cluster/ */
>> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
> then, don't we skip here the very first cluster, if it is dirty?

I don't think so. The HBitmapIterator is still in its initial state 
here, and it can and will return 0 if the first sector is dirty.

The iotest submitted in v12 tests writes to the very first sector, and I 
just verified it quickly that this function *does* return 0 for the 
first go-around.

>> +            if (yield_and_check(job)) {
>> +                goto leave;
>> +            }
>> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
>> +
>> +            /* Play some catchup with the progress meter */
>> +            if (cluster != last_cluster + 1) {
>> +                job->common.offset += ((cluster - last_cluster - 1) *
>> +                                       BACKUP_CLUSTER_SIZE);
>> +            }
>> +
>> +            do {
>> +                ret = backup_do_cow(bs, cluster *
>> BACKUP_SECTORS_PER_CLUSTER,
>> +                                    BACKUP_SECTORS_PER_CLUSTER,
>> &error_is_read);
>> +                if ((ret < 0) &&
>> +                    backup_error_action(job, error_is_read, -ret) ==
>> +                    BLOCK_ERROR_ACTION_REPORT) {
>> +                    goto leave;
>> +                }
>> +            } while (ret < 0);
>> +
>> +            /* Advance (or rewind) our iterator if we need to. */
>> +            if (polyrhythmic) {
>> +                bdrv_set_dirty_iter(&hbi,
>> +                                    (cluster + 1) *
>> BACKUP_SECTORS_PER_CLUSTER);
>> +            }
>> +
>> +            last_cluster = cluster;
>> +        }
>> +
>> +        /* Play some final catchup with the progress meter */
>> +        if (last_cluster + 1 < end) {
>> +            job->common.offset += ((end - last_cluster - 1) *
>> +                                   BACKUP_CLUSTER_SIZE);
>> +        }
>> +
>>       } else {
>
diff mbox

Patch

diff --git a/block.c b/block.c
index 8d84ace..e93fceb 100644
--- a/block.c
+++ b/block.c
@@ -5648,6 +5648,15 @@  static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
     }
 }
 
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    assert(hbi->hb);
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
     return hbitmap_count(bitmap->bitmap);
diff --git a/block/backup.c b/block/backup.c
index 1c535b1..bf8c0e7 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,8 @@  typedef struct CowRequest {
 typedef struct BackupBlockJob {
     BlockJob common;
     BlockDriverState *target;
+    /* bitmap for sync=dirty-bitmap */
+    BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
     RateLimit limit;
     BlockdevOnError on_source_error;
@@ -242,6 +244,31 @@  static void backup_complete(BlockJob *job, void *opaque)
     g_free(data);
 }
 
+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+{
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    /* we need to yield so that qemu_aio_flush() returns.
+     * (without, VM does not reboot)
+     */
+    if (job->common.speed) {
+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
+                                                      job->sectors_read);
+        job->sectors_read = 0;
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+    } else {
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+    }
+
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    return false;
+}
+
 static void coroutine_fn backup_run(void *opaque)
 {
     BackupBlockJob *job = opaque;
@@ -254,13 +281,13 @@  static void coroutine_fn backup_run(void *opaque)
     };
     int64_t start, end;
     int ret = 0;
+    bool error_is_read;
 
     QLIST_INIT(&job->inflight_reqs);
     qemu_co_rwlock_init(&job->flush_rwlock);
 
     start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
 
     job->bitmap = hbitmap_alloc(end, 0);
 
@@ -278,28 +305,61 @@  static void coroutine_fn backup_run(void *opaque)
             qemu_coroutine_yield();
             job->common.busy = true;
         }
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        /* Dirty Bitmap sync has a slightly different iteration method */
+        HBitmapIter hbi;
+        int64_t sector;
+        int64_t cluster;
+        int64_t last_cluster = -1;
+        bool polyrhythmic;
+
+        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
+        /* Does the granularity happen to match our backup cluster size? */
+        polyrhythmic = (bdrv_dirty_bitmap_granularity(job->sync_bitmap) !=
+                        BACKUP_CLUSTER_SIZE);
+
+        /* Find the next dirty /sector/ and copy that /cluster/ */
+        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+            if (yield_and_check(job)) {
+                goto leave;
+            }
+            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+
+            /* Play some catchup with the progress meter */
+            if (cluster != last_cluster + 1) {
+                job->common.offset += ((cluster - last_cluster - 1) *
+                                       BACKUP_CLUSTER_SIZE);
+            }
+
+            do {
+                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+                if ((ret < 0) &&
+                    backup_error_action(job, error_is_read, -ret) ==
+                    BLOCK_ERROR_ACTION_REPORT) {
+                    goto leave;
+                }
+            } while (ret < 0);
+
+            /* Advance (or rewind) our iterator if we need to. */
+            if (polyrhythmic) {
+                bdrv_set_dirty_iter(&hbi,
+                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
+            }
+
+            last_cluster = cluster;
+        }
+
+        /* Play some final catchup with the progress meter */
+        if (last_cluster + 1 < end) {
+            job->common.offset += ((end - last_cluster - 1) *
+                                   BACKUP_CLUSTER_SIZE);
+        }
+
     } else {
         /* Both FULL and TOP SYNC_MODE's require copying.. */
         for (; start < end; start++) {
-            bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
+            if (yield_and_check(job)) {
                 break;
             }
 
@@ -351,12 +411,26 @@  static void coroutine_fn backup_run(void *opaque)
         }
     }
 
+leave:
     notifier_with_return_remove(&before_write);
 
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
+    if (job->sync_bitmap) {
+        BdrvDirtyBitmap *bm;
+        if (ret < 0) {
+            /* Merge the successor back into the parent, delete nothing. */
+            bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+            assert(bm);
+            bdrv_enable_dirty_bitmap(job->sync_bitmap);
+        } else {
+            /* Everything is fine, delete this bitmap and install the backup. */
+            bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+            assert(bm);
+        }
+    }
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
@@ -369,6 +443,7 @@  static void coroutine_fn backup_run(void *opaque)
 
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
@@ -412,17 +487,37 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        if (!sync_bitmap) {
+            error_setg(errp, "must provide a valid bitmap name for "
+                             "\"dirty-bitmap\" sync mode");
+            return;
+        }
+
+        /* Create a new bitmap, and freeze/disable this one. */
+        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
+            return;
+        }
+
+    } else if (sync_bitmap) {
+        error_setg(errp,
+                   "a sync_bitmap was provided to backup_run, "
+                   "but received an incompatible sync_mode (%s)",
+                   MirrorSyncMode_lookup[sync_mode]);
+        return;
+    }
+
     len = bdrv_getlength(bs);
     if (len < 0) {
         error_setg_errno(errp, -len, "unable to get length for '%s'",
                          bdrv_get_device_name(bs));
-        return;
+        goto error;
     }
 
     BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
                                            cb, opaque, errp);
     if (!job) {
-        return;
+        goto error;
     }
 
     bdrv_op_block_all(target, job->common.blocker);
@@ -431,7 +526,15 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->on_target_error = on_target_error;
     job->target = target;
     job->sync_mode = sync_mode;
+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
+                       sync_bitmap : NULL;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
     qemu_coroutine_enter(job->common.co, job);
+    return;
+
+ error:
+    if (sync_bitmap) {
+        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
+    }
 }
diff --git a/block/mirror.c b/block/mirror.c
index 77bd1ed..271dbf3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -718,6 +718,10 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     bool is_none_mode;
     BlockDriverState *base;
 
+    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
+        return;
+    }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
     mirror_start_job(bs, target, replaces,
diff --git a/blockdev.c b/blockdev.c
index 826d0c1..3f41e82 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1569,6 +1569,7 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                      backup->sync,
                      backup->has_mode, backup->mode,
                      backup->has_speed, backup->speed,
+                     backup->has_bitmap, backup->bitmap,
                      backup->has_on_source_error, backup->on_source_error,
                      backup->has_on_target_error, backup->on_target_error,
                      &local_err);
@@ -2429,6 +2430,7 @@  void qmp_drive_backup(const char *device, const char *target,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
+                      bool has_bitmap, const char *bitmap,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
@@ -2436,6 +2438,7 @@  void qmp_drive_backup(const char *device, const char *target,
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
+    BdrvDirtyBitmap *bmap = NULL;
     AioContext *aio_context;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
@@ -2534,7 +2537,16 @@  void qmp_drive_backup(const char *device, const char *target,
 
     bdrv_set_aio_context(target_bs, aio_context);
 
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+    if (has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+            goto out;
+        }
+    }
+
+    backup_start(bs, target_bs, speed, sync, bmap,
+                 on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
@@ -2592,8 +2604,8 @@  void qmp_blockdev_backup(const char *device, const char *target,
 
     bdrv_ref(target_bs);
     bdrv_set_aio_context(target_bs, aio_context);
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
-                 block_job_cb, bs, &local_err);
+    backup_start(bs, target_bs, speed, sync, NULL, on_source_error,
+                 on_target_error, block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
         error_propagate(errp, local_err);
diff --git a/hmp.c b/hmp.c
index b47f331..015499f 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1027,7 +1027,8 @@  void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
     qmp_drive_backup(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0, false, 0, &err);
+                     true, mode, false, 0, false, NULL,
+                     false, 0, false, 0, &err);
     hmp_handle_error(mon, &err);
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index b2d84d6..8589e77 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -469,6 +469,7 @@  void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
                              int64_t cur_sector, int nr_sectors);
 void bdrv_dirty_iter_init(BlockDriverState *bs,
                           BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7ad1950..2233790 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -588,6 +588,7 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @cb: Completion function for the job.
@@ -598,6 +599,7 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  */
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 940eff7..9c5a99c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -508,10 +508,12 @@ 
 #
 # @none: only copy data written from now on
 #
+# @dirty-bitmap: only copy data described by the dirty bitmap. Since: 2.3
+#
 # Since: 1.3
 ##
 { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
+  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
 
 ##
 # @BlockJobType:
@@ -686,14 +688,17 @@ 
 #          probe if @mode is 'existing', else the format of the source
 #
 # @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
+#        (all the disk, only the sectors allocated in the topmost image, from a
+#        dirty bitmap, or only new I/O).
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
 # @speed: #optional the maximum speed, in bytes per second
 #
+# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
+#          (Since 2.3)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -711,7 +716,7 @@ 
 { 'type': 'DriveBackup',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
+            '*speed': 'int', '*bitmap': 'str',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index ce7782f..5aa3845 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1048,7 +1048,7 @@  EQMP
     {
         .name       = "drive-backup",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_drive_backup,
     },
 
@@ -1075,8 +1075,9 @@  Arguments:
             (json-string, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
-  allocated in the topmost image, or "none" to only replicate new I/O
-  (MirrorSyncMode).
+  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
+  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
+- "bitmap": dirty bitmap name for sync==dirty-bitmap
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')
 - "speed": the maximum speed, in bytes per second (json-int, optional)