diff mbox

[v11,09/13] qmp: Add support of "dirty-bitmap" sync mode for drive-backup

Message ID 1421080265-2228-10-git-send-email-jsnow@redhat.com
State New
Headers show

Commit Message

John Snow Jan. 12, 2015, 4:31 p.m. UTC
For "dirty-bitmap" sync mode, the block job will iterate through the
given dirty bitmap to decide if a sector needs backup (backup all the
dirty clusters and skip clean ones), just as allocation conditions of
"top" sync mode.

Signed-off-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
---
 block.c                   |   5 ++
 block/backup.c            | 120 ++++++++++++++++++++++++++++++++++++++--------
 block/mirror.c            |   4 ++
 blockdev.c                |  14 +++++-
 hmp.c                     |   3 +-
 include/block/block.h     |   1 +
 include/block/block_int.h |   2 +
 qapi/block-core.json      |  11 +++--
 qmp-commands.hx           |   7 +--
 9 files changed, 137 insertions(+), 30 deletions(-)

Comments

Fam Zheng Jan. 13, 2015, 9:37 a.m. UTC | #1
On Mon, 01/12 11:31, John Snow wrote:
> For "dirty-bitmap" sync mode, the block job will iterate through the
> given dirty bitmap to decide if a sector needs backup (backup all the
> dirty clusters and skip clean ones), just as allocation conditions of
> "top" sync mode.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  block.c                   |   5 ++
>  block/backup.c            | 120 ++++++++++++++++++++++++++++++++++++++--------
>  block/mirror.c            |   4 ++
>  blockdev.c                |  14 +++++-
>  hmp.c                     |   3 +-
>  include/block/block.h     |   1 +
>  include/block/block_int.h |   2 +
>  qapi/block-core.json      |  11 +++--
>  qmp-commands.hx           |   7 +--
>  9 files changed, 137 insertions(+), 30 deletions(-)
> 
> diff --git a/block.c b/block.c
> index 3f33b9d..5eb6788 100644
> --- a/block.c
> +++ b/block.c
> @@ -5633,6 +5633,11 @@ static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
>      }
>  }
>  
> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
> +{
> +    hbitmap_iter_init(hbi, hbi->hb, offset);

What's the reason for this indirection? Can caller just use hbitmap_iter_init?

> +}
> +
>  int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>  {
>      return hbitmap_count(bitmap->bitmap);
> diff --git a/block/backup.c b/block/backup.c
> index 792e655..0626a3e 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -37,6 +37,8 @@ typedef struct CowRequest {
>  typedef struct BackupBlockJob {
>      BlockJob common;
>      BlockDriverState *target;
> +    /* bitmap for sync=dirty-bitmap */
> +    BdrvDirtyBitmap *sync_bitmap;
>      MirrorSyncMode sync_mode;
>      RateLimit limit;
>      BlockdevOnError on_source_error;
> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void *opaque)
>      g_free(data);
>  }
>  
> +static bool coroutine_fn yield_and_check(BackupBlockJob *job)
> +{
> +    if (block_job_is_cancelled(&job->common)) {
> +        return true;
> +    }
> +
> +    /* we need to yield so that qemu_aio_flush() returns.
> +     * (without, VM does not reboot)
> +     */
> +    if (job->common.speed) {
> +        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
> +                                                      job->sectors_read);
> +        job->sectors_read = 0;
> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> +    } else {
> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> +    }
> +
> +    if (block_job_is_cancelled(&job->common)) {
> +        return true;
> +    }
> +
> +    return false;
> +}
> +
>  static void coroutine_fn backup_run(void *opaque)
>  {
>      BackupBlockJob *job = opaque;
> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
>      };
>      int64_t start, end;
>      int ret = 0;
> +    bool error_is_read;
>  
>      QLIST_INIT(&job->inflight_reqs);
>      qemu_co_rwlock_init(&job->flush_rwlock);
>  
>      start = 0;
> -    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
> -                       BACKUP_SECTORS_PER_CLUSTER);
> +    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>  
>      job->bitmap = hbitmap_alloc(end, 0);
>  
> @@ -278,28 +305,45 @@ static void coroutine_fn backup_run(void *opaque)
>              qemu_coroutine_yield();
>              job->common.busy = true;
>          }
> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        /* Dirty Bitmap sync has a slightly different iteration method */
> +        HBitmapIter hbi;
> +        int64_t sector;
> +        int64_t cluster;
> +        bool polyrhythmic;
> +
> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
> +        /* Does the granularity happen to match our backup cluster size? */
> +        polyrhythmic = (bdrv_dirty_bitmap_granularity(bs, job->sync_bitmap) !=
> +                        BACKUP_CLUSTER_SIZE);
> +
> +        /* Find the next dirty /sector/ and copy that /cluster/ */
> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
> +            if (yield_and_check(job)) {
> +                goto leave;
> +            }
> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
> +
> +            do {
> +                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
> +                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
> +                if ((ret < 0) &&
> +                    backup_error_action(job, error_is_read, -ret) ==
> +                    BLOCK_ERROR_ACTION_REPORT) {
> +                    goto leave;
> +                }
> +            } while (ret < 0);
> +
> +            /* Advance (or rewind) our iterator if we need to. */
> +            if (polyrhythmic) {
> +                bdrv_set_dirty_iter(&hbi,
> +                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
> +            }
> +        }
>      } else {
>          /* Both FULL and TOP SYNC_MODE's require copying.. */
>          for (; start < end; start++) {
> -            bool error_is_read;
> -
> -            if (block_job_is_cancelled(&job->common)) {
> -                break;
> -            }
> -
> -            /* we need to yield so that qemu_aio_flush() returns.
> -             * (without, VM does not reboot)
> -             */
> -            if (job->common.speed) {
> -                uint64_t delay_ns = ratelimit_calculate_delay(
> -                        &job->limit, job->sectors_read);
> -                job->sectors_read = 0;
> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> -            } else {
> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> -            }
> -
> -            if (block_job_is_cancelled(&job->common)) {
> +            if (yield_and_check(job)) {
>                  break;
>              }
>  
> @@ -351,12 +395,23 @@ static void coroutine_fn backup_run(void *opaque)
>          }
>      }
>  
> +leave:
>      notifier_with_return_remove(&before_write);
>  
>      /* wait until pending backup_do_cow() calls have completed */
>      qemu_co_rwlock_wrlock(&job->flush_rwlock);
>      qemu_co_rwlock_unlock(&job->flush_rwlock);
>  
> +    if (job->sync_bitmap) {
> +        if (ret < 0) {
> +            /* Merge the successor back into the parent, delete nothing. */
> +            assert(bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL));

Why can't reclaim fail here? If we canassert, please move the expression out
because it has a side effect.

> +            bdrv_enable_dirty_bitmap(job->sync_bitmap);
> +        } else {
> +            /* Everything is fine, delete this bitmap and install the backup. */
> +            assert(bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL));
> +        }
> +    }
>      hbitmap_free(job->bitmap);
>  
>      bdrv_iostatus_disable(target);
> @@ -368,6 +423,7 @@ static void coroutine_fn backup_run(void *opaque)
>  
>  void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                    int64_t speed, MirrorSyncMode sync_mode,
> +                  BdrvDirtyBitmap *sync_bitmap,
>                    BlockdevOnError on_source_error,
>                    BlockdevOnError on_target_error,
>                    BlockCompletionFunc *cb, void *opaque,
> @@ -386,6 +442,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>          return;
>      }
>  
> +    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        if (!sync_bitmap) {
> +            error_setg(errp, "must provide a valid bitmap name for "
> +                             "\"dirty-bitmap\" sync mode");
> +            return;
> +        }
> +
> +        /* Create a new bitmap, and freeze/disable this one. */
> +        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
> +            return;
> +        }

There are two error paths in this function after this creation, need to release
it there?

> +
> +    } else if (sync_bitmap) {
> +        error_setg(errp,
> +                   "a sync_bitmap was provided to backup_run, "
> +                   "but received an incompatible sync_mode (%s)",
> +                   MirrorSyncMode_lookup[sync_mode]);
> +        return;
> +    }
> +
>      len = bdrv_getlength(bs);
>      if (len < 0) {
>          error_setg_errno(errp, -len, "unable to get length for '%s'",
> @@ -403,6 +479,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>      job->on_target_error = on_target_error;
>      job->target = target;
>      job->sync_mode = sync_mode;
> +    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
> +                       sync_bitmap : NULL;
>      job->common.len = len;
>      job->common.co = qemu_coroutine_create(backup_run);
>      qemu_coroutine_enter(job->common.co, job);
> diff --git a/block/mirror.c b/block/mirror.c
> index fc545f1..427c688 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -717,6 +717,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>      bool is_none_mode;
>      BlockDriverState *base;
>  
> +    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> +        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
> +        return;
> +    }
>      is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>      base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
>      mirror_start_job(bs, target, replaces,
> diff --git a/blockdev.c b/blockdev.c
> index 8dde72a..114a94c 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1569,6 +1569,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>                       backup->sync,
>                       backup->has_mode, backup->mode,
>                       backup->has_speed, backup->speed,
> +                     backup->has_bitmap, backup->bitmap,
>                       backup->has_on_source_error, backup->on_source_error,
>                       backup->has_on_target_error, backup->on_target_error,
>                       &local_err);
> @@ -2358,6 +2359,7 @@ void qmp_drive_backup(const char *device, const char *target,
>                        enum MirrorSyncMode sync,
>                        bool has_mode, enum NewImageMode mode,
>                        bool has_speed, int64_t speed,
> +                      bool has_bitmap, const char *bitmap,
>                        bool has_on_source_error, BlockdevOnError on_source_error,
>                        bool has_on_target_error, BlockdevOnError on_target_error,
>                        Error **errp)
> @@ -2365,6 +2367,7 @@ void qmp_drive_backup(const char *device, const char *target,
>      BlockDriverState *bs;
>      BlockDriverState *target_bs;
>      BlockDriverState *source = NULL;
> +    BdrvDirtyBitmap *bmap = NULL;
>      AioContext *aio_context;
>      BlockDriver *drv = NULL;
>      Error *local_err = NULL;
> @@ -2460,7 +2463,16 @@ void qmp_drive_backup(const char *device, const char *target,
>  
>      bdrv_set_aio_context(target_bs, aio_context);
>  
> -    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
> +    if (has_bitmap) {
> +        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
> +        if (!bmap) {
> +            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
> +            goto out;
> +        }
> +    }
> +
> +    backup_start(bs, target_bs, speed, sync, bmap,
> +                 on_source_error, on_target_error,
>                   block_job_cb, bs, &local_err);
>      if (local_err != NULL) {
>          bdrv_unref(target_bs);
> diff --git a/hmp.c b/hmp.c
> index 481be80..63b19c7 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1021,7 +1021,8 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
>  
>      qmp_drive_backup(device, filename, !!format, format,
>                       full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
> -                     true, mode, false, 0, false, 0, false, 0, &err);
> +                     true, mode, false, 0, false, NULL,
> +                     false, 0, false, 0, &err);
>      hmp_handle_error(mon, &err);
>  }
>  
> diff --git a/include/block/block.h b/include/block/block.h
> index 6138544..99ed679 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -462,6 +462,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
>  void bdrv_clear_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>  void bdrv_dirty_iter_init(BlockDriverState *bs,
>                            BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>  int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>  
>  void bdrv_enable_copy_on_read(BlockDriverState *bs);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 06a21dd..cb1e4a1 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -584,6 +584,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>   * @target: Block device to write to.
>   * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
>   * @sync_mode: What parts of the disk image should be copied to the destination.
> + * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
>   * @on_source_error: The action to take upon error reading from the source.
>   * @on_target_error: The action to take upon error writing to the target.
>   * @cb: Completion function for the job.
> @@ -594,6 +595,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>   */
>  void backup_start(BlockDriverState *bs, BlockDriverState *target,
>                    int64_t speed, MirrorSyncMode sync_mode,
> +                  BdrvDirtyBitmap *sync_bitmap,
>                    BlockdevOnError on_source_error,
>                    BlockdevOnError on_target_error,
>                    BlockCompletionFunc *cb, void *opaque,
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 3e863b9..92c3a84 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -500,7 +500,7 @@
>  # Since: 1.3
>  ##
>  { 'enum': 'MirrorSyncMode',
> -  'data': ['top', 'full', 'none'] }
> +  'data': ['top', 'full', 'none', 'dirty-bitmap'] }

Maybe document version info for 'dirty-bitmap'? Eric?

Fam

<snip>
John Snow Jan. 13, 2015, 5:50 p.m. UTC | #2
On 01/13/2015 04:37 AM, Fam Zheng wrote:
> On Mon, 01/12 11:31, John Snow wrote:
>> For "dirty-bitmap" sync mode, the block job will iterate through the
>> given dirty bitmap to decide if a sector needs backup (backup all the
>> dirty clusters and skip clean ones), just as allocation conditions of
>> "top" sync mode.
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   block.c                   |   5 ++
>>   block/backup.c            | 120 ++++++++++++++++++++++++++++++++++++++--------
>>   block/mirror.c            |   4 ++
>>   blockdev.c                |  14 +++++-
>>   hmp.c                     |   3 +-
>>   include/block/block.h     |   1 +
>>   include/block/block_int.h |   2 +
>>   qapi/block-core.json      |  11 +++--
>>   qmp-commands.hx           |   7 +--
>>   9 files changed, 137 insertions(+), 30 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index 3f33b9d..5eb6788 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -5633,6 +5633,11 @@ static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
>>       }
>>   }
>>
>> +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
>> +{
>> +    hbitmap_iter_init(hbi, hbi->hb, offset);
>
> What's the reason for this indirection? Can caller just use hbitmap_iter_init?
>

Essentially it is just a rename of hbitmap_iter_init to make its usage 
here clear, that we are manually advancing the pointer. How we 
accomplish that (hbitmap_iter_init) is an implementation detail.

Yes, I can just call this directly from block/backup, but at the time I 
was less sure of how I would advance the pointer, so I created a wrapper 
where I could change details as needed, if I needed to.

>> +}
>> +
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>>   {
>>       return hbitmap_count(bitmap->bitmap);
>> diff --git a/block/backup.c b/block/backup.c
>> index 792e655..0626a3e 100644
>> --- a/block/backup.c
>> +++ b/block/backup.c
>> @@ -37,6 +37,8 @@ typedef struct CowRequest {
>>   typedef struct BackupBlockJob {
>>       BlockJob common;
>>       BlockDriverState *target;
>> +    /* bitmap for sync=dirty-bitmap */
>> +    BdrvDirtyBitmap *sync_bitmap;
>>       MirrorSyncMode sync_mode;
>>       RateLimit limit;
>>       BlockdevOnError on_source_error;
>> @@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void *opaque)
>>       g_free(data);
>>   }
>>
>> +static bool coroutine_fn yield_and_check(BackupBlockJob *job)
>> +{
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    /* we need to yield so that qemu_aio_flush() returns.
>> +     * (without, VM does not reboot)
>> +     */
>> +    if (job->common.speed) {
>> +        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
>> +                                                      job->sectors_read);
>> +        job->sectors_read = 0;
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
>> +    } else {
>> +        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
>> +    }
>> +
>> +    if (block_job_is_cancelled(&job->common)) {
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>>   static void coroutine_fn backup_run(void *opaque)
>>   {
>>       BackupBlockJob *job = opaque;
>> @@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
>>       };
>>       int64_t start, end;
>>       int ret = 0;
>> +    bool error_is_read;
>>
>>       QLIST_INIT(&job->inflight_reqs);
>>       qemu_co_rwlock_init(&job->flush_rwlock);
>>
>>       start = 0;
>> -    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
>> -                       BACKUP_SECTORS_PER_CLUSTER);
>> +    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
>>
>>       job->bitmap = hbitmap_alloc(end, 0);
>>
>> @@ -278,28 +305,45 @@ static void coroutine_fn backup_run(void *opaque)
>>               qemu_coroutine_yield();
>>               job->common.busy = true;
>>           }
>> +    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        /* Dirty Bitmap sync has a slightly different iteration method */
>> +        HBitmapIter hbi;
>> +        int64_t sector;
>> +        int64_t cluster;
>> +        bool polyrhythmic;
>> +
>> +        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
>> +        /* Does the granularity happen to match our backup cluster size? */
>> +        polyrhythmic = (bdrv_dirty_bitmap_granularity(bs, job->sync_bitmap) !=
>> +                        BACKUP_CLUSTER_SIZE);
>> +
>> +        /* Find the next dirty /sector/ and copy that /cluster/ */
>> +        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
>> +            if (yield_and_check(job)) {
>> +                goto leave;
>> +            }
>> +            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
>> +
>> +            do {
>> +                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
>> +                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
>> +                if ((ret < 0) &&
>> +                    backup_error_action(job, error_is_read, -ret) ==
>> +                    BLOCK_ERROR_ACTION_REPORT) {
>> +                    goto leave;
>> +                }
>> +            } while (ret < 0);
>> +
>> +            /* Advance (or rewind) our iterator if we need to. */
>> +            if (polyrhythmic) {
>> +                bdrv_set_dirty_iter(&hbi,
>> +                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
>> +            }
>> +        }
>>       } else {
>>           /* Both FULL and TOP SYNC_MODE's require copying.. */
>>           for (; start < end; start++) {
>> -            bool error_is_read;
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> -                break;
>> -            }
>> -
>> -            /* we need to yield so that qemu_aio_flush() returns.
>> -             * (without, VM does not reboot)
>> -             */
>> -            if (job->common.speed) {
>> -                uint64_t delay_ns = ratelimit_calculate_delay(
>> -                        &job->limit, job->sectors_read);
>> -                job->sectors_read = 0;
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
>> -            } else {
>> -                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
>> -            }
>> -
>> -            if (block_job_is_cancelled(&job->common)) {
>> +            if (yield_and_check(job)) {
>>                   break;
>>               }
>>
>> @@ -351,12 +395,23 @@ static void coroutine_fn backup_run(void *opaque)
>>           }
>>       }
>>
>> +leave:
>>       notifier_with_return_remove(&before_write);
>>
>>       /* wait until pending backup_do_cow() calls have completed */
>>       qemu_co_rwlock_wrlock(&job->flush_rwlock);
>>       qemu_co_rwlock_unlock(&job->flush_rwlock);
>>
>> +    if (job->sync_bitmap) {
>> +        if (ret < 0) {
>> +            /* Merge the successor back into the parent, delete nothing. */
>> +            assert(bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL));
>
> Why can't reclaim fail here? If we canassert, please move the expression out
> because it has a side effect.
>

It shouldn't fail; if it does, something went very wrong. The only 
chance this has to fail is if the sync bitmap has no successor, but we 
explicitly installed one (and explicitly check that it succeeded) before 
going into the block backup.

I am not sure what other meaningful recovery could happen in this case, 
though we *could* just report an error and continue on, acknowledging 
that the BdrvDirtyBitmap is now compromised and of questionable validity.

Does anyone have an error handling preference here?

>> +            bdrv_enable_dirty_bitmap(job->sync_bitmap);
>> +        } else {
>> +            /* Everything is fine, delete this bitmap and install the backup. */
>> +            assert(bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL));
>> +        }
>> +    }
>>       hbitmap_free(job->bitmap);
>>
>>       bdrv_iostatus_disable(target);
>> @@ -368,6 +423,7 @@ static void coroutine_fn backup_run(void *opaque)
>>
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> @@ -386,6 +442,26 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>           return;
>>       }
>>
>> +    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        if (!sync_bitmap) {
>> +            error_setg(errp, "must provide a valid bitmap name for "
>> +                             "\"dirty-bitmap\" sync mode");
>> +            return;
>> +        }
>> +
>> +        /* Create a new bitmap, and freeze/disable this one. */
>> +        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
>> +            return;
>> +        }
>
> There are two error paths in this function after this creation, need to release
> it there?
>

Oh, yes. I will do two things:

(1) Add an optimization to reclaim such that if one of the bitmaps is 
empty, it just returns the other one, and
(2) Add a reclaim call in the error pathway.

>> +
>> +    } else if (sync_bitmap) {
>> +        error_setg(errp,
>> +                   "a sync_bitmap was provided to backup_run, "
>> +                   "but received an incompatible sync_mode (%s)",
>> +                   MirrorSyncMode_lookup[sync_mode]);
>> +        return;
>> +    }
>> +
>>       len = bdrv_getlength(bs);
>>       if (len < 0) {
>>           error_setg_errno(errp, -len, "unable to get length for '%s'",
>> @@ -403,6 +479,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>       job->on_target_error = on_target_error;
>>       job->target = target;
>>       job->sync_mode = sync_mode;
>> +    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
>> +                       sync_bitmap : NULL;
>>       job->common.len = len;
>>       job->common.co = qemu_coroutine_create(backup_run);
>>       qemu_coroutine_enter(job->common.co, job);
>> diff --git a/block/mirror.c b/block/mirror.c
>> index fc545f1..427c688 100644
>> --- a/block/mirror.c
>> +++ b/block/mirror.c
>> @@ -717,6 +717,10 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>>       bool is_none_mode;
>>       BlockDriverState *base;
>>
>> +    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
>> +        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
>> +        return;
>> +    }
>>       is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
>>       base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
>>       mirror_start_job(bs, target, replaces,
>> diff --git a/blockdev.c b/blockdev.c
>> index 8dde72a..114a94c 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1569,6 +1569,7 @@ static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
>>                        backup->sync,
>>                        backup->has_mode, backup->mode,
>>                        backup->has_speed, backup->speed,
>> +                     backup->has_bitmap, backup->bitmap,
>>                        backup->has_on_source_error, backup->on_source_error,
>>                        backup->has_on_target_error, backup->on_target_error,
>>                        &local_err);
>> @@ -2358,6 +2359,7 @@ void qmp_drive_backup(const char *device, const char *target,
>>                         enum MirrorSyncMode sync,
>>                         bool has_mode, enum NewImageMode mode,
>>                         bool has_speed, int64_t speed,
>> +                      bool has_bitmap, const char *bitmap,
>>                         bool has_on_source_error, BlockdevOnError on_source_error,
>>                         bool has_on_target_error, BlockdevOnError on_target_error,
>>                         Error **errp)
>> @@ -2365,6 +2367,7 @@ void qmp_drive_backup(const char *device, const char *target,
>>       BlockDriverState *bs;
>>       BlockDriverState *target_bs;
>>       BlockDriverState *source = NULL;
>> +    BdrvDirtyBitmap *bmap = NULL;
>>       AioContext *aio_context;
>>       BlockDriver *drv = NULL;
>>       Error *local_err = NULL;
>> @@ -2460,7 +2463,16 @@ void qmp_drive_backup(const char *device, const char *target,
>>
>>       bdrv_set_aio_context(target_bs, aio_context);
>>
>> -    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
>> +    if (has_bitmap) {
>> +        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
>> +        if (!bmap) {
>> +            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
>> +            goto out;
>> +        }
>> +    }
>> +
>> +    backup_start(bs, target_bs, speed, sync, bmap,
>> +                 on_source_error, on_target_error,
>>                    block_job_cb, bs, &local_err);
>>       if (local_err != NULL) {
>>           bdrv_unref(target_bs);
>> diff --git a/hmp.c b/hmp.c
>> index 481be80..63b19c7 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -1021,7 +1021,8 @@ void hmp_drive_backup(Monitor *mon, const QDict *qdict)
>>
>>       qmp_drive_backup(device, filename, !!format, format,
>>                        full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
>> -                     true, mode, false, 0, false, 0, false, 0, &err);
>> +                     true, mode, false, 0, false, NULL,
>> +                     false, 0, false, 0, &err);
>>       hmp_handle_error(mon, &err);
>>   }
>>
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 6138544..99ed679 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -462,6 +462,7 @@ void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
>>   void bdrv_clear_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>>   void bdrv_dirty_iter_init(BlockDriverState *bs,
>>                             BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
>> +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>>   int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>>
>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 06a21dd..cb1e4a1 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -584,6 +584,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>>    * @target: Block device to write to.
>>    * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
>>    * @sync_mode: What parts of the disk image should be copied to the destination.
>> + * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
>>    * @on_source_error: The action to take upon error reading from the source.
>>    * @on_target_error: The action to take upon error writing to the target.
>>    * @cb: Completion function for the job.
>> @@ -594,6 +595,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>>    */
>>   void backup_start(BlockDriverState *bs, BlockDriverState *target,
>>                     int64_t speed, MirrorSyncMode sync_mode,
>> +                  BdrvDirtyBitmap *sync_bitmap,
>>                     BlockdevOnError on_source_error,
>>                     BlockdevOnError on_target_error,
>>                     BlockCompletionFunc *cb, void *opaque,
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 3e863b9..92c3a84 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -500,7 +500,7 @@
>>   # Since: 1.3
>>   ##
>>   { 'enum': 'MirrorSyncMode',
>> -  'data': ['top', 'full', 'none'] }
>> +  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
>
> Maybe document version info for 'dirty-bitmap'? Eric?
>
> Fam
>
> <snip>
>

Thanks!
--John
Fam Zheng Jan. 14, 2015, 6:29 a.m. UTC | #3
On Tue, 01/13 12:50, John Snow wrote:
> 
> 
> On 01/13/2015 04:37 AM, Fam Zheng wrote:
> >On Mon, 01/12 11:31, John Snow wrote:
> >>For "dirty-bitmap" sync mode, the block job will iterate through the
> >>given dirty bitmap to decide if a sector needs backup (backup all the
> >>dirty clusters and skip clean ones), just as allocation conditions of
> >>"top" sync mode.
> >>
> >>Signed-off-by: Fam Zheng <famz@redhat.com>
> >>Signed-off-by: John Snow <jsnow@redhat.com>
> >>---
> >>  block.c                   |   5 ++
> >>  block/backup.c            | 120 ++++++++++++++++++++++++++++++++++++++--------
> >>  block/mirror.c            |   4 ++
> >>  blockdev.c                |  14 +++++-
> >>  hmp.c                     |   3 +-
> >>  include/block/block.h     |   1 +
> >>  include/block/block_int.h |   2 +
> >>  qapi/block-core.json      |  11 +++--
> >>  qmp-commands.hx           |   7 +--
> >>  9 files changed, 137 insertions(+), 30 deletions(-)
> >>
> >>diff --git a/block.c b/block.c
> >>index 3f33b9d..5eb6788 100644
> >>--- a/block.c
> >>+++ b/block.c
> >>@@ -5633,6 +5633,11 @@ static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
> >>      }
> >>  }
> >>
> >>+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
> >>+{
> >>+    hbitmap_iter_init(hbi, hbi->hb, offset);
> >
> >What's the reason for this indirection? Can caller just use hbitmap_iter_init?
> >
> 
> Essentially it is just a rename of hbitmap_iter_init to make its usage here
> clear, that we are manually advancing the pointer. How we accomplish that
> (hbitmap_iter_init) is an implementation detail.
> 
> Yes, I can just call this directly from block/backup, but at the time I was
> less sure of how I would advance the pointer, so I created a wrapper where I
> could change details as needed, if I needed to.

OK, either is fine for me.

> 
> >>+}
> >>+
> >>  int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
> >>  {
> >>      return hbitmap_count(bitmap->bitmap);
> >>diff --git a/block/backup.c b/block/backup.c
> >>index 792e655..0626a3e 100644
> >>--- a/block/backup.c
> >>+++ b/block/backup.c
> >>@@ -37,6 +37,8 @@ typedef struct CowRequest {
> >>  typedef struct BackupBlockJob {
> >>      BlockJob common;
> >>      BlockDriverState *target;
> >>+    /* bitmap for sync=dirty-bitmap */
> >>+    BdrvDirtyBitmap *sync_bitmap;
> >>      MirrorSyncMode sync_mode;
> >>      RateLimit limit;
> >>      BlockdevOnError on_source_error;
> >>@@ -242,6 +244,31 @@ static void backup_complete(BlockJob *job, void *opaque)
> >>      g_free(data);
> >>  }
> >>
> >>+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
> >>+{
> >>+    if (block_job_is_cancelled(&job->common)) {
> >>+        return true;
> >>+    }
> >>+
> >>+    /* we need to yield so that qemu_aio_flush() returns.
> >>+     * (without, VM does not reboot)
> >>+     */
> >>+    if (job->common.speed) {
> >>+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
> >>+                                                      job->sectors_read);
> >>+        job->sectors_read = 0;
> >>+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> >>+    } else {
> >>+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> >>+    }
> >>+
> >>+    if (block_job_is_cancelled(&job->common)) {
> >>+        return true;
> >>+    }
> >>+
> >>+    return false;
> >>+}
> >>+
> >>  static void coroutine_fn backup_run(void *opaque)
> >>  {
> >>      BackupBlockJob *job = opaque;
> >>@@ -254,13 +281,13 @@ static void coroutine_fn backup_run(void *opaque)
> >>      };
> >>      int64_t start, end;
> >>      int ret = 0;
> >>+    bool error_is_read;
> >>
> >>      QLIST_INIT(&job->inflight_reqs);
> >>      qemu_co_rwlock_init(&job->flush_rwlock);
> >>
> >>      start = 0;
> >>-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
> >>-                       BACKUP_SECTORS_PER_CLUSTER);
> >>+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
> >>
> >>      job->bitmap = hbitmap_alloc(end, 0);
> >>
> >>@@ -278,28 +305,45 @@ static void coroutine_fn backup_run(void *opaque)
> >>              qemu_coroutine_yield();
> >>              job->common.busy = true;
> >>          }
> >>+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
> >>+        /* Dirty Bitmap sync has a slightly different iteration method */
> >>+        HBitmapIter hbi;
> >>+        int64_t sector;
> >>+        int64_t cluster;
> >>+        bool polyrhythmic;
> >>+
> >>+        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
> >>+        /* Does the granularity happen to match our backup cluster size? */
> >>+        polyrhythmic = (bdrv_dirty_bitmap_granularity(bs, job->sync_bitmap) !=
> >>+                        BACKUP_CLUSTER_SIZE);
> >>+
> >>+        /* Find the next dirty /sector/ and copy that /cluster/ */
> >>+        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
> >>+            if (yield_and_check(job)) {
> >>+                goto leave;
> >>+            }
> >>+            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
> >>+
> >>+            do {
> >>+                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
> >>+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
> >>+                if ((ret < 0) &&
> >>+                    backup_error_action(job, error_is_read, -ret) ==
> >>+                    BLOCK_ERROR_ACTION_REPORT) {
> >>+                    goto leave;
> >>+                }
> >>+            } while (ret < 0);
> >>+
> >>+            /* Advance (or rewind) our iterator if we need to. */
> >>+            if (polyrhythmic) {
> >>+                bdrv_set_dirty_iter(&hbi,
> >>+                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
> >>+            }
> >>+        }
> >>      } else {
> >>          /* Both FULL and TOP SYNC_MODE's require copying.. */
> >>          for (; start < end; start++) {
> >>-            bool error_is_read;
> >>-
> >>-            if (block_job_is_cancelled(&job->common)) {
> >>-                break;
> >>-            }
> >>-
> >>-            /* we need to yield so that qemu_aio_flush() returns.
> >>-             * (without, VM does not reboot)
> >>-             */
> >>-            if (job->common.speed) {
> >>-                uint64_t delay_ns = ratelimit_calculate_delay(
> >>-                        &job->limit, job->sectors_read);
> >>-                job->sectors_read = 0;
> >>-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
> >>-            } else {
> >>-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
> >>-            }
> >>-
> >>-            if (block_job_is_cancelled(&job->common)) {
> >>+            if (yield_and_check(job)) {
> >>                  break;
> >>              }
> >>
> >>@@ -351,12 +395,23 @@ static void coroutine_fn backup_run(void *opaque)
> >>          }
> >>      }
> >>
> >>+leave:
> >>      notifier_with_return_remove(&before_write);
> >>
> >>      /* wait until pending backup_do_cow() calls have completed */
> >>      qemu_co_rwlock_wrlock(&job->flush_rwlock);
> >>      qemu_co_rwlock_unlock(&job->flush_rwlock);
> >>
> >>+    if (job->sync_bitmap) {
> >>+        if (ret < 0) {
> >>+            /* Merge the successor back into the parent, delete nothing. */
> >>+            assert(bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL));
> >
> >Why can't reclaim fail here? If we canassert, please move the expression out
> >because it has a side effect.
> >
> 
> It shouldn't fail; if it does, something went very wrong. The only chance
> this has to fail is if the sync bitmap has no successor, but we explicitly
> installed one (and explicitly check that it succeeded) before going into the
> block backup.
> 
> I am not sure what other meaningful recovery could happen in this case,
> though we *could* just report an error and continue on, acknowledging that
> the BdrvDirtyBitmap is now compromised and of questionable validity.
> 
> Does anyone have an error handling preference here?

I don't, as long as you move it out from assert:

int r = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
assert(r);

Fam
Max Reitz Jan. 16, 2015, 5:52 p.m. UTC | #4
On 2015-01-12 at 11:31, John Snow wrote:
> For "dirty-bitmap" sync mode, the block job will iterate through the
> given dirty bitmap to decide if a sector needs backup (backup all the
> dirty clusters and skip clean ones), just as allocation conditions of
> "top" sync mode.
>
> Signed-off-by: Fam Zheng <famz@redhat.com>
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>   block.c                   |   5 ++
>   block/backup.c            | 120 ++++++++++++++++++++++++++++++++++++++--------
>   block/mirror.c            |   4 ++
>   blockdev.c                |  14 +++++-
>   hmp.c                     |   3 +-
>   include/block/block.h     |   1 +
>   include/block/block_int.h |   2 +
>   qapi/block-core.json      |  11 +++--
>   qmp-commands.hx           |   7 +--
>   9 files changed, 137 insertions(+), 30 deletions(-)

Since you seem to be intending to rethink the "frozen" state, I'm just 
scanning through the series from patch 8 on.

While this patch doesn't seem to have changed much conceptually since 
the last version I reviewed, with it applied to master, qemu fails to 
build due to Fam's blockdev-backup series (some new calls to 
backup_start() which have to be adapted).

Max
John Snow Jan. 16, 2015, 5:59 p.m. UTC | #5
On 01/16/2015 12:52 PM, Max Reitz wrote:
> On 2015-01-12 at 11:31, John Snow wrote:
>> For "dirty-bitmap" sync mode, the block job will iterate through the
>> given dirty bitmap to decide if a sector needs backup (backup all the
>> dirty clusters and skip clean ones), just as allocation conditions of
>> "top" sync mode.
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   block.c                   |   5 ++
>>   block/backup.c            | 120
>> ++++++++++++++++++++++++++++++++++++++--------
>>   block/mirror.c            |   4 ++
>>   blockdev.c                |  14 +++++-
>>   hmp.c                     |   3 +-
>>   include/block/block.h     |   1 +
>>   include/block/block_int.h |   2 +
>>   qapi/block-core.json      |  11 +++--
>>   qmp-commands.hx           |   7 +--
>>   9 files changed, 137 insertions(+), 30 deletions(-)
>
> Since you seem to be intending to rethink the "frozen" state, I'm just
> scanning through the series from patch 8 on.
>
> While this patch doesn't seem to have changed much conceptually since
> the last version I reviewed, with it applied to master, qemu fails to
> build due to Fam's blockdev-backup series (some new calls to
> backup_start() which have to be adapted).
>
> Max

Understood. "Frozen" will still exist largely similar to what it doe s 
now, but the proposal from fam was to simply fold "enabled/disabled" 
into the same status, and perhaps differentiate between a simple 
"disabled" state and a "frozen" state by the presence or absence of a 
successor.

As for the rebase onto master, here's a v11.5:
https://github.com/jnsnow/qemu/commits/dbm-backup
diff mbox

Patch

diff --git a/block.c b/block.c
index 3f33b9d..5eb6788 100644
--- a/block.c
+++ b/block.c
@@ -5633,6 +5633,11 @@  static void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
     }
 }
 
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+    hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
     return hbitmap_count(bitmap->bitmap);
diff --git a/block/backup.c b/block/backup.c
index 792e655..0626a3e 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -37,6 +37,8 @@  typedef struct CowRequest {
 typedef struct BackupBlockJob {
     BlockJob common;
     BlockDriverState *target;
+    /* bitmap for sync=dirty-bitmap */
+    BdrvDirtyBitmap *sync_bitmap;
     MirrorSyncMode sync_mode;
     RateLimit limit;
     BlockdevOnError on_source_error;
@@ -242,6 +244,31 @@  static void backup_complete(BlockJob *job, void *opaque)
     g_free(data);
 }
 
+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+{
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    /* we need to yield so that qemu_aio_flush() returns.
+     * (without, VM does not reboot)
+     */
+    if (job->common.speed) {
+        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
+                                                      job->sectors_read);
+        job->sectors_read = 0;
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+    } else {
+        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+    }
+
+    if (block_job_is_cancelled(&job->common)) {
+        return true;
+    }
+
+    return false;
+}
+
 static void coroutine_fn backup_run(void *opaque)
 {
     BackupBlockJob *job = opaque;
@@ -254,13 +281,13 @@  static void coroutine_fn backup_run(void *opaque)
     };
     int64_t start, end;
     int ret = 0;
+    bool error_is_read;
 
     QLIST_INIT(&job->inflight_reqs);
     qemu_co_rwlock_init(&job->flush_rwlock);
 
     start = 0;
-    end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
-                       BACKUP_SECTORS_PER_CLUSTER);
+    end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
 
     job->bitmap = hbitmap_alloc(end, 0);
 
@@ -278,28 +305,45 @@  static void coroutine_fn backup_run(void *opaque)
             qemu_coroutine_yield();
             job->common.busy = true;
         }
+    } else if (job->sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        /* Dirty Bitmap sync has a slightly different iteration method */
+        HBitmapIter hbi;
+        int64_t sector;
+        int64_t cluster;
+        bool polyrhythmic;
+
+        bdrv_dirty_iter_init(bs, job->sync_bitmap, &hbi);
+        /* Does the granularity happen to match our backup cluster size? */
+        polyrhythmic = (bdrv_dirty_bitmap_granularity(bs, job->sync_bitmap) !=
+                        BACKUP_CLUSTER_SIZE);
+
+        /* Find the next dirty /sector/ and copy that /cluster/ */
+        while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+            if (yield_and_check(job)) {
+                goto leave;
+            }
+            cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+
+            do {
+                ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
+                                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+                if ((ret < 0) &&
+                    backup_error_action(job, error_is_read, -ret) ==
+                    BLOCK_ERROR_ACTION_REPORT) {
+                    goto leave;
+                }
+            } while (ret < 0);
+
+            /* Advance (or rewind) our iterator if we need to. */
+            if (polyrhythmic) {
+                bdrv_set_dirty_iter(&hbi,
+                                    (cluster + 1) * BACKUP_SECTORS_PER_CLUSTER);
+            }
+        }
     } else {
         /* Both FULL and TOP SYNC_MODE's require copying.. */
         for (; start < end; start++) {
-            bool error_is_read;
-
-            if (block_job_is_cancelled(&job->common)) {
-                break;
-            }
-
-            /* we need to yield so that qemu_aio_flush() returns.
-             * (without, VM does not reboot)
-             */
-            if (job->common.speed) {
-                uint64_t delay_ns = ratelimit_calculate_delay(
-                        &job->limit, job->sectors_read);
-                job->sectors_read = 0;
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-            } else {
-                block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-            }
-
-            if (block_job_is_cancelled(&job->common)) {
+            if (yield_and_check(job)) {
                 break;
             }
 
@@ -351,12 +395,23 @@  static void coroutine_fn backup_run(void *opaque)
         }
     }
 
+leave:
     notifier_with_return_remove(&before_write);
 
     /* wait until pending backup_do_cow() calls have completed */
     qemu_co_rwlock_wrlock(&job->flush_rwlock);
     qemu_co_rwlock_unlock(&job->flush_rwlock);
 
+    if (job->sync_bitmap) {
+        if (ret < 0) {
+            /* Merge the successor back into the parent, delete nothing. */
+            assert(bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL));
+            bdrv_enable_dirty_bitmap(job->sync_bitmap);
+        } else {
+            /* Everything is fine, delete this bitmap and install the backup. */
+            assert(bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL));
+        }
+    }
     hbitmap_free(job->bitmap);
 
     bdrv_iostatus_disable(target);
@@ -368,6 +423,7 @@  static void coroutine_fn backup_run(void *opaque)
 
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
@@ -386,6 +442,26 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
         return;
     }
 
+    if (sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        if (!sync_bitmap) {
+            error_setg(errp, "must provide a valid bitmap name for "
+                             "\"dirty-bitmap\" sync mode");
+            return;
+        }
+
+        /* Create a new bitmap, and freeze/disable this one. */
+        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
+            return;
+        }
+
+    } else if (sync_bitmap) {
+        error_setg(errp,
+                   "a sync_bitmap was provided to backup_run, "
+                   "but received an incompatible sync_mode (%s)",
+                   MirrorSyncMode_lookup[sync_mode]);
+        return;
+    }
+
     len = bdrv_getlength(bs);
     if (len < 0) {
         error_setg_errno(errp, -len, "unable to get length for '%s'",
@@ -403,6 +479,8 @@  void backup_start(BlockDriverState *bs, BlockDriverState *target,
     job->on_target_error = on_target_error;
     job->target = target;
     job->sync_mode = sync_mode;
+    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_DIRTY_BITMAP ?
+                       sync_bitmap : NULL;
     job->common.len = len;
     job->common.co = qemu_coroutine_create(backup_run);
     qemu_coroutine_enter(job->common.co, job);
diff --git a/block/mirror.c b/block/mirror.c
index fc545f1..427c688 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -717,6 +717,10 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     bool is_none_mode;
     BlockDriverState *base;
 
+    if (mode == MIRROR_SYNC_MODE_DIRTY_BITMAP) {
+        error_setg(errp, "Sync mode 'dirty-bitmap' not supported");
+        return;
+    }
     is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
     base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
     mirror_start_job(bs, target, replaces,
diff --git a/blockdev.c b/blockdev.c
index 8dde72a..114a94c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1569,6 +1569,7 @@  static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
                      backup->sync,
                      backup->has_mode, backup->mode,
                      backup->has_speed, backup->speed,
+                     backup->has_bitmap, backup->bitmap,
                      backup->has_on_source_error, backup->on_source_error,
                      backup->has_on_target_error, backup->on_target_error,
                      &local_err);
@@ -2358,6 +2359,7 @@  void qmp_drive_backup(const char *device, const char *target,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
                       bool has_speed, int64_t speed,
+                      bool has_bitmap, const char *bitmap,
                       bool has_on_source_error, BlockdevOnError on_source_error,
                       bool has_on_target_error, BlockdevOnError on_target_error,
                       Error **errp)
@@ -2365,6 +2367,7 @@  void qmp_drive_backup(const char *device, const char *target,
     BlockDriverState *bs;
     BlockDriverState *target_bs;
     BlockDriverState *source = NULL;
+    BdrvDirtyBitmap *bmap = NULL;
     AioContext *aio_context;
     BlockDriver *drv = NULL;
     Error *local_err = NULL;
@@ -2460,7 +2463,16 @@  void qmp_drive_backup(const char *device, const char *target,
 
     bdrv_set_aio_context(target_bs, aio_context);
 
-    backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+    if (has_bitmap) {
+        bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+        if (!bmap) {
+            error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+            goto out;
+        }
+    }
+
+    backup_start(bs, target_bs, speed, sync, bmap,
+                 on_source_error, on_target_error,
                  block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_unref(target_bs);
diff --git a/hmp.c b/hmp.c
index 481be80..63b19c7 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1021,7 +1021,8 @@  void hmp_drive_backup(Monitor *mon, const QDict *qdict)
 
     qmp_drive_backup(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, false, 0, false, 0, &err);
+                     true, mode, false, 0, false, NULL,
+                     false, 0, false, 0, &err);
     hmp_handle_error(mon, &err);
 }
 
diff --git a/include/block/block.h b/include/block/block.h
index 6138544..99ed679 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -462,6 +462,7 @@  void bdrv_reset_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
 void bdrv_clear_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_iter_init(BlockDriverState *bs,
                           BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 06a21dd..cb1e4a1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -584,6 +584,7 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @sync_mode: What parts of the disk image should be copied to the destination.
+ * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_DIRTY_BITMAP.
  * @on_source_error: The action to take upon error reading from the source.
  * @on_target_error: The action to take upon error writing to the target.
  * @cb: Completion function for the job.
@@ -594,6 +595,7 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
  */
 void backup_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode sync_mode,
+                  BdrvDirtyBitmap *sync_bitmap,
                   BlockdevOnError on_source_error,
                   BlockdevOnError on_target_error,
                   BlockCompletionFunc *cb, void *opaque,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3e863b9..92c3a84 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -500,7 +500,7 @@ 
 # Since: 1.3
 ##
 { 'enum': 'MirrorSyncMode',
-  'data': ['top', 'full', 'none'] }
+  'data': ['top', 'full', 'none', 'dirty-bitmap'] }
 
 ##
 # @BlockJobType:
@@ -675,14 +675,17 @@ 
 #          probe if @mode is 'existing', else the format of the source
 #
 # @sync: what parts of the disk image should be copied to the destination
-#        (all the disk, only the sectors allocated in the topmost image, or
-#        only new I/O).
+#        (all the disk, only the sectors allocated in the topmost image, from a
+#        dirty bitmap, or only new I/O).
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
 #        'absolute-paths'.
 #
 # @speed: #optional the maximum speed, in bytes per second
 #
+# @bitmap: #optional the name of dirty bitmap if sync is "dirty-bitmap"
+#          (Since 2.3)
+#
 # @on-source-error: #optional the action to take on an error on the source,
 #                   default 'report'.  'stop' and 'enospc' can only be used
 #                   if the block device supports io-status (see BlockInfo).
@@ -700,7 +703,7 @@ 
 { 'type': 'DriveBackup',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int',
+            '*speed': 'int', '*bitmap': 'str',
             '*on-source-error': 'BlockdevOnError',
             '*on-target-error': 'BlockdevOnError' } }
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 59be8eb..479d4f5 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1048,7 +1048,7 @@  EQMP
     {
         .name       = "drive-backup",
         .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
-                      "on-source-error:s?,on-target-error:s?",
+                      "bitmap:s?,on-source-error:s?,on-target-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_drive_backup,
     },
 
@@ -1075,8 +1075,9 @@  Arguments:
             (json-string, optional)
 - "sync": what parts of the disk image should be copied to the destination;
   possibilities include "full" for all the disk, "top" for only the sectors
-  allocated in the topmost image, or "none" to only replicate new I/O
-  (MirrorSyncMode).
+  allocated in the topmost image, "dirty-bitmap" for only the dirty sectors in
+  the bitmap, or "none" to only replicate new I/O (MirrorSyncMode).
+- "bitmap": dirty bitmap name for sync==dirty-bitmap
 - "mode": whether and how QEMU should create a new image
           (NewImageMode, optional, default 'absolute-paths')
 - "speed": the maximum speed, in bytes per second (json-int, optional)