diff mbox

[v2] block: per caller dirty bitmap

Message ID 1383557410-18062-1-git-send-email-famz@redhat.com
State New
Headers show

Commit Message

Fam Zheng Nov. 4, 2013, 9:30 a.m. UTC
Previously a BlockDriverState has only one dirty bitmap, so only one
caller (e.g. a block job) can keep track of writing. This changes the
dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
lifecycle is managed with these new functions:

    bdrv_create_dirty_bitmap
    bdrv_release_dirty_bitmap

Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.

In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
is added to these functions, since each caller has its own dirty bitmap:

    bdrv_get_dirty
    bdrv_dirty_iter_init
    bdrv_get_dirty_count

bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
internally walk the list of all dirty bitmaps and set them one by one.

Signed-off-by: Fam Zheng <famz@redhat.com>

---
v2: Added BdrvDirtyBitmap [Paolo]

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block-migration.c         | 22 +++++++++----
 block.c                   | 81 ++++++++++++++++++++++++++++-------------------
 block/mirror.c            | 23 ++++++++------
 block/qapi.c              |  8 -----
 include/block/block.h     | 11 ++++---
 include/block/block_int.h |  2 +-
 6 files changed, 85 insertions(+), 62 deletions(-)

Comments

Paolo Bonzini Nov. 4, 2013, 10:37 a.m. UTC | #1
Il 04/11/2013 10:30, Fam Zheng ha scritto:
> diff --git a/include/block/block.h b/include/block/block.h
> index 3560deb..06f424c 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -388,12 +388,15 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
>  bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
>  
>  struct HBitmapIter;
> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
> +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity);
> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
>  void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> +                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>  
>  void bdrv_enable_copy_on_read(BlockDriverState *bs);
>  void bdrv_disable_copy_on_read(BlockDriverState *bs);

You do not really need the BDS argument to the functions, do you?  (Or
do you have other plans?)

Apart from this, looks good.

Paolo
Fam Zheng Nov. 4, 2013, 10:47 a.m. UTC | #2
On 11/04/2013 06:37 PM, Paolo Bonzini wrote:
> Il 04/11/2013 10:30, Fam Zheng ha scritto:
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 3560deb..06f424c 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -388,12 +388,15 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
>>   bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
>>   
>>   struct HBitmapIter;
>> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
>> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
>> +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
>> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity);
>> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
>>   void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
>>   void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
>> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
>> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
>> +void bdrv_dirty_iter_init(BlockDriverState *bs,
>> +                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
>> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>>   
>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
> You do not really need the BDS argument to the functions, do you?  (Or
> do you have other plans?)

I just wanted to keep the pattern of those bdrv_* family, no other plans 
for it.

Fam
Paolo Bonzini Nov. 4, 2013, 10:55 a.m. UTC | #3
Il 04/11/2013 11:47, Fam Zheng ha scritto:
>>>
>>> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
>>> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
>>> +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
>>> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int
>>> granularity);
>>> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap
>>> *bitmap);
>>> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
>>> int64_t sector);
>>>   void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int
>>> nr_sectors);
>>>   void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int
>>> nr_sectors);
>>> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter
>>> *hbi);
>>> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
>>> +void bdrv_dirty_iter_init(BlockDriverState *bs,
>>> +                          BdrvDirtyBitmap *bitmap, struct
>>> HBitmapIter *hbi);
>>> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
>>> *bitmap);
>>>     void bdrv_enable_copy_on_read(BlockDriverState *bs);
>>>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
>> You do not really need the BDS argument to the functions, do you?  (Or
>> do you have other plans?)
> 
> I just wanted to keep the pattern of those bdrv_* family, no other plans
> for it.

Kevin, Stefan, any second opinions?

Paolo
Benoît Canet Nov. 4, 2013, 2:38 p.m. UTC | #4
Le Monday 04 Nov 2013 à 17:30:10 (+0800), Fam Zheng a écrit :
> Previously a BlockDriverState has only one dirty bitmap, so only one
> caller (e.g. a block job) can keep track of writing. This changes the
> dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
> lifecycle is managed with these new functions:
> 
>     bdrv_create_dirty_bitmap
>     bdrv_release_dirty_bitmap
> 
> Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.
> 
> In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
> is added to these functions, since each caller has its own dirty bitmap:
> 
>     bdrv_get_dirty
>     bdrv_dirty_iter_init
>     bdrv_get_dirty_count
> 
> bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
> internally walk the list of all dirty bitmaps and set them one by one.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> 
> ---
> v2: Added BdrvDirtyBitmap [Paolo]
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block-migration.c         | 22 +++++++++----
>  block.c                   | 81 ++++++++++++++++++++++++++++-------------------
>  block/mirror.c            | 23 ++++++++------
>  block/qapi.c              |  8 -----
>  include/block/block.h     | 11 ++++---
>  include/block/block_int.h |  2 +-
>  6 files changed, 85 insertions(+), 62 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index daf9ec1..8f4e826 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -58,6 +58,7 @@ typedef struct BlkMigDevState {
>      /* Protected by block migration lock.  */
>      unsigned long *aio_bitmap;
>      int64_t completed_sectors;
> +    BdrvDirtyBitmap *dirty_bitmap;
>  } BlkMigDevState;
>  
>  typedef struct BlkMigBlock {
> @@ -309,12 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>  
>  /* Called with iothread lock taken.  */
>  
> -static void set_dirty_tracking(int enable)
> +static void set_dirty_tracking(void)
>  {
>      BlkMigDevState *bmds;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
> +        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
> +    }
> +}
> +
> +static void unset_dirty_tracking(void)
> +{
> +    BlkMigDevState *bmds;
> +
> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> +        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
>      }
>  }
>  
> @@ -432,7 +442,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>          } else {
>              blk_mig_unlock();
>          }
> -        if (bdrv_get_dirty(bmds->bs, sector)) {
> +        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
>  
>              if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
>                  nr_sectors = total_sectors - sector;
> @@ -554,7 +564,7 @@ static int64_t get_remaining_dirty(void)
>      int64_t dirty = 0;
>  
>      QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
> -        dirty += bdrv_get_dirty_count(bmds->bs);
> +        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
>      }
>  
>      return dirty << BDRV_SECTOR_BITS;
> @@ -569,7 +579,7 @@ static void blk_mig_cleanup(void)
>  
>      bdrv_drain_all();
>  
> -    set_dirty_tracking(0);
> +    unset_dirty_tracking();
>  
>      blk_mig_lock();
>      while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
> @@ -604,7 +614,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
>      init_blk_migration(f);
>  
>      /* start track dirty blocks */
> -    set_dirty_tracking(1);
> +    set_dirty_tracking();
>      qemu_mutex_unlock_iothread();
>  
>      ret = flush_blks(f);
> diff --git a/block.c b/block.c
> index 58efb5b..ef7a55f 100644
> --- a/block.c
> +++ b/block.c
> @@ -49,6 +49,11 @@
>  #include <windows.h>
>  #endif
>  
> +typedef struct BdrvDirtyBitmap {
> +    HBitmap *bitmap;
> +    QLIST_ENTRY(BdrvDirtyBitmap) list;
> +} BdrvDirtyBitmap;
> +
>  #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
>  
>  typedef enum {
> @@ -323,6 +328,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>      BlockDriverState *bs;
>  
>      bs = g_malloc0(sizeof(BlockDriverState));
> +    QLIST_INIT(&bs->dirty_bitmaps);
>      pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
>      if (device_name[0] != '\0') {
>          QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
> @@ -1615,7 +1621,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>      bs_dest->iostatus           = bs_src->iostatus;
>  
>      /* dirty bitmap */
> -    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
> +    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
>  
>      /* reference count */
>      bs_dest->refcnt             = bs_src->refcnt;
> @@ -1648,7 +1654,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>  
>      /* bs_new must be anonymous and shouldn't have anything fancy enabled */
>      assert(bs_new->device_name[0] == '\0');
> -    assert(bs_new->dirty_bitmap == NULL);
> +    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
>      assert(bs_new->job == NULL);
>      assert(bs_new->dev == NULL);
>      assert(bs_new->in_use == 0);
> @@ -1709,6 +1715,7 @@ static void bdrv_delete(BlockDriverState *bs)
>      assert(!bs->job);
>      assert(!bs->in_use);
>      assert(!bs->refcnt);
> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>  
>      bdrv_close(bs);
>  
> @@ -2785,9 +2792,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>          ret = bdrv_co_flush(bs);
>      }
>  
> -    if (bs->dirty_bitmap) {
>          bdrv_set_dirty(bs, sector_num, nb_sectors);
> -    }
>  
>      if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
>          bs->wr_highest_sector = sector_num + nb_sectors - 1;
> @@ -3323,7 +3328,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
>      if (bdrv_check_request(bs, sector_num, nb_sectors))
>          return -EIO;
>  
> -    assert(!bs->dirty_bitmap);
> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>  
>      return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
>  }
> @@ -4183,9 +4188,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>          return -EROFS;
>      }
>  
> -    if (bs->dirty_bitmap) {
> -        bdrv_reset_dirty(bs, sector_num, nb_sectors);
> -    }
> +    bdrv_reset_dirty(bs, sector_num, nb_sectors);
>  
>      /* Do nothing if disabled.  */
>      if (!(bs->open_flags & BDRV_O_UNMAP)) {
> @@ -4347,58 +4350,70 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
>      return true;
>  }
>  
> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
>  {
>      int64_t bitmap_size;
> +    BdrvDirtyBitmap *bitmap;
>  
>      assert((granularity & (granularity - 1)) == 0);
>  
> -    if (granularity) {
> -        granularity >>= BDRV_SECTOR_BITS;
> -        assert(!bs->dirty_bitmap);
> -        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
> -        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
> -    } else {
> -        if (bs->dirty_bitmap) {
> -            hbitmap_free(bs->dirty_bitmap);
> -            bs->dirty_bitmap = NULL;
> +    granularity >>= BDRV_SECTOR_BITS;
> +    assert(granularity);
> +    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
> +    bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
> +    bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
> +    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
> +    return bitmap;
> +}
> +
> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
> +{
> +    BdrvDirtyBitmap *bm, *next;
> +    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
> +        if (bm == bitmap) {
> +            QLIST_REMOVE(bitmap, list);
> +            hbitmap_free(bitmap->bitmap);
> +            g_free(bitmap);
> +            return;
>          }
>      }
>  }
>  
> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
>  {
> -    if (bs->dirty_bitmap) {
> -        return hbitmap_get(bs->dirty_bitmap, sector);
> +    if (bitmap) {
> +        return hbitmap_get(bitmap->bitmap, sector);

Is see some if (bitmap) here but nowhere some code resetting the various
dirty_bitmap fields to NULL.
Is it missing ?

Best regards

Benoît

>      } else {
>          return 0;
>      }
>  }
>  
> -void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> +                          BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
>  {
> -    hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
> +    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
>  }
>  
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
>                      int nr_sectors)
>  {
> -    hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
> +    BdrvDirtyBitmap *bitmap;
> +    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
> +        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
> +    }
>  }
>  
> -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
> -                      int nr_sectors)
> +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
>  {
> -    hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
> +    BdrvDirtyBitmap *bitmap;
> +    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
> +        hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
> +    }
>  }
>  
> -int64_t bdrv_get_dirty_count(BlockDriverState *bs)
> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>  {
> -    if (bs->dirty_bitmap) {
> -        return hbitmap_count(bs->dirty_bitmap);
> -    } else {
> -        return 0;
> -    }
> +    return hbitmap_count(bitmap->bitmap);
>  }
>  
>  /* Get a reference to bs */
> diff --git a/block/mirror.c b/block/mirror.c
> index 7b95acf..6dc27ad 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -39,6 +39,7 @@ typedef struct MirrorBlockJob {
>      int64_t granularity;
>      size_t buf_size;
>      unsigned long *cow_bitmap;
> +    BdrvDirtyBitmap *dirty_bitmap;
>      HBitmapIter hbi;
>      uint8_t *buf;
>      QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
> @@ -145,9 +146,10 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>  
>      s->sector_num = hbitmap_iter_next(&s->hbi);
>      if (s->sector_num < 0) {
> -        bdrv_dirty_iter_init(source, &s->hbi);
> +        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
>          s->sector_num = hbitmap_iter_next(&s->hbi);
> -        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
> +        trace_mirror_restart_iter(s,
> +                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
>          assert(s->sector_num >= 0);
>      }
>  
> @@ -183,7 +185,7 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>      do {
>          int added_sectors, added_chunks;
>  
> -        if (!bdrv_get_dirty(source, next_sector) ||
> +        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
>              test_bit(next_chunk, s->in_flight_bitmap)) {
>              assert(nb_sectors > 0);
>              break;
> @@ -249,7 +251,8 @@ static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
>          /* Advance the HBitmapIter in parallel, so that we do not examine
>           * the same sector twice.
>           */
> -        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
> +        if (next_sector > hbitmap_next_sector
> +            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
>              hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
>          }
>  
> @@ -355,7 +358,7 @@ static void coroutine_fn mirror_run(void *opaque)
>          }
>      }
>  
> -    bdrv_dirty_iter_init(bs, &s->hbi);
> +    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
>      last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>      for (;;) {
>          uint64_t delay_ns;
> @@ -367,7 +370,7 @@ static void coroutine_fn mirror_run(void *opaque)
>              goto immediate_exit;
>          }
>  
> -        cnt = bdrv_get_dirty_count(bs);
> +        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>  
>          /* Note that even when no rate limit is applied we need to yield
>           * periodically with no pending I/O so that qemu_aio_flush() returns.
> @@ -409,7 +412,7 @@ static void coroutine_fn mirror_run(void *opaque)
>  
>                  should_complete = s->should_complete ||
>                      block_job_is_cancelled(&s->common);
> -                cnt = bdrv_get_dirty_count(bs);
> +                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>              }
>          }
>  
> @@ -424,7 +427,7 @@ static void coroutine_fn mirror_run(void *opaque)
>               */
>              trace_mirror_before_drain(s, cnt);
>              bdrv_drain_all();
> -            cnt = bdrv_get_dirty_count(bs);
> +            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
>          }
>  
>          ret = 0;
> @@ -471,7 +474,7 @@ immediate_exit:
>      qemu_vfree(s->buf);
>      g_free(s->cow_bitmap);
>      g_free(s->in_flight_bitmap);
> -    bdrv_set_dirty_tracking(bs, 0);
> +    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
>      bdrv_iostatus_disable(s->target);
>      if (s->should_complete && ret == 0) {
>          if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
> @@ -575,7 +578,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
>      s->granularity = granularity;
>      s->buf_size = MAX(buf_size, granularity);
>  
> -    bdrv_set_dirty_tracking(bs, granularity);
> +    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
>      bdrv_set_enable_write_cache(s->target, true);
>      bdrv_set_on_error(s->target, on_target_error, on_target_error);
>      bdrv_iostatus_enable(s->target);
> diff --git a/block/qapi.c b/block/qapi.c
> index 5880b3e..6b0cdcf 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
>          info->io_status = bs->iostatus;
>      }
>  
> -    if (bs->dirty_bitmap) {
> -        info->has_dirty = true;
> -        info->dirty = g_malloc0(sizeof(*info->dirty));
> -        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
> -        info->dirty->granularity =
> -         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
> -    }
> -
>      if (bs->drv) {
>          info->has_inserted = true;
>          info->inserted = g_malloc0(sizeof(*info->inserted));
> diff --git a/include/block/block.h b/include/block/block.h
> index 3560deb..06f424c 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -388,12 +388,15 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
>  bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
>  
>  struct HBitmapIter;
> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
> +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity);
> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
>  void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
>  void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> +                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
>  
>  void bdrv_enable_copy_on_read(BlockDriverState *bs);
>  void bdrv_disable_copy_on_read(BlockDriverState *bs);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 1666066..3f2bee1 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -301,7 +301,7 @@ struct BlockDriverState {
>      bool iostatus_enabled;
>      BlockDeviceIoStatus iostatus;
>      char device_name[32];
> -    HBitmap *dirty_bitmap;
> +    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
>      int refcnt;
>      int in_use; /* users other than guest access, eg. block migration */
>      QTAILQ_ENTRY(BlockDriverState) list;
> -- 
> 1.8.3.1
> 
>
Fam Zheng Nov. 5, 2013, 3:12 a.m. UTC | #5
On 11/04/2013 10:38 PM, Benoît Canet wrote:
> Le Monday 04 Nov 2013 à 17:30:10 (+0800), Fam Zheng a écrit :
>> Previously a BlockDriverState has only one dirty bitmap, so only one
>> caller (e.g. a block job) can keep track of writing. This changes the
>> dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
>> lifecycle is managed with these new functions:
>>
>>      bdrv_create_dirty_bitmap
>>      bdrv_release_dirty_bitmap
>>
>> Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.
>>
>> In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
>> is added to these functions, since each caller has its own dirty bitmap:
>>
>>      bdrv_get_dirty
>>      bdrv_dirty_iter_init
>>      bdrv_get_dirty_count
>>
>> bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
>> internally walk the list of all dirty bitmaps and set them one by one.
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>>
>> ---
>> v2: Added BdrvDirtyBitmap [Paolo]
>>
>> Signed-off-by: Fam Zheng <famz@redhat.com>
>> ---
>>   block-migration.c         | 22 +++++++++----
>>   block.c                   | 81 ++++++++++++++++++++++++++++-------------------
>>   block/mirror.c            | 23 ++++++++------
>>   block/qapi.c              |  8 -----
>>   include/block/block.h     | 11 ++++---
>>   include/block/block_int.h |  2 +-
>>   6 files changed, 85 insertions(+), 62 deletions(-)
>>
>> diff --git a/block-migration.c b/block-migration.c
>> index daf9ec1..8f4e826 100644
>> --- a/block-migration.c
>> +++ b/block-migration.c
>> @@ -58,6 +58,7 @@ typedef struct BlkMigDevState {
>>       /* Protected by block migration lock.  */
>>       unsigned long *aio_bitmap;
>>       int64_t completed_sectors;
>> +    BdrvDirtyBitmap *dirty_bitmap;
>>   } BlkMigDevState;
>>   
>>   typedef struct BlkMigBlock {
>> @@ -309,12 +310,21 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
>>   
>>   /* Called with iothread lock taken.  */
>>   
>> -static void set_dirty_tracking(int enable)
>> +static void set_dirty_tracking(void)
>>   {
>>       BlkMigDevState *bmds;
>>   
>>       QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
>> -        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
>> +        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
>> +    }
>> +}
>> +
>> +static void unset_dirty_tracking(void)
>> +{
>> +    BlkMigDevState *bmds;
>> +
>> +    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
>> +        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
>>       }
>>   }
>>   
>> @@ -432,7 +442,7 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
>>           } else {
>>               blk_mig_unlock();
>>           }
>> -        if (bdrv_get_dirty(bmds->bs, sector)) {
>> +        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
>>   
>>               if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
>>                   nr_sectors = total_sectors - sector;
>> @@ -554,7 +564,7 @@ static int64_t get_remaining_dirty(void)
>>       int64_t dirty = 0;
>>   
>>       QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
>> -        dirty += bdrv_get_dirty_count(bmds->bs);
>> +        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
>>       }
>>   
>>       return dirty << BDRV_SECTOR_BITS;
>> @@ -569,7 +579,7 @@ static void blk_mig_cleanup(void)
>>   
>>       bdrv_drain_all();
>>   
>> -    set_dirty_tracking(0);
>> +    unset_dirty_tracking();
>>   
>>       blk_mig_lock();
>>       while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
>> @@ -604,7 +614,7 @@ static int block_save_setup(QEMUFile *f, void *opaque)
>>       init_blk_migration(f);
>>   
>>       /* start track dirty blocks */
>> -    set_dirty_tracking(1);
>> +    set_dirty_tracking();
>>       qemu_mutex_unlock_iothread();
>>   
>>       ret = flush_blks(f);
>> diff --git a/block.c b/block.c
>> index 58efb5b..ef7a55f 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -49,6 +49,11 @@
>>   #include <windows.h>
>>   #endif
>>   
>> +typedef struct BdrvDirtyBitmap {
>> +    HBitmap *bitmap;
>> +    QLIST_ENTRY(BdrvDirtyBitmap) list;
>> +} BdrvDirtyBitmap;
>> +
>>   #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
>>   
>>   typedef enum {
>> @@ -323,6 +328,7 @@ BlockDriverState *bdrv_new(const char *device_name)
>>       BlockDriverState *bs;
>>   
>>       bs = g_malloc0(sizeof(BlockDriverState));
>> +    QLIST_INIT(&bs->dirty_bitmaps);
>>       pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
>>       if (device_name[0] != '\0') {
>>           QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
>> @@ -1615,7 +1621,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
>>       bs_dest->iostatus           = bs_src->iostatus;
>>   
>>       /* dirty bitmap */
>> -    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
>> +    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
>>   
>>       /* reference count */
>>       bs_dest->refcnt             = bs_src->refcnt;
>> @@ -1648,7 +1654,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
>>   
>>       /* bs_new must be anonymous and shouldn't have anything fancy enabled */
>>       assert(bs_new->device_name[0] == '\0');
>> -    assert(bs_new->dirty_bitmap == NULL);
>> +    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
>>       assert(bs_new->job == NULL);
>>       assert(bs_new->dev == NULL);
>>       assert(bs_new->in_use == 0);
>> @@ -1709,6 +1715,7 @@ static void bdrv_delete(BlockDriverState *bs)
>>       assert(!bs->job);
>>       assert(!bs->in_use);
>>       assert(!bs->refcnt);
>> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>>   
>>       bdrv_close(bs);
>>   
>> @@ -2785,9 +2792,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>>           ret = bdrv_co_flush(bs);
>>       }
>>   
>> -    if (bs->dirty_bitmap) {
>>           bdrv_set_dirty(bs, sector_num, nb_sectors);
>> -    }
>>   
>>       if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
>>           bs->wr_highest_sector = sector_num + nb_sectors - 1;
>> @@ -3323,7 +3328,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
>>       if (bdrv_check_request(bs, sector_num, nb_sectors))
>>           return -EIO;
>>   
>> -    assert(!bs->dirty_bitmap);
>> +    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
>>   
>>       return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
>>   }
>> @@ -4183,9 +4188,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
>>           return -EROFS;
>>       }
>>   
>> -    if (bs->dirty_bitmap) {
>> -        bdrv_reset_dirty(bs, sector_num, nb_sectors);
>> -    }
>> +    bdrv_reset_dirty(bs, sector_num, nb_sectors);
>>   
>>       /* Do nothing if disabled.  */
>>       if (!(bs->open_flags & BDRV_O_UNMAP)) {
>> @@ -4347,58 +4350,70 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
>>       return true;
>>   }
>>   
>> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
>> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
>>   {
>>       int64_t bitmap_size;
>> +    BdrvDirtyBitmap *bitmap;
>>   
>>       assert((granularity & (granularity - 1)) == 0);
>>   
>> -    if (granularity) {
>> -        granularity >>= BDRV_SECTOR_BITS;
>> -        assert(!bs->dirty_bitmap);
>> -        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
>> -        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
>> -    } else {
>> -        if (bs->dirty_bitmap) {
>> -            hbitmap_free(bs->dirty_bitmap);
>> -            bs->dirty_bitmap = NULL;
>> +    granularity >>= BDRV_SECTOR_BITS;
>> +    assert(granularity);
>> +    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
>> +    bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
>> +    bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
>> +    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
>> +    return bitmap;
>> +}
>> +
>> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>> +{
>> +    BdrvDirtyBitmap *bm, *next;
>> +    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
>> +        if (bm == bitmap) {
>> +            QLIST_REMOVE(bitmap, list);
>> +            hbitmap_free(bitmap->bitmap);
>> +            g_free(bitmap);
>> +            return;
>>           }
>>       }
>>   }
>>   
>> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
>> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
>>   {
>> -    if (bs->dirty_bitmap) {
>> -        return hbitmap_get(bs->dirty_bitmap, sector);
>> +    if (bitmap) {
>> +        return hbitmap_get(bitmap->bitmap, sector);
> Is see some if (bitmap) here but nowhere some code resetting the various
> dirty_bitmap fields to NULL.
> Is it missing ?
I just kept the logic here, but callers should always pass in a non NULL 
for now. And the dirty_bitmap fields are released by caller with 
bdrv_release_dirty_bitmap(). Is this the answer to your question?

Thanks,
Fam
Stefan Hajnoczi Nov. 11, 2013, 10:31 a.m. UTC | #6
On Mon, Nov 04, 2013 at 11:55:47AM +0100, Paolo Bonzini wrote:
> Il 04/11/2013 11:47, Fam Zheng ha scritto:
> >>>
> >>> -void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
> >>> -int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
> >>> +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
> >>> +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int
> >>> granularity);
> >>> +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap
> >>> *bitmap);
> >>> +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
> >>> int64_t sector);
> >>>   void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int
> >>> nr_sectors);
> >>>   void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int
> >>> nr_sectors);
> >>> -void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter
> >>> *hbi);
> >>> -int64_t bdrv_get_dirty_count(BlockDriverState *bs);
> >>> +void bdrv_dirty_iter_init(BlockDriverState *bs,
> >>> +                          BdrvDirtyBitmap *bitmap, struct
> >>> HBitmapIter *hbi);
> >>> +int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap
> >>> *bitmap);
> >>>     void bdrv_enable_copy_on_read(BlockDriverState *bs);
> >>>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
> >> You do not really need the BDS argument to the functions, do you?  (Or
> >> do you have other plans?)
> > 
> > I just wanted to keep the pattern of those bdrv_* family, no other plans
> > for it.
> 
> Kevin, Stefan, any second opinions?

I was thinking the same thing and arrived at the conclusion that since
it's bdrv_*() and not bbitmap_*() we should keep the explicit BDS
argument.

Stefan
Stefan Hajnoczi Nov. 11, 2013, 10:34 a.m. UTC | #7
On Mon, Nov 04, 2013 at 05:30:10PM +0800, Fam Zheng wrote:
> @@ -2785,9 +2792,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
>          ret = bdrv_co_flush(bs);
>      }
>  
> -    if (bs->dirty_bitmap) {
>          bdrv_set_dirty(bs, sector_num, nb_sectors);
> -    }

Forgot to reduce indentation?
Stefan Hajnoczi Nov. 11, 2013, 10:35 a.m. UTC | #8
On Mon, Nov 04, 2013 at 05:30:10PM +0800, Fam Zheng wrote:
> Previously a BlockDriverState has only one dirty bitmap, so only one
> caller (e.g. a block job) can keep track of writing. This changes the
> dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
> lifecycle is managed with these new functions:
> 
>     bdrv_create_dirty_bitmap
>     bdrv_release_dirty_bitmap
> 
> Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.
> 
> In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
> is added to these functions, since each caller has its own dirty bitmap:
> 
>     bdrv_get_dirty
>     bdrv_dirty_iter_init
>     bdrv_get_dirty_count
> 
> bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
> internally walk the list of all dirty bitmaps and set them one by one.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> 
> ---
> v2: Added BdrvDirtyBitmap [Paolo]
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>
> ---
>  block-migration.c         | 22 +++++++++----
>  block.c                   | 81 ++++++++++++++++++++++++++++-------------------
>  block/mirror.c            | 23 ++++++++------
>  block/qapi.c              |  8 -----
>  include/block/block.h     | 11 ++++---
>  include/block/block_int.h |  2 +-
>  6 files changed, 85 insertions(+), 62 deletions(-)

Happy with this modulo the indentation fixup I commented on.

Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Kevin Wolf Nov. 12, 2013, 10:46 a.m. UTC | #9
Am 04.11.2013 um 10:30 hat Fam Zheng geschrieben:
> Previously a BlockDriverState has only one dirty bitmap, so only one
> caller (e.g. a block job) can keep track of writing. This changes the
> dirty bitmap to a list and creates a BdrvDirtyBitmap for each caller, the
> lifecycle is managed with these new functions:
> 
>     bdrv_create_dirty_bitmap
>     bdrv_release_dirty_bitmap
> 
> Where BdrvDirtyBitmap is a linked list wrapper structure of HBitmap.
> 
> In place of bdrv_set_dirty_tracking, a BdrvDirtyBitmap pointer argument
> is added to these functions, since each caller has its own dirty bitmap:
> 
>     bdrv_get_dirty
>     bdrv_dirty_iter_init
>     bdrv_get_dirty_count
> 
> bdrv_set_dirty and bdrv_reset_dirty prototypes are unchanged but will
> internally walk the list of all dirty bitmaps and set them one by one.
> 
> Signed-off-by: Fam Zheng <famz@redhat.com>

> diff --git a/block/qapi.c b/block/qapi.c
> index 5880b3e..6b0cdcf 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
>          info->io_status = bs->iostatus;
>      }
>  
> -    if (bs->dirty_bitmap) {
> -        info->has_dirty = true;
> -        info->dirty = g_malloc0(sizeof(*info->dirty));
> -        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
> -        info->dirty->granularity =
> -         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
> -    }
> -
>      if (bs->drv) {
>          info->has_inserted = true;
>          info->inserted = g_malloc0(sizeof(*info->inserted));

The dirty field should probably be removed from qapi-schema.json if it
never gets set any more.

It was optional, so perhaps removing it doesn't cause any trouble
indeed, but I'd like to hear Eric on this matter before merging the
patch. Though if libvirt does make use of it, we have a problem because
it doesn't really make sense any more after these changes.

Kevin
Eric Blake Nov. 12, 2013, 1:06 p.m. UTC | #10
On 11/12/2013 03:46 AM, Kevin Wolf wrote:

>> +++ b/block/qapi.c
>> @@ -204,14 +204,6 @@ void bdrv_query_info(BlockDriverState *bs,
>>          info->io_status = bs->iostatus;
>>      }
>>  
>> -    if (bs->dirty_bitmap) {
>> -        info->has_dirty = true;
>> -        info->dirty = g_malloc0(sizeof(*info->dirty));
>> -        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
>> -        info->dirty->granularity =
>> -         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
>> -    }
>> -
>>      if (bs->drv) {
>>          info->has_inserted = true;
>>          info->inserted = g_malloc0(sizeof(*info->inserted));
> 
> The dirty field should probably be removed from qapi-schema.json if it
> never gets set any more.
> 
> It was optional, so perhaps removing it doesn't cause any trouble
> indeed, but I'd like to hear Eric on this matter before merging the
> patch. Though if libvirt does make use of it, we have a problem because
> it doesn't really make sense any more after these changes.

Removing an optional output-only field is okay (it is the same as
stating that we are ALWAYS taking the option of not including it).
Since it has always been marked optional, management can't be relying on
it; more specifically, libvirt does not have any code checking for it.
So I'm okay with removing it from the schema as a backwards-compatible
change.

[Removing an optional input field is wrong, but BlockInfo is an output
type, not an input type.]

Eventually, libvirt would like to use persistent dirty bitmaps, in order
to resume long-running operations such as drive-mirror even across
migration between different qemu processes, but I don't think we are
quite there yet, and I also don't think that removal of 'dirty' from
BlockInfo impedes in that goal.
diff mbox

Patch

diff --git a/block-migration.c b/block-migration.c
index daf9ec1..8f4e826 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -58,6 +58,7 @@  typedef struct BlkMigDevState {
     /* Protected by block migration lock.  */
     unsigned long *aio_bitmap;
     int64_t completed_sectors;
+    BdrvDirtyBitmap *dirty_bitmap;
 } BlkMigDevState;
 
 typedef struct BlkMigBlock {
@@ -309,12 +310,21 @@  static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
 
 /* Called with iothread lock taken.  */
 
-static void set_dirty_tracking(int enable)
+static void set_dirty_tracking(void)
 {
     BlkMigDevState *bmds;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
+        bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE);
+    }
+}
+
+static void unset_dirty_tracking(void)
+{
+    BlkMigDevState *bmds;
+
+    QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+        bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
     }
 }
 
@@ -432,7 +442,7 @@  static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
         } else {
             blk_mig_unlock();
         }
-        if (bdrv_get_dirty(bmds->bs, sector)) {
+        if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
 
             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
                 nr_sectors = total_sectors - sector;
@@ -554,7 +564,7 @@  static int64_t get_remaining_dirty(void)
     int64_t dirty = 0;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        dirty += bdrv_get_dirty_count(bmds->bs);
+        dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
     }
 
     return dirty << BDRV_SECTOR_BITS;
@@ -569,7 +579,7 @@  static void blk_mig_cleanup(void)
 
     bdrv_drain_all();
 
-    set_dirty_tracking(0);
+    unset_dirty_tracking();
 
     blk_mig_lock();
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
@@ -604,7 +614,7 @@  static int block_save_setup(QEMUFile *f, void *opaque)
     init_blk_migration(f);
 
     /* start track dirty blocks */
-    set_dirty_tracking(1);
+    set_dirty_tracking();
     qemu_mutex_unlock_iothread();
 
     ret = flush_blks(f);
diff --git a/block.c b/block.c
index 58efb5b..ef7a55f 100644
--- a/block.c
+++ b/block.c
@@ -49,6 +49,11 @@ 
 #include <windows.h>
 #endif
 
+typedef struct BdrvDirtyBitmap {
+    HBitmap *bitmap;
+    QLIST_ENTRY(BdrvDirtyBitmap) list;
+} BdrvDirtyBitmap;
+
 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
 
 typedef enum {
@@ -323,6 +328,7 @@  BlockDriverState *bdrv_new(const char *device_name)
     BlockDriverState *bs;
 
     bs = g_malloc0(sizeof(BlockDriverState));
+    QLIST_INIT(&bs->dirty_bitmaps);
     pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
     if (device_name[0] != '\0') {
         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
@@ -1615,7 +1621,7 @@  static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
     bs_dest->iostatus           = bs_src->iostatus;
 
     /* dirty bitmap */
-    bs_dest->dirty_bitmap       = bs_src->dirty_bitmap;
+    bs_dest->dirty_bitmaps      = bs_src->dirty_bitmaps;
 
     /* reference count */
     bs_dest->refcnt             = bs_src->refcnt;
@@ -1648,7 +1654,7 @@  void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
 
     /* bs_new must be anonymous and shouldn't have anything fancy enabled */
     assert(bs_new->device_name[0] == '\0');
-    assert(bs_new->dirty_bitmap == NULL);
+    assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
     assert(bs_new->job == NULL);
     assert(bs_new->dev == NULL);
     assert(bs_new->in_use == 0);
@@ -1709,6 +1715,7 @@  static void bdrv_delete(BlockDriverState *bs)
     assert(!bs->job);
     assert(!bs->in_use);
     assert(!bs->refcnt);
+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
     bdrv_close(bs);
 
@@ -2785,9 +2792,7 @@  static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
         ret = bdrv_co_flush(bs);
     }
 
-    if (bs->dirty_bitmap) {
         bdrv_set_dirty(bs, sector_num, nb_sectors);
-    }
 
     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
         bs->wr_highest_sector = sector_num + nb_sectors - 1;
@@ -3323,7 +3328,7 @@  int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
     if (bdrv_check_request(bs, sector_num, nb_sectors))
         return -EIO;
 
-    assert(!bs->dirty_bitmap);
+    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
 }
@@ -4183,9 +4188,7 @@  int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
         return -EROFS;
     }
 
-    if (bs->dirty_bitmap) {
-        bdrv_reset_dirty(bs, sector_num, nb_sectors);
-    }
+    bdrv_reset_dirty(bs, sector_num, nb_sectors);
 
     /* Do nothing if disabled.  */
     if (!(bs->open_flags & BDRV_O_UNMAP)) {
@@ -4347,58 +4350,70 @@  bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
     return true;
 }
 
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity)
 {
     int64_t bitmap_size;
+    BdrvDirtyBitmap *bitmap;
 
     assert((granularity & (granularity - 1)) == 0);
 
-    if (granularity) {
-        granularity >>= BDRV_SECTOR_BITS;
-        assert(!bs->dirty_bitmap);
-        bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
-        bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
-    } else {
-        if (bs->dirty_bitmap) {
-            hbitmap_free(bs->dirty_bitmap);
-            bs->dirty_bitmap = NULL;
+    granularity >>= BDRV_SECTOR_BITS;
+    assert(granularity);
+    bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+    bitmap = g_malloc0(sizeof(BdrvDirtyBitmap));
+    bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+    return bitmap;
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+    BdrvDirtyBitmap *bm, *next;
+    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+        if (bm == bitmap) {
+            QLIST_REMOVE(bitmap, list);
+            hbitmap_free(bitmap->bitmap);
+            g_free(bitmap);
+            return;
         }
     }
 }
 
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
 {
-    if (bs->dirty_bitmap) {
-        return hbitmap_get(bs->dirty_bitmap, sector);
+    if (bitmap) {
+        return hbitmap_get(bitmap->bitmap, sector);
     } else {
         return 0;
     }
 }
 
-void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
+void bdrv_dirty_iter_init(BlockDriverState *bs,
+                          BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
 {
-    hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
+    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
 }
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
                     int nr_sectors)
 {
-    hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
+    BdrvDirtyBitmap *bitmap;
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    }
 }
 
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                      int nr_sectors)
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
 {
-    hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
+    BdrvDirtyBitmap *bitmap;
+    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+        hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+    }
 }
 
-int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-    if (bs->dirty_bitmap) {
-        return hbitmap_count(bs->dirty_bitmap);
-    } else {
-        return 0;
-    }
+    return hbitmap_count(bitmap->bitmap);
 }
 
 /* Get a reference to bs */
diff --git a/block/mirror.c b/block/mirror.c
index 7b95acf..6dc27ad 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -39,6 +39,7 @@  typedef struct MirrorBlockJob {
     int64_t granularity;
     size_t buf_size;
     unsigned long *cow_bitmap;
+    BdrvDirtyBitmap *dirty_bitmap;
     HBitmapIter hbi;
     uint8_t *buf;
     QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
@@ -145,9 +146,10 @@  static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
 
     s->sector_num = hbitmap_iter_next(&s->hbi);
     if (s->sector_num < 0) {
-        bdrv_dirty_iter_init(source, &s->hbi);
+        bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
         s->sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
+        trace_mirror_restart_iter(s,
+                                  bdrv_get_dirty_count(source, s->dirty_bitmap));
         assert(s->sector_num >= 0);
     }
 
@@ -183,7 +185,7 @@  static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
     do {
         int added_sectors, added_chunks;
 
-        if (!bdrv_get_dirty(source, next_sector) ||
+        if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
             test_bit(next_chunk, s->in_flight_bitmap)) {
             assert(nb_sectors > 0);
             break;
@@ -249,7 +251,8 @@  static void coroutine_fn mirror_iteration(MirrorBlockJob *s)
         /* Advance the HBitmapIter in parallel, so that we do not examine
          * the same sector twice.
          */
-        if (next_sector > hbitmap_next_sector && bdrv_get_dirty(source, next_sector)) {
+        if (next_sector > hbitmap_next_sector
+            && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
             hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
         }
 
@@ -355,7 +358,7 @@  static void coroutine_fn mirror_run(void *opaque)
         }
     }
 
-    bdrv_dirty_iter_init(bs, &s->hbi);
+    bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
     last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     for (;;) {
         uint64_t delay_ns;
@@ -367,7 +370,7 @@  static void coroutine_fn mirror_run(void *opaque)
             goto immediate_exit;
         }
 
-        cnt = bdrv_get_dirty_count(bs);
+        cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
 
         /* Note that even when no rate limit is applied we need to yield
          * periodically with no pending I/O so that qemu_aio_flush() returns.
@@ -409,7 +412,7 @@  static void coroutine_fn mirror_run(void *opaque)
 
                 should_complete = s->should_complete ||
                     block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(bs);
+                cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
             }
         }
 
@@ -424,7 +427,7 @@  static void coroutine_fn mirror_run(void *opaque)
              */
             trace_mirror_before_drain(s, cnt);
             bdrv_drain_all();
-            cnt = bdrv_get_dirty_count(bs);
+            cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
         }
 
         ret = 0;
@@ -471,7 +474,7 @@  immediate_exit:
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
     g_free(s->in_flight_bitmap);
-    bdrv_set_dirty_tracking(bs, 0);
+    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
     bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
@@ -575,7 +578,7 @@  void mirror_start(BlockDriverState *bs, BlockDriverState *target,
     s->granularity = granularity;
     s->buf_size = MAX(buf_size, granularity);
 
-    bdrv_set_dirty_tracking(bs, granularity);
+    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
     bdrv_set_enable_write_cache(s->target, true);
     bdrv_set_on_error(s->target, on_target_error, on_target_error);
     bdrv_iostatus_enable(s->target);
diff --git a/block/qapi.c b/block/qapi.c
index 5880b3e..6b0cdcf 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -204,14 +204,6 @@  void bdrv_query_info(BlockDriverState *bs,
         info->io_status = bs->iostatus;
     }
 
-    if (bs->dirty_bitmap) {
-        info->has_dirty = true;
-        info->dirty = g_malloc0(sizeof(*info->dirty));
-        info->dirty->count = bdrv_get_dirty_count(bs) * BDRV_SECTOR_SIZE;
-        info->dirty->granularity =
-         ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bs->dirty_bitmap));
-    }
-
     if (bs->drv) {
         info->has_inserted = true;
         info->inserted = g_malloc0(sizeof(*info->inserted));
diff --git a/include/block/block.h b/include/block/block.h
index 3560deb..06f424c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -388,12 +388,15 @@  void *qemu_blockalign(BlockDriverState *bs, size_t size);
 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
 
 struct HBitmapIter;
-void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
-int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
+typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity);
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
-void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
-int64_t bdrv_get_dirty_count(BlockDriverState *bs);
+void bdrv_dirty_iter_init(BlockDriverState *bs,
+                          BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
 void bdrv_disable_copy_on_read(BlockDriverState *bs);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1666066..3f2bee1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -301,7 +301,7 @@  struct BlockDriverState {
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
-    HBitmap *dirty_bitmap;
+    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
     int refcnt;
     int in_use; /* users other than guest access, eg. block migration */
     QTAILQ_ENTRY(BlockDriverState) list;