diff mbox

[RFC,v3,05/14] block: add meta bitmaps

Message ID 1424268014-13293-6-git-send-email-vsementsov@parallels.com
State New
Headers show

Commit Message

Vladimir Sementsov-Ogievskiy Feb. 18, 2015, 2 p.m. UTC
Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks
changes (set/unset) of this BdrvDirtyBitmap. It is needed for live
migration of block dirty bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
---
 block.c               | 40 ++++++++++++++++++++++++++++++++++++++++
 include/block/block.h |  5 +++++
 2 files changed, 45 insertions(+)

Comments

John Snow Feb. 18, 2015, 11:45 p.m. UTC | #1
On 02/18/2015 09:00 AM, Vladimir Sementsov-Ogievskiy wrote:
> Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks
> changes (set/unset) of this BdrvDirtyBitmap. It is needed for live
> migration of block dirty bitmaps.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
> ---
>   block.c               | 40 ++++++++++++++++++++++++++++++++++++++++
>   include/block/block.h |  5 +++++
>   2 files changed, 45 insertions(+)
>
> diff --git a/block.c b/block.c
> index a127fd2..aaa08b8 100644
> --- a/block.c
> +++ b/block.c
> @@ -58,9 +58,15 @@
>    * (3) successor is set: frozen mode.
>    *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
>    *     or enabled. A frozen bitmap can only abdicate() or reclaim().
> + *
> + * Meta bitmap:
> + * Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks changes
> + * (set/unset) of this BdrvDirtyBitmap. It is needed for live migration of
> + * block dirty bitmaps.
>    */
>   struct BdrvDirtyBitmap {
>       HBitmap *bitmap;            /* Dirty sector bitmap implementation */
> +    HBitmap *meta_bitmap;       /* Meta bitmap */
>       BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
>       char *name;                 /* Optional non-empty unique ID */
>       int64_t size;               /* Size of the bitmap (Number of sectors) */
> @@ -5398,6 +5404,31 @@ void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
>       bitmap->name = NULL;
>   }
>
> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
> +                                        uint64_t granularity)
> +{
> +    uint64_t sector_granularity;
> +
> +    assert((granularity & (granularity - 1)) == 0);
> +
> +    granularity *= 8 * bdrv_dirty_bitmap_granularity(bitmap);
> +    sector_granularity = granularity >> BDRV_SECTOR_BITS;
> +    assert(sector_granularity);
> +

The maths here could use a comment, I think.

the "granularity" field here actually describes the desired 
serialization buffer size; e.g. CHUNK_SIZE (1 << 20) or 1 MiB. This 
parameter should be renamed to explain what it's actually for. Something 
like "chunk_size" and a comment explaining that it is in bytes.

...

That said, let's talk about the default chunk size you're using in 
correlation with this function.

a CHUNK_SIZE of 1MiB here is going to lead us to, if we have a bitmap 
with the default granularity of 128 sectors\64KiB bytes, a granularity 
for the meta_bitmap of one billion sectors (1 << 30) or 512GiB.

That's going to be bigger than most drives entirely, which will 
generally lead us to only using a single "chunk" per drive. Which means 
we won't really get a lot of mileage out of the bulk/dirty phases most 
of the time.

It's wild to think about that the first 1,000,000,000 sectors or 
512,000,000,000 bytes will all be represented by the first single bit in 
this bitmap. If a single hair on the drive changes, we resend the 
_entire_ bitmap, possibly over and over again. Will we ever make 
progress? Should we investigate a smaller chunk size?

Here's some quick mappings of chunk size (bytes) to effective 
meta_bitmap byte granularities, assuming the meta_bitmap is tracking a 
bitmap with the default granularity of 64KiB:

(1 << 20) 1MiB   -- 512GiB  // This is too high of a granularity
(1 << 17) 128KiB --  64GiB
(1 << 15) 32KiB  --  16GiB
(1 << 11) 2KiB   --   1GiB
(1 << 10) 1KiB   -- 512MiB
(1 << 9)  512B   -- 256MiB
(1 << 8)  256B   -- 128MiB
(1 << 5)  32 B   --  16MiB  // This is too small of a chunk size.
(1 << 1)   1 B   --   1MiB

We want to make the chunk sends efficient, but we also want to make sure 
that the dirty phase doesn't resend more data than it needs to, so we 
need to strike a balance here, no?

I think arguments could be made for most granularities between 128MiB 
through 1GiB. Anything outside of that is too lopsided, IMO.

What are your thoughts on this?

> +    bitmap->meta_bitmap =
> +        hbitmap_alloc(bitmap->size, ffsll(sector_granularity) - 1);
> +
> +    return bitmap->meta_bitmap;
> +}
> +
> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap)
> +{
> +    if (bitmap->meta_bitmap) {
> +        hbitmap_free(bitmap->meta_bitmap);
> +        bitmap->meta_bitmap = NULL;
> +    }
> +}
> +
>   BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
>                                             uint32_t granularity,
>                                             const char *name,
> @@ -5532,6 +5563,9 @@ void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
>               assert(!bdrv_dirty_bitmap_frozen(bm));
>               QLIST_REMOVE(bitmap, list);
>               hbitmap_free(bitmap->bitmap);
> +            if (bitmap->meta_bitmap) {
> +                hbitmap_free(bitmap->meta_bitmap);
> +            }
>               g_free(bitmap->name);
>               g_free(bitmap);
>               return;
> @@ -5659,6 +5693,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
>   {
>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>       hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
> +    if (bitmap->meta_bitmap) {
> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
> +    }
>   }
>
>   void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
> @@ -5666,6 +5703,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
>   {
>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>       hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
> +    if (bitmap->meta_bitmap) {
> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
> +    }
>   }
>
>   void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
> diff --git a/include/block/block.h b/include/block/block.h
> index c6a928d..f2c62f6 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -4,6 +4,7 @@
>   #include "block/aio.h"
>   #include "qemu-common.h"
>   #include "qemu/option.h"
> +#include "qemu/hbitmap.h"
>   #include "block/coroutine.h"
>   #include "block/accounting.h"
>   #include "qapi/qmp/qobject.h"
> @@ -487,6 +488,10 @@ void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
>                                             uint64_t start, uint64_t count);
>   void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
>
> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
> +                                        uint64_t granularity);
> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap);
> +
>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
>
>
Vladimir Sementsov-Ogievskiy Feb. 19, 2015, 11:43 a.m. UTC | #2
On 19.02.2015 02:45, John Snow wrote:
>
>
> On 02/18/2015 09:00 AM, Vladimir Sementsov-Ogievskiy wrote:
>> Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks
>> changes (set/unset) of this BdrvDirtyBitmap. It is needed for live
>> migration of block dirty bitmaps.
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>> ---
>>   block.c               | 40 ++++++++++++++++++++++++++++++++++++++++
>>   include/block/block.h |  5 +++++
>>   2 files changed, 45 insertions(+)
>>
>> diff --git a/block.c b/block.c
>> index a127fd2..aaa08b8 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -58,9 +58,15 @@
>>    * (3) successor is set: frozen mode.
>>    *     A frozen bitmap cannot be renamed, deleted, anonymized, 
>> cleared, set,
>>    *     or enabled. A frozen bitmap can only abdicate() or reclaim().
>> + *
>> + * Meta bitmap:
>> + * Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It 
>> tracks changes
>> + * (set/unset) of this BdrvDirtyBitmap. It is needed for live 
>> migration of
>> + * block dirty bitmaps.
>>    */
>>   struct BdrvDirtyBitmap {
>>       HBitmap *bitmap;            /* Dirty sector bitmap 
>> implementation */
>> +    HBitmap *meta_bitmap;       /* Meta bitmap */
>>       BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen 
>> status */
>>       char *name;                 /* Optional non-empty unique ID */
>>       int64_t size;               /* Size of the bitmap (Number of 
>> sectors) */
>> @@ -5398,6 +5404,31 @@ void 
>> bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
>>       bitmap->name = NULL;
>>   }
>>
>> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
>> +                                        uint64_t granularity)
>> +{
>> +    uint64_t sector_granularity;
>> +
>> +    assert((granularity & (granularity - 1)) == 0);
>> +
>> +    granularity *= 8 * bdrv_dirty_bitmap_granularity(bitmap);
>> +    sector_granularity = granularity >> BDRV_SECTOR_BITS;
>> +    assert(sector_granularity);
>> +
>
> The maths here could use a comment, I think.
>
> the "granularity" field here actually describes the desired 
> serialization buffer size; e.g. CHUNK_SIZE (1 << 20) or 1 MiB. This 
> parameter should be renamed to explain what it's actually for. 
> Something like "chunk_size" and a comment explaining that it is in bytes.
>
> ...
>
> That said, let's talk about the default chunk size you're using in 
> correlation with this function.
>
> a CHUNK_SIZE of 1MiB here is going to lead us to, if we have a bitmap 
> with the default granularity of 128 sectors\64KiB bytes, a granularity 
> for the meta_bitmap of one billion sectors (1 << 30) or 512GiB.
>
> That's going to be bigger than most drives entirely, which will 
> generally lead us to only using a single "chunk" per drive. Which 
> means we won't really get a lot of mileage out of the bulk/dirty 
> phases most of the time.
>
> It's wild to think about that the first 1,000,000,000 sectors or 
> 512,000,000,000 bytes will all be represented by the first single bit 
> in this bitmap. If a single hair on the drive changes, we resend the 
> _entire_ bitmap, possibly over and over again. Will we ever make 
> progress? Should we investigate a smaller chunk size?
>
> Here's some quick mappings of chunk size (bytes) to effective 
> meta_bitmap byte granularities, assuming the meta_bitmap is tracking a 
> bitmap with the default granularity of 64KiB:
>
> (1 << 20) 1MiB   -- 512GiB  // This is too high of a granularity
> (1 << 17) 128KiB --  64GiB
> (1 << 15) 32KiB  --  16GiB
> (1 << 11) 2KiB   --   1GiB
> (1 << 10) 1KiB   -- 512MiB
> (1 << 9)  512B   -- 256MiB
> (1 << 8)  256B   -- 128MiB
> (1 << 5)  32 B   --  16MiB  // This is too small of a chunk size.
> (1 << 1)   1 B   --   1MiB
>
> We want to make the chunk sends efficient, but we also want to make 
> sure that the dirty phase doesn't resend more data than it needs to, 
> so we need to strike a balance here, no?
>
> I think arguments could be made for most granularities between 128MiB 
> through 1GiB. Anything outside of that is too lopsided, IMO.
>
> What are your thoughts on this?
Ok, interesting thing to discuss.

My thoughts:
* the chunk size for block-migration is 1mb, than the bitmap (64kb 
granularity) for this chunk is 16bit=2bytes long. It's an intuitive 
reason for choosing the chunk size about 2 bytes. But in this case the 
data/metadata ratio is very bad (about 20bytes for the header of the 
chunk). So, taking the nearest value with adequate ratio gives (IMHO) 
'1kb -- 512mb': 20b/1k ~ 2%. Or 512b => 4%.

* for ndb+mirror migration scheme the default chunk is 64kb instead of 
1mb. So the bitmap is more smaller. But the same reason of data/metadata 
ratio leads to 1kb chunk for dirty bitmap migration.

So, what about default to 1kb and additional parameter for migration 
(migration capabilities) to give the user a possibility of chose?

* Yes, in most of user cases the bitmap (64kb granularity) will be small 
(< 1mb). In these cases, I think, it would be better to send the data 
only in complete step, only once. (for exmaple, if pending <= 1mb, 
dosn't send anything in incremental phase).
Live migration is actually needed only for migration of bitmaps for 
disks of several TBs size.

>
>> +    bitmap->meta_bitmap =
>> +        hbitmap_alloc(bitmap->size, ffsll(sector_granularity) - 1);
>> +
>> +    return bitmap->meta_bitmap;
>> +}
>> +
>> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap)
>> +{
>> +    if (bitmap->meta_bitmap) {
>> +        hbitmap_free(bitmap->meta_bitmap);
>> +        bitmap->meta_bitmap = NULL;
>> +    }
>> +}
>> +
>>   BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
>>                                             uint32_t granularity,
>>                                             const char *name,
>> @@ -5532,6 +5563,9 @@ void bdrv_release_dirty_bitmap(BlockDriverState 
>> *bs, BdrvDirtyBitmap *bitmap)
>>               assert(!bdrv_dirty_bitmap_frozen(bm));
>>               QLIST_REMOVE(bitmap, list);
>>               hbitmap_free(bitmap->bitmap);
>> +            if (bitmap->meta_bitmap) {
>> +                hbitmap_free(bitmap->meta_bitmap);
>> +            }
>>               g_free(bitmap->name);
>>               g_free(bitmap);
>>               return;
>> @@ -5659,6 +5693,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap 
>> *bitmap,
>>   {
>>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>>       hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
>> +    if (bitmap->meta_bitmap) {
>> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
>> +    }
>>   }
>>
>>   void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
>> @@ -5666,6 +5703,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap 
>> *bitmap,
>>   {
>>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>>       hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
>> +    if (bitmap->meta_bitmap) {
>> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
>> +    }
>>   }
>>
>>   void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
>> diff --git a/include/block/block.h b/include/block/block.h
>> index c6a928d..f2c62f6 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -4,6 +4,7 @@
>>   #include "block/aio.h"
>>   #include "qemu-common.h"
>>   #include "qemu/option.h"
>> +#include "qemu/hbitmap.h"
>>   #include "block/coroutine.h"
>>   #include "block/accounting.h"
>>   #include "qapi/qmp/qobject.h"
>> @@ -487,6 +488,10 @@ void 
>> bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
>>                                             uint64_t start, uint64_t 
>> count);
>>   void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
>>
>> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
>> +                                        uint64_t granularity);
>> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap);
>> +
>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
>>
>>
John Snow Feb. 21, 2015, 12:53 a.m. UTC | #3
On 02/19/2015 06:43 AM, Vladimir Sementsov-Ogievskiy wrote:
> On 19.02.2015 02:45, John Snow wrote:
>>
>>
>> On 02/18/2015 09:00 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks
>>> changes (set/unset) of this BdrvDirtyBitmap. It is needed for live
>>> migration of block dirty bitmaps.
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@parallels.com>
>>> ---
>>>   block.c               | 40 ++++++++++++++++++++++++++++++++++++++++
>>>   include/block/block.h |  5 +++++
>>>   2 files changed, 45 insertions(+)
>>>
>>> diff --git a/block.c b/block.c
>>> index a127fd2..aaa08b8 100644
>>> --- a/block.c
>>> +++ b/block.c
>>> @@ -58,9 +58,15 @@
>>>    * (3) successor is set: frozen mode.
>>>    *     A frozen bitmap cannot be renamed, deleted, anonymized,
>>> cleared, set,
>>>    *     or enabled. A frozen bitmap can only abdicate() or reclaim().
>>> + *
>>> + * Meta bitmap:
>>> + * Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It
>>> tracks changes
>>> + * (set/unset) of this BdrvDirtyBitmap. It is needed for live
>>> migration of
>>> + * block dirty bitmaps.
>>>    */
>>>   struct BdrvDirtyBitmap {
>>>       HBitmap *bitmap;            /* Dirty sector bitmap
>>> implementation */
>>> +    HBitmap *meta_bitmap;       /* Meta bitmap */
>>>       BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen
>>> status */
>>>       char *name;                 /* Optional non-empty unique ID */
>>>       int64_t size;               /* Size of the bitmap (Number of
>>> sectors) */
>>> @@ -5398,6 +5404,31 @@ void
>>> bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
>>>       bitmap->name = NULL;
>>>   }
>>>
>>> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
>>> +                                        uint64_t granularity)
>>> +{
>>> +    uint64_t sector_granularity;
>>> +
>>> +    assert((granularity & (granularity - 1)) == 0);
>>> +
>>> +    granularity *= 8 * bdrv_dirty_bitmap_granularity(bitmap);
>>> +    sector_granularity = granularity >> BDRV_SECTOR_BITS;
>>> +    assert(sector_granularity);
>>> +
>>
>> The maths here could use a comment, I think.
>>
>> the "granularity" field here actually describes the desired
>> serialization buffer size; e.g. CHUNK_SIZE (1 << 20) or 1 MiB. This
>> parameter should be renamed to explain what it's actually for.
>> Something like "chunk_size" and a comment explaining that it is in bytes.
>>
>> ...
>>
>> That said, let's talk about the default chunk size you're using in
>> correlation with this function.
>>
>> a CHUNK_SIZE of 1MiB here is going to lead us to, if we have a bitmap
>> with the default granularity of 128 sectors\64KiB bytes, a granularity
>> for the meta_bitmap of one billion sectors (1 << 30) or 512GiB.
>>
>> That's going to be bigger than most drives entirely, which will
>> generally lead us to only using a single "chunk" per drive. Which
>> means we won't really get a lot of mileage out of the bulk/dirty
>> phases most of the time.
>>
>> It's wild to think about that the first 1,000,000,000 sectors or
>> 512,000,000,000 bytes will all be represented by the first single bit
>> in this bitmap. If a single hair on the drive changes, we resend the
>> _entire_ bitmap, possibly over and over again. Will we ever make
>> progress? Should we investigate a smaller chunk size?
>>
>> Here's some quick mappings of chunk size (bytes) to effective
>> meta_bitmap byte granularities, assuming the meta_bitmap is tracking a
>> bitmap with the default granularity of 64KiB:
>>
>> (1 << 20) 1MiB   -- 512GiB  // This is too high of a granularity
>> (1 << 17) 128KiB --  64GiB
>> (1 << 15) 32KiB  --  16GiB
>> (1 << 11) 2KiB   --   1GiB
>> (1 << 10) 1KiB   -- 512MiB
>> (1 << 9)  512B   -- 256MiB
>> (1 << 8)  256B   -- 128MiB
>> (1 << 5)  32 B   --  16MiB  // This is too small of a chunk size.
>> (1 << 1)   1 B   --   1MiB
>>
>> We want to make the chunk sends efficient, but we also want to make
>> sure that the dirty phase doesn't resend more data than it needs to,
>> so we need to strike a balance here, no?
>>
>> I think arguments could be made for most granularities between 128MiB
>> through 1GiB. Anything outside of that is too lopsided, IMO.
>>
>> What are your thoughts on this?
> Ok, interesting thing to discuss.
>
> My thoughts:
> * the chunk size for block-migration is 1mb, than the bitmap (64kb
> granularity) for this chunk is 16bit=2bytes long. It's an intuitive
> reason for choosing the chunk size about 2 bytes. But in this case the
> data/metadata ratio is very bad (about 20bytes for the header of the
> chunk). So, taking the nearest value with adequate ratio gives (IMHO)
> '1kb -- 512mb': 20b/1k ~ 2%. Or 512b => 4%.
>
> * for ndb+mirror migration scheme the default chunk is 64kb instead of
> 1mb. So the bitmap is more smaller. But the same reason of data/metadata
> ratio leads to 1kb chunk for dirty bitmap migration.
>
> So, what about default to 1kb and additional parameter for migration
> (migration capabilities) to give the user a possibility of chose?
>

That sounds good to me. I'm less sure about the parameter, though. In 
practice I doubt most people will be attempting a live migration of data 
sets big enough to need to alter the default. For cases with such large 
data sets, it almost certainly makes more sense to perform the migration 
via some other method.

> * Yes, in most of user cases the bitmap (64kb granularity) will be small
> (< 1mb). In these cases, I think, it would be better to send the data
> only in complete step, only once. (for exmaple, if pending <= 1mb,
> dosn't send anything in incremental phase).
> Live migration is actually needed only for migration of bitmaps for
> disks of several TBs size.
>

You are right; sending only the data upon completion after live 
migration makes a lot of sense. If we acknowledge that we only really 
need dual-phase live migration of dirty bitmaps for extremely huge 
drives, then a chunk size of 1MiB isn't a problem anymore.

Would this be hard to do? We could just skip the live migration phase 
for "small" drives (less than however much we describe with one chunk -- 
for 1MiB, that's a whopping 512GiB) and just wait for the completion 
phase to do a bulk-send of the entire drive.

For large drives, we can do the live migration as "normal."

Is this sane?

>>
>>> +    bitmap->meta_bitmap =
>>> +        hbitmap_alloc(bitmap->size, ffsll(sector_granularity) - 1);
>>> +
>>> +    return bitmap->meta_bitmap;
>>> +}
>>> +
>>> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap)
>>> +{
>>> +    if (bitmap->meta_bitmap) {
>>> +        hbitmap_free(bitmap->meta_bitmap);
>>> +        bitmap->meta_bitmap = NULL;
>>> +    }
>>> +}
>>> +
>>>   BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
>>>                                             uint32_t granularity,
>>>                                             const char *name,
>>> @@ -5532,6 +5563,9 @@ void bdrv_release_dirty_bitmap(BlockDriverState
>>> *bs, BdrvDirtyBitmap *bitmap)
>>>               assert(!bdrv_dirty_bitmap_frozen(bm));
>>>               QLIST_REMOVE(bitmap, list);
>>>               hbitmap_free(bitmap->bitmap);
>>> +            if (bitmap->meta_bitmap) {
>>> +                hbitmap_free(bitmap->meta_bitmap);
>>> +            }
>>>               g_free(bitmap->name);
>>>               g_free(bitmap);
>>>               return;
>>> @@ -5659,6 +5693,9 @@ void bdrv_set_dirty_bitmap(BdrvDirtyBitmap
>>> *bitmap,
>>>   {
>>>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>>>       hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
>>> +    if (bitmap->meta_bitmap) {
>>> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
>>> +    }
>>>   }
>>>
>>>   void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
>>> @@ -5666,6 +5703,9 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap
>>> *bitmap,
>>>   {
>>>       assert(bdrv_dirty_bitmap_enabled(bitmap));
>>>       hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
>>> +    if (bitmap->meta_bitmap) {
>>> +        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
>>> +    }
>>>   }
>>>
>>>   void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
>>> diff --git a/include/block/block.h b/include/block/block.h
>>> index c6a928d..f2c62f6 100644
>>> --- a/include/block/block.h
>>> +++ b/include/block/block.h
>>> @@ -4,6 +4,7 @@
>>>   #include "block/aio.h"
>>>   #include "qemu-common.h"
>>>   #include "qemu/option.h"
>>> +#include "qemu/hbitmap.h"
>>>   #include "block/coroutine.h"
>>>   #include "block/accounting.h"
>>>   #include "qapi/qmp/qobject.h"
>>> @@ -487,6 +488,10 @@ void
>>> bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
>>>                                             uint64_t start, uint64_t
>>> count);
>>>   void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
>>>
>>> +HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
>>> +                                        uint64_t granularity);
>>> +void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap);
>>> +
>>>   void bdrv_enable_copy_on_read(BlockDriverState *bs);
>>>   void bdrv_disable_copy_on_read(BlockDriverState *bs);
>>>
>>>
>
>
diff mbox

Patch

diff --git a/block.c b/block.c
index a127fd2..aaa08b8 100644
--- a/block.c
+++ b/block.c
@@ -58,9 +58,15 @@ 
  * (3) successor is set: frozen mode.
  *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
  *     or enabled. A frozen bitmap can only abdicate() or reclaim().
+ *
+ * Meta bitmap:
+ * Meta bitmap is a 'dirty bitmap' for the BdrvDirtyBitmap. It tracks changes
+ * (set/unset) of this BdrvDirtyBitmap. It is needed for live migration of
+ * block dirty bitmaps.
  */
 struct BdrvDirtyBitmap {
     HBitmap *bitmap;            /* Dirty sector bitmap implementation */
+    HBitmap *meta_bitmap;       /* Meta bitmap */
     BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
     char *name;                 /* Optional non-empty unique ID */
     int64_t size;               /* Size of the bitmap (Number of sectors) */
@@ -5398,6 +5404,31 @@  void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
     bitmap->name = NULL;
 }
 
+HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
+                                        uint64_t granularity)
+{
+    uint64_t sector_granularity;
+
+    assert((granularity & (granularity - 1)) == 0);
+
+    granularity *= 8 * bdrv_dirty_bitmap_granularity(bitmap);
+    sector_granularity = granularity >> BDRV_SECTOR_BITS;
+    assert(sector_granularity);
+
+    bitmap->meta_bitmap =
+        hbitmap_alloc(bitmap->size, ffsll(sector_granularity) - 1);
+
+    return bitmap->meta_bitmap;
+}
+
+void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap)
+{
+    if (bitmap->meta_bitmap) {
+        hbitmap_free(bitmap->meta_bitmap);
+        bitmap->meta_bitmap = NULL;
+    }
+}
+
 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
                                           uint32_t granularity,
                                           const char *name,
@@ -5532,6 +5563,9 @@  void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
             assert(!bdrv_dirty_bitmap_frozen(bm));
             QLIST_REMOVE(bitmap, list);
             hbitmap_free(bitmap->bitmap);
+            if (bitmap->meta_bitmap) {
+                hbitmap_free(bitmap->meta_bitmap);
+            }
             g_free(bitmap->name);
             g_free(bitmap);
             return;
@@ -5659,6 +5693,9 @@  void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+    if (bitmap->meta_bitmap) {
+        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
+    }
 }
 
 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
@@ -5666,6 +5703,9 @@  void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 {
     assert(bdrv_dirty_bitmap_enabled(bitmap));
     hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+    if (bitmap->meta_bitmap) {
+        hbitmap_set(bitmap->meta_bitmap, cur_sector, nr_sectors);
+    }
 }
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
diff --git a/include/block/block.h b/include/block/block.h
index c6a928d..f2c62f6 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -4,6 +4,7 @@ 
 #include "block/aio.h"
 #include "qemu-common.h"
 #include "qemu/option.h"
+#include "qemu/hbitmap.h"
 #include "block/coroutine.h"
 #include "block/accounting.h"
 #include "qapi/qmp/qobject.h"
@@ -487,6 +488,10 @@  void bdrv_dirty_bitmap_deserialize_zeroes(BdrvDirtyBitmap *bitmap,
                                           uint64_t start, uint64_t count);
 void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap);
 
+HBitmap *bdrv_create_meta_bitmap(BdrvDirtyBitmap *bitmap,
+                                        uint64_t granularity);
+void bdrv_release_meta_bitmap(BdrvDirtyBitmap *bitmap);
+
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
 void bdrv_disable_copy_on_read(BlockDriverState *bs);