Patchwork [30/30] ram: optimize migration bitmap walking

login
register
mail settings
Submitter Juan Quintela
Date Oct. 18, 2012, 7:30 a.m.
Message ID <1350545426-23172-31-git-send-email-quintela@redhat.com>
Download mbox | patch
Permalink /patch/192235/
State New
Headers show

Comments

Juan Quintela - Oct. 18, 2012, 7:30 a.m.
Instead of testing each page individually, we search what is the next
dirty page with a bitmap operation.  We have to reorganize the code to
move from a "for" loop, to a while(dirty) loop.

Signed-off-by: Juan Quintela <quintela@redhat.com>
---
 arch_init.c | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)
Orit Wasserman - Oct. 21, 2012, 1:01 p.m.
On 10/18/2012 09:30 AM, Juan Quintela wrote:
> Instead of testing each page individually, we search what is the next
> dirty page with a bitmap operation.  We have to reorganize the code to
> move from a "for" loop, to a while(dirty) loop.
> 
> Signed-off-by: Juan Quintela <quintela@redhat.com>
> ---
>  arch_init.c | 45 ++++++++++++++++++++++++++-------------------
>  1 file changed, 26 insertions(+), 19 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index 8391375..cd6ebef 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -343,18 +343,21 @@ static unsigned long *migration_bitmap;
>  static uint64_t migration_dirty_pages;
>  static uint32_t last_version;
> 
> -static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
> -                                                         ram_addr_t offset)
> +static inline
> +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> +                                                 ram_addr_t start)
>  {
> -    bool ret;
> -    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
> +    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> +    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> +    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> 
> -    ret = test_and_clear_bit(nr, migration_bitmap);
> +    unsigned long next = find_next_bit(migration_bitmap, size, nr);
> 
> -    if (ret) {
> +    if (next < size) {
> +        clear_bit(next, migration_bitmap);
>          migration_dirty_pages--;
>      }
> -    return ret;
> +    return (next - base) << TARGET_PAGE_BITS;
>  }
> 
>  static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
> @@ -423,6 +426,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>  {
>      RAMBlock *block = last_seen_block;
>      ram_addr_t offset = last_offset;
> +    bool complete_round = false;
>      int bytes_sent = -1;
>      MemoryRegion *mr;
>      ram_addr_t current_addr;
> @@ -430,9 +434,21 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>      if (!block)
>          block = QLIST_FIRST(&ram_list.blocks);
> 
> -    do {
> +    while(true) {
>          mr = block->mr;
> -        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
> +        if (complete_round && block == last_seen_block &&
> +            offset >= last_offset) {
> +            break;
> +        }
Juan,
You need to exchange those line, first check to see you did a full round than
calculate and reset the offset, in the way it is written now you may reset a bit and than break of the loop
without sending it.

Orit
> +        if (offset >= block->length) {
> +            offset = 0;
> +            block = QLIST_NEXT(block, next);
> +            if (!block) {
> +                block = QLIST_FIRST(&ram_list.blocks);
> +                complete_round = true;
> +            }
> +        } else {
>              uint8_t *p;
>              int cont = (block == last_sent_block) ?
>                  RAM_SAVE_FLAG_CONTINUE : 0;
> @@ -467,16 +483,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>                  break;
>              }
>          }
> -
> -        offset += TARGET_PAGE_SIZE;
> -        if (offset >= block->length) {
> -            offset = 0;
> -            block = QLIST_NEXT(block, next);
> -            if (!block)
> -                block = QLIST_FIRST(&ram_list.blocks);
> -        }
> -    } while (block != last_seen_block || offset != last_offset);
> -
> +    }
>      last_seen_block = block;
>      last_offset = offset;
>
Juan Quintela - Oct. 26, 2012, 11:39 a.m.
Orit Wasserman <owasserm@redhat.com> wrote:
> On 10/18/2012 09:30 AM, Juan Quintela wrote:
>> Instead of testing each page individually, we search what is the next
>> dirty page with a bitmap operation.  We have to reorganize the code to
>> move from a "for" loop, to a while(dirty) loop.
>> 

>> 
>> -    do {
>> +    while(true) {
>>          mr = block->mr;
>> -        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
>> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
>> +        if (complete_round && block == last_seen_block &&
>> +            offset >= last_offset) {
>> +            break;
>> +        }
> Juan,
> You need to exchange those line, first check to see you did a full round than
> calculate and reset the offset, in the way it is written now you may
> reset a bit and than break of the loop
> without sending it.

How?

if complete_round == true, it means that we are in the second round.

block == last_seen_block means that we are back at the 1st block that we
have looked.

if offset >= last_offset, there are two options:
a- == last_offset: that was the 1st one that we checked, so it can't be
   true.
b- >= last_offset: it means tat we have already passed that bit, it
   _has_ to be zero, otherwise somebody has changed the bitmap under or
   foot.

Or have I missed something?
Notice that at some point we should allow for concurrent dirty of the
bitmap, but we need to do yet more things.

Later, Juan.
Orit Wasserman - Oct. 28, 2012, 8:35 a.m.
On 10/26/2012 01:39 PM, Juan Quintela wrote:
> Orit Wasserman <owasserm@redhat.com> wrote:
>> On 10/18/2012 09:30 AM, Juan Quintela wrote:
>>> Instead of testing each page individually, we search what is the next
>>> dirty page with a bitmap operation.  We have to reorganize the code to
>>> move from a "for" loop, to a while(dirty) loop.
>>>
> 
>>>
>>> -    do {
>>> +    while(true) {
>>>          mr = block->mr;
>>> -        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
>>> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
>>> +        if (complete_round && block == last_seen_block &&
>>> +            offset >= last_offset) {
>>> +            break;
>>> +        }
>> Juan,
>> You need to exchange those line, first check to see you did a full round than
>> calculate and reset the offset, in the way it is written now you may
>> reset a bit and than break of the loop
>> without sending it.
> 
> How?
> 
> if complete_round == true, it means that we are in the second round.
> 
> block == last_seen_block means that we are back at the 1st block that we
> have looked.
> 
> if offset >= last_offset, there are two options:
> a- == last_offset: that was the 1st one that we checked, so it can't be
>    true.
> b- >= last_offset: it means tat we have already passed that bit, it
>    _has_ to be zero, otherwise somebody has changed the bitmap under or
>    foot.
> 
> Or have I missed something?
If we are in iterate state this means the bitmap is changing all the time,
even when we didn't finish a complete cycle (for example we get to the bandwidth limit, exit ram_save_iterate and sync the bitmap in pending).
This means that bits in part of the bitmap we already walked can get dirty again. 
In that case the if condition can be true for a dirty bit,
in the current code we reset it and than break for the loop, this means it is set clean without sending it, which is a problem.
Changing the line order will fix it (the way it was before).

Regards,
Orit

> Notice that at some point we should allow for concurrent dirty of the
> bitmap, but we need to do yet more things.
> 
> Later, Juan.
>
Orit Wasserman - Oct. 30, 2012, 10:15 a.m.
On 10/28/2012 10:35 AM, Orit Wasserman wrote:
> On 10/26/2012 01:39 PM, Juan Quintela wrote:
>> Orit Wasserman <owasserm@redhat.com> wrote:
>>> On 10/18/2012 09:30 AM, Juan Quintela wrote:
>>>> Instead of testing each page individually, we search what is the next
>>>> dirty page with a bitmap operation.  We have to reorganize the code to
>>>> move from a "for" loop, to a while(dirty) loop.
>>>>
>>
>>>>
>>>> -    do {
>>>> +    while(true) {
>>>>          mr = block->mr;
>>>> -        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
>>>> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
>>>> +        if (complete_round && block == last_seen_block &&
>>>> +            offset >= last_offset) {
>>>> +            break;
>>>> +        }
>>> Juan,
>>> You need to exchange those line, first check to see you did a full round than
>>> calculate and reset the offset, in the way it is written now you may
>>> reset a bit and than break of the loop
>>> without sending it.
>>
>> How?
>>
>> if complete_round == true, it means that we are in the second round.
>>
>> block == last_seen_block means that we are back at the 1st block that we
>> have looked.
>>
>> if offset >= last_offset, there are two options:
>> a- == last_offset: that was the 1st one that we checked, so it can't be
>>    true.
>> b- >= last_offset: it means tat we have already passed that bit, it
>>    _has_ to be zero, otherwise somebody has changed the bitmap under or
>>    foot.
>>
>> Or have I missed something?
> If we are in iterate state this means the bitmap is changing all the time,
> even when we didn't finish a complete cycle (for example we get to the bandwidth limit, exit ram_save_iterate and sync the bitmap in pending).
> This means that bits in part of the bitmap we already walked can get dirty again. 
> In that case the if condition can be true for a dirty bit,
> in the current code we reset it and than break for the loop, this means it is set clean without sending it, which is a problem.
> Changing the line order will fix it (the way it was before).
> 
OK read the code more thoroughly, there is no way that the bitmap will if we do a full cycle in ram_save_block,
We can actually simplify and  the function by checking the number of dirty pages. If it is zero we can return 
immediately.
If it is not zero it means we will exit the loop when we will find the dirty page.

Cheers,
Orit 
> Regards,
> Orit
> 
>> Notice that at some point we should allow for concurrent dirty of the
>> bitmap, but we need to do yet more things.
>>
>> Later, Juan.
>>
> 
>
Juan Quintela - Oct. 30, 2012, 3:33 p.m.
Orit Wasserman <owasserm@redhat.com> wrote:
>> If we are in iterate state this means the bitmap is changing all the time,
>> even when we didn't finish a complete cycle (for example we get to
>> the bandwidth limit, exit ram_save_iterate and sync the bitmap in
>> pending).
>> This means that bits in part of the bitmap we already walked can get
>> dirty again.
>> In that case the if condition can be true for a dirty bit,
>> in the current code we reset it and than break for the loop, this
>> means it is set clean without sending it, which is a problem.
>> Changing the line order will fix it (the way it was before).
>> 
> OK read the code more thoroughly, there is no way that the bitmap will
> if we do a full cycle in ram_save_block,
> We can actually simplify and the function by checking the number of
> dirty pages. If it is zero we can return
> immediately.
> If it is not zero it means we will exit the loop when we will find the
> dirty page.

Very good idea.  Thanks.

Later, Juan.

Patch

diff --git a/arch_init.c b/arch_init.c
index 8391375..cd6ebef 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -343,18 +343,21 @@  static unsigned long *migration_bitmap;
 static uint64_t migration_dirty_pages;
 static uint32_t last_version;

-static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
-                                                         ram_addr_t offset)
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
 {
-    bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);

-    ret = test_and_clear_bit(nr, migration_bitmap);
+    unsigned long next = find_next_bit(migration_bitmap, size, nr);

-    if (ret) {
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
         migration_dirty_pages--;
     }
-    return ret;
+    return (next - base) << TARGET_PAGE_BITS;
 }

 static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
@@ -423,6 +426,7 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
 {
     RAMBlock *block = last_seen_block;
     ram_addr_t offset = last_offset;
+    bool complete_round = false;
     int bytes_sent = -1;
     MemoryRegion *mr;
     ram_addr_t current_addr;
@@ -430,9 +434,21 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);

-    do {
+    while(true) {
         mr = block->mr;
-        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->length) {
+            offset = 0;
+            block = QLIST_NEXT(block, next);
+            if (!block) {
+                block = QLIST_FIRST(&ram_list.blocks);
+                complete_round = true;
+            }
+        } else {
             uint8_t *p;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
@@ -467,16 +483,7 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
                 break;
             }
         }
-
-        offset += TARGET_PAGE_SIZE;
-        if (offset >= block->length) {
-            offset = 0;
-            block = QLIST_NEXT(block, next);
-            if (!block)
-                block = QLIST_FIRST(&ram_list.blocks);
-        }
-    } while (block != last_seen_block || offset != last_offset);
-
+    }
     last_seen_block = block;
     last_offset = offset;