diff mbox series

[v2,3/3] migration/ram: Optimize ram_save_host_page()

Message ID 20210301082132.1107-4-jiangkunkun@huawei.com
State New
Headers show
Series Some modifications about ram_save_host_page() | expand

Commit Message

Kunkun Jiang March 1, 2021, 8:21 a.m. UTC
Starting from pss->page, ram_save_host_page() will check every page
and send the dirty pages up to the end of the current host page or
the boundary of used_length of the block. If the host page size is
a huge page, the step "check" will take a lot of time.

This will improve performance to use migration_bitmap_find_dirty().

Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
---
 migration/ram.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

Comments

David Edmondson March 3, 2021, 8:56 a.m. UTC | #1
On Monday, 2021-03-01 at 16:21:32 +08, Kunkun Jiang wrote:

> Starting from pss->page, ram_save_host_page() will check every page
> and send the dirty pages up to the end of the current host page or
> the boundary of used_length of the block. If the host page size is
> a huge page, the step "check" will take a lot of time.
>
> This will improve performance to use migration_bitmap_find_dirty().

This is cleaner, thank you.

I was hoping to just invert the body of the loop - something like
(completely untested):

do {
  int pages_this_iteration = 0;

  /* Check if the page is dirty and, if so, send it. */
  if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
    pages_this_iteration = ram_save_target_page(rs, pss, last_stage);
    if (pages_this_iteration < 0) {
      return pages_this_iteration;
    }

    pages += pages_this_iteration;

    /*
     * Allow rate limiting to happen in the middle of huge pages if
     * the current iteration sent something.
     */
    if (pagesize_bits > 1 && pages_this_iteration > 0) {
      migration_rate_limit();
    }
  }
  pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
 } while ((pss->page < hostpage_boundary) &&
          offset_in_ramblock(pss->block,
                             ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
/* The offset we leave with is the min boundary of host page and block */
pss->page = MIN(pss->page, hostpage_boundary) - 1;

> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
> ---
>  migration/ram.c | 12 +++++++-----
>  1 file changed, 7 insertions(+), 5 deletions(-)
>
> diff --git a/migration/ram.c b/migration/ram.c
> index 3a9115b6dc..a1374db356 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1991,6 +1991,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>      int tmppages, pages = 0;
>      size_t pagesize_bits =
>          qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
> +    unsigned long hostpage_boundary =
> +        QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
>      unsigned long start_page = pss->page;
>      int res;
>  
> @@ -2002,7 +2004,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>      do {
>          /* Check the pages is dirty and if it is send it */
>          if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
> -            pss->page++;
> +            pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>              continue;
>          }
>  
> @@ -2012,16 +2014,16 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>          }
>  
>          pages += tmppages;
> -        pss->page++;
> +        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>          /* Allow rate limiting to happen in the middle of huge pages */
>          if (pagesize_bits > 1) {
>              migration_rate_limit();
>          }
> -    } while ((pss->page & (pagesize_bits - 1)) &&
> +    } while ((pss->page < hostpage_boundary) &&
>               offset_in_ramblock(pss->block,
>                                  ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
> -    /* The offset we leave with is the last one we looked at */
> -    pss->page--;
> +    /* The offset we leave with is the min boundary of host page and block */
> +    pss->page = MIN(pss->page, hostpage_boundary) - 1;
>  
>      res = ram_save_release_protection(rs, pss, start_page);
>      return (res < 0 ? res : pages);
> -- 
> 2.23.0

dme.
Kunkun Jiang March 3, 2021, 11:47 a.m. UTC | #2
On 2021/3/3 16:56, David Edmondson wrote:
> On Monday, 2021-03-01 at 16:21:32 +08, Kunkun Jiang wrote:
>
>> Starting from pss->page, ram_save_host_page() will check every page
>> and send the dirty pages up to the end of the current host page or
>> the boundary of used_length of the block. If the host page size is
>> a huge page, the step "check" will take a lot of time.
>>
>> This will improve performance to use migration_bitmap_find_dirty().
> This is cleaner, thank you.
>
> I was hoping to just invert the body of the loop - something like
> (completely untested):
Sorry for my misunderstanding.
I will improve it in the next version.
> do {
>    int pages_this_iteration = 0;
>
>    /* Check if the page is dirty and, if so, send it. */
>    if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
>      pages_this_iteration = ram_save_target_page(rs, pss, last_stage);
>      if (pages_this_iteration < 0) {
>        return pages_this_iteration;
>      }
>
>      pages += pages_this_iteration;
>
>      /*
>       * Allow rate limiting to happen in the middle of huge pages if
>       * the current iteration sent something.
>       */
>      if (pagesize_bits > 1 && pages_this_iteration > 0) {
>        migration_rate_limit();
>      }
I missed the case that the value of pages_this_iteration is 0. 😅
>    }
>    pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>   } while ((pss->page < hostpage_boundary) &&
>            offset_in_ramblock(pss->block,
>                               ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
> /* The offset we leave with is the min boundary of host page and block */
> pss->page = MIN(pss->page, hostpage_boundary) - 1;

Best Regards.

Kunkun Jiang

>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>> ---
>>   migration/ram.c | 12 +++++++-----
>>   1 file changed, 7 insertions(+), 5 deletions(-)
>>
>> diff --git a/migration/ram.c b/migration/ram.c
>> index 3a9115b6dc..a1374db356 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -1991,6 +1991,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>       int tmppages, pages = 0;
>>       size_t pagesize_bits =
>>           qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>> +    unsigned long hostpage_boundary =
>> +        QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
>>       unsigned long start_page = pss->page;
>>       int res;
>>   
>> @@ -2002,7 +2004,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>       do {
>>           /* Check the pages is dirty and if it is send it */
>>           if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
>> -            pss->page++;
>> +            pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>>               continue;
>>           }
>>   
>> @@ -2012,16 +2014,16 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>           }
>>   
>>           pages += tmppages;
>> -        pss->page++;
>> +        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>>           /* Allow rate limiting to happen in the middle of huge pages */
>>           if (pagesize_bits > 1) {
>>               migration_rate_limit();
>>           }
>> -    } while ((pss->page & (pagesize_bits - 1)) &&
>> +    } while ((pss->page < hostpage_boundary) &&
>>                offset_in_ramblock(pss->block,
>>                                   ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
>> -    /* The offset we leave with is the last one we looked at */
>> -    pss->page--;
>> +    /* The offset we leave with is the min boundary of host page and block */
>> +    pss->page = MIN(pss->page, hostpage_boundary) - 1;
>>   
>>       res = ram_save_release_protection(rs, pss, start_page);
>>       return (res < 0 ? res : pages);
>> -- 
>> 2.23.0
> dme.
David Edmondson March 3, 2021, 2:55 p.m. UTC | #3
On Wednesday, 2021-03-03 at 19:47:20 +08, Kunkun Jiang wrote:

> On 2021/3/3 16:56, David Edmondson wrote:
>> On Monday, 2021-03-01 at 16:21:32 +08, Kunkun Jiang wrote:
>>
>>> Starting from pss->page, ram_save_host_page() will check every page
>>> and send the dirty pages up to the end of the current host page or
>>> the boundary of used_length of the block. If the host page size is
>>> a huge page, the step "check" will take a lot of time.
>>>
>>> This will improve performance to use migration_bitmap_find_dirty().
>> This is cleaner, thank you.
>>
>> I was hoping to just invert the body of the loop - something like
>> (completely untested):
> Sorry for my misunderstanding.

No, I explained myself poorly.

> I will improve it in the next version.
>> do {
>>    int pages_this_iteration = 0;
>>
>>    /* Check if the page is dirty and, if so, send it. */
>>    if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
>>      pages_this_iteration = ram_save_target_page(rs, pss, last_stage);
>>      if (pages_this_iteration < 0) {
>>        return pages_this_iteration;
>>      }
>>
>>      pages += pages_this_iteration;
>>
>>      /*
>>       * Allow rate limiting to happen in the middle of huge pages if
>>       * the current iteration sent something.
>>       */
>>      if (pagesize_bits > 1 && pages_this_iteration > 0) {
>>        migration_rate_limit();
>>      }
> I missed the case that the value of pages_this_iteration is 0. 😅

I don't think that your version was wrong, because it returned early
from the loop if there were no candidate pages.

>>    }
>>    pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>>   } while ((pss->page < hostpage_boundary) &&
>>            offset_in_ramblock(pss->block,
>>                               ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
>> /* The offset we leave with is the min boundary of host page and block */
>> pss->page = MIN(pss->page, hostpage_boundary) - 1;
>
> Best Regards.
>
> Kunkun Jiang
>
>>> Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
>>> Signed-off-by: Kunkun Jiang <jiangkunkun@huawei.com>
>>> ---
>>>   migration/ram.c | 12 +++++++-----
>>>   1 file changed, 7 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/migration/ram.c b/migration/ram.c
>>> index 3a9115b6dc..a1374db356 100644
>>> --- a/migration/ram.c
>>> +++ b/migration/ram.c
>>> @@ -1991,6 +1991,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>>       int tmppages, pages = 0;
>>>       size_t pagesize_bits =
>>>           qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
>>> +    unsigned long hostpage_boundary =
>>> +        QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
>>>       unsigned long start_page = pss->page;
>>>       int res;
>>>   
>>> @@ -2002,7 +2004,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>>       do {
>>>           /* Check the pages is dirty and if it is send it */
>>>           if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
>>> -            pss->page++;
>>> +            pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>>>               continue;
>>>           }
>>>   
>>> @@ -2012,16 +2014,16 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
>>>           }
>>>   
>>>           pages += tmppages;
>>> -        pss->page++;
>>> +        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
>>>           /* Allow rate limiting to happen in the middle of huge pages */
>>>           if (pagesize_bits > 1) {
>>>               migration_rate_limit();
>>>           }
>>> -    } while ((pss->page & (pagesize_bits - 1)) &&
>>> +    } while ((pss->page < hostpage_boundary) &&
>>>                offset_in_ramblock(pss->block,
>>>                                   ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
>>> -    /* The offset we leave with is the last one we looked at */
>>> -    pss->page--;
>>> +    /* The offset we leave with is the min boundary of host page and block */
>>> +    pss->page = MIN(pss->page, hostpage_boundary) - 1;
>>>   
>>>       res = ram_save_release_protection(rs, pss, start_page);
>>>       return (res < 0 ? res : pages);
>>> -- 
>>> 2.23.0
>> dme.

dme.
diff mbox series

Patch

diff --git a/migration/ram.c b/migration/ram.c
index 3a9115b6dc..a1374db356 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1991,6 +1991,8 @@  static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
     int tmppages, pages = 0;
     size_t pagesize_bits =
         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
+    unsigned long hostpage_boundary =
+        QEMU_ALIGN_UP(pss->page + 1, pagesize_bits);
     unsigned long start_page = pss->page;
     int res;
 
@@ -2002,7 +2004,7 @@  static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
     do {
         /* Check the pages is dirty and if it is send it */
         if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
-            pss->page++;
+            pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
             continue;
         }
 
@@ -2012,16 +2014,16 @@  static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
         }
 
         pages += tmppages;
-        pss->page++;
+        pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
         /* Allow rate limiting to happen in the middle of huge pages */
         if (pagesize_bits > 1) {
             migration_rate_limit();
         }
-    } while ((pss->page & (pagesize_bits - 1)) &&
+    } while ((pss->page < hostpage_boundary) &&
              offset_in_ramblock(pss->block,
                                 ((ram_addr_t)pss->page) << TARGET_PAGE_BITS));
-    /* The offset we leave with is the last one we looked at */
-    pss->page--;
+    /* The offset we leave with is the min boundary of host page and block */
+    pss->page = MIN(pss->page, hostpage_boundary) - 1;
 
     res = ram_save_release_protection(rs, pss, start_page);
     return (res < 0 ? res : pages);