diff mbox

[RFC,RDMA,support,v6:,6/7] send pc.ram over RDMA

Message ID 1365568180-19593-7-git-send-email-mrhines@linux.vnet.ibm.com
State New
Headers show

Commit Message

mrhines@linux.vnet.ibm.com April 10, 2013, 4:29 a.m. UTC
From: "Michael R. Hines" <mrhines@us.ibm.com>

All that is left for this part of the patch is:

1. use the new (optionally defined) save_ram_page function pointer
   to decide what to do with the page if RDMA is enable or not
   and return ENOTSUP as agreed.
2. invoke hooks from QEMURamControlOps function pointers to hook
   into the RDMA protocol at the right points in order to perform
   dynamic page registration.

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 arch_init.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini April 10, 2013, 7:57 a.m. UTC | #1
Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
> From: "Michael R. Hines" <mrhines@us.ibm.com>
> 
> All that is left for this part of the patch is:
> 
> 1. use the new (optionally defined) save_ram_page function pointer
>    to decide what to do with the page if RDMA is enable or not
>    and return ENOTSUP as agreed.
> 2. invoke hooks from QEMURamControlOps function pointers to hook
>    into the RDMA protocol at the right points in order to perform
>    dynamic page registration.
> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
> ---
>  arch_init.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 43 insertions(+), 2 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index 769ce77..a7d5b16 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -115,6 +115,7 @@ const uint32_t arch_type = QEMU_ARCH;
>  #define RAM_SAVE_FLAG_EOS      0x10
>  #define RAM_SAVE_FLAG_CONTINUE 0x20
>  #define RAM_SAVE_FLAG_XBZRLE   0x40
> +#define RAM_SAVE_FLAG_REGISTER 0x80 /* perform hook during iteration */

Please rename this to RAM_SAVE_FLAG_HOOK.

>  
>  
>  static struct defconfig_file {
> @@ -170,6 +171,13 @@ static struct {
>      .cache = NULL,
>  };
>  
> +#ifdef CONFIG_RDMA
> +void qemu_ram_registration_start(QEMUFile *f, void *opaque, int section)
> +{
> +    DPRINTF("start section: %d\n", section);
> +    qemu_put_be64(f, RAM_SAVE_FLAG_REGISTER);
> +}
> +#endif

Please put this in migration-rdma.c together with the other QEMUFileOps.

>  int64_t xbzrle_cache_resize(int64_t new_size)
>  {
> @@ -447,15 +455,22 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>                  ram_bulk_stage = false;
>              }
>          } else {
> +            bool zero;
>              uint8_t *p;
>              int cont = (block == last_sent_block) ?
>                  RAM_SAVE_FLAG_CONTINUE : 0;
>  
>              p = memory_region_get_ram_ptr(mr) + offset;
>  
> +            /* use capability now, defaults to true */
> +            zero = migrate_check_for_zero() ? is_zero_page(p) : false;
> +
>              /* In doubt sent page as normal */
>              bytes_sent = -1;
> -            if (is_zero_page(p)) {
> +            if ((bytes_sent = ram_control_save_page(f, block->offset, 
> +                            offset, cont, TARGET_PAGE_SIZE, zero)) >= 0) {
> +                acct_info.norm_pages++;
> +            } else if (zero) {
>                  acct_info.dup_pages++;
>                  if (!ram_bulk_stage) {
>                      bytes_sent = save_block_hdr(f, block, offset, cont,
> @@ -476,7 +491,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>              }
>  
>              /* XBZRLE overflow or normal page */
> -            if (bytes_sent == -1) {
> +            if (bytes_sent == -1 || bytes_sent == -ENOTSUP) {
>                  bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                  qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
>                  bytes_sent += TARGET_PAGE_SIZE;
> @@ -598,6 +613,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      }
>  
>      qemu_mutex_unlock_ramlist();
> +
> +    /*
> +     * These following calls generate reserved messages for future expansion of the RDMA
> +     * protocol. If the ops are not defined, nothing will happen.
> +     *
> +     * Please leave in place. They are intended to be used to pre-register
> +     * memory in the future to mitigate the extremely high cost of dynamic page
> +     * registration.
> +     */
> +    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
> +    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
> +
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      return 0;
> @@ -616,6 +643,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>          reset_ram_globals();
>      }
>  
> +    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
> +
>      t0 = qemu_get_clock_ns(rt_clock);
>      i = 0;
>      while ((ret = qemu_file_rate_limit(f)) == 0) {
> @@ -646,6 +675,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>  
>      qemu_mutex_unlock_ramlist();
>  
> +    /* 
> +     * must occur before EOS (or any QEMUFile operation) 
> +     * because of RDMA protocol 
> +     */
> +    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
> +
>      if (ret < 0) {
>          bytes_transferred += total_sent;
>          return ret;
> @@ -663,6 +698,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      qemu_mutex_lock_ramlist();
>      migration_bitmap_sync();
>  
> +    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
> +
>      /* try transferring iterative blocks of memory */
>  
>      /* flush all remaining blocks regardless of rate limiting */
> @@ -676,6 +713,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>          }
>          bytes_transferred += bytes_sent;
>      }
> +
> +    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>      migration_end();
>  
>      qemu_mutex_unlock_ramlist();
> @@ -864,6 +903,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>                  ret = -EINVAL;
>                  goto done;
>              }
> +        } else if (flags & RAM_SAVE_FLAG_REGISTER) {
> +            ram_control_register_iterate(f, RAM_CONTROL_REGISTER); 

Please rename this function to ram_control_load_hook(f, flags).

Paolo

>          }
>          error = qemu_file_get_error(f);
>          if (error) {
>
mrhines@linux.vnet.ibm.com April 10, 2013, 12:38 p.m. UTC | #2
Acknowledged.

On 04/10/2013 03:57 AM, Paolo Bonzini wrote:
> Il 10/04/2013 06:29, mrhines@linux.vnet.ibm.com ha scritto:
>> From: "Michael R. Hines" <mrhines@us.ibm.com>
>>
>> All that is left for this part of the patch is:
>>
>> 1. use the new (optionally defined) save_ram_page function pointer
>>     to decide what to do with the page if RDMA is enable or not
>>     and return ENOTSUP as agreed.
>> 2. invoke hooks from QEMURamControlOps function pointers to hook
>>     into the RDMA protocol at the right points in order to perform
>>     dynamic page registration.
>> Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
>> ---
>>   arch_init.c |   45 +++++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 43 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index 769ce77..a7d5b16 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -115,6 +115,7 @@ const uint32_t arch_type = QEMU_ARCH;
>>   #define RAM_SAVE_FLAG_EOS      0x10
>>   #define RAM_SAVE_FLAG_CONTINUE 0x20
>>   #define RAM_SAVE_FLAG_XBZRLE   0x40
>> +#define RAM_SAVE_FLAG_REGISTER 0x80 /* perform hook during iteration */
> Please rename this to RAM_SAVE_FLAG_HOOK.
>
>>   
>>   
>>   static struct defconfig_file {
>> @@ -170,6 +171,13 @@ static struct {
>>       .cache = NULL,
>>   };
>>   
>> +#ifdef CONFIG_RDMA
>> +void qemu_ram_registration_start(QEMUFile *f, void *opaque, int section)
>> +{
>> +    DPRINTF("start section: %d\n", section);
>> +    qemu_put_be64(f, RAM_SAVE_FLAG_REGISTER);
>> +}
>> +#endif
> Please put this in migration-rdma.c together with the other QEMUFileOps.
>
>>   int64_t xbzrle_cache_resize(int64_t new_size)
>>   {
>> @@ -447,15 +455,22 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>>                   ram_bulk_stage = false;
>>               }
>>           } else {
>> +            bool zero;
>>               uint8_t *p;
>>               int cont = (block == last_sent_block) ?
>>                   RAM_SAVE_FLAG_CONTINUE : 0;
>>   
>>               p = memory_region_get_ram_ptr(mr) + offset;
>>   
>> +            /* use capability now, defaults to true */
>> +            zero = migrate_check_for_zero() ? is_zero_page(p) : false;
>> +
>>               /* In doubt sent page as normal */
>>               bytes_sent = -1;
>> -            if (is_zero_page(p)) {
>> +            if ((bytes_sent = ram_control_save_page(f, block->offset,
>> +                            offset, cont, TARGET_PAGE_SIZE, zero)) >= 0) {
>> +                acct_info.norm_pages++;
>> +            } else if (zero) {
>>                   acct_info.dup_pages++;
>>                   if (!ram_bulk_stage) {
>>                       bytes_sent = save_block_hdr(f, block, offset, cont,
>> @@ -476,7 +491,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
>>               }
>>   
>>               /* XBZRLE overflow or normal page */
>> -            if (bytes_sent == -1) {
>> +            if (bytes_sent == -1 || bytes_sent == -ENOTSUP) {
>>                   bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>>                   qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
>>                   bytes_sent += TARGET_PAGE_SIZE;
>> @@ -598,6 +613,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>       }
>>   
>>       qemu_mutex_unlock_ramlist();
>> +
>> +    /*
>> +     * These following calls generate reserved messages for future expansion of the RDMA
>> +     * protocol. If the ops are not defined, nothing will happen.
>> +     *
>> +     * Please leave in place. They are intended to be used to pre-register
>> +     * memory in the future to mitigate the extremely high cost of dynamic page
>> +     * registration.
>> +     */
>> +    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>> +    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>> +
>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>   
>>       return 0;
>> @@ -616,6 +643,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>>           reset_ram_globals();
>>       }
>>   
>> +    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
>> +
>>       t0 = qemu_get_clock_ns(rt_clock);
>>       i = 0;
>>       while ((ret = qemu_file_rate_limit(f)) == 0) {
>> @@ -646,6 +675,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
>>   
>>       qemu_mutex_unlock_ramlist();
>>   
>> +    /*
>> +     * must occur before EOS (or any QEMUFile operation)
>> +     * because of RDMA protocol
>> +     */
>> +    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
>> +
>>       if (ret < 0) {
>>           bytes_transferred += total_sent;
>>           return ret;
>> @@ -663,6 +698,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>       qemu_mutex_lock_ramlist();
>>       migration_bitmap_sync();
>>   
>> +    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
>> +
>>       /* try transferring iterative blocks of memory */
>>   
>>       /* flush all remaining blocks regardless of rate limiting */
>> @@ -676,6 +713,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>           }
>>           bytes_transferred += bytes_sent;
>>       }
>> +
>> +    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>>       migration_end();
>>   
>>       qemu_mutex_unlock_ramlist();
>> @@ -864,6 +903,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                   ret = -EINVAL;
>>                   goto done;
>>               }
>> +        } else if (flags & RAM_SAVE_FLAG_REGISTER) {
>> +            ram_control_register_iterate(f, RAM_CONTROL_REGISTER);
> Please rename this function to ram_control_load_hook(f, flags).
>
> Paolo
>
>>           }
>>           error = qemu_file_get_error(f);
>>           if (error) {
>>
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index 769ce77..a7d5b16 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -115,6 +115,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
+#define RAM_SAVE_FLAG_REGISTER 0x80 /* perform hook during iteration */
 
 
 static struct defconfig_file {
@@ -170,6 +171,13 @@  static struct {
     .cache = NULL,
 };
 
+#ifdef CONFIG_RDMA
+void qemu_ram_registration_start(QEMUFile *f, void *opaque, int section)
+{
+    DPRINTF("start section: %d\n", section);
+    qemu_put_be64(f, RAM_SAVE_FLAG_REGISTER);
+}
+#endif
 
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
@@ -447,15 +455,22 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
                 ram_bulk_stage = false;
             }
         } else {
+            bool zero;
             uint8_t *p;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
             p = memory_region_get_ram_ptr(mr) + offset;
 
+            /* use capability now, defaults to true */
+            zero = migrate_check_for_zero() ? is_zero_page(p) : false;
+
             /* In doubt sent page as normal */
             bytes_sent = -1;
-            if (is_zero_page(p)) {
+            if ((bytes_sent = ram_control_save_page(f, block->offset, 
+                            offset, cont, TARGET_PAGE_SIZE, zero)) >= 0) {
+                acct_info.norm_pages++;
+            } else if (zero) {
                 acct_info.dup_pages++;
                 if (!ram_bulk_stage) {
                     bytes_sent = save_block_hdr(f, block, offset, cont,
@@ -476,7 +491,7 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
             }
 
             /* XBZRLE overflow or normal page */
-            if (bytes_sent == -1) {
+            if (bytes_sent == -1 || bytes_sent == -ENOTSUP) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
                 bytes_sent += TARGET_PAGE_SIZE;
@@ -598,6 +613,18 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     }
 
     qemu_mutex_unlock_ramlist();
+
+    /*
+     * These following calls generate reserved messages for future expansion of the RDMA
+     * protocol. If the ops are not defined, nothing will happen.
+     *
+     * Please leave in place. They are intended to be used to pre-register
+     * memory in the future to mitigate the extremely high cost of dynamic page
+     * registration.
+     */
+    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
@@ -616,6 +643,8 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
         reset_ram_globals();
     }
 
+    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
     t0 = qemu_get_clock_ns(rt_clock);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
@@ -646,6 +675,12 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
 
     qemu_mutex_unlock_ramlist();
 
+    /* 
+     * must occur before EOS (or any QEMUFile operation) 
+     * because of RDMA protocol 
+     */
+    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
     if (ret < 0) {
         bytes_transferred += total_sent;
         return ret;
@@ -663,6 +698,8 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
     qemu_mutex_lock_ramlist();
     migration_bitmap_sync();
 
+    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
     /* try transferring iterative blocks of memory */
 
     /* flush all remaining blocks regardless of rate limiting */
@@ -676,6 +713,8 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
         }
         bytes_transferred += bytes_sent;
     }
+
+    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
     migration_end();
 
     qemu_mutex_unlock_ramlist();
@@ -864,6 +903,8 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 goto done;
             }
+        } else if (flags & RAM_SAVE_FLAG_REGISTER) {
+            ram_control_register_iterate(f, RAM_CONTROL_REGISTER); 
         }
         error = qemu_file_get_error(f);
         if (error) {