Patchwork [RFC,RDMA,support,v1:,11/13] send pc.ram over RDMA

login
register
mail settings
Submitter mrhines@linux.vnet.ibm.com
Date April 10, 2013, 10:28 p.m.
Message ID <1365632901-15470-12-git-send-email-mrhines@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/235508/
State New
Headers show

Comments

mrhines@linux.vnet.ibm.com - April 10, 2013, 10:28 p.m.
From: "Michael R. Hines" <mrhines@us.ibm.com>

This takes advantages of the previous patches:
1. use the new QEMUFileOps hook 'save_page' and return
   ENOTSUP if not supported.
2. call out to the right accessor methods to invoke
   the iteration hooks defined in QEMUFileOps

Signed-off-by: Michael R. Hines <mrhines@us.ibm.com>
---
 arch_init.c |   46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)
Paolo Bonzini - April 11, 2013, 6:26 a.m.
Il 11/04/2013 00:28, mrhines@linux.vnet.ibm.com ha scritto:
>  
> +#ifdef CONFIG_RDMA
> +int qemu_rdma_registration_start(QEMUFile *f, void *opaque, uint32_t flags)
> +{
> +    DPRINTF("start section: %d\n", flags);
> +    qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
> +    return 0;
> +}
> +#endif
>  

This must be in migration-rdma.c.  Move RAM_SAVE_FLAG_HOOK to
migration/migration.h with a comment like this:

/* Whenever this is found in the data stream, the flags
 * will be passed to ram_load_hook in the incoming-migration
 * side.  This lets before_ram_iterate/after_ram_iterate add
 * transport-specific sections to the RAM migration data.
 */

Paolo
mrhines@linux.vnet.ibm.com - April 11, 2013, 12:41 p.m.
Acknowledged.

On 04/11/2013 02:26 AM, Paolo Bonzini wrote:
> Il 11/04/2013 00:28, mrhines@linux.vnet.ibm.com ha scritto:
>>   
>> +#ifdef CONFIG_RDMA
>> +int qemu_rdma_registration_start(QEMUFile *f, void *opaque, uint32_t flags)
>> +{
>> +    DPRINTF("start section: %d\n", flags);
>> +    qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
>> +    return 0;
>> +}
>> +#endif
>>   
> This must be in migration-rdma.c.  Move RAM_SAVE_FLAG_HOOK to
> migration/migration.h with a comment like this:
>
> /* Whenever this is found in the data stream, the flags
>   * will be passed to ram_load_hook in the incoming-migration
>   * side.  This lets before_ram_iterate/after_ram_iterate add
>   * transport-specific sections to the RAM migration data.
>   */
>
> Paolo
>

Patch

diff --git a/arch_init.c b/arch_init.c
index 769ce77..eea3091 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -115,6 +115,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
 #define RAM_SAVE_FLAG_XBZRLE   0x40
+#define RAM_SAVE_FLAG_HOOK     0x80 /* perform hook during iteration */
 
 
 static struct defconfig_file {
@@ -170,6 +171,14 @@  static struct {
     .cache = NULL,
 };
 
+#ifdef CONFIG_RDMA
+int qemu_rdma_registration_start(QEMUFile *f, void *opaque, uint32_t flags)
+{
+    DPRINTF("start section: %d\n", flags);
+    qemu_put_be64(f, RAM_SAVE_FLAG_HOOK);
+    return 0;
+}
+#endif
 
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
@@ -447,15 +456,22 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
                 ram_bulk_stage = false;
             }
         } else {
+            bool zero;
             uint8_t *p;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
             p = memory_region_get_ram_ptr(mr) + offset;
 
+            /* use capability now, defaults to true */
+            zero = migrate_check_for_zero() ? is_zero_page(p) : false;
+
             /* In doubt sent page as normal */
             bytes_sent = -1;
-            if (is_zero_page(p)) {
+            if ((bytes_sent = ram_control_save_page(f, block->offset, 
+                            offset, cont, TARGET_PAGE_SIZE, zero)) >= 0) {
+                acct_info.norm_pages++;
+            } else if (zero) {
                 acct_info.dup_pages++;
                 if (!ram_bulk_stage) {
                     bytes_sent = save_block_hdr(f, block, offset, cont,
@@ -476,7 +492,7 @@  static int ram_save_block(QEMUFile *f, bool last_stage)
             }
 
             /* XBZRLE overflow or normal page */
-            if (bytes_sent == -1) {
+            if (bytes_sent == -1 || bytes_sent == -ENOTSUP) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
                 bytes_sent += TARGET_PAGE_SIZE;
@@ -598,6 +614,18 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     }
 
     qemu_mutex_unlock_ramlist();
+
+    /*
+     * These following calls generate reserved messages for future expansion of the RDMA
+     * protocol. If the ops are not defined, nothing will happen.
+     *
+     * Please leave in place. They are intended to be used to pre-register
+     * memory in the future to mitigate the extremely high cost of dynamic page
+     * registration.
+     */
+    ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+    ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
@@ -616,6 +644,8 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
         reset_ram_globals();
     }
 
+    ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
     t0 = qemu_get_clock_ns(rt_clock);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
@@ -646,6 +676,12 @@  static int ram_save_iterate(QEMUFile *f, void *opaque)
 
     qemu_mutex_unlock_ramlist();
 
+    /* 
+     * must occur before EOS (or any QEMUFile operation) 
+     * because of RDMA protocol 
+     */
+    ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
     if (ret < 0) {
         bytes_transferred += total_sent;
         return ret;
@@ -663,6 +699,8 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
     qemu_mutex_lock_ramlist();
     migration_bitmap_sync();
 
+    ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
     /* try transferring iterative blocks of memory */
 
     /* flush all remaining blocks regardless of rate limiting */
@@ -676,6 +714,8 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
         }
         bytes_transferred += bytes_sent;
     }
+
+    ram_control_after_iterate(f, RAM_CONTROL_FINISH);
     migration_end();
 
     qemu_mutex_unlock_ramlist();
@@ -864,6 +904,8 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 goto done;
             }
+        } else if (flags & RAM_SAVE_FLAG_HOOK) {
+            ram_control_load_hook(f, RAM_CONTROL_REGISTER); 
         }
         error = qemu_file_get_error(f);
         if (error) {