Patchwork [RFC,5/6] savevm: Migrate RAM based on name/offset

login
register
mail settings
Submitter Alex Williamson
Date June 8, 2010, 7:16 p.m.
Message ID <20100608191633.4451.59848.stgit@localhost.localdomain>
Download mbox | patch
Permalink /patch/55021/
State New
Headers show

Comments

Alex Williamson - June 8, 2010, 7:16 p.m.
Synchronize RAM blocks with the target and migrate using name/offset
pairs.  This ensures both source and target have the same view of
RAM and that we get the right bits into the right slot.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

 arch_init.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
 vl.c        |    2 +
 2 files changed, 93 insertions(+), 12 deletions(-)
Anthony Liguori - June 8, 2010, 8:12 p.m.
On 06/08/2010 02:16 PM, Alex Williamson wrote:
> Synchronize RAM blocks with the target and migrate using name/offset
> pairs.  This ensures both source and target have the same view of
> RAM and that we get the right bits into the right slot.
>
> Signed-off-by: Alex Williamson<alex.williamson@redhat.com>
> ---
>
>   arch_init.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++++++++------
>   vl.c        |    2 +
>   2 files changed, 93 insertions(+), 12 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 6103461..8ba92ec 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -113,20 +113,29 @@ static int ram_save_block(QEMUFile *f)
>
>       while (addr<  total_ram) {
>           if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
> +            RAMBlock *block;
> +            ram_addr_t offset;
>               uint8_t *p;
>
>               cpu_physical_memory_reset_dirty(current_addr,
>                                               current_addr + TARGET_PAGE_SIZE,
>                                               MIGRATION_DIRTY_FLAG);
>
> -            p = qemu_get_ram_ptr(current_addr);
> +            QLIST_FOREACH(block,&ram.blocks, next) {
> +                if (current_addr - block->offset<  block->length)
> +                    break;
> +            }
> +            offset = current_addr - block->offset;
> +            p = block->host + offset;
>
>               if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
>                   qemu_put_byte(f, *p);
>    

I think we could use some trickery like use another flag in 
current_address to determine whether this was a different section from 
the previous section and then only encode the section name if that's true.

Regards,

Anthony Liguori

>                   bytes_sent = 1;
>               } else {
> -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
> +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE);
> +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
>                   qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                   bytes_sent = TARGET_PAGE_SIZE;
>               }
> @@ -196,6 +205,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       }
>
>       if (stage == 1) {
> +        RAMBlock *block;
>           uint64_t total_ram = ram_bytes_total();
>           bytes_transferred = 0;
>
> @@ -210,6 +220,11 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>           cpu_physical_memory_set_dirty_tracking(1);
>
>           qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE);
> +
> +        QLIST_FOREACH(block,&ram.blocks, next) {
> +            qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
> +            qemu_put_be64(f, block->length);
> +        }
>       }
>
>       bytes_transferred_last = bytes_transferred;
> @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>       ram_addr_t addr;
>       int flags;
>
> -    if (version_id != 3) {
> +    if (version_id<  3) {
>           return -EINVAL;
>       }
>
> @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>           addr&= TARGET_PAGE_MASK;
>
>           if (flags&  RAM_SAVE_FLAG_MEM_SIZE) {
> -            if (addr != ram_bytes_total()) {
> -                return -EINVAL;
> +            if (version_id == 3) {
> +                if (addr != ram_bytes_total()) {
> +                    return -EINVAL;
> +                }
> +            } else {
> +                /* Synchronize RAM block list */
> +                char name[64];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +                    length = qemu_get_be64(f);
> +
> +                    QLIST_FOREACH(block,&ram.blocks, next) {
> +                        if (!strncmp(name, block->name, sizeof(name))) {
> +                            if (block->length != length)
> +                                return -EINVAL;
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        if (!qemu_ram_alloc(name, length))
> +                            return -ENOMEM;
> +                    }
> +
> +                    total_ram_bytes -= length;
> +                }
>               }
>           }
>
>           if (flags&  RAM_SAVE_FLAG_COMPRESS) {
> -            uint8_t ch = qemu_get_byte(f);
> -            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
> +            void *host;
> +            uint8_t ch;
> +
> +            if (version_id == 3) {
> +                host = qemu_get_ram_ptr(addr);
> +            } else {
> +                RAMBlock *block;
> +                char name[64];
> +
> +                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +
> +                QLIST_FOREACH(block,&ram.blocks, next) {
> +                    if (!strncmp(name, block->name, sizeof(name)))
> +                        break;
> +                }
> +                if (!block)
> +                    return -EINVAL;
> +
> +                host = block->host + addr;
> +            }
> +            ch = qemu_get_byte(f);
> +            memset(host, ch, TARGET_PAGE_SIZE);
>   #ifndef _WIN32
>               if (ch == 0&&
>                   (!kvm_enabled() || kvm_has_sync_mmu())) {
> -                madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE,
> -                        MADV_DONTNEED);
> +                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
>               }
>   #endif
>           } else if (flags&  RAM_SAVE_FLAG_PAGE) {
> -            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
> +            void *host;
> +
> +            if (version_id == 3) {
> +                host = qemu_get_ram_ptr(addr);
> +            } else {
> +                RAMBlock *block;
> +                char name[64];
> +
> +                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +
> +                QLIST_FOREACH(block,&ram.blocks, next) {
> +                    if (!strncmp(name, block->name, sizeof(name)))
> +                        break;
> +                }
> +                if (!block)
> +                    return -EINVAL;
> +
> +                host = block->host + addr;
> +            }
> +            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>           }
>           if (qemu_file_has_error(f)) {
>               return -EIO;
> diff --git a/vl.c b/vl.c
> index 9e9c176..a871895 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -3731,7 +3731,7 @@ int main(int argc, char **argv, char **envp)
>       if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0)
>           exit(1);
>
> -    register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL,
> +    register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL,
>                            ram_load, NULL);
>
>       if (nb_numa_nodes>  0) {
>
>
Chris Wright - June 8, 2010, 8:54 p.m.
* Alex Williamson (alex.williamson@redhat.com) wrote:
> @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>      ram_addr_t addr;
>      int flags;
>  
> -    if (version_id != 3) {
> +    if (version_id < 3) {
>          return -EINVAL;

Should we clamp to 3 and 4?

>      }
>  
> @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>          addr &= TARGET_PAGE_MASK;
>  
>          if (flags & RAM_SAVE_FLAG_MEM_SIZE) {

Does it simplify anything to simply add a new flag?

> -            if (addr != ram_bytes_total()) {
> -                return -EINVAL;
> +            if (version_id == 3) {
> +                if (addr != ram_bytes_total()) {
> +                    return -EINVAL;
> +                }
> +            } else {
> +                /* Synchronize RAM block list */
> +                char name[64];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
> +                    length = qemu_get_be64(f);
> +
> +                    QLIST_FOREACH(block, &ram.blocks, next) {
> +                        if (!strncmp(name, block->name, sizeof(name))) {
> +                            if (block->length != length)
> +                                return -EINVAL;
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        if (!qemu_ram_alloc(name, length))
> +                            return -ENOMEM;

Is there any usee to finding blocks in stream such that we simply allocate
them all dynamically?

thanks,
-chris
Alex Williamson - June 8, 2010, 9:12 p.m.
On Tue, 2010-06-08 at 15:12 -0500, Anthony Liguori wrote:
> On 06/08/2010 02:16 PM, Alex Williamson wrote:
> >               if (is_dup_page(p, *p)) {
> > -                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
> > +                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
> > +                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
> >                   qemu_put_byte(f, *p);
> >    
> 
> I think we could use some trickery like use another flag in 
> current_address to determine whether this was a different section from 
> the previous section and then only encode the section name if that's true.

Good suggestion, see patch 7/6 ;)

Alex
Alex Williamson - June 8, 2010, 9:22 p.m.
On Tue, 2010-06-08 at 13:54 -0700, Chris Wright wrote:
> * Alex Williamson (alex.williamson@redhat.com) wrote:
> > @@ -257,7 +272,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
> >      ram_addr_t addr;
> >      int flags;
> >  
> > -    if (version_id != 3) {
> > +    if (version_id < 3) {
> >          return -EINVAL;
> 
> Should we clamp to 3 and 4?

Yep, definitely a good idea.

> >      }
> >  
> > @@ -268,23 +283,89 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
> >          addr &= TARGET_PAGE_MASK;
> >  
> >          if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> 
> Does it simplify anything to simply add a new flag?

Not that I can see.  Appending it to this existing flag conveniently
lets the receiving side know when it's done since SUM(block->length)
equals MEM_SIZE.  Let me know if I'm missing an optimization or use case
you're thinking of.

> > +                    QLIST_FOREACH(block, &ram.blocks, next) {
> > +                        if (!strncmp(name, block->name, sizeof(name))) {
> > +                            if (block->length != length)
> > +                                return -EINVAL;
> > +                            break;
> > +                        }
> > +                    }
> > +
> > +                    if (!block) {
> > +                        if (!qemu_ram_alloc(name, length))
> > +                            return -ENOMEM;
> 
> Is there any use to finding blocks in stream such that we simply allocate
> them all dynamically?

Maybe.  It seems like we'd be doing a lot of reallocs as we go though.
I think we're pretty locked down once the migration starts, so nothing
should be changing on the source once we get started.  If that's the
case (someone correct me if it's not), sending the block list layout
first seems more efficient.  Thanks,

Alex

Patch

diff --git a/arch_init.c b/arch_init.c
index 6103461..8ba92ec 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -113,20 +113,29 @@  static int ram_save_block(QEMUFile *f)
 
     while (addr < total_ram) {
         if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+            RAMBlock *block;
+            ram_addr_t offset;
             uint8_t *p;
 
             cpu_physical_memory_reset_dirty(current_addr,
                                             current_addr + TARGET_PAGE_SIZE,
                                             MIGRATION_DIRTY_FLAG);
 
-            p = qemu_get_ram_ptr(current_addr);
+            QLIST_FOREACH(block, &ram.blocks, next) {
+                if (current_addr - block->offset < block->length)
+                    break;
+            }
+            offset = current_addr - block->offset;
+            p = block->host + offset;
 
             if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_be64(f, offset | RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
             } else {
-                qemu_put_be64(f, current_addr | RAM_SAVE_FLAG_PAGE);
+                qemu_put_be64(f, offset | RAM_SAVE_FLAG_PAGE);
+                qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
             }
@@ -196,6 +205,7 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     }
 
     if (stage == 1) {
+        RAMBlock *block;
         uint64_t total_ram = ram_bytes_total();
         bytes_transferred = 0;
 
@@ -210,6 +220,11 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         cpu_physical_memory_set_dirty_tracking(1);
 
         qemu_put_be64(f, total_ram | RAM_SAVE_FLAG_MEM_SIZE);
+
+        QLIST_FOREACH(block, &ram.blocks, next) {
+            qemu_put_buffer(f, (uint8_t *)block->name, sizeof(block->name));
+            qemu_put_be64(f, block->length);
+        }
     }
 
     bytes_transferred_last = bytes_transferred;
@@ -257,7 +272,7 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
     ram_addr_t addr;
     int flags;
 
-    if (version_id != 3) {
+    if (version_id < 3) {
         return -EINVAL;
     }
 
@@ -268,23 +283,89 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
         addr &= TARGET_PAGE_MASK;
 
         if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (addr != ram_bytes_total()) {
-                return -EINVAL;
+            if (version_id == 3) {
+                if (addr != ram_bytes_total()) {
+                    return -EINVAL;
+                }
+            } else {
+                /* Synchronize RAM block list */
+                char name[64];
+                ram_addr_t length;
+                ram_addr_t total_ram_bytes = addr;
+
+                while (total_ram_bytes) {
+                    RAMBlock *block;
+                    qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+                    length = qemu_get_be64(f);
+
+                    QLIST_FOREACH(block, &ram.blocks, next) {
+                        if (!strncmp(name, block->name, sizeof(name))) {
+                            if (block->length != length)
+                                return -EINVAL;
+                            break;
+                        }
+                    }
+
+                    if (!block) {
+                        if (!qemu_ram_alloc(name, length))
+                            return -ENOMEM;
+                    }
+
+                    total_ram_bytes -= length;
+                }
             }
         }
 
         if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            uint8_t ch = qemu_get_byte(f);
-            memset(qemu_get_ram_ptr(addr), ch, TARGET_PAGE_SIZE);
+            void *host;
+            uint8_t ch;
+
+            if (version_id == 3) {
+                host = qemu_get_ram_ptr(addr);
+            } else {
+                RAMBlock *block;
+                char name[64];
+
+                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+
+                QLIST_FOREACH(block, &ram.blocks, next) {
+                    if (!strncmp(name, block->name, sizeof(name)))
+                        break;
+                }
+                if (!block)
+                    return -EINVAL;
+
+                host = block->host + addr;
+            }
+            ch = qemu_get_byte(f);
+            memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
             if (ch == 0 &&
                 (!kvm_enabled() || kvm_has_sync_mmu())) {
-                madvise(qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE,
-                        MADV_DONTNEED);
+                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
             }
 #endif
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            qemu_get_buffer(f, qemu_get_ram_ptr(addr), TARGET_PAGE_SIZE);
+            void *host;
+
+            if (version_id == 3) {
+                host = qemu_get_ram_ptr(addr);
+            } else {
+                RAMBlock *block;
+                char name[64];
+
+                qemu_get_buffer(f, (uint8_t *)name, sizeof(name));
+
+                QLIST_FOREACH(block, &ram.blocks, next) {
+                    if (!strncmp(name, block->name, sizeof(name)))
+                        break;
+                }
+                if (!block)
+                    return -EINVAL;
+
+                host = block->host + addr;
+            }
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
         }
         if (qemu_file_has_error(f)) {
             return -EIO;
diff --git a/vl.c b/vl.c
index 9e9c176..a871895 100644
--- a/vl.c
+++ b/vl.c
@@ -3731,7 +3731,7 @@  int main(int argc, char **argv, char **envp)
     if (qemu_opts_foreach(&qemu_drive_opts, drive_init_func, machine, 1) != 0)
         exit(1);
 
-    register_savevm_live("ram", 0, 3, NULL, ram_save_live, NULL, 
+    register_savevm_live("ram", 0, 4, NULL, ram_save_live, NULL, 
                          ram_load, NULL);
 
     if (nb_numa_nodes > 0) {