Patchwork [v13,11/13] Add XBZRLE to ram_save_block and ram_save_live

login
register
mail settings
Submitter Orit Wasserman
Date June 27, 2012, 10:34 a.m.
Message ID <1340793261-11400-12-git-send-email-owasserm@redhat.com>
Download mbox | patch
Permalink /patch/167605/
State New
Headers show

Comments

Orit Wasserman - June 27, 2012, 10:34 a.m.
In the outgoing migration check to see if the page is cached and
changed than send compressed page by using save_xbrle_page function.
In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
and decompress the page (by using load_xbrle function).

Signed-off-by: Benoit Hudzia <benoit.hudzia@sap.com>
Signed-off-by: Petter Svard <petters@cs.umu.se>
Signed-off-by: Aidan Shribman <aidan.shribman@sap.com>
Signed-off-by: Orit Wasserman <owasserm@redhat.com>
---
 arch_init.c |  179 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 migration.c |   26 ++++++++-
 migration.h |    4 +
 3 files changed, 205 insertions(+), 4 deletions(-)
Eric Blake - June 27, 2012, 7:56 p.m.
On 06/27/2012 04:34 AM, Orit Wasserman wrote:
> In the outgoing migration check to see if the page is cached and
> changed than send compressed page by using save_xbrle_page function.
> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
> and decompress the page (by using load_xbrle function).

Mismatch between commit comment and actual code (XBRLE vs. XBZRLE).


> +
> +    /* Stage 1 cache the page and exit.
> +       Stage 2 check to see if page is cached , if not cache the page.

s/cached ,/cached,/


> +    if (encoded_len == 0) {
> +        DPRINTF("Unmodifed page skipping\n");

spelling and grammar:
s/Unmodifed page skipping/Skipping unmodified page/


>      if (!block)
>          block = QLIST_FIRST(&ram_list.blocks);
>  
> +
> +
>      do {

Why the spurious whitespace addition?


> +            } else if (migrate_use_xbzrle() && stage != 3) {
> +                current_addr = block->offset + offset;
> +                /* In stage 1 we only cache the pages before sending them
> +                   from the cache (uncompressed).
> +                   We doen't use compression for stage 3.

s/doen't/don't/

> +
> +            /* either we didn't send yet (we may got XBZRLE overflow) */

s/may got/may have had/

>  
> -            break;
> +            /* if page is unmodified lets continue to the next */

s/lets/let's/

or even shorter:

s/ lets/,/

> @@ -331,6 +440,17 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          last_offset = 0;
>          sort_ram_list();
>  
> +        if (migrate_use_xbzrle()) {
> +            XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> +                                      TARGET_PAGE_SIZE,
> +                                      TARGET_PAGE_SIZE);
> +            if (!XBZRLE.cache) {
> +                DPRINTF("Error creating cache\n");
> +                return -1;
> +            }
> +            XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);

This guarantees long alignment (good, per my proposed g_assert in
10/13), but not page alignment.  Do we get any benefits by using a
different allocation to guarantee page alignment?  (Off-hand, I'm not
thinking of any.)

> @@ -389,7 +509,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          while ((bytes_sent = ram_save_block(f, stage)) != -1) {
>              bytes_transferred += bytes_sent;
>          }
> -        memory_global_dirty_log_stop();
> +        migration_end();
>      }
>  
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

This hunk should have been squashed into 9/13.

> @@ -512,6 +675,16 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>              host = host_from_stream_offset(f, addr, flags);
>  
>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {

We should probably be erroring out if we detect this incoming flag, but
the management app explicitly disabled the xbzrle migration capability,
since that represents a case of user error, and failing the migration is
better than proceeding to decode things anyway.
Orit Wasserman - June 28, 2012, 7:17 a.m.
On 06/27/2012 10:56 PM, Eric Blake wrote:
> On 06/27/2012 04:34 AM, Orit Wasserman wrote:
>> In the outgoing migration check to see if the page is cached and
>> changed than send compressed page by using save_xbrle_page function.
>> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
>> and decompress the page (by using load_xbrle function).
> 
> Mismatch between commit comment and actual code (XBRLE vs. XBZRLE).
> 
> 
>> +
>> +    /* Stage 1 cache the page and exit.
>> +       Stage 2 check to see if page is cached , if not cache the page.
> 
> s/cached ,/cached,/
> 
> 
>> +    if (encoded_len == 0) {
>> +        DPRINTF("Unmodifed page skipping\n");
> 
> spelling and grammar:
> s/Unmodifed page skipping/Skipping unmodified page/
> 
> 
>>      if (!block)
>>          block = QLIST_FIRST(&ram_list.blocks);
>>  
>> +
>> +
>>      do {
> 
> Why the spurious whitespace addition?
> 
> 
>> +            } else if (migrate_use_xbzrle() && stage != 3) {
>> +                current_addr = block->offset + offset;
>> +                /* In stage 1 we only cache the pages before sending them
>> +                   from the cache (uncompressed).
>> +                   We doen't use compression for stage 3.
> 
> s/doen't/don't/
> 
>> +
>> +            /* either we didn't send yet (we may got XBZRLE overflow) */
> 
> s/may got/may have had/
> 
>>  
>> -            break;
>> +            /* if page is unmodified lets continue to the next */
> 
> s/lets/let's/
> 
> or even shorter:
> 
> s/ lets/,/
> 
>> @@ -331,6 +440,17 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>          last_offset = 0;
>>          sort_ram_list();
>>  
>> +        if (migrate_use_xbzrle()) {
>> +            XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
>> +                                      TARGET_PAGE_SIZE,
>> +                                      TARGET_PAGE_SIZE);
>> +            if (!XBZRLE.cache) {
>> +                DPRINTF("Error creating cache\n");
>> +                return -1;
>> +            }
>> +            XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
> 
> This guarantees long alignment (good, per my proposed g_assert in
> 10/13), but not page alignment.  Do we get any benefits by using a
> different allocation to guarantee page alignment?  (Off-hand, I'm not
> thinking of any.)
> 
>> @@ -389,7 +509,7 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>          while ((bytes_sent = ram_save_block(f, stage)) != -1) {
>>              bytes_transferred += bytes_sent;
>>          }
>> -        memory_global_dirty_log_stop();
>> +        migration_end();
>>      }
>>  
>>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> 
> This hunk should have been squashed into 9/13.
> 
>> @@ -512,6 +675,16 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>              host = host_from_stream_offset(f, addr, flags);
>>  
>>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> 
> We should probably be erroring out if we detect this incoming flag, but
> the management app explicitly disabled the xbzrle migration capability,
> since that represents a case of user error, and failing the migration is
> better than proceeding to decode things anyway.
> 
correct, I will fix it.

Orit

Patch

diff --git a/arch_init.c b/arch_init.c
index 6703c72..bfc9f27 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -43,6 +43,7 @@ 
 #include "hw/smbios.h"
 #include "exec-memory.h"
 #include "hw/pcspk.h"
+#include "qemu/cache.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -102,6 +103,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
 
 #ifdef __ALTIVEC__
 #include <altivec.h>
@@ -169,6 +171,27 @@  static int is_dup_page(uint8_t *page)
     return 1;
 }
 
+/* XBZRLE (Xor Based Zero Length Encoding */
+typedef struct XBZRLEHeader {
+    uint16_t xh_len;
+    uint8_t xh_flags;
+} XBZRLEHeader;
+
+/* struct contains XBZRLE cache and a static page
+   used by the compression */
+static struct {
+    /* buffer used for XBZRLE encoding */
+    uint8_t *encoded_buf;
+    /* buffer used for XBZRLE decoding */
+    uint8_t *decoded_buf;
+    /* Cache for XBZRLE */
+    Cache *cache;
+} XBZRLE = {
+    .encoded_buf = NULL,
+    .decoded_buf = NULL,
+    .cache = NULL,
+};
+
 static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
         int cont, int flag)
 {
@@ -181,6 +204,64 @@  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
 
 }
 
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, int cont, int stage)
+{
+    int encoded_len = 0, bytes_sent = -1, ret = -1;
+    XBZRLEHeader hdr = {
+        .xh_len = 0,
+        .xh_flags = 0,
+    };
+    uint8_t *prev_cached_page;
+
+    /* Stage 1 cache the page and exit.
+       Stage 2 check to see if page is cached , if not cache the page.
+       Stage 3 check if the page is cached and if not exit.
+    */
+    if (stage == 1 || !cache_is_cached(XBZRLE.cache, current_addr)) {
+        cache_insert(XBZRLE.cache, current_addr,
+                     g_memdup(current_data, TARGET_PAGE_SIZE));
+        return -1;
+    }
+
+    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_len = xbzrle_encode_buffer(prev_cached_page, current_data,
+                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                       TARGET_PAGE_SIZE);
+    if (encoded_len == 0) {
+        DPRINTF("Unmodifed page skipping\n");
+        return 0;
+    } else if (encoded_len == -1) {
+        DPRINTF("Overflow\n");
+        /* update data in the cache */
+        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
+        return -1;
+    }
+
+    /* we need to update the data in the cache, in order to get the same data
+       we cached we decode the encoded page on the cached data */
+    ret = xbzrle_decode_buffer(XBZRLE.encoded_buf, encoded_len,
+                               prev_cached_page, TARGET_PAGE_SIZE);
+    g_assert(ret != -1);
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
+
+    /* Send XBZRLE based compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(f, hdr.xh_flags);
+    qemu_put_be16(f, hdr.xh_len);
+    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_sent = encoded_len + sizeof(hdr);
+
+    return bytes_sent;
+}
+
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
 
@@ -190,10 +271,13 @@  static int ram_save_block(QEMUFile *f, int stage)
     ram_addr_t offset = last_offset;
     int bytes_sent = -1;
     MemoryRegion *mr;
+    ram_addr_t current_addr;
 
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
 
+
+
     do {
         mr = block->mr;
         if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
@@ -210,13 +294,31 @@  static int ram_save_block(QEMUFile *f, int stage)
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
+            } else if (migrate_use_xbzrle() && stage != 3) {
+                current_addr = block->offset + offset;
+                /* In stage 1 we only cache the pages before sending them
+                   from the cache (uncompressed).
+                   We doen't use compression for stage 3.
+                */
+                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
+                                              offset, cont, stage);
+
+                /* send the cached page copy for consistency
+                   In stage 3 we send the host page */
+                p = get_cached_data(XBZRLE.cache, current_addr);
+            }
+
+            /* either we didn't send yet (we may got XBZRLE overflow) */
+            if (bytes_sent == -1) {
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
             }
 
-            break;
+            /* if page is unmodified lets continue to the next */
+            if (bytes_sent != 0) {
+                break;
+            }
         }
 
         offset += TARGET_PAGE_SIZE;
@@ -307,6 +409,13 @@  static void sort_ram_list(void)
 static void migration_end(void)
 {
     memory_global_dirty_log_stop();
+
+    if (migrate_use_xbzrle()) {
+        cache_fini(XBZRLE.cache);
+        g_free(XBZRLE.cache);
+        g_free(XBZRLE.encoded_buf);
+        XBZRLE.cache = NULL;
+    }
 }
 
 int ram_save_live(QEMUFile *f, int stage, void *opaque)
@@ -331,6 +440,17 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         last_offset = 0;
         sort_ram_list();
 
+        if (migrate_use_xbzrle()) {
+            XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+                                      TARGET_PAGE_SIZE,
+                                      TARGET_PAGE_SIZE);
+            if (!XBZRLE.cache) {
+                DPRINTF("Error creating cache\n");
+                return -1;
+            }
+            XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -389,7 +509,7 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         while ((bytes_sent = ram_save_block(f, stage)) != -1) {
             bytes_transferred += bytes_sent;
         }
-        memory_global_dirty_log_stop();
+        migration_end();
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
@@ -402,6 +522,49 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }
 
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = 0;
+    XBZRLEHeader hdr = {
+        .xh_len = 0,
+        .xh_flags = 0,
+    };
+
+    if (!XBZRLE.decoded_buf) {
+        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    }
+
+    /* extract RLE header */
+    hdr.xh_flags = qemu_get_byte(f);
+    hdr.xh_len = qemu_get_be16(f);
+
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
+        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+        return -1;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+        return -1;
+    }
+    /* load data and decode */
+    qemu_get_buffer(f, XBZRLE.decoded_buf, hdr.xh_len);
+
+    /* decode RLE */
+    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, hdr.xh_len, host,
+                               TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+        rc = -1;
+    } else  if (ret > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
+                ret, TARGET_PAGE_SIZE);
+        rc = -1;
+    }
+
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -512,6 +675,16 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
             host = host_from_stream_offset(f, addr, flags);
 
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
+            void *host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            if (load_xbzrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
         error = qemu_file_get_error(f);
         if (error) {
diff --git a/migration.c b/migration.c
index c113111..6063ed8 100644
--- a/migration.c
+++ b/migration.c
@@ -43,6 +43,9 @@  enum {
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
+/* Migration XBZRLE cache size */
+#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024)
+
 static NotifierList migration_state_notifiers =
     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
 
@@ -55,7 +58,8 @@  static MigrationState *migrate_get_current(void)
     static MigrationState current_migration = {
         .state = MIG_STATE_SETUP,
         .bandwidth_limit = MAX_THROTTLE,
-    };
+        .xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE,
+};
 
     return &current_migration;
 }
@@ -426,6 +430,7 @@  static MigrationState *migrate_init(const MigrationParams *params)
     MigrationState *s = migrate_get_current();
     int64_t bandwidth_limit = s->bandwidth_limit;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+    int64_t xbzrle_cache_size = s->xbzrle_cache_size;
 
     memcpy(enabled_capabilities, s->enabled_capabilities,
            sizeof(enabled_capabilities));
@@ -435,6 +440,7 @@  static MigrationState *migrate_init(const MigrationParams *params)
     s->params = *params;
     memcpy(s->enabled_capabilities, enabled_capabilities,
            sizeof(enabled_capabilities));
+    s->xbzrle_cache_size = xbzrle_cache_size;
 
     s->state = MIG_STATE_SETUP;
 
@@ -532,3 +538,21 @@  void qmp_migrate_set_downtime(double value, Error **errp)
     value = MAX(0, MIN(UINT64_MAX, value));
     max_downtime = (uint64_t)value;
 }
+
+int migrate_use_xbzrle(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
+}
+
+int64_t migrate_xbzrle_cache_size(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->xbzrle_cache_size;
+}
diff --git a/migration.h b/migration.h
index 7582ecb..4fe8122 100644
--- a/migration.h
+++ b/migration.h
@@ -39,6 +39,7 @@  struct MigrationState
     void *opaque;
     MigrationParams params;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+    int64_t xbzrle_cache_size;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -103,4 +104,7 @@  int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
                          uint8_t *dst, int dlen);
 int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
 
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
+
 #endif