Patchwork [v7,05/11] Add XBZRLE to ram_save_block and ram_save_live

login
register
mail settings
Submitter Orit Wasserman
Date Jan. 26, 2012, 2:24 p.m.
Message ID <1327587897-31192-6-git-send-email-owasserm@redhat.com>
Download mbox | patch
Permalink /patch/137949/
State New
Headers show

Comments

Orit Wasserman - Jan. 26, 2012, 2:24 p.m.
Add migration state to store XBRLE params (enablement and cache size).
In the outgoing migration check to see if the page is cached and
changed than send compressed page by using save_xbrle_page function.
In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
and decompress the page (by using load_xbrle function).

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
---
 arch_init.c |  169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 migration.h |    3 +
 savevm.c    |   93 ++++++++++++++++++++++++++++++++
 3 files changed, 253 insertions(+), 12 deletions(-)
Avi Kivity - Jan. 29, 2012, 10:52 a.m.
On 01/26/2012 04:24 PM, Orit Wasserman wrote:
> Add migration state to store XBRLE params (enablement and cache size).
> In the outgoing migration check to see if the page is cached and
> changed than send compressed page by using save_xbrle_page function.
> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
> and decompress the page (by using load_xbrle function).
>
> +/* XBZRLE (Xor Based Zero Length Encoding */
> +typedef struct XBZRLEHeader {
> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBZRLEHeader;

__attribute__((packed))

> +
> +/* RAM Migration State */
> +typedef struct ArchMigrationState {
> +    int use_xbzrle;
> +    int64_t xbzrle_cache_size;
> +} ArchMigrationState;
> +
> +static ArchMigrationState arch_mig_state;

Strange name.

>  
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont)
> +{

...

> +    /* Send XBZRLE based compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));

Or better, encode the members separately so they get proper endian encoding.

> +    qemu_put_buffer(f, encoded_buf, encoded_len);
> +    bytes_sent = encoded_len + sizeof(hdr);
> +
> +done:
> +    g_free(encoded_buf);
> +    return bytes_sent;
> +}
> +
>  static RAMBlock *last_block;
>  static ram_addr_t last_offset;
>  
>
>  
> +/*
> +  page = zrun
> +       | zrun nzrun
> +       | zrun nzrun page

This is no longer accurate.

> +
> +  zrun = length
> +
> +  nzrun = length byte...
> +
> +  length = uleb128 encoded integer
> + */
> +int encode_page(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst,
> +                int dlen)
> +{
...
> +}
> +
> +int decode_page(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> +    int i = 0, d = 0;
> +    uint32_t count = 0;
> +
> +    while (i < slen - 1) {
> +        /* zrun */
> +        i += uleb128_decode_small(src + i, &count);
> +        d += count;
> +
> +        /* overflow */
> +        if (d > dlen) {
> +            return -1;

assert instead?

> +        }
> +
> +        /* completed decoding */
> +        if (i == slen) {
> +            return d + 1;
> +        }
> +
> +        /* nzrun */
> +        i += uleb128_decode_small(src + i, &count);
> +        /* overflow */
> +        if (d + count > dlen) {
> +            return -1;
> +        }
> +        memcpy(dst + d, src + i, count);
> +        d += count;
> +        i += count;
> +    }
> +

memset() for the tail?

> +    return d + 1;
> +}

Patch

diff --git a/arch_init.c b/arch_init.c
index 0f6037e..4ebf080 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -103,6 +103,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE  0x40
 
 #ifdef __ALTIVEC__
 #include <altivec.h>
@@ -163,6 +164,21 @@  static void cache_insert(ram_addr_t id, uint8_t *pdata);
 static unsigned long cache_get_cache_pos(ram_addr_t address);
 static CacheItem *cache_item_get(unsigned long pos, int item);
 
+/* XBZRLE (Xor Based Zero Length Encoding */
+typedef struct XBZRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBZRLEHeader;
+
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+    int use_xbzrle;
+    int64_t xbzrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
 /***********************************************************/
 /* XBRLE page cache implementation */
 static CacheItem *cache_item_get(unsigned long pos, int item)
@@ -311,19 +327,66 @@  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
         }
 }
 
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBZRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *encoded_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);
+    encoded_len = encode_page(it->it_data, current_data, TARGET_PAGE_SIZE,
+                              encoded_buf, TARGET_PAGE_SIZE);
+    if (encoded_len < 0) {
+        DPRINTF("Unmodifed page - skipping\n");
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
+
+    /* Send XBZRLE based compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    qemu_put_buffer(f, encoded_buf, encoded_len);
+    bytes_sent = encoded_len + sizeof(hdr);
+
+done:
+    g_free(encoded_buf);
+    return bytes_sent;
+}
+
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
 
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
 {
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
     int bytes_sent = 0;
     MemoryRegion *mr;
+    ram_addr_t current_addr;
 
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
 
+    current_addr = block->offset + offset;
+
     do {
         mr = block->mr;
         if (memory_region_get_dirty(mr, offset, DIRTY_MEMORY_MIGRATION)) {
@@ -339,11 +402,18 @@  static int ram_save_block(QEMUFile *f)
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
+            } else if (stage == 2 && arch_mig_state.use_xbzrle) {
+                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
+                    offset, cont);
+            }
+            if (!bytes_sent) {
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
             }
+            if (arch_mig_state.use_xbzrle) {
+                cache_insert(current_addr, p);
+            }
 
             break;
         }
@@ -443,6 +513,9 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 
     if (stage < 0) {
         memory_global_dirty_log_stop();
+        if (arch_mig_state.use_xbzrle) {
+            cache_fini();
+        }
         return 0;
     }
 
@@ -455,6 +528,10 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         last_offset = 0;
         sort_ram_list();
 
+        if (arch_mig_state.use_xbzrle) {
+            cache_init(arch_mig_state.xbzrle_cache_size);
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -482,9 +559,11 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
 
-        bytes_sent = ram_save_block(f);
-        bytes_transferred += bytes_sent;
-        if (bytes_sent == 0) { /* no more blocks */
+        bytes_sent = ram_save_block(f, stage);
+        /* bytes_sent -1 represent unchanged page */
+        if (bytes_sent > 0) {
+            bytes_transferred += bytes_sent;
+        } else if (bytes_sent == 0) { /* no more blocks */
             break;
         }
     }
@@ -507,19 +586,67 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         int bytes_sent;
 
         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
             bytes_transferred += bytes_sent;
         }
         memory_global_dirty_log_stop();
+        if (arch_mig_state.use_xbzrle) {
+            cache_fini();
+        }
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }
 
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *xbzrle_buf = NULL;
+    XBZRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
+        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
+                ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    rc = 0;
+
+done:
+    g_free(xbzrle_buf);
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -565,14 +692,18 @@  static inline void *host_from_stream_offset_versioned(int version_id,
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
     int error;
+    static uint64_t seq_iter;
+
+    seq_iter++;
 
     if (version_id < 4 || version_id > 4) {
         return -EINVAL;
     }
 
     do {
+        void *host;
         addr = qemu_get_be64(f);
 
         flags = addr & ~TARGET_PAGE_MASK;
@@ -596,8 +727,10 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret =  -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -605,7 +738,8 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }
 
                     total_ram_bytes -= length;
@@ -644,14 +778,25 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                 return -EINVAL;
             }
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbzrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
         error = qemu_file_get_error(f);
         if (error) {
-            return error;
+            ret = error;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
 }
 
 #ifdef HAS_AUDIO
diff --git a/migration.h b/migration.h
index 50dec18..c8cadbc 100644
--- a/migration.h
+++ b/migration.h
@@ -98,5 +98,8 @@  void migrate_del_blocker(Error *reason);
 /* ULEB128 */
 int uleb128_encode_small(uint8_t *out, uint32_t n);
 int uleb128_decode_small(const uint8 *in, uint32_t *n);
+int encode_page(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                uint8_t *dst, int dlen);
+int decode_page(uint8_t *src, int slen, uint8_t *dst, int dlen);
 
 #endif
diff --git a/savevm.c b/savevm.c
index 74c674f..41852d9 100644
--- a/savevm.c
+++ b/savevm.c
@@ -2325,3 +2325,96 @@  int uleb128_decode_small(const uint8 *in, uint32_t *n)
     }
 }
 
+/*
+  page = zrun
+       | zrun nzrun
+       | zrun nzrun page
+
+  zrun = length
+
+  nzrun = length byte...
+
+  length = uleb128 encoded integer
+ */
+int encode_page(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst,
+                int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0 , i = 0;
+    uint8_t *nzrun_start = NULL;
+
+    while (i < slen) {
+        /* overflow */
+        if (d + 2 > dlen) {
+            return 0;
+        }
+
+        while (!(old_buf[i] ^ new_buf[i]) && ++i < slen) {
+            zrun_len++;
+        }
+
+        /* buffer unchanged */
+        if (zrun_len == slen) {
+            return -1;
+        }
+
+        /* skip last zero run */
+        if (i == slen) {
+            return d;
+        }
+
+        d += uleb128_encode_small(dst + d, zrun_len);
+        zrun_len = 0;
+
+        nzrun_start = new_buf + i;
+        while ((old_buf[i] ^ new_buf[i]) != 0 && ++i < slen) {
+            nzrun_len++;
+        }
+
+        /* overflow */
+        if (d + nzrun_len + 2 > dlen) {
+            return 0;
+        }
+
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+        nzrun_len = 0;
+    }
+
+    return d;
+}
+
+int decode_page(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int i = 0, d = 0;
+    uint32_t count = 0;
+
+    while (i < slen - 1) {
+        /* zrun */
+        i += uleb128_decode_small(src + i, &count);
+        d += count;
+
+        /* overflow */
+        if (d > dlen) {
+            return -1;
+        }
+
+        /* completed decoding */
+        if (i == slen) {
+            return d + 1;
+        }
+
+        /* nzrun */
+        i += uleb128_decode_small(src + i, &count);
+        /* overflow */
+        if (d + count > dlen) {
+            return -1;
+        }
+        memcpy(dst + d, src + i, count);
+        d += count;
+        i += count;
+    }
+
+    return d + 1;
+}