diff mbox

XBRLE page delta compression for live migration of large memory apps

Message ID AB5A8C7661872E428D6B8E1C2DFA35085CABC948DD@DEWDFECCR02.wdf.sap.corp
State New
Headers show

Commit Message

Shribman, Aidan May 22, 2011, noon UTC
Subject: [PATCH] XBRLE page delta compression for live migration of large memory apps
From: Aidan Shribman <aidan.shribman@sap.com>

By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
bandwidth for transfering of dirty memory pages during live migration
        migrate_set_cachesize <size>
        migrate -x <url>
Qemu host: Ubuntu 10.10
Testing: live migration (w and w/o XBRLE) tested successfully.

Signed-off-by: Benoit Hudzia <benoit.hudzia@sap.com>
Signed-off-by: Petter Svard <petters@cs.umu.se>
Signed-off-by: Aidan Shribman <aidan.shribman@sap.com>

---

 arch_init.c       |  647 +++++++++++++++++++++++++++++++++++++++++++++++++----
 block-migration.c |    3 +-
 hmp-commands.hx   |   36 +++-
 hw/hw.h           |    3 +-
 migration-exec.c  |    6 +-
 migration-fd.c    |    6 +-
 migration-tcp.c   |    6 +-
 migration-unix.c  |    6 +-
 migration.c       |   33 +++-
 migration.h       |   23 ++-
 qmp-commands.hx   |   43 +++-
 savevm.c          |   13 +-
 sysemu.h          |    3 +-
 13 files changed, 749 insertions(+), 79 deletions(-)

Comments

Shribman, Aidan June 13, 2011, 1:10 p.m. UTC | #1
The proposed patch slipped by with no apparent response - kindly provide feedback,
Aidan


> -----Original Message-----
> From: qemu-devel-bounces+aidan.shribman=sap.com@nongnu.org
> [mailto:qemu-devel-bounces+aidan.shribman=sap.com@nongnu.org]
> On Behalf Of Shribman, Aidan
> Sent: Sunday, May 22, 2011 3:01 PM
> To: qemu-devel@nongnu.org
> Subject: [Qemu-devel] [PATCH] XBRLE page delta compression
> for live migration of large memory apps
>
> Subject: [PATCH] XBRLE page delta compression for live
> migration of large memory apps
> From: Aidan Shribman <aidan.shribman@sap.com>
>
> By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
> bandwidth for transfering of dirty memory pages during live migration
>         migrate_set_cachesize <size>
>         migrate -x <url>
> Qemu host: Ubuntu 10.10
> Testing: live migration (w and w/o XBRLE) tested successfully.
>
> Signed-off-by: Benoit Hudzia <benoit.hudzia@sap.com>
> Signed-off-by: Petter Svard <petters@cs.umu.se>
> Signed-off-by: Aidan Shribman <aidan.shribman@sap.com>
>
> ---
>
>  arch_init.c       |  647
> +++++++++++++++++++++++++++++++++++++++++++++++++----
>  block-migration.c |    3 +-
>  hmp-commands.hx   |   36 +++-
>  hw/hw.h           |    3 +-
>  migration-exec.c  |    6 +-
>  migration-fd.c    |    6 +-
>  migration-tcp.c   |    6 +-
>  migration-unix.c  |    6 +-
>  migration.c       |   33 +++-
>  migration.h       |   23 ++-
>  qmp-commands.hx   |   43 +++-
>  savevm.c          |   13 +-
>  sysemu.h          |    3 +-
>  13 files changed, 749 insertions(+), 79 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 4486925..069cd67 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -27,6 +27,7 @@
>  #include <sys/types.h>
>  #include <sys/mman.h>
>  #endif
> +#include <assert.h>
>  #include "config.h"
>  #include "monitor.h"
>  #include "sysemu.h"
> @@ -41,6 +42,24 @@
>  #include "gdbstub.h"
>  #include "hw/smbios.h"
>
> +//#define DEBUG_ARCH_INIT
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__);
> } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +//#define DEBUG_ARCH_INIT_CKSUM
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> +    do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> +    do { } while (0)
> +#endif
> +
>  #ifdef TARGET_SPARC
>  int graphic_width = 1024;
>  int graphic_height = 768;
> @@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH;
>  #define RAM_SAVE_FLAG_PAGE     0x08
>  #define RAM_SAVE_FLAG_EOS      0x10
>  #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBRLE    0x40
> +
> +/***********************************************************/
> +/* Page cache for storing previous pages as basis for XBRLE
> compression */
> +#define CACHE_N_WAY 2 /* 2-way assossiative cache */
> +
> +typedef struct CacheItem {
> +    ram_addr_t it_addr;
> +    unsigned long it_age;
> +    uint8_t *it_data;
> +} CacheItem;
> +
> +typedef struct CacheBucket {
> +    CacheItem bkt_item[CACHE_N_WAY];
> +} CacheBucket;
> +
> +static CacheBucket *page_cache;
> +static int64_t cache_num_buckets;
> +static uint64_t cache_max_item_age;
> +static int64_t cache_num_items;
> +
> +static void cache_init(ssize_t num_buckets);
> +static void cache_fini(void);
> +static int cache_is_cached(ram_addr_t addr);
> +static int cache_get_oldest(CacheBucket *buck);
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
> +static void cache_insert(ram_addr_t id, uint8_t *pdata);
> +static unsigned long cache_get_cache_pos(ram_addr_t address);
> +static CacheItem *cache_item_get(unsigned long pos, int item);
> +
> +/***********************************************************/
> +/* RAM Migration State */
> +typedef struct ArchMigrationState {
> +    int use_xbrle;
> +    int64_t xbrle_cache_size;
> +} ArchMigrationState;
> +
> +static ArchMigrationState arch_mig_state;
> +
> +void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
> +        int64_t xbrle_cache_size, void *opaque)
> +{
> +    arch_mig_state.use_xbrle = use_xbrle;
> +    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
> +}
> +
> +/***********************************************************/
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +typedef struct XBRLEHeader {
> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBRLEHeader;
> +
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst,
> int dlen);
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst,
> int dlen);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf);
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const
> char *fmt, ...);
> +#endif
> +
> +/***********************************************************/
> +/* benchmarking */
> +typedef struct BenchInfo {
> +    uint64_t normal_pages;
> +    uint64_t xbrle_pages;
> +    uint64_t xbrle_bytes;
> +    uint64_t xbrle_pages_aborted;
> +    uint64_t dup_pages;
> +    uint64_t iterations;
> +} BenchInfo;
> +
> +static BenchInfo bench;
> +
> +/***********************************************************/
> +/* XBRLE page cache implementation */
> +static CacheItem *cache_item_get(unsigned long pos, int item)
> +{
> +    assert(page_cache);
> +    return &page_cache[pos].bkt_item[item];
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static int64_t cache_max_items(void)
> +{
> +    return cache_num_buckets * CACHE_N_WAY;
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
> +static void cache_init(int64_t num_bytes)
> +{
> +    int i;
> +
> +    cache_num_items = 0;
> +    cache_max_item_age = 0;
> +    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
> +    assert(cache_num_buckets);
> +    DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
> +
> +    assert(!page_cache);
> +    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
> +            sizeof(CacheBucket));
> +
> +    for (i = 0; i < cache_num_buckets; i++) {
> +        int j;
> +        for (j = 0; j < CACHE_N_WAY; j++) {
> +            CacheItem *it = cache_item_get(i, j);
> +            it->it_data = NULL;
> +            it->it_age = 0;
> +            it->it_addr = -1;
> +        }
> +    }
> +}
> +
> +static void cache_fini(void)
> +{
> +    int i;
> +
> +    assert(page_cache);
> +
> +    for (i = 0; i < cache_num_buckets; i++) {
> +        int j;
> +        for (j = 0; j < CACHE_N_WAY; j++) {
> +            CacheItem *it = cache_item_get(i, j);
> +            qemu_free(it->it_data);
> +            it->it_data = 0;
> +        }
> +    }
> +
> +    qemu_free(page_cache);
> +    page_cache = NULL;
> +}
> +
> +static unsigned long cache_get_cache_pos(ram_addr_t address)
> +{
> +    unsigned long pos;
> +
> +    assert(cache_num_buckets);
> +    pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1);
> +    return pos;
> +}
> +
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
> +{
> +    unsigned long big = 0;
> +    int big_pos = -1;
> +    int j;
> +
> +    assert(page_cache);
> +
> +    for (j = 0; j < CACHE_N_WAY; j++) {
> +        CacheItem *it = &buck->bkt_item[j];
> +
> +        if (it->it_addr != addr) {
> +            continue;
> +        }
> +
> +        if (!j || it->it_age > big) {
> +            big = it->it_age;
> +            big_pos = j;
> +        }
> +    }
> +
> +    return big_pos;
> +}
> +
> +static int cache_get_oldest(CacheBucket *buck)
> +{
> +    unsigned long small = 0;
> +    int small_pos = -1;
> +    int j;
> +
> +    assert(page_cache);
> +
> +    for (j = 0; j < CACHE_N_WAY; j++) {
> +        CacheItem *it = &buck->bkt_item[j];
> +
> +        if (!j || it->it_age <  small) {
> +            small = it->it_age;
> +            small_pos = j;
> +        }
> +    }
> +
> +    return small_pos;
> +}
> +
> +static int cache_is_cached(ram_addr_t addr)
> +{
> +    unsigned long pos = cache_get_cache_pos(addr);
> +
> +    assert(page_cache);
> +    CacheBucket *bucket = &page_cache[pos];
> +    return cache_get_newest(bucket, addr);
> +}
> +
> +static void cache_insert(unsigned long addr, uint8_t *pdata)
> +{
> +    unsigned long pos;
> +    int slot = -1;
> +    CacheBucket *bucket;
> +
> +    pos = cache_get_cache_pos(addr);
> +    assert(page_cache);
> +    bucket = &page_cache[pos];
> +    slot = cache_get_oldest(bucket); /* evict LRU */
> +
> +    /* actual update of entry */
> +    CacheItem *it = cache_item_get(pos, slot);
> +    if (!it->it_data) {
> +        cache_num_items++;
> +    }
> +    qemu_free(it->it_data);
> +    it->it_data = pdata;
> +    it->it_age = ++cache_max_item_age;
> +    it->it_addr = addr;
> +}
> +
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> +    int d = 0, ch_run = 0, i;
> +    uint8_t prev, ch;
> +
> +    for (i = 0; i <= slen; i++) {
> +        if (i != slen) {
> +            ch = src[i];
> +        }
> +
> +        if (!i || (i != slen && ch == prev && ch_run < 255)) {
> +            ch_run++;
> +        } else {
> +            if (d+2 > dlen)
> +                return -1;
> +            *dst++ = ch_run;
> +            *dst++ = prev;
> +            d += 2;
> +            ch_run = 1;
> +        }
> +
> +        prev = ch;
> +    }
> +    return d;
> +}
> +
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> +    int d = 0, s;
> +
> +    for (s = 0; s < slen-1; s += 2) {
> +        uint8_t ch_run = src[s];
> +        uint8_t ch = src[s+1];
> +        while (ch_run--) {
> +            if (d == dlen) {
> +                return -1;
> +            }
> +            dst[d] = ch;
> +            d++;
> +        }
> +    }
> +    return d;
> +}
> +
> +#define PAGE_SAMPLE_PERCENT 0.02
> +#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
> +#define BYTES_CHANGED_PERCENT 0.30
> +
> +static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
> +{
> +    int i, bytes_changed = 0;
> +
> +    srand(time(NULL)+getpid()+getpid()*987654+rand());
> +
> +    for (i = 0; i < PAGE_SAMPLE_SIZE; i++) {
> +        unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE
> / (RAND_MAX+1.0));
> +
> +         if (old[pos] != new[pos]) {
> +             bytes_changed++;
> +         }
> +    }
> +
> +    return (((float) bytes_changed) / PAGE_SAMPLE_SIZE) <
> BYTES_CHANGED_PERCENT;
> +}
> +
> +static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
> +{
> +    int i;
> +
> +    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
> +        dst[i] = src1[i] ^ src2[i];
> +    }
> +}
> +
> +static void save_block_hdr(QEMUFile *f,
> +        RAMBlock *block, ram_addr_t offset, int cont, int flag)
> +{
> +        qemu_put_be64(f, offset | cont | flag);
> +        if (!cont) {
> +                qemu_put_byte(f, strlen(block->idstr));
> +                qemu_put_buffer(f, (uint8_t *)block->idstr,
> +                                strlen(block->idstr));
> +        }
> +}
> +
> +#define ENCODING_FLAG_XBRLE 0x1
> +#define ENCODING_FLAG_CKSUM 0x2
> +
> +static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
> +        ram_addr_t current_addr, RAMBlock *block, ram_addr_t
> offset, int cont)
> +{
> +    int cache_location = -1, slot = -1, encoded_len = 0,
> bytes_sent = 0;
> +    XBRLEHeader hdr = {0};
> +    CacheItem *it;
> +    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
> +
> +    /* get location */
> +    slot = cache_is_cached(current_addr);
> +    if (slot == -1) {
> +        goto done;
> +    }
> +    cache_location = cache_get_cache_pos(current_addr);
> +
> +    /* abort if page changed too much */
> +    it = cache_item_get(cache_location, slot);
> +    if (!is_page_good_for_xbrle(it->it_data, current_data)) {
> +        DPRINTF("Page changed too much! Aborting XBRLE.\n");
> +        bench.xbrle_pages_aborted++;
> +        goto done;
> +    }
> +
> +    /* XOR encoding */
> +    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    xor_encode(xor_buf, it->it_data, current_data);
> +
> +    /* XBRLE (XOR+RLE) encoding */
> +    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
> +            TARGET_PAGE_SIZE);
> +
> +    if (encoded_len < 0) {
> +        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
> +        goto done;
> +    }
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +    hdr.xh_cksum = page_cksum(current_data);
> +    hdr.xh_flags |= ENCODING_FLAG_CKSUM;
> +#endif
> +
> +    /* Send XBRLE compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
> +    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> +    qemu_put_buffer(f, xbrle_buf, encoded_len);
> +    PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
> +            encoded_len);
> +    bench.xbrle_pages++;
> +    bytes_sent = encoded_len + sizeof(hdr);
> +    bench.xbrle_bytes += bytes_sent;
> +
> +done:
> +    qemu_free(xor_buf);
> +    qemu_free(xbrle_buf);
> +    return bytes_sent;
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf)
> +{
> +    uint32_t res = 0;
> +    int i;
> +
> +    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
> +        res += buf[i];
> +    }
> +
> +    return res;
> +}
> +
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const
> char *fmt, ...)
> +{
> +    va_list arg_ptr;
> +    static FILE *fp;
> +    static uint32_t page_seq;
> +
> +    va_start(arg_ptr, fmt);
> +    if (!fp) {
> +        fp = fopen("mig.log", "w");
> +    }
> +    page_seq++;
> +    fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
> +            (unsigned long) addr, page_cksum(pdata));
> +    vfprintf(fp, fmt, arg_ptr);
> +    va_end(arg_ptr);
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
>
>  static int is_dup_page(uint8_t *page, uint8_t ch)
>  {
> @@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
>  static RAMBlock *last_block;
>  static ram_addr_t last_offset;
>
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>  {
>      RAMBlock *block = last_block;
>      ram_addr_t offset = last_offset;
> @@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f)
>                                              current_addr +
> TARGET_PAGE_SIZE,
>                                              MIGRATION_DIRTY_FLAG);
>
> -            p = block->host + offset;
> +            p = qemu_mallocz(TARGET_PAGE_SIZE);
> +            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);
>
>              if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, offset | cont |
> RAM_SAVE_FLAG_COMPRESS);
> -                if (!cont) {
> -                    qemu_put_byte(f, strlen(block->idstr));
> -                    qemu_put_buffer(f, (uint8_t *)block->idstr,
> -                                    strlen(block->idstr));
> -                }
> +                save_block_hdr(f, block, offset, cont,
> RAM_SAVE_FLAG_COMPRESS);
>                  qemu_put_byte(f, *p);
>                  bytes_sent = 1;
> -            } else {
> -                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
> -                if (!cont) {
> -                    qemu_put_byte(f, strlen(block->idstr));
> -                    qemu_put_buffer(f, (uint8_t *)block->idstr,
> -                                    strlen(block->idstr));
> +                bench.dup_pages++;
> +                PAGE_LOG(current_addr, p, "DUP page\n");
> +            } else if (stage == 2 && arch_mig_state.use_xbrle) {
> +                bytes_sent = save_xbrle_page(f, p,
> current_addr, block,
> +                    offset, cont);
> +                if (bytes_sent) {
> +                    cache_insert(current_addr, p);
>                  }
> +            }
> +            if (!bytes_sent) {
> +                save_block_hdr(f, block, offset, cont,
> RAM_SAVE_FLAG_PAGE);
>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                  bytes_sent = TARGET_PAGE_SIZE;
> +                bench.normal_pages++;
> +                PAGE_LOG(current_addr, p, "NORMAL page\n");
> +                if (arch_mig_state.use_xbrle) {
> +                    cache_insert(current_addr, p);
> +                }
>              }
> -
>              break;
>          }
>
> @@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void)
>      return total;
>  }
>
> +#ifdef DEBUG_ARCH_INIT
> +static void dump_percentage(const char *label, unsigned long
> absolute,
> +        unsigned long total)
> +{
> +    printf("%s: %ld (%0.2f%%)\n", label, absolute,
> +            (total ? (100.0 * absolute / total) : 0));
> +}
> +
> +static void dump_migration_statistics(void)
> +{
> +    unsigned long normal_bytes = bench.normal_pages *
> TARGET_PAGE_SIZE;
> +    unsigned long total_pages = bench.normal_pages +
> bench.xbrle_pages
> +        + bench.dup_pages;
> +    unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
> +        + bench.dup_pages;
> +
> +    printf("\n");
> +
> printf("=====================================================\n");
> +    printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
> +    printf("Iterations: %ld\n", bench.iterations);
> +
> +    dump_percentage("Normal pages", bench.normal_pages, total_pages);
> +    dump_percentage("Normal bytes", normal_bytes, total_bytes);
> +
> +    dump_percentage("Dup pages", bench.dup_pages, total_pages);
> +    dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
> +
> +    if (arch_mig_state.use_xbrle) {
> +        dump_percentage("XBRLE pages", bench.xbrle_pages,
> total_pages);
> +        dump_percentage("XBRLE bytes", bench.xbrle_bytes,
> total_bytes);
> +        dump_percentage("Aborted XBRLE pages from XBRLE",
> +            bench.xbrle_pages_aborted,
> +            bench.xbrle_pages + bench.xbrle_pages_aborted);
> +    }
> +
> +    dump_percentage("Total pages", total_pages, total_pages);
> +    dump_percentage("Total bytes", total_bytes, total_bytes);
> +
> +    if (arch_mig_state.use_xbrle) {
> +        printf("Cache number of inserts: %ld\n", cache_max_item_age);
> +        printf("Cache max items: %ld\n", cache_max_items());
> +        dump_percentage("Cache number of items", cache_num_items,
> +            cache_max_items());
> +    }
> +
> +
> printf("=====================================================\n");
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
>  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>  {
>      ram_addr_t addr;
> @@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile
> *f, int stage, void *opaque)
>          last_block = NULL;
>          last_offset = 0;
>
> +        if (arch_mig_state.use_xbrle) {
> +            cache_init(arch_mig_state.xbrle_cache_size);
> +        }
> +
>          /* Make sure all dirty bits are set */
>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>              for (addr = block->offset; addr < block->offset
> + block->length;
> @@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile
> *f, int stage, void *opaque)
>      while (!qemu_file_rate_limit(f)) {
>          int bytes_sent;
>
> -        bytes_sent = ram_save_block(f);
> +        bytes_sent = ram_save_block(f, stage);
>          bytes_transferred += bytes_sent;
> +        bench.iterations++;
>          if (bytes_sent == 0) { /* no more blocks */
>              break;
>          }
> @@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon,
> QEMUFile *f, int stage, void *opaque)
>          int bytes_sent;
>
>          /* flush all remaining blocks regardless of rate limiting */
> -        while ((bytes_sent = ram_save_block(f)) != 0) {
> +        while ((bytes_sent = ram_save_block(f, stage))) {
>              bytes_transferred += bytes_sent;
>          }
>          cpu_physical_memory_set_dirty_tracking(0);
> +        if (arch_mig_state.use_xbrle) {
> +            cache_fini();
> +#ifdef DEBUG_ARCH_INIT
> +            dump_migration_statistics();
> +#endif
> +        }
>      }
>
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>
> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n",
> expected_time,
> +        migrate_max_downtime());
> +
>      return (stage == 2) && (expected_time <= migrate_max_downtime());
>  }
>
> +static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = -1;
> +    uint8_t *prev_page, *xor_buf, *xbrle_buf;
> +    XBRLEHeader hdr = {0};
> +
> +    /* extract RLE header */
> +    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> +    if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) {
> +        fprintf(stderr, "Failed to load XBRLE page - wrong
> compression!\n");
> +        goto done;
> +    }
> +
> +    if (hdr.xh_len > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBRLE page - len
> overflow!\n");
> +        goto done;
> +    }
> +
> +    /* load data and decode */
> +    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
> +
> +    /* decode RLE */
> +    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf,
> TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBRLE page - decode
> error!\n");
> +        goto done;
> +    }
> +
> +    if (ret != TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBRLE page - size %d
> expected %d!\n",
> +            ret, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    /* decode XOR delta */
> +    prev_page = host;
> +    xor_encode(prev_page, prev_page, xor_buf);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +    if (hdr.xh_flags & ENCODING_FLAG_CKSUM &&
> +            hdr.xh_cksum != page_cksum(prev_page)) {
> +        fprintf(stderr, "Failed to load XBRLE page - bad
> checksum!\n");
> +        goto done;
> +    }
> +#endif
> +
> +    PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n",
> hdr.xh_len);
> +    rc = 0;
> +done:
> +    qemu_free(xor_buf);
> +    qemu_free(xbrle_buf);
> +    return rc;
> +}
> +
>  static inline void *host_from_stream_offset(QEMUFile *f,
>                                              ram_addr_t offset,
>                                              int flags)
> @@ -328,16 +865,38 @@ static inline void
> *host_from_stream_offset(QEMUFile *f,
>      return NULL;
>  }
>
> +static inline void *host_from_stream_offset_versioned(int version_id,
> +        QEMUFile *f, ram_addr_t offset, int flags)
> +{
> +        void *host;
> +        if (version_id == 3) {
> +                host = qemu_get_ram_ptr(offset);
> +        } else {
> +                host = host_from_stream_offset(f, offset, flags);
> +        }
> +        if (!host) {
> +            fprintf(stderr, "Failed to convert RAM address to host"
> +                    " for offset 0x%lX!\n", offset);
> +            abort();
> +        }
> +        return host;
> +}
> +
>  int ram_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      ram_addr_t addr;
> -    int flags;
> +    int flags, ret = 0;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
>
>      if (version_id < 3 || version_id > 4) {
> -        return -EINVAL;
> +        ret = -EINVAL;
> +        goto done;
>      }
>
>      do {
> +        void *host;
>          addr = qemu_get_be64(f);
>
>          flags = addr & ~TARGET_PAGE_MASK;
> @@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>          if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
>              if (version_id == 3) {
>                  if (addr != ram_bytes_total()) {
> -                    return -EINVAL;
> +                    ret = -EINVAL;
> +                    goto done;
>                  }
>              } else {
>                  /* Synchronize RAM block list */
> @@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>
>                      QLIST_FOREACH(block, &ram_list.blocks, next) {
>                          if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length)
> -                                return -EINVAL;
> +                            if (block->length != length) {
> +                                ret = -EINVAL;
> +                                goto done;
> +                            }
>                              break;
>                          }
>                      }
> @@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>                      if (!block) {
>                          fprintf(stderr, "Unknown ramblock
> \"%s\", cannot "
>                                  "accept migration\n", id);
> -                        return -EINVAL;
> +                        ret = -EINVAL;
> +                        goto done;
>                      }
>
>                      total_ram_bytes -= length;
> @@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>          }
>
>          if (flags & RAM_SAVE_FLAG_COMPRESS) {
> -            void *host;
>              uint8_t ch;
>
> -            if (version_id == 3)
> -                host = qemu_get_ram_ptr(addr);
> -            else
> -                host = host_from_stream_offset(f, addr, flags);
> -            if (!host) {
> -                return -EINVAL;
> -            }
> -
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
>              ch = qemu_get_byte(f);
>              memset(host, ch, TARGET_PAGE_SIZE);
>  #ifndef _WIN32
> @@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>                  qemu_madvise(host, TARGET_PAGE_SIZE,
> QEMU_MADV_DONTNEED);
>              }
>  #endif
> +            PAGE_LOG(addr, host, "DUP page\n");
>          } else if (flags & RAM_SAVE_FLAG_PAGE) {
> -            void *host;
> -
> -            if (version_id == 3)
> -                host = qemu_get_ram_ptr(addr);
> -            else
> -                host = host_from_stream_offset(f, addr, flags);
> -
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +            PAGE_LOG(addr, host, "NORMAL page\n");
> +        } else if (flags & RAM_SAVE_FLAG_XBRLE) {
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
> +            if (load_xbrle(f, addr, host) < 0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }
>          }
> +
>          if (qemu_file_has_error(f)) {
> -            return -EIO;
> +            ret = -EIO;
> +            goto done;
>          }
>      } while (!(flags & RAM_SAVE_FLAG_EOS));
>
> -    return 0;
> +done:
> +    DPRINTF("Completed load of VM with exit code %d seq
> iteration %ld\n",
> +            ret, seq_iter);
> +    return ret;
>  }
>
>  void qemu_service_io(void)
> diff --git a/block-migration.c b/block-migration.c
> index 3e66f49..004fc12 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void
> *opaque, int version_id)
>      return 0;
>  }
>
> -static void block_set_params(int blk_enable, int
> shared_base, void *opaque)
> +static void block_set_params(int blk_enable, int shared_base,
> +       int use_xbrle, int64_t xbrle_cache_size, void *opaque)
>  {
>      block_mig_state.blk_enable = blk_enable;
>      block_mig_state.shared_base = shared_base;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index e5585ba..e49d5be 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -717,24 +717,27 @@ ETEXI
>
>      {
>          .name       = "migrate",
> -        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
> -        .params     = "[-d] [-b] [-i] uri",
> -        .help       = "migrate to URI (using -d to not wait
> for completion)"
> -                     "\n\t\t\t -b for migration without
> shared storage with"
> -                     " full copy of disk\n\t\t\t -i for
> migration without "
> -                     "shared storage with incremental copy of disk "
> -                     "(base image shared between src and
> destination)",
> +        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> +        .params     = "[-d] [-b] [-i] [-x] uri",
> +        .help       = "migrate to URI"
> +                      "\n\t -d to not wait for completion"
> +                      "\n\t -b for migration without shared
> storage with"
> +                      " full copy of disk"
> +                      "\n\t -i for migration without"
> +                      " shared storage with incremental copy of disk"
> +                      " (base image shared between source
> and destination)"
> +                      "\n\t -x to use XBRLE page delta compression",
>          .user_print = monitor_user_noop,
>         .mhandler.cmd_new = do_migrate,
>      },
>
> -
>  STEXI
> -@item migrate [-d] [-b] [-i] @var{uri}
> +@item migrate [-d] [-b] [-i] [-x] @var{uri}
>  @findex migrate
>  Migrate to @var{uri} (using -d to not wait for completion).
>         -b for migration with full copy of disk
>         -i for migration with incremental copy of disk (base
> image is shared)
> +    -x to use XBRLE page delta compression
>  ETEXI
>
>      {
> @@ -753,10 +756,23 @@ Cancel the current VM migration.
>  ETEXI
>
>      {
> +        .name       = "migrate_set_cachesize",
> +        .args_type  = "value:s",
> +        .params     = "value",
> +        .help       = "set cache size (in MB) for XBRLE migrations",
> +        .mhandler.cmd = do_migrate_set_cachesize,
> +    },
> +
> +STEXI
> +@item migrate_set_cachesize @var{value}
> +Set cache size (in MB) for xbrle migrations.
> +ETEXI
> +
> +    {
>          .name       = "migrate_set_speed",
>          .args_type  = "value:o",
>          .params     = "value",
> -        .help       = "set maximum speed (in bytes) for migrations. "
> +        .help       = "set maximum XBRLE cache size (in
> bytes) for migrations. "
>         "Defaults to MB if no size suffix is specified, ie.
> B/K/M/G/T",
>          .user_print = monitor_user_noop,
>          .mhandler.cmd_new = do_migrate_set_speed,
> diff --git a/hw/hw.h b/hw/hw.h
> index 9d2cfc2..de9e5a6 100644
> --- a/hw/hw.h
> +++ b/hw/hw.h
> @@ -239,7 +239,8 @@ static inline void
> qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
>  int64_t qemu_ftell(QEMUFile *f);
>  int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
>
> -typedef void SaveSetParamsHandler(int blk_enable, int
> shared, void * opaque);
> +typedef void SaveSetParamsHandler(int blk_enable, int shared,
> +       int use_xbrle, int64_t xbrle_cache_size, void * opaque);
>  typedef void SaveStateHandler(QEMUFile *f, void *opaque);
>  typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f,
> int stage,
>                                   void *opaque);
> diff --git a/migration-exec.c b/migration-exec.c
> index 14718dd..fe8254a 100644
> --- a/migration-exec.c
> +++ b/migration-exec.c
> @@ -67,7 +67,9 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                               int blk,
> -                                             int inc)
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size)
>  {
>      FdMigrationState *s;
>      FILE *f;
> @@ -99,6 +101,8 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
>
>      s->mig_state.blk = blk;
>      s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>      s->state = MIG_STATE_ACTIVE;
>      s->mon = NULL;
> diff --git a/migration-fd.c b/migration-fd.c
> index 6d14505..4a1ddbd 100644
> --- a/migration-fd.c
> +++ b/migration-fd.c
> @@ -56,7 +56,9 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
>                                             int64_t bandwidth_limit,
>                                             int detach,
>                                             int blk,
> -                                           int inc)
> +                        int inc,
> +                        int use_xbrle,
> +                        int64_t xbrle_cache_size)
>  {
>      FdMigrationState *s;
>
> @@ -82,6 +84,8 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
>
>      s->mig_state.blk = blk;
>      s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>      s->state = MIG_STATE_ACTIVE;
>      s->mon = NULL;
> diff --git a/migration-tcp.c b/migration-tcp.c
> index b55f419..4ca5bf6 100644
> --- a/migration-tcp.c
> +++ b/migration-tcp.c
> @@ -81,7 +81,9 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                              int blk,
> -                                            int inc)
> +                         int inc,
> +                         int use_xbrle,
> +                         int64_t xbrle_cache_size)
>  {
>      struct sockaddr_in addr;
>      FdMigrationState *s;
> @@ -101,6 +103,8 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
>
>      s->mig_state.blk = blk;
>      s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>      s->state = MIG_STATE_ACTIVE;
>      s->mon = NULL;
> diff --git a/migration-unix.c b/migration-unix.c
> index 57232c0..0813902 100644
> --- a/migration-unix.c
> +++ b/migration-unix.c
> @@ -80,7 +80,9 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                               int blk,
> -                                             int inc)
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size)
>  {
>      FdMigrationState *s;
>      struct sockaddr_un addr;
> @@ -100,6 +102,8 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
>
>      s->mig_state.blk = blk;
>      s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>      s->state = MIG_STATE_ACTIVE;
>      s->mon = NULL;
> diff --git a/migration.c b/migration.c
> index 9ee8b17..b5b530b 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -34,6 +34,9 @@
>  /* Migration speed throttling */
>  static uint32_t max_throttle = (32 << 20);
>
> +/* Migration XBRLE cache size */
> +static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
> +
>  static MigrationState *current_migration;
>
>  int qemu_start_incoming_migration(const char *uri)
> @@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict
> *qdict, QObject **ret_data)
>      int detach = qdict_get_try_bool(qdict, "detach", 0);
>      int blk = qdict_get_try_bool(qdict, "blk", 0);
>      int inc = qdict_get_try_bool(qdict, "inc", 0);
> +    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
>      const char *uri = qdict_get_str(qdict, "uri");
>
>      if (current_migration &&
> @@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict
> *qdict, QObject **ret_data)
>
>      if (strstart(uri, "tcp:", &p)) {
>          s = tcp_start_outgoing_migration(mon, p,
> max_throttle, detach,
> -                                         blk, inc);
> +                                         blk, inc, use_xbrle,
> +                                         migrate_cache_size);
>  #if !defined(WIN32)
>      } else if (strstart(uri, "exec:", &p)) {
>          s = exec_start_outgoing_migration(mon, p,
> max_throttle, detach,
> -                                          blk, inc);
> +                                          blk, inc, use_xbrle,
> +                                          migrate_cache_size);
>      } else if (strstart(uri, "unix:", &p)) {
>          s = unix_start_outgoing_migration(mon, p,
> max_throttle, detach,
> -                                          blk, inc);
> +                                          blk, inc, use_xbrle,
> +                                          migrate_cache_size);
>      } else if (strstart(uri, "fd:", &p)) {
>          s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
> -                                        blk, inc);
> +                                        blk, inc, use_xbrle,
> +                                        migrate_cache_size);
>  #endif
>      } else {
>          monitor_printf(mon, "unknown migration protocol: %s\n", uri);
> @@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s)
>
>      DPRINTF("beginning savevm\n");
>      ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
> -                                  s->mig_state.shared);
> +                                  s->mig_state.shared,
> s->mig_state.use_xbrle,
> +                                  s->mig_state.xbrle_cache_size);
>      if (ret < 0) {
>          DPRINTF("failed, %d\n", ret);
>          migrate_fd_error(s);
> @@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque)
>      qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
>      return s->close(s);
>  }
> +
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
> +{
> +    ssize_t bytes;
> +    char *ptr;
> +    const char *value = qdict_get_str(qdict, "value");
> +
> +    bytes = strtod(value, &ptr);
> +    if (bytes > 0) {
> +        migrate_cache_size = bytes;
> +        monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
> +    }
> +}
> +
> diff --git a/migration.h b/migration.h
> index d13ed4f..62be2f8 100644
> --- a/migration.h
> +++ b/migration.h
> @@ -32,6 +32,8 @@ struct MigrationState
>      void (*release)(MigrationState *s);
>      int blk;
>      int shared;
> +    int use_xbrle;
> +    int64_t xbrle_cache_size;
>  };
>
>  typedef struct FdMigrationState FdMigrationState;
> @@ -76,7 +78,9 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                               int blk,
> -                                             int inc);
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size);
>
>  int tcp_start_incoming_migration(const char *host_port);
>
> @@ -85,7 +89,9 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
>                                              int64_t bandwidth_limit,
>                                              int detach,
>                                              int blk,
> -                                            int inc);
> +                         int inc,
> +                         int use_xbrle,
> +                         int64_t xbrle_cache_size);
>
>  int unix_start_incoming_migration(const char *path);
>
> @@ -94,7 +100,9 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                               int blk,
> -                                             int inc);
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size);
>
>  int fd_start_incoming_migration(const char *path);
>
> @@ -103,7 +111,9 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
>                                             int64_t bandwidth_limit,
>                                             int detach,
>                                             int blk,
> -                                           int inc);
> +                        int inc,
> +                        int use_xbrle,
> +                        int64_t xbrle_cache_size);
>
>  void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);
>
> @@ -134,4 +144,9 @@ static inline FdMigrationState
> *migrate_to_fms(MigrationState *mig_state)
>      return container_of(mig_state, FdMigrationState, mig_state);
>  }
>
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
> +
> +void arch_set_params(int blk_enable, int shared_base,
> +        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
> +
>  #endif
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 793cf1c..8fbe64b 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -431,13 +431,16 @@ EQMP
>
>      {
>          .name       = "migrate",
> -        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
> -        .params     = "[-d] [-b] [-i] uri",
> -        .help       = "migrate to URI (using -d to not wait
> for completion)"
> -                     "\n\t\t\t -b for migration without
> shared storage with"
> -                     " full copy of disk\n\t\t\t -i for
> migration without "
> -                     "shared storage with incremental copy of disk "
> -                     "(base image shared between src and
> destination)",
> +        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> +        .params     = "[-d] [-b] [-i] [-x] uri",
> +        .help       = "migrate to URI"
> +                      "\n\t -d to not wait for completion"
> +                      "\n\t -b for migration without shared
> storage with"
> +                      " full copy of disk"
> +                      "\n\t -i for migration without"
> +                      " shared storage with incremental copy of disk"
> +                      " (base image shared between source
> and destination)"
> +                      "\n\t -x to use XBRLE page delta compression",
>          .user_print = monitor_user_noop,
>         .mhandler.cmd_new = do_migrate,
>      },
> @@ -453,6 +456,7 @@ Arguments:
>  - "blk": block migration, full disk copy (json-bool, optional)
>  - "inc": incremental disk copy (json-bool, optional)
>  - "uri": Destination URI (json-string)
> +- "xbrle": to use XBRLE page delta compression
>
>  Example:
>
> @@ -494,6 +498,31 @@ Example:
>  EQMP
>
>      {
> +        .name       = "migrate_set_cachesize",
> +        .args_type  = "value:s",
> +        .params     = "value",
> +        .help       = "set cache size (in MB) for xbrle migrations",
> +        .mhandler.cmd = do_migrate_set_cachesize,
> +    },
> +
> +SQMP
> +migrate_set_cachesize
> +---------------------
> +
> +Set cache size to be used by XBRLE migration
> +
> +Arguments:
> +
> +- "value": cache size in bytes (json-number)
> +
> +Example:
> +
> +-> { "execute": "migrate_set_cachesize", "arguments": {
> "value": 500M } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>          .name       = "migrate_set_speed",
>          .args_type  = "value:f",
>          .params     = "value",
> diff --git a/savevm.c b/savevm.c
> index 4e49765..93b512b 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
>                      void *opaque)
>  {
>      return register_savevm_live(dev, idstr, instance_id, version_id,
> -                                NULL, NULL, save_state,
> load_state, opaque);
> +                                arch_set_params, NULL, save_state,
> +                                load_state, opaque);
>  }
>
>  void unregister_savevm(DeviceState *dev, const char *idstr,
> void *opaque)
> @@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f,
> SaveStateEntry *se)
>  #define QEMU_VM_SUBSECTION           0x05
>
>  int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int
> blk_enable,
> -                            int shared)
> +                            int shared, int use_xbrle,
> +                            int64_t xbrle_cache_size)
>  {
>      SaveStateEntry *se;
>
>      QTAILQ_FOREACH(se, &savevm_handlers, entry) {
>          if(se->set_params == NULL) {
>              continue;
> -       }
> -       se->set_params(blk_enable, shared, se->opaque);
> +        }
> +        se->set_params(blk_enable, shared, use_xbrle,
> xbrle_cache_size,
> +                se->opaque);
>      }
>
>      qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> @@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor
> *mon, QEMUFile *f)
>
>      bdrv_flush_all();
>
> -    ret = qemu_savevm_state_begin(mon, f, 0, 0);
> +    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
>      if (ret < 0)
>          goto out;
>
> diff --git a/sysemu.h b/sysemu.h
> index b81a70e..15a0664 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -74,7 +74,8 @@ void qemu_announce_self(void);
>  void main_loop_wait(int nonblocking);
>
>  int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int
> blk_enable,
> -                            int shared);
> +                            int shared, int use_xbrle,
> +                            int64_t xbrle_cache_size);
>  int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
>  int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
>  void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
>
>
>
>
Anthony Liguori June 22, 2011, 12:01 p.m. UTC | #2
On 05/22/2011 07:00 AM, Shribman, Aidan wrote:
> Subject: [PATCH] XBRLE page delta compression for live migration of large memory apps
> From: Aidan Shribman<aidan.shribman@sap.com>
>
> By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
> bandwidth for transfering of dirty memory pages during live migration
>          migrate_set_cachesize<size>
>          migrate -x<url>
> Qemu host: Ubuntu 10.10
> Testing: live migration (w and w/o XBRLE) tested successfully.

By how much?

This is a change to the live migration protocol, it would also require 
documentation and an understanding of how it affects compatibility.

The patch really needs to be split into logical pieces too.  It's a bit 
too big for a meaningful review.

Regards,

Anthony Liguori

>
> Signed-off-by: Benoit Hudzia<benoit.hudzia@sap.com>
> Signed-off-by: Petter Svard<petters@cs.umu.se>
> Signed-off-by: Aidan Shribman<aidan.shribman@sap.com>
>
> ---
>
>   arch_init.c       |  647 +++++++++++++++++++++++++++++++++++++++++++++++++----
>   block-migration.c |    3 +-
>   hmp-commands.hx   |   36 +++-
>   hw/hw.h           |    3 +-
>   migration-exec.c  |    6 +-
>   migration-fd.c    |    6 +-
>   migration-tcp.c   |    6 +-
>   migration-unix.c  |    6 +-
>   migration.c       |   33 +++-
>   migration.h       |   23 ++-
>   qmp-commands.hx   |   43 +++-
>   savevm.c          |   13 +-
>   sysemu.h          |    3 +-
>   13 files changed, 749 insertions(+), 79 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 4486925..069cd67 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -27,6 +27,7 @@
>   #include<sys/types.h>
>   #include<sys/mman.h>
>   #endif
> +#include<assert.h>
>   #include "config.h"
>   #include "monitor.h"
>   #include "sysemu.h"
> @@ -41,6 +42,24 @@
>   #include "gdbstub.h"
>   #include "hw/smbios.h"
>
> +//#define DEBUG_ARCH_INIT
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +//#define DEBUG_ARCH_INIT_CKSUM
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> +    do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> +    do { } while (0)
> +#endif
> +
>   #ifdef TARGET_SPARC
>   int graphic_width = 1024;
>   int graphic_height = 768;
> @@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH;
>   #define RAM_SAVE_FLAG_PAGE     0x08
>   #define RAM_SAVE_FLAG_EOS      0x10
>   #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBRLE    0x40
> +
> +/***********************************************************/
> +/* Page cache for storing previous pages as basis for XBRLE compression */
> +#define CACHE_N_WAY 2 /* 2-way assossiative cache */
> +
> +typedef struct CacheItem {
> +    ram_addr_t it_addr;
> +    unsigned long it_age;
> +    uint8_t *it_data;
> +} CacheItem;
> +
> +typedef struct CacheBucket {
> +    CacheItem bkt_item[CACHE_N_WAY];
> +} CacheBucket;
> +
> +static CacheBucket *page_cache;
> +static int64_t cache_num_buckets;
> +static uint64_t cache_max_item_age;
> +static int64_t cache_num_items;
> +
> +static void cache_init(ssize_t num_buckets);
> +static void cache_fini(void);
> +static int cache_is_cached(ram_addr_t addr);
> +static int cache_get_oldest(CacheBucket *buck);
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
> +static void cache_insert(ram_addr_t id, uint8_t *pdata);
> +static unsigned long cache_get_cache_pos(ram_addr_t address);
> +static CacheItem *cache_item_get(unsigned long pos, int item);
> +
> +/***********************************************************/
> +/* RAM Migration State */
> +typedef struct ArchMigrationState {
> +    int use_xbrle;
> +    int64_t xbrle_cache_size;
> +} ArchMigrationState;
> +
> +static ArchMigrationState arch_mig_state;
> +
> +void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
> +        int64_t xbrle_cache_size, void *opaque)
> +{
> +    arch_mig_state.use_xbrle = use_xbrle;
> +    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
> +}
> +
> +/***********************************************************/
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +typedef struct XBRLEHeader {
> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBRLEHeader;
> +
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen);
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf);
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...);
> +#endif
> +
> +/***********************************************************/
> +/* benchmarking */
> +typedef struct BenchInfo {
> +    uint64_t normal_pages;
> +    uint64_t xbrle_pages;
> +    uint64_t xbrle_bytes;
> +    uint64_t xbrle_pages_aborted;
> +    uint64_t dup_pages;
> +    uint64_t iterations;
> +} BenchInfo;
> +
> +static BenchInfo bench;
> +
> +/***********************************************************/
> +/* XBRLE page cache implementation */
> +static CacheItem *cache_item_get(unsigned long pos, int item)
> +{
> +    assert(page_cache);
> +    return&page_cache[pos].bkt_item[item];
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static int64_t cache_max_items(void)
> +{
> +    return cache_num_buckets * CACHE_N_WAY;
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
> +static void cache_init(int64_t num_bytes)
> +{
> +    int i;
> +
> +    cache_num_items = 0;
> +    cache_max_item_age = 0;
> +    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
> +    assert(cache_num_buckets);
> +    DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
> +
> +    assert(!page_cache);
> +    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
> +            sizeof(CacheBucket));
> +
> +    for (i = 0; i<  cache_num_buckets; i++) {
> +        int j;
> +        for (j = 0; j<  CACHE_N_WAY; j++) {
> +            CacheItem *it = cache_item_get(i, j);
> +            it->it_data = NULL;
> +            it->it_age = 0;
> +            it->it_addr = -1;
> +        }
> +    }
> +}
> +
> +static void cache_fini(void)
> +{
> +    int i;
> +
> +    assert(page_cache);
> +
> +    for (i = 0; i<  cache_num_buckets; i++) {
> +        int j;
> +        for (j = 0; j<  CACHE_N_WAY; j++) {
> +            CacheItem *it = cache_item_get(i, j);
> +            qemu_free(it->it_data);
> +            it->it_data = 0;
> +        }
> +    }
> +
> +    qemu_free(page_cache);
> +    page_cache = NULL;
> +}
> +
> +static unsigned long cache_get_cache_pos(ram_addr_t address)
> +{
> +    unsigned long pos;
> +
> +    assert(cache_num_buckets);
> +    pos = (address/TARGET_PAGE_SIZE)&  (cache_num_buckets - 1);
> +    return pos;
> +}
> +
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
> +{
> +    unsigned long big = 0;
> +    int big_pos = -1;
> +    int j;
> +
> +    assert(page_cache);
> +
> +    for (j = 0; j<  CACHE_N_WAY; j++) {
> +        CacheItem *it =&buck->bkt_item[j];
> +
> +        if (it->it_addr != addr) {
> +            continue;
> +        }
> +
> +        if (!j || it->it_age>  big) {
> +            big = it->it_age;
> +            big_pos = j;
> +        }
> +    }
> +
> +    return big_pos;
> +}
> +
> +static int cache_get_oldest(CacheBucket *buck)
> +{
> +    unsigned long small = 0;
> +    int small_pos = -1;
> +    int j;
> +
> +    assert(page_cache);
> +
> +    for (j = 0; j<  CACHE_N_WAY; j++) {
> +        CacheItem *it =&buck->bkt_item[j];
> +
> +        if (!j || it->it_age<   small) {
> +            small = it->it_age;
> +            small_pos = j;
> +        }
> +    }
> +
> +    return small_pos;
> +}
> +
> +static int cache_is_cached(ram_addr_t addr)
> +{
> +    unsigned long pos = cache_get_cache_pos(addr);
> +
> +    assert(page_cache);
> +    CacheBucket *bucket =&page_cache[pos];
> +    return cache_get_newest(bucket, addr);
> +}
> +
> +static void cache_insert(unsigned long addr, uint8_t *pdata)
> +{
> +    unsigned long pos;
> +    int slot = -1;
> +    CacheBucket *bucket;
> +
> +    pos = cache_get_cache_pos(addr);
> +    assert(page_cache);
> +    bucket =&page_cache[pos];
> +    slot = cache_get_oldest(bucket); /* evict LRU */
> +
> +    /* actual update of entry */
> +    CacheItem *it = cache_item_get(pos, slot);
> +    if (!it->it_data) {
> +        cache_num_items++;
> +    }
> +    qemu_free(it->it_data);
> +    it->it_data = pdata;
> +    it->it_age = ++cache_max_item_age;
> +    it->it_addr = addr;
> +}
> +
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> +    int d = 0, ch_run = 0, i;
> +    uint8_t prev, ch;
> +
> +    for (i = 0; i<= slen; i++) {
> +        if (i != slen) {
> +            ch = src[i];
> +        }
> +
> +        if (!i || (i != slen&&  ch == prev&&  ch_run<  255)) {
> +            ch_run++;
> +        } else {
> +            if (d+2>  dlen)
> +                return -1;
> +            *dst++ = ch_run;
> +            *dst++ = prev;
> +            d += 2;
> +            ch_run = 1;
> +        }
> +
> +        prev = ch;
> +    }
> +    return d;
> +}
> +
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> +    int d = 0, s;
> +
> +    for (s = 0; s<  slen-1; s += 2) {
> +        uint8_t ch_run = src[s];
> +        uint8_t ch = src[s+1];
> +        while (ch_run--) {
> +            if (d == dlen) {
> +                return -1;
> +            }
> +            dst[d] = ch;
> +            d++;
> +        }
> +    }
> +    return d;
> +}
> +
> +#define PAGE_SAMPLE_PERCENT 0.02
> +#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
> +#define BYTES_CHANGED_PERCENT 0.30
> +
> +static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
> +{
> +    int i, bytes_changed = 0;
> +
> +    srand(time(NULL)+getpid()+getpid()*987654+rand());
> +
> +    for (i = 0; i<  PAGE_SAMPLE_SIZE; i++) {
> +        unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE / (RAND_MAX+1.0));
> +
> +         if (old[pos] != new[pos]) {
> +             bytes_changed++;
> +         }
> +    }
> +
> +    return (((float) bytes_changed) / PAGE_SAMPLE_SIZE)<  BYTES_CHANGED_PERCENT;
> +}
> +
> +static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
> +{
> +    int i;
> +
> +    for (i = 0; i<  TARGET_PAGE_SIZE; i++) {
> +        dst[i] = src1[i] ^ src2[i];
> +    }
> +}
> +
> +static void save_block_hdr(QEMUFile *f,
> +        RAMBlock *block, ram_addr_t offset, int cont, int flag)
> +{
> +        qemu_put_be64(f, offset | cont | flag);
> +        if (!cont) {
> +                qemu_put_byte(f, strlen(block->idstr));
> +                qemu_put_buffer(f, (uint8_t *)block->idstr,
> +                                strlen(block->idstr));
> +        }
> +}
> +
> +#define ENCODING_FLAG_XBRLE 0x1
> +#define ENCODING_FLAG_CKSUM 0x2
> +
> +static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
> +        ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
> +{
> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
> +    XBRLEHeader hdr = {0};
> +    CacheItem *it;
> +    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
> +
> +    /* get location */
> +    slot = cache_is_cached(current_addr);
> +    if (slot == -1) {
> +        goto done;
> +    }
> +    cache_location = cache_get_cache_pos(current_addr);
> +
> +    /* abort if page changed too much */
> +    it = cache_item_get(cache_location, slot);
> +    if (!is_page_good_for_xbrle(it->it_data, current_data)) {
> +        DPRINTF("Page changed too much! Aborting XBRLE.\n");
> +        bench.xbrle_pages_aborted++;
> +        goto done;
> +    }
> +
> +    /* XOR encoding */
> +    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    xor_encode(xor_buf, it->it_data, current_data);
> +
> +    /* XBRLE (XOR+RLE) encoding */
> +    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
> +            TARGET_PAGE_SIZE);
> +
> +    if (encoded_len<  0) {
> +        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
> +        goto done;
> +    }
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +    hdr.xh_cksum = page_cksum(current_data);
> +    hdr.xh_flags |= ENCODING_FLAG_CKSUM;
> +#endif
> +
> +    /* Send XBRLE compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
> +    qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
> +    qemu_put_buffer(f, xbrle_buf, encoded_len);
> +    PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
> +            encoded_len);
> +    bench.xbrle_pages++;
> +    bytes_sent = encoded_len + sizeof(hdr);
> +    bench.xbrle_bytes += bytes_sent;
> +
> +done:
> +    qemu_free(xor_buf);
> +    qemu_free(xbrle_buf);
> +    return bytes_sent;
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf)
> +{
> +    uint32_t res = 0;
> +    int i;
> +
> +    for (i = 0; i<  TARGET_PAGE_SIZE; i++) {
> +        res += buf[i];
> +    }
> +
> +    return res;
> +}
> +
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...)
> +{
> +    va_list arg_ptr;
> +    static FILE *fp;
> +    static uint32_t page_seq;
> +
> +    va_start(arg_ptr, fmt);
> +    if (!fp) {
> +        fp = fopen("mig.log", "w");
> +    }
> +    page_seq++;
> +    fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
> +            (unsigned long) addr, page_cksum(pdata));
> +    vfprintf(fp, fmt, arg_ptr);
> +    va_end(arg_ptr);
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
>
>   static int is_dup_page(uint8_t *page, uint8_t ch)
>   {
> @@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
>   static RAMBlock *last_block;
>   static ram_addr_t last_offset;
>
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>   {
>       RAMBlock *block = last_block;
>       ram_addr_t offset = last_offset;
> @@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f)
>                                               current_addr + TARGET_PAGE_SIZE,
>                                               MIGRATION_DIRTY_FLAG);
>
> -            p = block->host + offset;
> +            p = qemu_mallocz(TARGET_PAGE_SIZE);
> +            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);
>
>               if (is_dup_page(p, *p)) {
> -                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
> -                if (!cont) {
> -                    qemu_put_byte(f, strlen(block->idstr));
> -                    qemu_put_buffer(f, (uint8_t *)block->idstr,
> -                                    strlen(block->idstr));
> -                }
> +                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>                   qemu_put_byte(f, *p);
>                   bytes_sent = 1;
> -            } else {
> -                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
> -                if (!cont) {
> -                    qemu_put_byte(f, strlen(block->idstr));
> -                    qemu_put_buffer(f, (uint8_t *)block->idstr,
> -                                    strlen(block->idstr));
> +                bench.dup_pages++;
> +                PAGE_LOG(current_addr, p, "DUP page\n");
> +            } else if (stage == 2&&  arch_mig_state.use_xbrle) {
> +                bytes_sent = save_xbrle_page(f, p, current_addr, block,
> +                    offset, cont);
> +                if (bytes_sent) {
> +                    cache_insert(current_addr, p);
>                   }
> +            }
> +            if (!bytes_sent) {
> +                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                   qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                   bytes_sent = TARGET_PAGE_SIZE;
> +                bench.normal_pages++;
> +                PAGE_LOG(current_addr, p, "NORMAL page\n");
> +                if (arch_mig_state.use_xbrle) {
> +                    cache_insert(current_addr, p);
> +                }
>               }
> -
>               break;
>           }
>
> @@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void)
>       return total;
>   }
>
> +#ifdef DEBUG_ARCH_INIT
> +static void dump_percentage(const char *label, unsigned long absolute,
> +        unsigned long total)
> +{
> +    printf("%s: %ld (%0.2f%%)\n", label, absolute,
> +            (total ? (100.0 * absolute / total) : 0));
> +}
> +
> +static void dump_migration_statistics(void)
> +{
> +    unsigned long normal_bytes = bench.normal_pages * TARGET_PAGE_SIZE;
> +    unsigned long total_pages = bench.normal_pages + bench.xbrle_pages
> +        + bench.dup_pages;
> +    unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
> +        + bench.dup_pages;
> +
> +    printf("\n");
> +    printf("=====================================================\n");
> +    printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
> +    printf("Iterations: %ld\n", bench.iterations);
> +
> +    dump_percentage("Normal pages", bench.normal_pages, total_pages);
> +    dump_percentage("Normal bytes", normal_bytes, total_bytes);
> +
> +    dump_percentage("Dup pages", bench.dup_pages, total_pages);
> +    dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
> +
> +    if (arch_mig_state.use_xbrle) {
> +        dump_percentage("XBRLE pages", bench.xbrle_pages, total_pages);
> +        dump_percentage("XBRLE bytes", bench.xbrle_bytes, total_bytes);
> +        dump_percentage("Aborted XBRLE pages from XBRLE",
> +            bench.xbrle_pages_aborted,
> +            bench.xbrle_pages + bench.xbrle_pages_aborted);
> +    }
> +
> +    dump_percentage("Total pages", total_pages, total_pages);
> +    dump_percentage("Total bytes", total_bytes, total_bytes);
> +
> +    if (arch_mig_state.use_xbrle) {
> +        printf("Cache number of inserts: %ld\n", cache_max_item_age);
> +        printf("Cache max items: %ld\n", cache_max_items());
> +        dump_percentage("Cache number of items", cache_num_items,
> +            cache_max_items());
> +    }
> +
> +    printf("=====================================================\n");
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
>   int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>   {
>       ram_addr_t addr;
> @@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>           last_block = NULL;
>           last_offset = 0;
>
> +        if (arch_mig_state.use_xbrle) {
> +            cache_init(arch_mig_state.xbrle_cache_size);
> +        }
> +
>           /* Make sure all dirty bits are set */
>           QLIST_FOREACH(block,&ram_list.blocks, next) {
>               for (addr = block->offset; addr<  block->offset + block->length;
> @@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>       while (!qemu_file_rate_limit(f)) {
>           int bytes_sent;
>
> -        bytes_sent = ram_save_block(f);
> +        bytes_sent = ram_save_block(f, stage);
>           bytes_transferred += bytes_sent;
> +        bench.iterations++;
>           if (bytes_sent == 0) { /* no more blocks */
>               break;
>           }
> @@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
>           int bytes_sent;
>
>           /* flush all remaining blocks regardless of rate limiting */
> -        while ((bytes_sent = ram_save_block(f)) != 0) {
> +        while ((bytes_sent = ram_save_block(f, stage))) {
>               bytes_transferred += bytes_sent;
>           }
>           cpu_physical_memory_set_dirty_tracking(0);
> +        if (arch_mig_state.use_xbrle) {
> +            cache_fini();
> +#ifdef DEBUG_ARCH_INIT
> +            dump_migration_statistics();
> +#endif
> +        }
>       }
>
>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>       expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>
> +    DPRINTF("ram_save_live: expected(%ld)<= max(%ld)?\n", expected_time,
> +        migrate_max_downtime());
> +
>       return (stage == 2)&&  (expected_time<= migrate_max_downtime());
>   }
>
> +static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = -1;
> +    uint8_t *prev_page, *xor_buf, *xbrle_buf;
> +    XBRLEHeader hdr = {0};
> +
> +    /* extract RLE header */
> +    qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
> +    if (!(hdr.xh_flags&  ENCODING_FLAG_XBRLE)) {
> +        fprintf(stderr, "Failed to load XBRLE page - wrong compression!\n");
> +        goto done;
> +    }
> +
> +    if (hdr.xh_len>  TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBRLE page - len overflow!\n");
> +        goto done;
> +    }
> +
> +    /* load data and decode */
> +    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
> +
> +    /* decode RLE */
> +    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> +    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBRLE page - decode error!\n");
> +        goto done;
> +    }
> +
> +    if (ret != TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBRLE page - size %d expected %d!\n",
> +            ret, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    /* decode XOR delta */
> +    prev_page = host;
> +    xor_encode(prev_page, prev_page, xor_buf);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +    if (hdr.xh_flags&  ENCODING_FLAG_CKSUM&&
> +            hdr.xh_cksum != page_cksum(prev_page)) {
> +        fprintf(stderr, "Failed to load XBRLE page - bad checksum!\n");
> +        goto done;
> +    }
> +#endif
> +
> +    PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n", hdr.xh_len);
> +    rc = 0;
> +done:
> +    qemu_free(xor_buf);
> +    qemu_free(xbrle_buf);
> +    return rc;
> +}
> +
>   static inline void *host_from_stream_offset(QEMUFile *f,
>                                               ram_addr_t offset,
>                                               int flags)
> @@ -328,16 +865,38 @@ static inline void *host_from_stream_offset(QEMUFile *f,
>       return NULL;
>   }
>
> +static inline void *host_from_stream_offset_versioned(int version_id,
> +        QEMUFile *f, ram_addr_t offset, int flags)
> +{
> +        void *host;
> +        if (version_id == 3) {
> +                host = qemu_get_ram_ptr(offset);
> +        } else {
> +                host = host_from_stream_offset(f, offset, flags);
> +        }
> +        if (!host) {
> +            fprintf(stderr, "Failed to convert RAM address to host"
> +                    " for offset 0x%lX!\n", offset);
> +            abort();
> +        }
> +        return host;
> +}
> +
>   int ram_load(QEMUFile *f, void *opaque, int version_id)
>   {
>       ram_addr_t addr;
> -    int flags;
> +    int flags, ret = 0;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
>
>       if (version_id<  3 || version_id>  4) {
> -        return -EINVAL;
> +        ret = -EINVAL;
> +        goto done;
>       }
>
>       do {
> +        void *host;
>           addr = qemu_get_be64(f);
>
>           flags = addr&  ~TARGET_PAGE_MASK;
> @@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>           if (flags&  RAM_SAVE_FLAG_MEM_SIZE) {
>               if (version_id == 3) {
>                   if (addr != ram_bytes_total()) {
> -                    return -EINVAL;
> +                    ret = -EINVAL;
> +                    goto done;
>                   }
>               } else {
>                   /* Synchronize RAM block list */
> @@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>
>                       QLIST_FOREACH(block,&ram_list.blocks, next) {
>                           if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length)
> -                                return -EINVAL;
> +                            if (block->length != length) {
> +                                ret = -EINVAL;
> +                                goto done;
> +                            }
>                               break;
>                           }
>                       }
> @@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                       if (!block) {
>                           fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>                                   "accept migration\n", id);
> -                        return -EINVAL;
> +                        ret = -EINVAL;
> +                        goto done;
>                       }
>
>                       total_ram_bytes -= length;
> @@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>           }
>
>           if (flags&  RAM_SAVE_FLAG_COMPRESS) {
> -            void *host;
>               uint8_t ch;
>
> -            if (version_id == 3)
> -                host = qemu_get_ram_ptr(addr);
> -            else
> -                host = host_from_stream_offset(f, addr, flags);
> -            if (!host) {
> -                return -EINVAL;
> -            }
> -
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
>               ch = qemu_get_byte(f);
>               memset(host, ch, TARGET_PAGE_SIZE);
>   #ifndef _WIN32
> @@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                   qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
>               }
>   #endif
> +            PAGE_LOG(addr, host, "DUP page\n");
>           } else if (flags&  RAM_SAVE_FLAG_PAGE) {
> -            void *host;
> -
> -            if (version_id == 3)
> -                host = qemu_get_ram_ptr(addr);
> -            else
> -                host = host_from_stream_offset(f, addr, flags);
> -
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
>               qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +            PAGE_LOG(addr, host, "NORMAL page\n");
> +        } else if (flags&  RAM_SAVE_FLAG_XBRLE) {
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
> +            if (load_xbrle(f, addr, host)<  0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }
>           }
> +
>           if (qemu_file_has_error(f)) {
> -            return -EIO;
> +            ret = -EIO;
> +            goto done;
>           }
>       } while (!(flags&  RAM_SAVE_FLAG_EOS));
>
> -    return 0;
> +done:
> +    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
> +            ret, seq_iter);
> +    return ret;
>   }
>
>   void qemu_service_io(void)
> diff --git a/block-migration.c b/block-migration.c
> index 3e66f49..004fc12 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>       return 0;
>   }
>
> -static void block_set_params(int blk_enable, int shared_base, void *opaque)
> +static void block_set_params(int blk_enable, int shared_base,
> +       int use_xbrle, int64_t xbrle_cache_size, void *opaque)
>   {
>       block_mig_state.blk_enable = blk_enable;
>       block_mig_state.shared_base = shared_base;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index e5585ba..e49d5be 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -717,24 +717,27 @@ ETEXI
>
>       {
>           .name       = "migrate",
> -        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
> -        .params     = "[-d] [-b] [-i] uri",
> -        .help       = "migrate to URI (using -d to not wait for completion)"
> -                     "\n\t\t\t -b for migration without shared storage with"
> -                     " full copy of disk\n\t\t\t -i for migration without "
> -                     "shared storage with incremental copy of disk "
> -                     "(base image shared between src and destination)",
> +        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> +        .params     = "[-d] [-b] [-i] [-x] uri",
> +        .help       = "migrate to URI"
> +                      "\n\t -d to not wait for completion"
> +                      "\n\t -b for migration without shared storage with"
> +                      " full copy of disk"
> +                      "\n\t -i for migration without"
> +                      " shared storage with incremental copy of disk"
> +                      " (base image shared between source and destination)"
> +                      "\n\t -x to use XBRLE page delta compression",
>           .user_print = monitor_user_noop,
>          .mhandler.cmd_new = do_migrate,
>       },
>
> -
>   STEXI
> -@item migrate [-d] [-b] [-i] @var{uri}
> +@item migrate [-d] [-b] [-i] [-x] @var{uri}
>   @findex migrate
>   Migrate to @var{uri} (using -d to not wait for completion).
>          -b for migration with full copy of disk
>          -i for migration with incremental copy of disk (base image is shared)
> +    -x to use XBRLE page delta compression
>   ETEXI
>
>       {
> @@ -753,10 +756,23 @@ Cancel the current VM migration.
>   ETEXI
>
>       {
> +        .name       = "migrate_set_cachesize",
> +        .args_type  = "value:s",
> +        .params     = "value",
> +        .help       = "set cache size (in MB) for XBRLE migrations",
> +        .mhandler.cmd = do_migrate_set_cachesize,
> +    },
> +
> +STEXI
> +@item migrate_set_cachesize @var{value}
> +Set cache size (in MB) for xbrle migrations.
> +ETEXI
> +
> +    {
>           .name       = "migrate_set_speed",
>           .args_type  = "value:o",
>           .params     = "value",
> -        .help       = "set maximum speed (in bytes) for migrations. "
> +        .help       = "set maximum XBRLE cache size (in bytes) for migrations. "
>          "Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
>           .user_print = monitor_user_noop,
>           .mhandler.cmd_new = do_migrate_set_speed,
> diff --git a/hw/hw.h b/hw/hw.h
> index 9d2cfc2..de9e5a6 100644
> --- a/hw/hw.h
> +++ b/hw/hw.h
> @@ -239,7 +239,8 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
>   int64_t qemu_ftell(QEMUFile *f);
>   int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
>
> -typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
> +typedef void SaveSetParamsHandler(int blk_enable, int shared,
> +       int use_xbrle, int64_t xbrle_cache_size, void * opaque);
>   typedef void SaveStateHandler(QEMUFile *f, void *opaque);
>   typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
>                                    void *opaque);
> diff --git a/migration-exec.c b/migration-exec.c
> index 14718dd..fe8254a 100644
> --- a/migration-exec.c
> +++ b/migration-exec.c
> @@ -67,7 +67,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
>                                                int64_t bandwidth_limit,
>                                                int detach,
>                                                int blk,
> -                                             int inc)
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size)
>   {
>       FdMigrationState *s;
>       FILE *f;
> @@ -99,6 +101,8 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
>
>       s->mig_state.blk = blk;
>       s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>       s->state = MIG_STATE_ACTIVE;
>       s->mon = NULL;
> diff --git a/migration-fd.c b/migration-fd.c
> index 6d14505..4a1ddbd 100644
> --- a/migration-fd.c
> +++ b/migration-fd.c
> @@ -56,7 +56,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
>                                              int64_t bandwidth_limit,
>                                              int detach,
>                                              int blk,
> -                                           int inc)
> +                        int inc,
> +                        int use_xbrle,
> +                        int64_t xbrle_cache_size)
>   {
>       FdMigrationState *s;
>
> @@ -82,6 +84,8 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
>
>       s->mig_state.blk = blk;
>       s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>       s->state = MIG_STATE_ACTIVE;
>       s->mon = NULL;
> diff --git a/migration-tcp.c b/migration-tcp.c
> index b55f419..4ca5bf6 100644
> --- a/migration-tcp.c
> +++ b/migration-tcp.c
> @@ -81,7 +81,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
>                                                int64_t bandwidth_limit,
>                                                int detach,
>                                               int blk,
> -                                            int inc)
> +                         int inc,
> +                         int use_xbrle,
> +                         int64_t xbrle_cache_size)
>   {
>       struct sockaddr_in addr;
>       FdMigrationState *s;
> @@ -101,6 +103,8 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
>
>       s->mig_state.blk = blk;
>       s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>       s->state = MIG_STATE_ACTIVE;
>       s->mon = NULL;
> diff --git a/migration-unix.c b/migration-unix.c
> index 57232c0..0813902 100644
> --- a/migration-unix.c
> +++ b/migration-unix.c
> @@ -80,7 +80,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
>                                                int64_t bandwidth_limit,
>                                                int detach,
>                                                int blk,
> -                                             int inc)
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size)
>   {
>       FdMigrationState *s;
>       struct sockaddr_un addr;
> @@ -100,6 +102,8 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
>
>       s->mig_state.blk = blk;
>       s->mig_state.shared = inc;
> +    s->mig_state.use_xbrle = use_xbrle;
> +    s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
>       s->state = MIG_STATE_ACTIVE;
>       s->mon = NULL;
> diff --git a/migration.c b/migration.c
> index 9ee8b17..b5b530b 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -34,6 +34,9 @@
>   /* Migration speed throttling */
>   static uint32_t max_throttle = (32<<  20);
>
> +/* Migration XBRLE cache size */
> +static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
> +
>   static MigrationState *current_migration;
>
>   int qemu_start_incoming_migration(const char *uri)
> @@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
>       int detach = qdict_get_try_bool(qdict, "detach", 0);
>       int blk = qdict_get_try_bool(qdict, "blk", 0);
>       int inc = qdict_get_try_bool(qdict, "inc", 0);
> +    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
>       const char *uri = qdict_get_str(qdict, "uri");
>
>       if (current_migration&&
> @@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
>
>       if (strstart(uri, "tcp:",&p)) {
>           s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
> -                                         blk, inc);
> +                                         blk, inc, use_xbrle,
> +                                         migrate_cache_size);
>   #if !defined(WIN32)
>       } else if (strstart(uri, "exec:",&p)) {
>           s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
> -                                          blk, inc);
> +                                          blk, inc, use_xbrle,
> +                                          migrate_cache_size);
>       } else if (strstart(uri, "unix:",&p)) {
>           s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
> -                                          blk, inc);
> +                                          blk, inc, use_xbrle,
> +                                          migrate_cache_size);
>       } else if (strstart(uri, "fd:",&p)) {
>           s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
> -                                        blk, inc);
> +                                        blk, inc, use_xbrle,
> +                                        migrate_cache_size);
>   #endif
>       } else {
>           monitor_printf(mon, "unknown migration protocol: %s\n", uri);
> @@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s)
>
>       DPRINTF("beginning savevm\n");
>       ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
> -                                  s->mig_state.shared);
> +                                  s->mig_state.shared, s->mig_state.use_xbrle,
> +                                  s->mig_state.xbrle_cache_size);
>       if (ret<  0) {
>           DPRINTF("failed, %d\n", ret);
>           migrate_fd_error(s);
> @@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque)
>       qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
>       return s->close(s);
>   }
> +
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
> +{
> +    ssize_t bytes;
> +    char *ptr;
> +    const char *value = qdict_get_str(qdict, "value");
> +
> +    bytes = strtod(value,&ptr);
> +    if (bytes>  0) {
> +        migrate_cache_size = bytes;
> +        monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
> +    }
> +}
> +
> diff --git a/migration.h b/migration.h
> index d13ed4f..62be2f8 100644
> --- a/migration.h
> +++ b/migration.h
> @@ -32,6 +32,8 @@ struct MigrationState
>       void (*release)(MigrationState *s);
>       int blk;
>       int shared;
> +    int use_xbrle;
> +    int64_t xbrle_cache_size;
>   };
>
>   typedef struct FdMigrationState FdMigrationState;
> @@ -76,7 +78,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
>                                                int64_t bandwidth_limit,
>                                                int detach,
>                                                int blk,
> -                                             int inc);
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size);
>
>   int tcp_start_incoming_migration(const char *host_port);
>
> @@ -85,7 +89,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
>                                               int64_t bandwidth_limit,
>                                               int detach,
>                                               int blk,
> -                                            int inc);
> +                         int inc,
> +                         int use_xbrle,
> +                         int64_t xbrle_cache_size);
>
>   int unix_start_incoming_migration(const char *path);
>
> @@ -94,7 +100,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
>                                                int64_t bandwidth_limit,
>                                                int detach,
>                                                int blk,
> -                                             int inc);
> +                          int inc,
> +                          int use_xbrle,
> +                          int64_t xbrle_cache_size);
>
>   int fd_start_incoming_migration(const char *path);
>
> @@ -103,7 +111,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
>                                              int64_t bandwidth_limit,
>                                              int detach,
>                                              int blk,
> -                                           int inc);
> +                        int inc,
> +                        int use_xbrle,
> +                        int64_t xbrle_cache_size);
>
>   void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);
>
> @@ -134,4 +144,9 @@ static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
>       return container_of(mig_state, FdMigrationState, mig_state);
>   }
>
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
> +
> +void arch_set_params(int blk_enable, int shared_base,
> +        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
> +
>   #endif
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 793cf1c..8fbe64b 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -431,13 +431,16 @@ EQMP
>
>       {
>           .name       = "migrate",
> -        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
> -        .params     = "[-d] [-b] [-i] uri",
> -        .help       = "migrate to URI (using -d to not wait for completion)"
> -                     "\n\t\t\t -b for migration without shared storage with"
> -                     " full copy of disk\n\t\t\t -i for migration without "
> -                     "shared storage with incremental copy of disk "
> -                     "(base image shared between src and destination)",
> +        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> +        .params     = "[-d] [-b] [-i] [-x] uri",
> +        .help       = "migrate to URI"
> +                      "\n\t -d to not wait for completion"
> +                      "\n\t -b for migration without shared storage with"
> +                      " full copy of disk"
> +                      "\n\t -i for migration without"
> +                      " shared storage with incremental copy of disk"
> +                      " (base image shared between source and destination)"
> +                      "\n\t -x to use XBRLE page delta compression",
>           .user_print = monitor_user_noop,
>          .mhandler.cmd_new = do_migrate,
>       },
> @@ -453,6 +456,7 @@ Arguments:
>   - "blk": block migration, full disk copy (json-bool, optional)
>   - "inc": incremental disk copy (json-bool, optional)
>   - "uri": Destination URI (json-string)
> +- "xbrle": to use XBRLE page delta compression
>
>   Example:
>
> @@ -494,6 +498,31 @@ Example:
>   EQMP
>
>       {
> +        .name       = "migrate_set_cachesize",
> +        .args_type  = "value:s",
> +        .params     = "value",
> +        .help       = "set cache size (in MB) for xbrle migrations",
> +        .mhandler.cmd = do_migrate_set_cachesize,
> +    },
> +
> +SQMP
> +migrate_set_cachesize
> +---------------------
> +
> +Set cache size to be used by XBRLE migration
> +
> +Arguments:
> +
> +- "value": cache size in bytes (json-number)
> +
> +Example:
> +
> +->  { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>           .name       = "migrate_set_speed",
>           .args_type  = "value:f",
>           .params     = "value",
> diff --git a/savevm.c b/savevm.c
> index 4e49765..93b512b 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
>                       void *opaque)
>   {
>       return register_savevm_live(dev, idstr, instance_id, version_id,
> -                                NULL, NULL, save_state, load_state, opaque);
> +                                arch_set_params, NULL, save_state,
> +                                load_state, opaque);
>   }
>
>   void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
> @@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
>   #define QEMU_VM_SUBSECTION           0x05
>
>   int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
> -                            int shared)
> +                            int shared, int use_xbrle,
> +                            int64_t xbrle_cache_size)
>   {
>       SaveStateEntry *se;
>
>       QTAILQ_FOREACH(se,&savevm_handlers, entry) {
>           if(se->set_params == NULL) {
>               continue;
> -       }
> -       se->set_params(blk_enable, shared, se->opaque);
> +        }
> +        se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
> +                se->opaque);
>       }
>
>       qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> @@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)
>
>       bdrv_flush_all();
>
> -    ret = qemu_savevm_state_begin(mon, f, 0, 0);
> +    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
>       if (ret<  0)
>           goto out;
>
> diff --git a/sysemu.h b/sysemu.h
> index b81a70e..15a0664 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -74,7 +74,8 @@ void qemu_announce_self(void);
>   void main_loop_wait(int nonblocking);
>
>   int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
> -                            int shared);
> +                            int shared, int use_xbrle,
> +                            int64_t xbrle_cache_size);
>   int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
>   int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
>   void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
>
>
>
Stefan Hajnoczi June 22, 2011, 12:25 p.m. UTC | #3
On Wed, Jun 22, 2011 at 1:01 PM, Anthony Liguori <anthony@codemonkey.ws> wrote:
> On 05/22/2011 07:00 AM, Shribman, Aidan wrote:
>>
>> Subject: [PATCH] XBRLE page delta compression for live migration of large
>> memory apps
>> From: Aidan Shribman<aidan.shribman@sap.com>
>>
>> By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
>> bandwidth for transfering of dirty memory pages during live migration
>>         migrate_set_cachesize<size>
>>         migrate -x<url>
>> Qemu host: Ubuntu 10.10
>> Testing: live migration (w and w/o XBRLE) tested successfully.
>
> By how much?
>
> This is a change to the live migration protocol, it would also require
> documentation and an understanding of how it affects compatibility.
>
> The patch really needs to be split into logical pieces too.  It's a bit too
> big for a meaningful review.

Two places where you could consider splitting the patch is the caching
and the sampling.  Are they necessary for correctness and could they
be submitted as follow-up patches to a core patch which does just the
XBRLE?

Also, whenever there are heuristics and use of floating point then
there is some magic going on.  It may be necessary and give a huge
performance boost but needs explanation so it is not a black box or
fragile mechanism once it has been merged upstream.

Stefan
Shribman, Aidan July 6, 2011, 12:04 p.m. UTC | #4
> From: Stefan Hajnoczi [mailto:stefanha@gmail.com] 

> Sent: Wednesday, June 22, 2011 3:26 PM

> 

> On Wed, Jun 22, 2011 at 1:01 PM, Anthony Liguori 

> <anthony@codemonkey.ws> wrote:

> >>

> >> By using XBRLE (Xor Based Run-Length-Encoding) we can 

> reduce required

> >> bandwidth for transfering of dirty memory pages during 

> live migration

> >>         migrate_set_cachesize<size>

> >>         migrate -x<url>

> >

> > By how much?


See "Evaluation of delta compression techniques for efficient live migration of large virtual machines" (http://portal.acm.org/citation.cfm?id=1952698) subsection 5.2.3:

In the final test, a VM running a SAP Central Instance ERP system
was migrated over Gigabit Ethernet. With XBRLE, the total migration time was reduced from 235 s to 139 s, the suspension time was reduced from 3 s to 0.2 s, and the ping downtime from 5 s to 1 s...

> >

> > This is a change to the live migration protocol, it would 

> also require

> > documentation and an understanding of how it affects compatibility.


The default behavior (i.e. using the -x migrate option) has not changed thus still compatible with previous qemu versions. When initiating a migration with XBRLE the remote peer must also support XBRLE else migration will fail.

With regards to documentation please advise.

> >

> > The patch really needs to be split into logical pieces too. 

>  It's a bit too

> > big for a meaningful review.

> 

> Two places where you could consider splitting the patch is the caching

> and the sampling.  Are they necessary for correctness and could they

> be submitted as follow-up patches to a core patch which does just the

> XBRLE?


Some changes were done to reduce code size:
(1) Sampling code - which was used for early detection that the page changed so much thus XBRLE not applicable - has been replaced with a simple check that the XBRLE delta does not overflow a 1/3 of a page size (4096 bytes).
(2) Check-summing/page-logging code - existing only under debug compile ifdef option - was removed.
(3) XBRLE migration statistics - were replaced with a detailed 'info migrate' output - vital for tracking the XBRLE operation.
(4) 2-way associative cache - was not separated from the XBRLE code as the cache is a fundamental part of XBRLE implementation (XBRLE updates are the difference between the new page and the old cached page on the sender side).

Currently I don't see howto split the code into smaller meaningful pieces - for now I have re-submitted a single patch with correction. (see separate email [PATCH v2]).

> 

> Also, whenever there are heuristics and use of floating point then

> there is some magic going on.  It may be necessary and give a huge

> performance boost but needs explanation so it is not a black box or

> fragile mechanism once it has been merged upstream.


The code mentioned (which was responsible for sampling page to test it for being eligible for XBRLE encoding) has been removed.

> 

> Stefan


Aidan
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index 4486925..069cd67 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,6 +27,7 @@ 
 #include <sys/types.h>
 #include <sys/mman.h>
 #endif
+#include <assert.h>
 #include "config.h"
 #include "monitor.h"
 #include "sysemu.h"
@@ -41,6 +42,24 @@ 
 #include "gdbstub.h"
 #include "hw/smbios.h"

+//#define DEBUG_ARCH_INIT
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+//#define DEBUG_ARCH_INIT_CKSUM
+#ifdef DEBUG_ARCH_INIT_CKSUM
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { } while (0)
+#endif
+
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
 int graphic_height = 768;
@@ -88,6 +107,402 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBRLE    0x40
+
+/***********************************************************/
+/* Page cache for storing previous pages as basis for XBRLE compression */
+#define CACHE_N_WAY 2 /* 2-way assossiative cache */
+
+typedef struct CacheItem {
+    ram_addr_t it_addr;
+    unsigned long it_age;
+    uint8_t *it_data;
+} CacheItem;
+
+typedef struct CacheBucket {
+    CacheItem bkt_item[CACHE_N_WAY];
+} CacheBucket;
+
+static CacheBucket *page_cache;
+static int64_t cache_num_buckets;
+static uint64_t cache_max_item_age;
+static int64_t cache_num_items;
+
+static void cache_init(ssize_t num_buckets);
+static void cache_fini(void);
+static int cache_is_cached(ram_addr_t addr);
+static int cache_get_oldest(CacheBucket *buck);
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
+static void cache_insert(ram_addr_t id, uint8_t *pdata);
+static unsigned long cache_get_cache_pos(ram_addr_t address);
+static CacheItem *cache_item_get(unsigned long pos, int item);
+
+/***********************************************************/
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+    int use_xbrle;
+    int64_t xbrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
+void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
+        int64_t xbrle_cache_size, void *opaque)
+{
+    arch_mig_state.use_xbrle = use_xbrle;
+    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
+}
+
+/***********************************************************/
+/* XBRLE (Xor Based Run-Length Encoding) */
+typedef struct XBRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBRLEHeader;
+
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf);
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...);
+#endif
+
+/***********************************************************/
+/* benchmarking */
+typedef struct BenchInfo {
+    uint64_t normal_pages;
+    uint64_t xbrle_pages;
+    uint64_t xbrle_bytes;
+    uint64_t xbrle_pages_aborted;
+    uint64_t dup_pages;
+    uint64_t iterations;
+} BenchInfo;
+
+static BenchInfo bench;
+
+/***********************************************************/
+/* XBRLE page cache implementation */
+static CacheItem *cache_item_get(unsigned long pos, int item)
+{
+    assert(page_cache);
+    return &page_cache[pos].bkt_item[item];
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static int64_t cache_max_items(void)
+{
+    return cache_num_buckets * CACHE_N_WAY;
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
+static void cache_init(int64_t num_bytes)
+{
+    int i;
+
+    cache_num_items = 0;
+    cache_max_item_age = 0;
+    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
+    assert(cache_num_buckets);
+    DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
+
+    assert(!page_cache);
+    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
+            sizeof(CacheBucket));
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            it->it_data = NULL;
+            it->it_age = 0;
+            it->it_addr = -1;
+        }
+    }
+}
+
+static void cache_fini(void)
+{
+    int i;
+
+    assert(page_cache);
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            qemu_free(it->it_data);
+            it->it_data = 0;
+        }
+    }
+
+    qemu_free(page_cache);
+    page_cache = NULL;
+}
+
+static unsigned long cache_get_cache_pos(ram_addr_t address)
+{
+    unsigned long pos;
+
+    assert(cache_num_buckets);
+    pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1);
+    return pos;
+}
+
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
+{
+    unsigned long big = 0;
+    int big_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (it->it_addr != addr) {
+            continue;
+        }
+
+        if (!j || it->it_age > big) {
+            big = it->it_age;
+            big_pos = j;
+        }
+    }
+
+    return big_pos;
+}
+
+static int cache_get_oldest(CacheBucket *buck)
+{
+    unsigned long small = 0;
+    int small_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (!j || it->it_age <  small) {
+            small = it->it_age;
+            small_pos = j;
+        }
+    }
+
+    return small_pos;
+}
+
+static int cache_is_cached(ram_addr_t addr)
+{
+    unsigned long pos = cache_get_cache_pos(addr);
+
+    assert(page_cache);
+    CacheBucket *bucket = &page_cache[pos];
+    return cache_get_newest(bucket, addr);
+}
+
+static void cache_insert(unsigned long addr, uint8_t *pdata)
+{
+    unsigned long pos;
+    int slot = -1;
+    CacheBucket *bucket;
+
+    pos = cache_get_cache_pos(addr);
+    assert(page_cache);
+    bucket = &page_cache[pos];
+    slot = cache_get_oldest(bucket); /* evict LRU */
+
+    /* actual update of entry */
+    CacheItem *it = cache_item_get(pos, slot);
+    if (!it->it_data) {
+        cache_num_items++;
+    }
+    qemu_free(it->it_data);
+    it->it_data = pdata;
+    it->it_age = ++cache_max_item_age;
+    it->it_addr = addr;
+}
+
+/* XBRLE (Xor Based Run-Length Encoding) */
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, ch_run = 0, i;
+    uint8_t prev, ch;
+
+    for (i = 0; i <= slen; i++) {
+        if (i != slen) {
+            ch = src[i];
+        }
+
+        if (!i || (i != slen && ch == prev && ch_run < 255)) {
+            ch_run++;
+        } else {
+            if (d+2 > dlen)
+                return -1;
+            *dst++ = ch_run;
+            *dst++ = prev;
+            d += 2;
+            ch_run = 1;
+        }
+
+        prev = ch;
+    }
+    return d;
+}
+
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, s;
+
+    for (s = 0; s < slen-1; s += 2) {
+        uint8_t ch_run = src[s];
+        uint8_t ch = src[s+1];
+        while (ch_run--) {
+            if (d == dlen) {
+                return -1;
+            }
+            dst[d] = ch;
+            d++;
+        }
+    }
+    return d;
+}
+
+#define PAGE_SAMPLE_PERCENT 0.02
+#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
+#define BYTES_CHANGED_PERCENT 0.30
+
+static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
+{
+    int i, bytes_changed = 0;
+
+    srand(time(NULL)+getpid()+getpid()*987654+rand());
+
+    for (i = 0; i < PAGE_SAMPLE_SIZE; i++) {
+        unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE / (RAND_MAX+1.0));
+
+         if (old[pos] != new[pos]) {
+             bytes_changed++;
+         }
+    }
+
+    return (((float) bytes_changed) / PAGE_SAMPLE_SIZE) < BYTES_CHANGED_PERCENT;
+}
+
+static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
+{
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
+        dst[i] = src1[i] ^ src2[i];
+    }
+}
+
+static void save_block_hdr(QEMUFile *f,
+        RAMBlock *block, ram_addr_t offset, int cont, int flag)
+{
+        qemu_put_be64(f, offset | cont | flag);
+        if (!cont) {
+                qemu_put_byte(f, strlen(block->idstr));
+                qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                strlen(block->idstr));
+        }
+}
+
+#define ENCODING_FLAG_XBRLE 0x1
+#define ENCODING_FLAG_CKSUM 0x2
+
+static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
+        ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+    if (!is_page_good_for_xbrle(it->it_data, current_data)) {
+        DPRINTF("Page changed too much! Aborting XBRLE.\n");
+        bench.xbrle_pages_aborted++;
+        goto done;
+    }
+
+    /* XOR encoding */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    xor_encode(xor_buf, it->it_data, current_data);
+
+    /* XBRLE (XOR+RLE) encoding */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
+            TARGET_PAGE_SIZE);
+
+    if (encoded_len < 0) {
+        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    hdr.xh_cksum = page_cksum(current_data);
+    hdr.xh_flags |= ENCODING_FLAG_CKSUM;
+#endif
+
+    /* Send XBRLE compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
+    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    qemu_put_buffer(f, xbrle_buf, encoded_len);
+    PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
+            encoded_len);
+    bench.xbrle_pages++;
+    bytes_sent = encoded_len + sizeof(hdr);
+    bench.xbrle_bytes += bytes_sent;
+
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return bytes_sent;
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf)
+{
+    uint32_t res = 0;
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
+        res += buf[i];
+    }
+
+    return res;
+}
+
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...)
+{
+    va_list arg_ptr;
+    static FILE *fp;
+    static uint32_t page_seq;
+
+    va_start(arg_ptr, fmt);
+    if (!fp) {
+        fp = fopen("mig.log", "w");
+    }
+    page_seq++;
+    fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
+            (unsigned long) addr, page_cksum(pdata));
+    vfprintf(fp, fmt, arg_ptr);
+    va_end(arg_ptr);
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */

 static int is_dup_page(uint8_t *page, uint8_t ch)
 {
@@ -107,7 +522,7 @@  static int is_dup_page(uint8_t *page, uint8_t ch)
 static RAMBlock *last_block;
 static ram_addr_t last_offset;

-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
 {
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
@@ -128,28 +543,32 @@  static int ram_save_block(QEMUFile *f)
                                             current_addr + TARGET_PAGE_SIZE,
                                             MIGRATION_DIRTY_FLAG);

-            p = block->host + offset;
+            p = qemu_mallocz(TARGET_PAGE_SIZE);
+            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);

             if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
+                bench.dup_pages++;
+                PAGE_LOG(current_addr, p, "DUP page\n");
+            } else if (stage == 2 && arch_mig_state.use_xbrle) {
+                bytes_sent = save_xbrle_page(f, p, current_addr, block,
+                    offset, cont);
+                if (bytes_sent) {
+                    cache_insert(current_addr, p);
                 }
+            }
+            if (!bytes_sent) {
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
+                bench.normal_pages++;
+                PAGE_LOG(current_addr, p, "NORMAL page\n");
+                if (arch_mig_state.use_xbrle) {
+                    cache_insert(current_addr, p);
+                }
             }
-
             break;
         }

@@ -212,6 +631,55 @@  uint64_t ram_bytes_total(void)
     return total;
 }

+#ifdef DEBUG_ARCH_INIT
+static void dump_percentage(const char *label, unsigned long absolute,
+        unsigned long total)
+{
+    printf("%s: %ld (%0.2f%%)\n", label, absolute,
+            (total ? (100.0 * absolute / total) : 0));
+}
+
+static void dump_migration_statistics(void)
+{
+    unsigned long normal_bytes = bench.normal_pages * TARGET_PAGE_SIZE;
+    unsigned long total_pages = bench.normal_pages + bench.xbrle_pages
+        + bench.dup_pages;
+    unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
+        + bench.dup_pages;
+
+    printf("\n");
+    printf("=====================================================\n");
+    printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
+    printf("Iterations: %ld\n", bench.iterations);
+
+    dump_percentage("Normal pages", bench.normal_pages, total_pages);
+    dump_percentage("Normal bytes", normal_bytes, total_bytes);
+
+    dump_percentage("Dup pages", bench.dup_pages, total_pages);
+    dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        dump_percentage("XBRLE pages", bench.xbrle_pages, total_pages);
+        dump_percentage("XBRLE bytes", bench.xbrle_bytes, total_bytes);
+        dump_percentage("Aborted XBRLE pages from XBRLE",
+            bench.xbrle_pages_aborted,
+            bench.xbrle_pages + bench.xbrle_pages_aborted);
+    }
+
+    dump_percentage("Total pages", total_pages, total_pages);
+    dump_percentage("Total bytes", total_bytes, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        printf("Cache number of inserts: %ld\n", cache_max_item_age);
+        printf("Cache max items: %ld\n", cache_max_items());
+        dump_percentage("Cache number of items", cache_num_items,
+            cache_max_items());
+    }
+
+    printf("=====================================================\n");
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
 int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
@@ -235,6 +703,10 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         last_block = NULL;
         last_offset = 0;

+        if (arch_mig_state.use_xbrle) {
+            cache_init(arch_mig_state.xbrle_cache_size);
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
@@ -264,8 +736,9 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     while (!qemu_file_rate_limit(f)) {
         int bytes_sent;

-        bytes_sent = ram_save_block(f);
+        bytes_sent = ram_save_block(f, stage);
         bytes_transferred += bytes_sent;
+        bench.iterations++;
         if (bytes_sent == 0) { /* no more blocks */
             break;
         }
@@ -285,19 +758,83 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         int bytes_sent;

         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage))) {
             bytes_transferred += bytes_sent;
         }
         cpu_physical_memory_set_dirty_tracking(0);
+        if (arch_mig_state.use_xbrle) {
+            cache_fini();
+#ifdef DEBUG_ARCH_INIT
+            dump_migration_statistics();
+#endif
+        }
     }

     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;

+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }

+static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *prev_page, *xor_buf, *xbrle_buf;
+    XBRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) {
+        fprintf(stderr, "Failed to load XBRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret != TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - size %d expected %d!\n",
+            ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    /* decode XOR delta */
+    prev_page = host;
+    xor_encode(prev_page, prev_page, xor_buf);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    if (hdr.xh_flags & ENCODING_FLAG_CKSUM &&
+            hdr.xh_cksum != page_cksum(prev_page)) {
+        fprintf(stderr, "Failed to load XBRLE page - bad checksum!\n");
+        goto done;
+    }
+#endif
+
+    PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n", hdr.xh_len);
+    rc = 0;
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -328,16 +865,38 @@  static inline void *host_from_stream_offset(QEMUFile *f,
     return NULL;
 }

+static inline void *host_from_stream_offset_versioned(int version_id,
+        QEMUFile *f, ram_addr_t offset, int flags)
+{
+        void *host;
+        if (version_id == 3) {
+                host = qemu_get_ram_ptr(offset);
+        } else {
+                host = host_from_stream_offset(f, offset, flags);
+        }
+        if (!host) {
+            fprintf(stderr, "Failed to convert RAM address to host"
+                    " for offset 0x%lX!\n", offset);
+            abort();
+        }
+        return host;
+}
+
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
+    static uint64_t seq_iter;
+
+    seq_iter++;

     if (version_id < 3 || version_id > 4) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto done;
     }

     do {
+        void *host;
         addr = qemu_get_be64(f);

         flags = addr & ~TARGET_PAGE_MASK;
@@ -346,7 +905,8 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
         if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
             if (version_id == 3) {
                 if (addr != ram_bytes_total()) {
-                    return -EINVAL;
+                    ret = -EINVAL;
+                    goto done;
                 }
             } else {
                 /* Synchronize RAM block list */
@@ -365,8 +925,10 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)

                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret = -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -374,7 +936,8 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }

                     total_ram_bytes -= length;
@@ -383,17 +946,10 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
         }

         if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
             uint8_t ch;

-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             ch = qemu_get_byte(f);
             memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
@@ -402,22 +958,31 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
             }
 #endif
+            PAGE_LOG(addr, host, "DUP page\n");
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            PAGE_LOG(addr, host, "NORMAL page\n");
+        } else if (flags & RAM_SAVE_FLAG_XBRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
+
         if (qemu_file_has_error(f)) {
-            return -EIO;
+            ret = -EIO;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));

-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
 }

 void qemu_service_io(void)
diff --git a/block-migration.c b/block-migration.c
index 3e66f49..004fc12 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -689,7 +689,8 @@  static int block_load(QEMUFile *f, void *opaque, int version_id)
     return 0;
 }

-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(int blk_enable, int shared_base,
+       int use_xbrle, int64_t xbrle_cache_size, void *opaque)
 {
     block_mig_state.blk_enable = blk_enable;
     block_mig_state.shared_base = shared_base;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e5585ba..e49d5be 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -717,24 +717,27 @@  ETEXI

     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,
        .mhandler.cmd_new = do_migrate,
     },

-
 STEXI
-@item migrate [-d] [-b] [-i] @var{uri}
+@item migrate [-d] [-b] [-i] [-x] @var{uri}
 @findex migrate
 Migrate to @var{uri} (using -d to not wait for completion).
        -b for migration with full copy of disk
        -i for migration with incremental copy of disk (base image is shared)
+    -x to use XBRLE page delta compression
 ETEXI

     {
@@ -753,10 +756,23 @@  Cancel the current VM migration.
 ETEXI

     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for XBRLE migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+STEXI
+@item migrate_set_cachesize @var{value}
+Set cache size (in MB) for xbrle migrations.
+ETEXI
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:o",
         .params     = "value",
-        .help       = "set maximum speed (in bytes) for migrations. "
+        .help       = "set maximum XBRLE cache size (in bytes) for migrations. "
        "Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
         .user_print = monitor_user_noop,
         .mhandler.cmd_new = do_migrate_set_speed,
diff --git a/hw/hw.h b/hw/hw.h
index 9d2cfc2..de9e5a6 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -239,7 +239,8 @@  static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
 int64_t qemu_ftell(QEMUFile *f);
 int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);

-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(int blk_enable, int shared,
+       int use_xbrle, int64_t xbrle_cache_size, void * opaque);
 typedef void SaveStateHandler(QEMUFile *f, void *opaque);
 typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
                                  void *opaque);
diff --git a/migration-exec.c b/migration-exec.c
index 14718dd..fe8254a 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -67,7 +67,9 @@  MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
 {
     FdMigrationState *s;
     FILE *f;
@@ -99,6 +101,8 @@  MigrationState *exec_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-fd.c b/migration-fd.c
index 6d14505..4a1ddbd 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -56,7 +56,9 @@  MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                            int64_t bandwidth_limit,
                                            int detach,
                                            int blk,
-                                           int inc)
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size)
 {
     FdMigrationState *s;

@@ -82,6 +84,8 @@  MigrationState *fd_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-tcp.c b/migration-tcp.c
index b55f419..4ca5bf6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -81,7 +81,9 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                             int blk,
-                                            int inc)
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size)
 {
     struct sockaddr_in addr;
     FdMigrationState *s;
@@ -101,6 +103,8 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-unix.c b/migration-unix.c
index 57232c0..0813902 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -80,7 +80,9 @@  MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
 {
     FdMigrationState *s;
     struct sockaddr_un addr;
@@ -100,6 +102,8 @@  MigrationState *unix_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration.c b/migration.c
index 9ee8b17..b5b530b 100644
--- a/migration.c
+++ b/migration.c
@@ -34,6 +34,9 @@ 
 /* Migration speed throttling */
 static uint32_t max_throttle = (32 << 20);

+/* Migration XBRLE cache size */
+static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
+
 static MigrationState *current_migration;

 int qemu_start_incoming_migration(const char *uri)
@@ -80,6 +83,7 @@  int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)
     int detach = qdict_get_try_bool(qdict, "detach", 0);
     int blk = qdict_get_try_bool(qdict, "blk", 0);
     int inc = qdict_get_try_bool(qdict, "inc", 0);
+    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
     const char *uri = qdict_get_str(qdict, "uri");

     if (current_migration &&
@@ -90,17 +94,21 @@  int do_migrate(Monitor *mon, const QDict *qdict, QObject **ret_data)

     if (strstart(uri, "tcp:", &p)) {
         s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
-                                         blk, inc);
+                                         blk, inc, use_xbrle,
+                                         migrate_cache_size);
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "unix:", &p)) {
         s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "fd:", &p)) {
         s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
-                                        blk, inc);
+                                        blk, inc, use_xbrle,
+                                        migrate_cache_size);
 #endif
     } else {
         monitor_printf(mon, "unknown migration protocol: %s\n", uri);
@@ -341,7 +349,8 @@  void migrate_fd_connect(FdMigrationState *s)

     DPRINTF("beginning savevm\n");
     ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
-                                  s->mig_state.shared);
+                                  s->mig_state.shared, s->mig_state.use_xbrle,
+                                  s->mig_state.xbrle_cache_size);
     if (ret < 0) {
         DPRINTF("failed, %d\n", ret);
         migrate_fd_error(s);
@@ -448,3 +457,17 @@  int migrate_fd_close(void *opaque)
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
 }
+
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
+{
+    ssize_t bytes;
+    char *ptr;
+    const char *value = qdict_get_str(qdict, "value");
+
+    bytes = strtod(value, &ptr);
+    if (bytes > 0) {
+        migrate_cache_size = bytes;
+        monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
+    }
+}
+
diff --git a/migration.h b/migration.h
index d13ed4f..62be2f8 100644
--- a/migration.h
+++ b/migration.h
@@ -32,6 +32,8 @@  struct MigrationState
     void (*release)(MigrationState *s);
     int blk;
     int shared;
+    int use_xbrle;
+    int64_t xbrle_cache_size;
 };

 typedef struct FdMigrationState FdMigrationState;
@@ -76,7 +78,9 @@  MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

 int tcp_start_incoming_migration(const char *host_port);

@@ -85,7 +89,9 @@  MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                            int inc);
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size);

 int unix_start_incoming_migration(const char *path);

@@ -94,7 +100,9 @@  MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

 int fd_start_incoming_migration(const char *path);

@@ -103,7 +111,9 @@  MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                            int64_t bandwidth_limit,
                                            int detach,
                                            int blk,
-                                           int inc);
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size);

 void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);

@@ -134,4 +144,9 @@  static inline FdMigrationState *migrate_to_fms(MigrationState *mig_state)
     return container_of(mig_state, FdMigrationState, mig_state);
 }

+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
+
+void arch_set_params(int blk_enable, int shared_base,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
+
 #endif
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..8fbe64b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -431,13 +431,16 @@  EQMP

     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,
        .mhandler.cmd_new = do_migrate,
     },
@@ -453,6 +456,7 @@  Arguments:
 - "blk": block migration, full disk copy (json-bool, optional)
 - "inc": incremental disk copy (json-bool, optional)
 - "uri": Destination URI (json-string)
+- "xbrle": to use XBRLE page delta compression

 Example:

@@ -494,6 +498,31 @@  Example:
 EQMP

     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for xbrle migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+SQMP
+migrate_set_cachesize
+---------------------
+
+Set cache size to be used by XBRLE migration
+
+Arguments:
+
+- "value": cache size in bytes (json-number)
+
+Example:
+
+-> { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:f",
         .params     = "value",
diff --git a/savevm.c b/savevm.c
index 4e49765..93b512b 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1141,7 +1141,8 @@  int register_savevm(DeviceState *dev,
                     void *opaque)
 {
     return register_savevm_live(dev, idstr, instance_id, version_id,
-                                NULL, NULL, save_state, load_state, opaque);
+                                arch_set_params, NULL, save_state,
+                                load_state, opaque);
 }

 void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
@@ -1428,15 +1429,17 @@  static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
 #define QEMU_VM_SUBSECTION           0x05

 int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared)
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size)
 {
     SaveStateEntry *se;

     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if(se->set_params == NULL) {
             continue;
-       }
-       se->set_params(blk_enable, shared, se->opaque);
+        }
+        se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
+                se->opaque);
     }

     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1577,7 +1580,7 @@  static int qemu_savevm_state(Monitor *mon, QEMUFile *f)

     bdrv_flush_all();

-    ret = qemu_savevm_state_begin(mon, f, 0, 0);
+    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
     if (ret < 0)
         goto out;

diff --git a/sysemu.h b/sysemu.h
index b81a70e..15a0664 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -74,7 +74,8 @@  void qemu_announce_self(void);
 void main_loop_wait(int nonblocking);

 int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared);
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size);
 int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
 int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
 void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);