diff mbox

[v9,06/10] Add XBZRLE to ram_save_block and ram_save_live

Message ID 1334170153-9503-7-git-send-email-owasserm@redhat.com
State New
Headers show

Commit Message

Orit Wasserman April 11, 2012, 6:49 p.m. UTC
Add migration state to store XBRLE params (enablement and cache size).
In the outgoing migration check to see if the page is cached and
changed than send compressed page by using save_xbrle_page function.
In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
and decompress the page (by using load_xbrle function).

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
Signed-off-by: Benoit Hudzia <benoit.hudzia@sap.com>
Signed-off-by: Petter Svard <petters@cs.umu.se>
Signed-off-by: Aidan Shribman <aidan.shribman@sap.com>
---
 arch_init.c |  176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 migration.c |   18 ++++++
 migration.h |    8 +++
 savevm.c    |   94 +++++++++++++++++++++++++++++++-
 4 files changed, 283 insertions(+), 13 deletions(-)

Comments

Juan Quintela April 18, 2012, 4:54 p.m. UTC | #1
Orit Wasserman <owasserm@redhat.com> wrote:

> @@ -104,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH;
>  #define RAM_SAVE_FLAG_PAGE     0x08
>  #define RAM_SAVE_FLAG_EOS      0x10
>  #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE  0x40

missing space for alignment?

>  
>  #ifdef __ALTIVEC__
>  #include <altivec.h>
> @@ -165,6 +166,15 @@ static unsigned long cache_get_cache_pos(ram_addr_t address);
>  static CacheItem *cache_item_get(unsigned long pos, int item);
>  static void cache_resize(int64_t new_size);
>  
> +/* XBZRLE (Xor Based Zero Length Encoding */
> +typedef struct __attribute__((packed)) XBZRLEHeader {
> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBZRLEHeader;

We shouldn't use packed here. Explanation later.
And if we are worried about space, it is much better ordering to do it
by size:

typedef struct {
    uint32_t xh_cksum;
    uint16_t xh_len;
    uint8_t xh_flags;
} XBZRLEHeader;


Once here, are we using the xh_cksum at all?


> +static uint8 *prev_cache_page;

Move this also to the bucket struct?

> @@ -359,19 +374,74 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>  
>  }
>  
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont)
> +{
> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
> +    XBZRLEHeader hdr = {0};
> +    CacheItem *it;
> +    uint8_t *encoded_buf = NULL;
> +
> +    /* get location */
> +    slot = cache_is_cached(current_addr);
> +    if (slot == -1) {
> +        cache_insert(current_addr, current_data, 0);
> +        goto done;
> +    }
> +    cache_location = cache_get_cache_pos(current_addr);
> +
> +    /* abort if page changed too much */
> +    it = cache_item_get(cache_location, slot);

Comment and code don't match?

> +
> +    /* we need to copy the current data before encoding so it won't change
> +       during encoding. cache_insert copies the data.
> +    */
> +    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
> +    cache_insert(current_addr, current_data, 0);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);

Cast not needed.  Can we have a pagge allocated for this in Bucket
struct, XBRLE struct or whatever we want to call it?  Having to do a
malloc for each page thaht we sent looks excesive?

> +    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
> +                              encoded_buf, TARGET_PAGE_SIZE);
> +    if (encoded_len < 0) {
> +        DPRINTF("Unmodifed page - skipping\n");
> +        goto done;
> +    }
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
> +
> +    /* Send XBZRLE based compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));

This is wrong (same for load part), we are breking inter-arch migration.

qemu_put_byte(f, hdr->xh_flags);
qemu_put_be16(f, hdr->xh_len);
qemu_put_be32(f, hdr->xh_cksum);

And the inverse on the load side.

> +    qemu_put_buffer(f, encoded_buf, encoded_len);
> +    bytes_sent = encoded_len + sizeof(hdr);
> +
> +done:
> +    g_free(encoded_buf);
> +    return bytes_sent;
> +}
> +
>  static RAMBlock *last_block;
>  static ram_addr_t last_offset;
>  
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>  {
>      RAMBlock *block = last_block;
>      ram_addr_t offset = last_offset;
>      int bytes_sent = 0;
>      MemoryRegion *mr;
> +    ram_addr_t current_addr;
> +    int use_xbzrle =  migrate_use_xbzrle();
>  
>      if (!block)
>          block = QLIST_FIRST(&ram_list.blocks);
>  
> +    current_addr = block->offset + offset;
> +
>      do {
>          mr = block->mr;
>          if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
> @@ -388,7 +458,14 @@ static int ram_save_block(QEMUFile *f)
>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>                  qemu_put_byte(f, *p);
>                  bytes_sent = 1;
> -            } else {
> +            } else if (stage == 2 && use_xbzrle) {
here
> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                    offset, cont);
> +            } else if (use_xbzrle && stage == 1) {
and here, can we used the check in the same order?

> +                cache_insert(current_addr, p, 0);
> +            }

Or even better, change the code to something like:

else if (use_xbzrle) {
     if (stage == 1) {
        cache_insert(current_addr, p, 0);
     } else if (cache == 2) {
        bytes_sent = save_xbzrle_page(...);
     }

And I don't understand why this function care at all about the stages.
Normal code sent a page on stage == 1, here it is only saved on the
cache and sent as a normal page (without compression).  And on stage==3,
it is also always sent without compression.  I guess there is a method
on this, but a comment will help?

> +
> +            if (!bytes_sent) {
>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                  bytes_sent = TARGET_PAGE_SIZE;
> @@ -492,6 +569,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>  
>      if (stage < 0) {
>          memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>          return 0;
>      }
>  
> @@ -504,6 +584,10 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          last_offset = 0;
>          sort_ram_list();
>  
> +        if (migrate_use_xbzrle()) {
> +            cache_init(migrate_xbzrle_cache_size());
> +        }
> +
>          /* Make sure all dirty bits are set */
>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>              for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> @@ -531,9 +615,11 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)

>      while ((ret = qemu_file_rate_limit(f)) == 0) {
>          int bytes_sent;
>  
> -        bytes_sent = ram_save_block(f);
> -        bytes_transferred += bytes_sent;
> -        if (bytes_sent == 0) { /* no more blocks */
> +        bytes_sent = ram_save_block(f, stage);
> +        /* bytes_sent -1 represent unchanged page */
Interesting represtantion.  Yes, I know that zero is already used (would
be the obvious here).  Perhaps switching it with the old zero value, and
-1 pass to mean no more blocks?

> +        if (bytes_sent > 0) {
> +            bytes_transferred += bytes_sent;
> +        } else if (bytes_sent == 0) { /* no more blocks */
>              break;
>          }
>      }
> @@ -556,19 +642,67 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          int bytes_sent;
>  
>          /* flush all remaining blocks regardless of rate limiting */
> -        while ((bytes_sent = ram_save_block(f)) != 0) {
> +        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
>              bytes_transferred += bytes_sent;
>          }
>          memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>      }

Not that this problem is introduced on this series, but could we get
this into a function migration_end/fini() that it just called in the two
places where a migration can end.

>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>  
> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
> +        migrate_max_downtime());
> +
>      return (stage == 2) && (expected_time <= migrate_max_downtime());
>  }
>  
> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = -1;
> +    uint8_t *xbzrle_buf = NULL;
> +    XBZRLEHeader hdr = {0};
> +
> +    /* extract RLE header */
> +    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));

This needs to be done by hand as told.

> +    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +        goto done;
> +    }
> +
> +    if (hdr.xh_len > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +        goto done;
> +    }
> +
> +    /* load data and decode */
> +    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);

cast not needed.  Again, this can be done only _once_ for the whole
migration.

> +    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
> +
> +    /* decode RLE */
> +    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +        goto done;
> +    }
> +
> +    if (ret > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> +                ret, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    rc = 0;
> +
> +done:
> +    g_free(xbzrle_buf);
> +    return rc;
> +}
> +
>  static inline void *host_from_stream_offset(QEMUFile *f,
>                                              ram_addr_t offset,
>                                              int flags)
> @@ -614,14 +748,18 @@ static inline void *host_from_stream_offset_versioned(int version_id,
>  int ram_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      ram_addr_t addr;
> -    int flags;
> +    int flags, ret = 0;
>      int error;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
>  
>      if (version_id < 4 || version_id > 4) {
>          return -EINVAL;
>      }
>  
>      do {
> +        void *host;
>          addr = qemu_get_be64(f);
>  
>          flags = addr & ~TARGET_PAGE_MASK;
> @@ -645,8 +783,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>  
>                      QLIST_FOREACH(block, &ram_list.blocks, next) {
>                          if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length)
> -                                return -EINVAL;
> +                            if (block->length != length) {
> +                                ret =  -EINVAL;
> +                                goto done;
> +                            }
>                              break;
>                          }
>                      }
> @@ -654,7 +794,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                      if (!block) {
>                          fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>                                  "accept migration\n", id);
> -                        return -EINVAL;
> +                        ret = -EINVAL;
> +                        goto done;
>                      }
>  
>                      total_ram_bytes -= length;
> @@ -693,14 +834,25 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                  return -EINVAL;
>              }
>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
> +            if (load_xbzrle(f, addr, host) < 0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }

Why we don't needto check that host is NULL in this case?

[ I assume that encode/decode functions work ok, head hurts]

Can we change the name to xbzrle_page_encode()/decode()?  name sounds
really too general. As they are not related to pages at all, we could
also call it: xbrzle_buffer_encode()/decode?

Thanks, Juan.
Anthony Liguori April 18, 2012, 5:32 p.m. UTC | #2
On 04/11/2012 01:49 PM, Orit Wasserman wrote:
> Add migration state to store XBRLE params (enablement and cache size).
> In the outgoing migration check to see if the page is cached and
> changed than send compressed page by using save_xbrle_page function.
> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
> and decompress the page (by using load_xbrle function).
>
> Signed-off-by: Orit Wasserman<owasserm@redhat.com>
> Signed-off-by: Benoit Hudzia<benoit.hudzia@sap.com>
> Signed-off-by: Petter Svard<petters@cs.umu.se>
> Signed-off-by: Aidan Shribman<aidan.shribman@sap.com>
> ---
>   arch_init.c |  176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>   migration.c |   18 ++++++
>   migration.h |    8 +++
>   savevm.c    |   94 +++++++++++++++++++++++++++++++-
>   4 files changed, 283 insertions(+), 13 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index f1690cf..793f0be 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -104,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH;
>   #define RAM_SAVE_FLAG_PAGE     0x08
>   #define RAM_SAVE_FLAG_EOS      0x10
>   #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE  0x40
>
>   #ifdef __ALTIVEC__
>   #include<altivec.h>
> @@ -165,6 +166,15 @@ static unsigned long cache_get_cache_pos(ram_addr_t address);
>   static CacheItem *cache_item_get(unsigned long pos, int item);
>   static void cache_resize(int64_t new_size);
>
> +/* XBZRLE (Xor Based Zero Length Encoding */
> +typedef struct __attribute__((packed)) XBZRLEHeader {

QEMU_PACKED.

Although this is a bit of a red flag.  Why does this structure need to be 
packed?  memcpy()'ing off the wire and then doing endian conversion is usually a 
recipe for bugs.  I think it's generally better to explicitly decode from the 
wire into a structure.  It forces you to use the correct wrappers and avoids 
making mistakes.

> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBZRLEHeader;
> +
> +static uint8 *prev_cache_page;
> +
>   /***********************************************************/
>   /* XBRLE page cache implementation */
>   static CacheItem *cache_item_get(unsigned long pos, int item)
> @@ -196,6 +206,8 @@ static void cache_init(int64_t num_bytes)
>               it->it_addr = -1;
>           }
>       }
> +
> +    prev_cache_page = g_malloc(TARGET_PAGE_SIZE);
>   }
>
>   static void cache_fini(void)
> @@ -215,6 +227,9 @@ static void cache_fini(void)
>
>       g_free(page_cache);
>       page_cache = NULL;
> +
> +    g_free(prev_cache_page);
> +    prev_cache_page = NULL;
>   }
>
>   static unsigned long cache_get_cache_pos(ram_addr_t address)
> @@ -359,19 +374,74 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>
>   }
>
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont)
> +{
> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
> +    XBZRLEHeader hdr = {0};
> +    CacheItem *it;
> +    uint8_t *encoded_buf = NULL;
> +
> +    /* get location */
> +    slot = cache_is_cached(current_addr);
> +    if (slot == -1) {
> +        cache_insert(current_addr, current_data, 0);
> +        goto done;
> +    }
> +    cache_location = cache_get_cache_pos(current_addr);
> +
> +    /* abort if page changed too much */
> +    it = cache_item_get(cache_location, slot);
> +
> +    /* we need to copy the current data before encoding so it won't change
> +       during encoding. cache_insert copies the data.
> +    */
> +    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
> +    cache_insert(current_addr, current_data, 0);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);
> +    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
> +                              encoded_buf, TARGET_PAGE_SIZE);
> +    if (encoded_len<  0) {
> +        DPRINTF("Unmodifed page - skipping\n");
> +        goto done;
> +    }
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
> +
> +    /* Send XBZRLE based compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr));

And indeed, you've made the mistake of not considering endianness.  FWIW, we use 
big endian for the most part in our migration protocol.

> +    qemu_put_buffer(f, encoded_buf, encoded_len);
> +    bytes_sent = encoded_len + sizeof(hdr);
> +
> +done:
> +    g_free(encoded_buf);
> +    return bytes_sent;
> +}
> +
>   static RAMBlock *last_block;
>   static ram_addr_t last_offset;
>
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>   {
>       RAMBlock *block = last_block;
>       ram_addr_t offset = last_offset;
>       int bytes_sent = 0;
>       MemoryRegion *mr;
> +    ram_addr_t current_addr;
> +    int use_xbzrle =  migrate_use_xbzrle();
>
>       if (!block)
>           block = QLIST_FIRST(&ram_list.blocks);
>
> +    current_addr = block->offset + offset;
> +
>       do {
>           mr = block->mr;
>           if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
> @@ -388,7 +458,14 @@ static int ram_save_block(QEMUFile *f)
>                   save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>                   qemu_put_byte(f, *p);
>                   bytes_sent = 1;
> -            } else {
> +            } else if (stage == 2&&  use_xbzrle) {
> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                    offset, cont);
> +            } else if (use_xbzrle&&  stage == 1) {
> +                cache_insert(current_addr, p, 0);
> +            }
> +
> +            if (!bytes_sent) {
>                   save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                   qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                   bytes_sent = TARGET_PAGE_SIZE;
> @@ -492,6 +569,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>
>       if (stage<  0) {
>           memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>           return 0;
>       }
>
> @@ -504,6 +584,10 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>           last_offset = 0;
>           sort_ram_list();
>
> +        if (migrate_use_xbzrle()) {
> +            cache_init(migrate_xbzrle_cache_size());
> +        }
> +
>           /* Make sure all dirty bits are set */
>           QLIST_FOREACH(block,&ram_list.blocks, next) {
>               for (addr = 0; addr<  block->length; addr += TARGET_PAGE_SIZE) {
> @@ -531,9 +615,11 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>       while ((ret = qemu_file_rate_limit(f)) == 0) {
>           int bytes_sent;
>
> -        bytes_sent = ram_save_block(f);
> -        bytes_transferred += bytes_sent;
> -        if (bytes_sent == 0) { /* no more blocks */
> +        bytes_sent = ram_save_block(f, stage);
> +        /* bytes_sent -1 represent unchanged page */
> +        if (bytes_sent>  0) {
> +            bytes_transferred += bytes_sent;
> +        } else if (bytes_sent == 0) { /* no more blocks */
>               break;
>           }
>       }
> @@ -556,19 +642,67 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>           int bytes_sent;
>
>           /* flush all remaining blocks regardless of rate limiting */
> -        while ((bytes_sent = ram_save_block(f)) != 0) {
> +        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
>               bytes_transferred += bytes_sent;
>           }
>           memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>       }
>
>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
>       expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>
> +    DPRINTF("ram_save_live: expected(%ld)<= max(%ld)?\n", expected_time,
> +        migrate_max_downtime());
> +
>       return (stage == 2)&&  (expected_time<= migrate_max_downtime());
>   }
>
> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = -1;
> +    uint8_t *xbzrle_buf = NULL;
> +    XBZRLEHeader hdr = {0};
> +
> +    /* extract RLE header */
> +    qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
> +    if (!(hdr.xh_flags&  ENCODING_FLAG_XBZRLE)) {
> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +        goto done;
> +    }
> +
> +    if (hdr.xh_len>  TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +        goto done;
> +    }
> +
> +    /* load data and decode */
> +    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);
> +    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
> +
> +    /* decode RLE */
> +    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +        goto done;
> +    }
> +
> +    if (ret>  TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> +                ret, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    rc = 0;
> +
> +done:
> +    g_free(xbzrle_buf);
> +    return rc;
> +}
> +
>   static inline void *host_from_stream_offset(QEMUFile *f,
>                                               ram_addr_t offset,
>                                               int flags)
> @@ -614,14 +748,18 @@ static inline void *host_from_stream_offset_versioned(int version_id,
>   int ram_load(QEMUFile *f, void *opaque, int version_id)
>   {
>       ram_addr_t addr;
> -    int flags;
> +    int flags, ret = 0;
>       int error;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
>
>       if (version_id<  4 || version_id>  4) {
>           return -EINVAL;
>       }
>
>       do {
> +        void *host;
>           addr = qemu_get_be64(f);
>
>           flags = addr&  ~TARGET_PAGE_MASK;
> @@ -645,8 +783,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>
>                       QLIST_FOREACH(block,&ram_list.blocks, next) {
>                           if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length)
> -                                return -EINVAL;
> +                            if (block->length != length) {
> +                                ret =  -EINVAL;
> +                                goto done;
> +                            }
>                               break;
>                           }
>                       }
> @@ -654,7 +794,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                       if (!block) {
>                           fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>                                   "accept migration\n", id);
> -                        return -EINVAL;
> +                        ret = -EINVAL;
> +                        goto done;
>                       }
>
>                       total_ram_bytes -= length;
> @@ -693,14 +834,25 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                   return -EINVAL;
>               }
>               qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags&  RAM_SAVE_FLAG_XBZRLE) {
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
> +            if (load_xbzrle(f, addr, host)<  0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }
>           }
>           error = qemu_file_get_error(f);
>           if (error) {
> -            return error;
> +            ret = error;
> +            goto done;
>           }
>       } while (!(flags&  RAM_SAVE_FLAG_EOS));
>
> -    return 0;
> +done:
> +    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
> +            ret, seq_iter);
> +    return ret;
>   }
>
>   #ifdef HAS_AUDIO
> diff --git a/migration.c b/migration.c
> index fc9511c..a524d5a 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -464,3 +464,21 @@ void qmp_migrate_set_downtime(double value, Error **errp)
>       value = MAX(0, MIN(UINT64_MAX, value));
>       max_downtime = (uint64_t)value;
>   }
> +
> +int migrate_use_xbzrle(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->params.use_xbzrle;
> +}
> +
> +int64_t migrate_xbzrle_cache_size(void)
> +{
> +    MigrationState *s;
> +
> +    s = migrate_get_current();
> +
> +    return s->params.xbzrle_cache_size;
> +}
> diff --git a/migration.h b/migration.h
> index b2097a9..06ce8ab 100644
> --- a/migration.h
> +++ b/migration.h
> @@ -22,6 +22,8 @@
>   struct MigrationParams {
>       int blk;
>       int shared;
> +    int use_xbzrle;
> +    int64_t xbzrle_cache_size;
>   };

I really would prefer not to punt decisions like whether to compress migration 
traffic to the user.  They really can't make an informed decision here.  I think 
we need to do something smarter than just stick another flag on migrate.

Regards,

Anthony Liguori
Orit Wasserman April 19, 2012, 6:11 a.m. UTC | #3
On 04/18/2012 07:54 PM, Juan Quintela wrote:
> Orit Wasserman <owasserm@redhat.com> wrote:
> 
>> @@ -104,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH;
>>  #define RAM_SAVE_FLAG_PAGE     0x08
>>  #define RAM_SAVE_FLAG_EOS      0x10
>>  #define RAM_SAVE_FLAG_CONTINUE 0x20
>> +#define RAM_SAVE_FLAG_XBZRLE  0x40
> 
> missing space for alignment?
> 
>>  
>>  #ifdef __ALTIVEC__
>>  #include <altivec.h>
>> @@ -165,6 +166,15 @@ static unsigned long cache_get_cache_pos(ram_addr_t address);
>>  static CacheItem *cache_item_get(unsigned long pos, int item);
>>  static void cache_resize(int64_t new_size);
>>  
>> +/* XBZRLE (Xor Based Zero Length Encoding */
>> +typedef struct __attribute__((packed)) XBZRLEHeader {
>> +    uint8_t xh_flags;
>> +    uint16_t xh_len;
>> +    uint32_t xh_cksum;
>> +} XBZRLEHeader;
> 
> We shouldn't use packed here. Explanation later.
> And if we are worried about space, it is much better ordering to do it
> by size:
> 
> typedef struct {
>     uint32_t xh_cksum;
>     uint16_t xh_len;
>     uint8_t xh_flags;
> } XBZRLEHeader;
> 
> 
> Once here, are we using the xh_cksum at all?

Not at the moment , maybe someday. do you want me to remove it ?

> 
> 
>> +static uint8 *prev_cache_page;
> 
> Move this also to the bucket struct?
> 
>> @@ -359,19 +374,74 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>  
>>  }
>>  
>> +#define ENCODING_FLAG_XBZRLE 0x1
>> +
>> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
>> +                            ram_addr_t current_addr, RAMBlock *block,
>> +                            ram_addr_t offset, int cont)
>> +{
>> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
>> +    XBZRLEHeader hdr = {0};
>> +    CacheItem *it;
>> +    uint8_t *encoded_buf = NULL;
>> +
>> +    /* get location */
>> +    slot = cache_is_cached(current_addr);
>> +    if (slot == -1) {
>> +        cache_insert(current_addr, current_data, 0);
>> +        goto done;
>> +    }
>> +    cache_location = cache_get_cache_pos(current_addr);
>> +
>> +    /* abort if page changed too much */
>> +    it = cache_item_get(cache_location, slot);
> 
> Comment and code don't match?

I will move it to it's correct place (few lines down)

> 
>> +
>> +    /* we need to copy the current data before encoding so it won't change
>> +       during encoding. cache_insert copies the data.
>> +    */
>> +    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
>> +    cache_insert(current_addr, current_data, 0);
>> +
>> +    /* XBZRLE encoding (if there is no overflow) */
>> +    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);
> 
> Cast not needed.  Can we have a pagge allocated for this in Bucket
> struct, XBRLE struct or whatever we want to call it?  Having to do a
> malloc for each page thaht we sent looks excesive?

Yes this is the current implementation. I will fix it we can use a static buffer
and zero it each time.

> 
>> +    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
>> +                              encoded_buf, TARGET_PAGE_SIZE);
>> +    if (encoded_len < 0) {
>> +        DPRINTF("Unmodifed page - skipping\n");
>> +        goto done;
>> +    }
>> +
>> +    hdr.xh_len = encoded_len;
>> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
>> +
>> +    /* Send XBZRLE based compressed page */
>> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
>> +    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> 
> This is wrong (same for load part), we are breking inter-arch migration.
I will fix it (and the load).
> 
> qemu_put_byte(f, hdr->xh_flags);
> qemu_put_be16(f, hdr->xh_len);
> qemu_put_be32(f, hdr->xh_cksum);
> 
> And the inverse on the load side.
> 
>> +    qemu_put_buffer(f, encoded_buf, encoded_len);
>> +    bytes_sent = encoded_len + sizeof(hdr);
>> +
>> +done:
>> +    g_free(encoded_buf);
>> +    return bytes_sent;
>> +}
>> +
>>  static RAMBlock *last_block;
>>  static ram_addr_t last_offset;
>>  
>> -static int ram_save_block(QEMUFile *f)
>> +static int ram_save_block(QEMUFile *f, int stage)
>>  {
>>      RAMBlock *block = last_block;
>>      ram_addr_t offset = last_offset;
>>      int bytes_sent = 0;
>>      MemoryRegion *mr;
>> +    ram_addr_t current_addr;
>> +    int use_xbzrle =  migrate_use_xbzrle();
>>  
>>      if (!block)
>>          block = QLIST_FIRST(&ram_list.blocks);
>>  
>> +    current_addr = block->offset + offset;
>> +
>>      do {
>>          mr = block->mr;
>>          if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
>> @@ -388,7 +458,14 @@ static int ram_save_block(QEMUFile *f)
>>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>>                  qemu_put_byte(f, *p);
>>                  bytes_sent = 1;
>> -            } else {
>> +            } else if (stage == 2 && use_xbzrle) {
> here
>> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>> +                    offset, cont);
>> +            } else if (use_xbzrle && stage == 1) {
> and here, can we used the check in the same order?

of course it will be fixed.

> 
>> +                cache_insert(current_addr, p, 0);
>> +            }
> 
> Or even better, change the code to something like:
> 
> else if (use_xbzrle) {
>      if (stage == 1) {
>         cache_insert(current_addr, p, 0);
>      } else if (cache == 2) {
>         bytes_sent = save_xbzrle_page(...);
>      }
> 
> And I don't understand why this function care at all about the stages.
> Normal code sent a page on stage == 1, here it is only saved on the
> cache and sent as a normal page (without compression).  And on stage==3,
> it is also always sent without compression.  I guess there is a method
> on this, but a comment will help?
sure
> 
>> +
>> +            if (!bytes_sent) {
>>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>>                  bytes_sent = TARGET_PAGE_SIZE;
>> @@ -492,6 +569,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>  
>>      if (stage < 0) {
>>          memory_global_dirty_log_stop();
>> +        if (migrate_use_xbzrle()) {
>> +            cache_fini();
>> +        }
>>          return 0;
>>      }
>>  
>> @@ -504,6 +584,10 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>          last_offset = 0;
>>          sort_ram_list();
>>  
>> +        if (migrate_use_xbzrle()) {
>> +            cache_init(migrate_xbzrle_cache_size());
>> +        }
>> +
>>          /* Make sure all dirty bits are set */
>>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>>              for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
>> @@ -531,9 +615,11 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
> 
>>      while ((ret = qemu_file_rate_limit(f)) == 0) {
>>          int bytes_sent;
>>  
>> -        bytes_sent = ram_save_block(f);
>> -        bytes_transferred += bytes_sent;
>> -        if (bytes_sent == 0) { /* no more blocks */
>> +        bytes_sent = ram_save_block(f, stage);
>> +        /* bytes_sent -1 represent unchanged page */
> Interesting represtantion.  Yes, I know that zero is already used (would
> be the obvious here).  Perhaps switching it with the old zero value, and
> -1 pass to mean no more blocks?

yes that would make the code more readable.
> 
>> +        if (bytes_sent > 0) {
>> +            bytes_transferred += bytes_sent;
>> +        } else if (bytes_sent == 0) { /* no more blocks */
>>              break;
>>          }
>>      }
>> @@ -556,19 +642,67 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>          int bytes_sent;
>>  
>>          /* flush all remaining blocks regardless of rate limiting */
>> -        while ((bytes_sent = ram_save_block(f)) != 0) {
>> +        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
>>              bytes_transferred += bytes_sent;
>>          }
>>          memory_global_dirty_log_stop();
>> +        if (migrate_use_xbzrle()) {
>> +            cache_fini();
>> +        }
>>      }
> 
> Not that this problem is introduced on this series, but could we get
> this into a function migration_end/fini() that it just called in the two
> places where a migration can end.

I will add migration_end function.
> 
>>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>  
>>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>>  
>> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
>> +        migrate_max_downtime());
>> +
>>      return (stage == 2) && (expected_time <= migrate_max_downtime());
>>  }
>>  
>> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
>> +{
>> +    int ret, rc = -1;
>> +    uint8_t *xbzrle_buf = NULL;
>> +    XBZRLEHeader hdr = {0};
>> +
>> +    /* extract RLE header */
>> +    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> 
> This needs to be done by hand as told.
> 
>> +    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
>> +        goto done;
>> +    }
>> +
>> +    if (hdr.xh_len > TARGET_PAGE_SIZE) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
>> +        goto done;
>> +    }
>> +
>> +    /* load data and decode */
>> +    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);
> 
> cast not needed.  Again, this can be done only _once_ for the whole
> migration.
will be removed
> 
>> +    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
>> +
>> +    /* decode RLE */
>> +    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
>> +    if (ret == -1) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
>> +        goto done;
>> +    }
>> +
>> +    if (ret > TARGET_PAGE_SIZE) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
>> +                ret, TARGET_PAGE_SIZE);
>> +        goto done;
>> +    }
>> +
>> +    rc = 0;
>> +
>> +done:
>> +    g_free(xbzrle_buf);
>> +    return rc;
>> +}
>> +
>>  static inline void *host_from_stream_offset(QEMUFile *f,
>>                                              ram_addr_t offset,
>>                                              int flags)
>> @@ -614,14 +748,18 @@ static inline void *host_from_stream_offset_versioned(int version_id,
>>  int ram_load(QEMUFile *f, void *opaque, int version_id)
>>  {
>>      ram_addr_t addr;
>> -    int flags;
>> +    int flags, ret = 0;
>>      int error;
>> +    static uint64_t seq_iter;
>> +
>> +    seq_iter++;
>>  
>>      if (version_id < 4 || version_id > 4) {
>>          return -EINVAL;
>>      }
>>  
>>      do {
>> +        void *host;
>>          addr = qemu_get_be64(f);
>>  
>>          flags = addr & ~TARGET_PAGE_MASK;
>> @@ -645,8 +783,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>  
>>                      QLIST_FOREACH(block, &ram_list.blocks, next) {
>>                          if (!strncmp(id, block->idstr, sizeof(id))) {
>> -                            if (block->length != length)
>> -                                return -EINVAL;
>> +                            if (block->length != length) {
>> +                                ret =  -EINVAL;
>> +                                goto done;
>> +                            }
>>                              break;
>>                          }
>>                      }
>> @@ -654,7 +794,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                      if (!block) {
>>                          fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>>                                  "accept migration\n", id);
>> -                        return -EINVAL;
>> +                        ret = -EINVAL;
>> +                        goto done;
>>                      }
>>  
>>                      total_ram_bytes -= length;
>> @@ -693,14 +834,25 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                  return -EINVAL;
>>              }
>>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
>> +            host = host_from_stream_offset_versioned(version_id,
>> +                            f, addr, flags);
>> +            if (load_xbzrle(f, addr, host) < 0) {
>> +                ret = -EINVAL;
>> +                goto done;
>> +            }
> 
> Why we don't needto check that host is NULL in this case?
Oops You are right I will add the check.

> 
> [ I assume that encode/decode functions work ok, head hurts]
I hope so :). 
> 
> Can we change the name to xbzrle_page_encode()/decode()?  name sounds
> really too general. As they are not related to pages at all, we could
> also call it: xbrzle_buffer_encode()/decode?
sure.

Thanks,
Orit
> 
> Thanks, Juan.
Orit Wasserman April 19, 2012, 6:49 a.m. UTC | #4
On 04/18/2012 08:32 PM, Anthony Liguori wrote:
> On 04/11/2012 01:49 PM, Orit Wasserman wrote:
>> Add migration state to store XBRLE params (enablement and cache size).
>> In the outgoing migration check to see if the page is cached and
>> changed than send compressed page by using save_xbrle_page function.
>> In the incoming migration check to see if RAM_SAVE_FLAG_XBRLE is set
>> and decompress the page (by using load_xbrle function).
>>
>> Signed-off-by: Orit Wasserman<owasserm@redhat.com>
>> Signed-off-by: Benoit Hudzia<benoit.hudzia@sap.com>
>> Signed-off-by: Petter Svard<petters@cs.umu.se>
>> Signed-off-by: Aidan Shribman<aidan.shribman@sap.com>
>> ---
>>   arch_init.c |  176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>>   migration.c |   18 ++++++
>>   migration.h |    8 +++
>>   savevm.c    |   94 +++++++++++++++++++++++++++++++-
>>   4 files changed, 283 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index f1690cf..793f0be 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -104,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH;
>>   #define RAM_SAVE_FLAG_PAGE     0x08
>>   #define RAM_SAVE_FLAG_EOS      0x10
>>   #define RAM_SAVE_FLAG_CONTINUE 0x20
>> +#define RAM_SAVE_FLAG_XBZRLE  0x40
>>
>>   #ifdef __ALTIVEC__
>>   #include<altivec.h>
>> @@ -165,6 +166,15 @@ static unsigned long cache_get_cache_pos(ram_addr_t address);
>>   static CacheItem *cache_item_get(unsigned long pos, int item);
>>   static void cache_resize(int64_t new_size);
>>
>> +/* XBZRLE (Xor Based Zero Length Encoding */
>> +typedef struct __attribute__((packed)) XBZRLEHeader {
> 
> QEMU_PACKED.
> 
> Although this is a bit of a red flag.  Why does this structure need to be packed?  memcpy()'ing off the wire and then doing endian conversion is usually a recipe for bugs.  I think it's generally better to explicitly decode from the wire into a structure.  It forces you to use the correct wrappers and avoids making mistakes.
> 
I will remove the packed and fix the load/save of the structure.

>> +    uint8_t xh_flags;
>> +    uint16_t xh_len;
>> +    uint32_t xh_cksum;
>> +} XBZRLEHeader;
>> +
>> +static uint8 *prev_cache_page;
>> +
>>   /***********************************************************/
>>   /* XBRLE page cache implementation */
>>   static CacheItem *cache_item_get(unsigned long pos, int item)
>> @@ -196,6 +206,8 @@ static void cache_init(int64_t num_bytes)
>>               it->it_addr = -1;
>>           }
>>       }
>> +
>> +    prev_cache_page = g_malloc(TARGET_PAGE_SIZE);
>>   }
>>
>>   static void cache_fini(void)
>> @@ -215,6 +227,9 @@ static void cache_fini(void)
>>
>>       g_free(page_cache);
>>       page_cache = NULL;
>> +
>> +    g_free(prev_cache_page);
>> +    prev_cache_page = NULL;
>>   }
>>
>>   static unsigned long cache_get_cache_pos(ram_addr_t address)
>> @@ -359,19 +374,74 @@ static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
>>
>>   }
>>
>> +#define ENCODING_FLAG_XBZRLE 0x1
>> +
>> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
>> +                            ram_addr_t current_addr, RAMBlock *block,
>> +                            ram_addr_t offset, int cont)
>> +{
>> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
>> +    XBZRLEHeader hdr = {0};
>> +    CacheItem *it;
>> +    uint8_t *encoded_buf = NULL;
>> +
>> +    /* get location */
>> +    slot = cache_is_cached(current_addr);
>> +    if (slot == -1) {
>> +        cache_insert(current_addr, current_data, 0);
>> +        goto done;
>> +    }
>> +    cache_location = cache_get_cache_pos(current_addr);
>> +
>> +    /* abort if page changed too much */
>> +    it = cache_item_get(cache_location, slot);
>> +
>> +    /* we need to copy the current data before encoding so it won't change
>> +       during encoding. cache_insert copies the data.
>> +    */
>> +    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
>> +    cache_insert(current_addr, current_data, 0);
>> +
>> +    /* XBZRLE encoding (if there is no overflow) */
>> +    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);
>> +    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
>> +                              encoded_buf, TARGET_PAGE_SIZE);
>> +    if (encoded_len<  0) {
>> +        DPRINTF("Unmodifed page - skipping\n");
>> +        goto done;
>> +    }
>> +
>> +    hdr.xh_len = encoded_len;
>> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
>> +
>> +    /* Send XBZRLE based compressed page */
>> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
>> +    qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
> 
> And indeed, you've made the mistake of not considering endianness.  FWIW, we use big endian for the most part in our migration protocol.
I will fix it.
> 
>> +    qemu_put_buffer(f, encoded_buf, encoded_len);
>> +    bytes_sent = encoded_len + sizeof(hdr);
>> +
>> +done:
>> +    g_free(encoded_buf);
>> +    return bytes_sent;
>> +}
>> +
>>   static RAMBlock *last_block;
>>   static ram_addr_t last_offset;
>>
>> -static int ram_save_block(QEMUFile *f)
>> +static int ram_save_block(QEMUFile *f, int stage)
>>   {
>>       RAMBlock *block = last_block;
>>       ram_addr_t offset = last_offset;
>>       int bytes_sent = 0;
>>       MemoryRegion *mr;
>> +    ram_addr_t current_addr;
>> +    int use_xbzrle =  migrate_use_xbzrle();
>>
>>       if (!block)
>>           block = QLIST_FIRST(&ram_list.blocks);
>>
>> +    current_addr = block->offset + offset;
>> +
>>       do {
>>           mr = block->mr;
>>           if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
>> @@ -388,7 +458,14 @@ static int ram_save_block(QEMUFile *f)
>>                   save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
>>                   qemu_put_byte(f, *p);
>>                   bytes_sent = 1;
>> -            } else {
>> +            } else if (stage == 2&&  use_xbzrle) {
>> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
>> +                    offset, cont);
>> +            } else if (use_xbzrle&&  stage == 1) {
>> +                cache_insert(current_addr, p, 0);
>> +            }
>> +
>> +            if (!bytes_sent) {
>>                   save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>>                   qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>>                   bytes_sent = TARGET_PAGE_SIZE;
>> @@ -492,6 +569,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>
>>       if (stage<  0) {
>>           memory_global_dirty_log_stop();
>> +        if (migrate_use_xbzrle()) {
>> +            cache_fini();
>> +        }
>>           return 0;
>>       }
>>
>> @@ -504,6 +584,10 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>           last_offset = 0;
>>           sort_ram_list();
>>
>> +        if (migrate_use_xbzrle()) {
>> +            cache_init(migrate_xbzrle_cache_size());
>> +        }
>> +
>>           /* Make sure all dirty bits are set */
>>           QLIST_FOREACH(block,&ram_list.blocks, next) {
>>               for (addr = 0; addr<  block->length; addr += TARGET_PAGE_SIZE) {
>> @@ -531,9 +615,11 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>       while ((ret = qemu_file_rate_limit(f)) == 0) {
>>           int bytes_sent;
>>
>> -        bytes_sent = ram_save_block(f);
>> -        bytes_transferred += bytes_sent;
>> -        if (bytes_sent == 0) { /* no more blocks */
>> +        bytes_sent = ram_save_block(f, stage);
>> +        /* bytes_sent -1 represent unchanged page */
>> +        if (bytes_sent>  0) {
>> +            bytes_transferred += bytes_sent;
>> +        } else if (bytes_sent == 0) { /* no more blocks */
>>               break;
>>           }
>>       }
>> @@ -556,19 +642,67 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>>           int bytes_sent;
>>
>>           /* flush all remaining blocks regardless of rate limiting */
>> -        while ((bytes_sent = ram_save_block(f)) != 0) {
>> +        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
>>               bytes_transferred += bytes_sent;
>>           }
>>           memory_global_dirty_log_stop();
>> +        if (migrate_use_xbzrle()) {
>> +            cache_fini();
>> +        }
>>       }
>>
>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>
>>       expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>>
>> +    DPRINTF("ram_save_live: expected(%ld)<= max(%ld)?\n", expected_time,
>> +        migrate_max_downtime());
>> +
>>       return (stage == 2)&&  (expected_time<= migrate_max_downtime());
>>   }
>>
>> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
>> +{
>> +    int ret, rc = -1;
>> +    uint8_t *xbzrle_buf = NULL;
>> +    XBZRLEHeader hdr = {0};
>> +
>> +    /* extract RLE header */
>> +    qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
>> +    if (!(hdr.xh_flags&  ENCODING_FLAG_XBZRLE)) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
>> +        goto done;
>> +    }
>> +
>> +    if (hdr.xh_len>  TARGET_PAGE_SIZE) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
>> +        goto done;
>> +    }
>> +
>> +    /* load data and decode */
>> +    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);
>> +    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
>> +
>> +    /* decode RLE */
>> +    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
>> +    if (ret == -1) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
>> +        goto done;
>> +    }
>> +
>> +    if (ret>  TARGET_PAGE_SIZE) {
>> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
>> +                ret, TARGET_PAGE_SIZE);
>> +        goto done;
>> +    }
>> +
>> +    rc = 0;
>> +
>> +done:
>> +    g_free(xbzrle_buf);
>> +    return rc;
>> +}
>> +
>>   static inline void *host_from_stream_offset(QEMUFile *f,
>>                                               ram_addr_t offset,
>>                                               int flags)
>> @@ -614,14 +748,18 @@ static inline void *host_from_stream_offset_versioned(int version_id,
>>   int ram_load(QEMUFile *f, void *opaque, int version_id)
>>   {
>>       ram_addr_t addr;
>> -    int flags;
>> +    int flags, ret = 0;
>>       int error;
>> +    static uint64_t seq_iter;
>> +
>> +    seq_iter++;
>>
>>       if (version_id<  4 || version_id>  4) {
>>           return -EINVAL;
>>       }
>>
>>       do {
>> +        void *host;
>>           addr = qemu_get_be64(f);
>>
>>           flags = addr&  ~TARGET_PAGE_MASK;
>> @@ -645,8 +783,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>
>>                       QLIST_FOREACH(block,&ram_list.blocks, next) {
>>                           if (!strncmp(id, block->idstr, sizeof(id))) {
>> -                            if (block->length != length)
>> -                                return -EINVAL;
>> +                            if (block->length != length) {
>> +                                ret =  -EINVAL;
>> +                                goto done;
>> +                            }
>>                               break;
>>                           }
>>                       }
>> @@ -654,7 +794,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                       if (!block) {
>>                           fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>>                                   "accept migration\n", id);
>> -                        return -EINVAL;
>> +                        ret = -EINVAL;
>> +                        goto done;
>>                       }
>>
>>                       total_ram_bytes -= length;
>> @@ -693,14 +834,25 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>                   return -EINVAL;
>>               }
>>               qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
>> +        } else if (flags&  RAM_SAVE_FLAG_XBZRLE) {
>> +            host = host_from_stream_offset_versioned(version_id,
>> +                            f, addr, flags);
>> +            if (load_xbzrle(f, addr, host)<  0) {
>> +                ret = -EINVAL;
>> +                goto done;
>> +            }
>>           }
>>           error = qemu_file_get_error(f);
>>           if (error) {
>> -            return error;
>> +            ret = error;
>> +            goto done;
>>           }
>>       } while (!(flags&  RAM_SAVE_FLAG_EOS));
>>
>> -    return 0;
>> +done:
>> +    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
>> +            ret, seq_iter);
>> +    return ret;
>>   }
>>
>>   #ifdef HAS_AUDIO
>> diff --git a/migration.c b/migration.c
>> index fc9511c..a524d5a 100644
>> --- a/migration.c
>> +++ b/migration.c
>> @@ -464,3 +464,21 @@ void qmp_migrate_set_downtime(double value, Error **errp)
>>       value = MAX(0, MIN(UINT64_MAX, value));
>>       max_downtime = (uint64_t)value;
>>   }
>> +
>> +int migrate_use_xbzrle(void)
>> +{
>> +    MigrationState *s;
>> +
>> +    s = migrate_get_current();
>> +
>> +    return s->params.use_xbzrle;
>> +}
>> +
>> +int64_t migrate_xbzrle_cache_size(void)
>> +{
>> +    MigrationState *s;
>> +
>> +    s = migrate_get_current();
>> +
>> +    return s->params.xbzrle_cache_size;
>> +}
>> diff --git a/migration.h b/migration.h
>> index b2097a9..06ce8ab 100644
>> --- a/migration.h
>> +++ b/migration.h
>> @@ -22,6 +22,8 @@
>>   struct MigrationParams {
>>       int blk;
>>       int shared;
>> +    int use_xbzrle;
>> +    int64_t xbzrle_cache_size;
>>   };
> 
> I really would prefer not to punt decisions like whether to compress migration traffic to the user.  They really can't make an informed decision here.  I think we need to do something smarter than just stick another flag on migrate.
> 

I agree, and I'm planning to add automation later.
We will still need the migration flag in the command because only the management can ensure that the destination can decode the data.
so management will need to use the migration_caps command to check what the dest QEMU can handle and only than set the flag when activating the migration.
Maybe some day we will have feature negotiation as part of the migration protocol.

Thanks,
Orit 
> Regards,
> 
> Anthony Liguori
>
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index f1690cf..793f0be 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -104,6 +104,7 @@  const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE  0x40
 
 #ifdef __ALTIVEC__
 #include <altivec.h>
@@ -165,6 +166,15 @@  static unsigned long cache_get_cache_pos(ram_addr_t address);
 static CacheItem *cache_item_get(unsigned long pos, int item);
 static void cache_resize(int64_t new_size);
 
+/* XBZRLE (Xor Based Zero Length Encoding */
+typedef struct __attribute__((packed)) XBZRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBZRLEHeader;
+
+static uint8 *prev_cache_page;
+
 /***********************************************************/
 /* XBRLE page cache implementation */
 static CacheItem *cache_item_get(unsigned long pos, int item)
@@ -196,6 +206,8 @@  static void cache_init(int64_t num_bytes)
             it->it_addr = -1;
         }
     }
+
+    prev_cache_page = g_malloc(TARGET_PAGE_SIZE);
 }
 
 static void cache_fini(void)
@@ -215,6 +227,9 @@  static void cache_fini(void)
 
     g_free(page_cache);
     page_cache = NULL;
+
+    g_free(prev_cache_page);
+    prev_cache_page = NULL;
 }
 
 static unsigned long cache_get_cache_pos(ram_addr_t address)
@@ -359,19 +374,74 @@  static void save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
 
 }
 
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBZRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *encoded_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        cache_insert(current_addr, current_data, 0);
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+
+    /* we need to copy the current data before encoding so it won't change
+       during encoding. cache_insert copies the data.
+    */
+    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
+    cache_insert(current_addr, current_data, 0);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);
+    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
+                              encoded_buf, TARGET_PAGE_SIZE);
+    if (encoded_len < 0) {
+        DPRINTF("Unmodifed page - skipping\n");
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
+
+    /* Send XBZRLE based compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    qemu_put_buffer(f, encoded_buf, encoded_len);
+    bytes_sent = encoded_len + sizeof(hdr);
+
+done:
+    g_free(encoded_buf);
+    return bytes_sent;
+}
+
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
 
-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
 {
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
     int bytes_sent = 0;
     MemoryRegion *mr;
+    ram_addr_t current_addr;
+    int use_xbzrle =  migrate_use_xbzrle();
 
     if (!block)
         block = QLIST_FIRST(&ram_list.blocks);
 
+    current_addr = block->offset + offset;
+
     do {
         mr = block->mr;
         if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
@@ -388,7 +458,14 @@  static int ram_save_block(QEMUFile *f)
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
+            } else if (stage == 2 && use_xbzrle) {
+                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
+                    offset, cont);
+            } else if (use_xbzrle && stage == 1) {
+                cache_insert(current_addr, p, 0);
+            }
+
+            if (!bytes_sent) {
                 save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
@@ -492,6 +569,9 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
 
     if (stage < 0) {
         memory_global_dirty_log_stop();
+        if (migrate_use_xbzrle()) {
+            cache_fini();
+        }
         return 0;
     }
 
@@ -504,6 +584,10 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         last_offset = 0;
         sort_ram_list();
 
+        if (migrate_use_xbzrle()) {
+            cache_init(migrate_xbzrle_cache_size());
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -531,9 +615,11 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
 
-        bytes_sent = ram_save_block(f);
-        bytes_transferred += bytes_sent;
-        if (bytes_sent == 0) { /* no more blocks */
+        bytes_sent = ram_save_block(f, stage);
+        /* bytes_sent -1 represent unchanged page */
+        if (bytes_sent > 0) {
+            bytes_transferred += bytes_sent;
+        } else if (bytes_sent == 0) { /* no more blocks */
             break;
         }
     }
@@ -556,19 +642,67 @@  int ram_save_live(QEMUFile *f, int stage, void *opaque)
         int bytes_sent;
 
         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
             bytes_transferred += bytes_sent;
         }
         memory_global_dirty_log_stop();
+        if (migrate_use_xbzrle()) {
+            cache_fini();
+        }
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }
 
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *xbzrle_buf = NULL;
+    XBZRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
+        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
+                ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    rc = 0;
+
+done:
+    g_free(xbzrle_buf);
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -614,14 +748,18 @@  static inline void *host_from_stream_offset_versioned(int version_id,
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
     int error;
+    static uint64_t seq_iter;
+
+    seq_iter++;
 
     if (version_id < 4 || version_id > 4) {
         return -EINVAL;
     }
 
     do {
+        void *host;
         addr = qemu_get_be64(f);
 
         flags = addr & ~TARGET_PAGE_MASK;
@@ -645,8 +783,10 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
 
                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret =  -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -654,7 +794,8 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }
 
                     total_ram_bytes -= length;
@@ -693,14 +834,25 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
                 return -EINVAL;
             }
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbzrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
         error = qemu_file_get_error(f);
         if (error) {
-            return error;
+            ret = error;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
 }
 
 #ifdef HAS_AUDIO
diff --git a/migration.c b/migration.c
index fc9511c..a524d5a 100644
--- a/migration.c
+++ b/migration.c
@@ -464,3 +464,21 @@  void qmp_migrate_set_downtime(double value, Error **errp)
     value = MAX(0, MIN(UINT64_MAX, value));
     max_downtime = (uint64_t)value;
 }
+
+int migrate_use_xbzrle(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->params.use_xbzrle;
+}
+
+int64_t migrate_xbzrle_cache_size(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->params.xbzrle_cache_size;
+}
diff --git a/migration.h b/migration.h
index b2097a9..06ce8ab 100644
--- a/migration.h
+++ b/migration.h
@@ -22,6 +22,8 @@ 
 struct MigrationParams {
     int blk;
     int shared;
+    int use_xbzrle;
+    int64_t xbzrle_cache_size;
 };
 
 typedef struct MigrationState MigrationState;
@@ -100,5 +102,11 @@  void migrate_del_blocker(Error *reason);
 /* ULEB128 */
 int uleb128_encode_small(uint8_t *out, uint32_t n);
 int uleb128_decode_small(const uint8 *in, uint32_t *n);
+int encode_page(uint8_t *old_buf, uint8_t *new_buf, int slen,
+                uint8_t *dst, int dlen);
+int decode_page(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
 
 #endif
diff --git a/savevm.c b/savevm.c
index 4736784..fbf1903 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1711,7 +1711,9 @@  static int qemu_savevm_state(QEMUFile *f)
     int ret;
     MigrationParams params = {
         .blk = 0,
-        .shared = 0
+        .shared = 0,
+        .use_xbzrle = 0,
+        .xbzrle_cache_size = 0
     };
 
     if (qemu_savevm_state_blocked(NULL)) {
@@ -2401,3 +2403,93 @@  int uleb128_decode_small(const uint8 *in, uint32_t *n)
     }
 }
 
+/*
+  page = zrun nzrun
+       | zrun nzrun page
+
+  zrun = length
+
+  nzrun = length byte...
+
+  length = uleb128 encoded integer
+ */
+int encode_page(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst,
+                int dlen)
+{
+    uint32_t zrun_len = 0, nzrun_len = 0;
+    int d = 0 , i = 0;
+    uint8_t *nzrun_start = NULL;
+
+    while (i < slen) {
+        /* overflow */
+        if (d + 2 > dlen) {
+            return 0;
+        }
+
+        while (!(old_buf[i] ^ new_buf[i]) && ++i <= slen) {
+            zrun_len++;
+        }
+
+        /* buffer unchanged */
+        if (zrun_len == slen) {
+            return -1;
+        }
+
+        /* skip last zero run */
+        if (i == slen + 1) {
+            return d;
+        }
+
+        d += uleb128_encode_small(dst + d, zrun_len);
+
+        zrun_len = 0;
+        nzrun_start = new_buf + i;
+        while ((old_buf[i] ^ new_buf[i]) != 0 && ++i <= slen) {
+            nzrun_len++;
+        }
+
+        /* overflow */
+        if (d + nzrun_len + 2 > dlen) {
+            return 0;
+        }
+
+        d += uleb128_encode_small(dst + d, nzrun_len);
+        memcpy(dst + d, nzrun_start, nzrun_len);
+        d += nzrun_len;
+        nzrun_len = 0;
+    }
+
+    return d;
+}
+
+int decode_page(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int i = 0, d = 0;
+    uint32_t count = 0;
+
+    while (i < slen) {
+
+        /* zrun */
+        i += uleb128_decode_small(src + i, &count);
+        d += count;
+
+        /* overflow */
+        assert(d <= dlen);
+
+        /* completed decoding */
+        if (i == slen - 1) {
+            return d;
+        }
+
+        /* nzrun */
+        i += uleb128_decode_small(src + i, &count);
+
+        assert(d + count <= dlen);
+
+        memcpy(dst + d , src + i, count);
+        d += count;
+        i += count;
+    }
+
+    return d;
+}