diff mbox

[PATCHv2] migration: move ram migration support

Message ID 20130307133248.GA27676@redhat.com
State New
Headers show

Commit Message

Michael S. Tsirkin March 7, 2013, 1:32 p.m. UTC
Move RAM migration code from arch_init to savevm-ram.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

Note: this is on top of Juan's pull request

Changes from v1:
    - renamed source file, rebased on top of migration.next as
      suggested by Paolo

 Makefile.target |   2 +-
 arch_init.c     | 763 -----------------------------------------------------
 savevm-ram.c    | 804 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 805 insertions(+), 764 deletions(-)
 create mode 100644 savevm-ram.c

Comments

Paolo Bonzini March 7, 2013, 3:20 p.m. UTC | #1
Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto:
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)

These need to be adjusted, but it can be a follow-up.

Paolo

> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +/***********************************************************/
> +/* ram save/restore */
> +
> +#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
> +#define RAM_SAVE_FLAG_COMPRESS 0x02
> +#define RAM_SAVE_FLAG_MEM_SIZE 0x04
> +#define RAM_SAVE_FLAG_PAGE     0x08
> +#define RAM_SAVE_FLAG_EOS      0x10
> +#define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE   0x40
> +
> +#ifdef __ALTIVEC__
> +#include <altivec.h>
> +#define VECTYPE        vector unsigned char
> +#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
> +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
> +/* altivec.h may redefine the bool macro as vector type.
> + * Reset it to POSIX semantics. */
> +#undef bool
> +#define bool _Bool
> +#elif defined __SSE2__
> +#include <emmintrin.h>
> +#define VECTYPE        __m128i
> +#define SPLAT(p)       _mm_set1_epi8(*(p))
> +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
> +#else
> +#define VECTYPE        unsigned long
> +#define SPLAT(p)       (*(p) * (~0UL / 255))
> +#define ALL_EQ(v1, v2) ((v1) == (v2))
> +#endif
> +
> +static int is_dup_page(uint8_t *page)
> +{
> +    VECTYPE *p = (VECTYPE *)page;
> +    VECTYPE val = SPLAT(page);
> +    int i;
> +
> +    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
> +        if (!ALL_EQ(val, p[i])) {
> +            return 0;
> +        }
> +    }
> +
> +    return 1;
> +}
> +
> +/* struct contains XBZRLE cache and a static page
> +   used by the compression */
> +static struct {
> +    /* buffer used for XBZRLE encoding */
> +    uint8_t *encoded_buf;
> +    /* buffer for storing page content */
> +    uint8_t *current_buf;
> +    /* buffer used for XBZRLE decoding */
> +    uint8_t *decoded_buf;
> +    /* Cache for XBZRLE */
> +    PageCache *cache;
> +} XBZRLE = {
> +    .encoded_buf = NULL,
> +    .current_buf = NULL,
> +    .decoded_buf = NULL,
> +    .cache = NULL,
> +};
> +
> +
> +int64_t xbzrle_cache_resize(int64_t new_size)
> +{
> +    if (XBZRLE.cache != NULL) {
> +        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
> +            TARGET_PAGE_SIZE;
> +    }
> +    return pow2floor(new_size);
> +}
> +
> +/* accounting for migration statistics */
> +typedef struct AccountingInfo {
> +    uint64_t dup_pages;
> +    uint64_t norm_pages;
> +    uint64_t iterations;
> +    uint64_t xbzrle_bytes;
> +    uint64_t xbzrle_pages;
> +    uint64_t xbzrle_cache_miss;
> +    uint64_t xbzrle_overflows;
> +} AccountingInfo;
> +
> +static AccountingInfo acct_info;
> +
> +static void acct_clear(void)
> +{
> +    memset(&acct_info, 0, sizeof(acct_info));
> +}
> +
> +uint64_t dup_mig_bytes_transferred(void)
> +{
> +    return acct_info.dup_pages * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t dup_mig_pages_transferred(void)
> +{
> +    return acct_info.dup_pages;
> +}
> +
> +uint64_t norm_mig_bytes_transferred(void)
> +{
> +    return acct_info.norm_pages * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t norm_mig_pages_transferred(void)
> +{
> +    return acct_info.norm_pages;
> +}
> +
> +uint64_t xbzrle_mig_bytes_transferred(void)
> +{
> +    return acct_info.xbzrle_bytes;
> +}
> +
> +uint64_t xbzrle_mig_pages_transferred(void)
> +{
> +    return acct_info.xbzrle_pages;
> +}
> +
> +uint64_t xbzrle_mig_pages_cache_miss(void)
> +{
> +    return acct_info.xbzrle_cache_miss;
> +}
> +
> +uint64_t xbzrle_mig_pages_overflow(void)
> +{
> +    return acct_info.xbzrle_overflows;
> +}
> +
> +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
> +                             int cont, int flag)
> +{
> +    size_t size;
> +
> +    qemu_put_be64(f, offset | cont | flag);
> +    size = 8;
> +
> +    if (!cont) {
> +        qemu_put_byte(f, strlen(block->idstr));
> +        qemu_put_buffer(f, (uint8_t *)block->idstr,
> +                        strlen(block->idstr));
> +        size += 1 + strlen(block->idstr);
> +    }
> +    return size;
> +}
> +
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont, bool last_stage)
> +{
> +    int encoded_len = 0, bytes_sent = -1;
> +    uint8_t *prev_cached_page;
> +
> +    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
> +        if (!last_stage) {
> +            cache_insert(XBZRLE.cache, current_addr, current_data);
> +        }
> +        acct_info.xbzrle_cache_miss++;
> +        return -1;
> +    }
> +
> +    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
> +
> +    /* save current buffer into memory */
> +    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
> +                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
> +                                       TARGET_PAGE_SIZE);
> +    if (encoded_len == 0) {
> +        DPRINTF("Skipping unmodified page\n");
> +        return 0;
> +    } else if (encoded_len == -1) {
> +        DPRINTF("Overflow\n");
> +        acct_info.xbzrle_overflows++;
> +        /* update data in the cache */
> +        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
> +        return -1;
> +    }
> +
> +    /* we need to update the data in the cache, in order to get the same data */
> +    if (!last_stage) {
> +        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
> +    }
> +
> +    /* Send XBZRLE based compressed page */
> +    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
> +    qemu_put_be16(f, encoded_len);
> +    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
> +    bytes_sent += encoded_len + 1 + 2;
> +    acct_info.xbzrle_pages++;
> +    acct_info.xbzrle_bytes += bytes_sent;
> +
> +    return bytes_sent;
> +}
> +
> +
> +/* This is the last block that we have visited serching for dirty pages
> + */
> +static RAMBlock *last_seen_block;
> +/* This is the last block from where we have sent data */
> +static RAMBlock *last_sent_block;
> +static ram_addr_t last_offset;
> +static unsigned long *migration_bitmap;
> +static uint64_t migration_dirty_pages;
> +static uint32_t last_version;
> +
> +static inline
> +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> +                                                 ram_addr_t start)
> +{
> +    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> +    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> +    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> +
> +    unsigned long next = find_next_bit(migration_bitmap, size, nr);
> +
> +    if (next < size) {
> +        clear_bit(next, migration_bitmap);
> +        migration_dirty_pages--;
> +    }
> +    return (next - base) << TARGET_PAGE_BITS;
> +}
> +
> +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
> +                                              ram_addr_t offset)
> +{
> +    bool ret;
> +    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
> +
> +    ret = test_and_set_bit(nr, migration_bitmap);
> +
> +    if (!ret) {
> +        migration_dirty_pages++;
> +    }
> +    return ret;
> +}
> +
> +/* Needs iothread lock! */
> +
> +static void migration_bitmap_sync(void)
> +{
> +    RAMBlock *block;
> +    ram_addr_t addr;
> +    uint64_t num_dirty_pages_init = migration_dirty_pages;
> +    MigrationState *s = migrate_get_current();
> +    static int64_t start_time;
> +    static int64_t num_dirty_pages_period;
> +    int64_t end_time;
> +
> +    if (!start_time) {
> +        start_time = qemu_get_clock_ms(rt_clock);
> +    }
> +
> +    trace_migration_bitmap_sync_start();
> +    memory_global_sync_dirty_bitmap(get_system_memory());
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> +            if (memory_region_test_and_clear_dirty(block->mr,
> +                                                   addr, TARGET_PAGE_SIZE,
> +                                                   DIRTY_MEMORY_MIGRATION)) {
> +                migration_bitmap_set_dirty(block->mr, addr);
> +            }
> +        }
> +    }
> +    trace_migration_bitmap_sync_end(migration_dirty_pages
> +                                    - num_dirty_pages_init);
> +    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
> +    end_time = qemu_get_clock_ms(rt_clock);
> +
> +    /* more than 1 second = 1000 millisecons */
> +    if (end_time > start_time + 1000) {
> +        s->dirty_pages_rate = num_dirty_pages_period * 1000
> +            / (end_time - start_time);
> +        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> +        start_time = end_time;
> +        num_dirty_pages_period = 0;
> +    }
> +}
> +
> +/*
> + * ram_save_block: Writes a page of memory to the stream f
> + *
> + * Returns:  The number of bytes written.
> + *           0 means no dirty pages
> + */
> +
> +static int ram_save_block(QEMUFile *f, bool last_stage)
> +{
> +    RAMBlock *block = last_seen_block;
> +    ram_addr_t offset = last_offset;
> +    bool complete_round = false;
> +    int bytes_sent = 0;
> +    MemoryRegion *mr;
> +    ram_addr_t current_addr;
> +
> +    if (!block)
> +        block = QTAILQ_FIRST(&ram_list.blocks);
> +
> +    while (true) {
> +        mr = block->mr;
> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
> +        if (complete_round && block == last_seen_block &&
> +            offset >= last_offset) {
> +            break;
> +        }
> +        if (offset >= block->length) {
> +            offset = 0;
> +            block = QTAILQ_NEXT(block, next);
> +            if (!block) {
> +                block = QTAILQ_FIRST(&ram_list.blocks);
> +                complete_round = true;
> +            }
> +        } else {
> +            uint8_t *p;
> +            int cont = (block == last_sent_block) ?
> +                RAM_SAVE_FLAG_CONTINUE : 0;
> +
> +            p = memory_region_get_ram_ptr(mr) + offset;
> +
> +            /* In doubt sent page as normal */
> +            bytes_sent = -1;
> +            if (is_dup_page(p)) {
> +                acct_info.dup_pages++;
> +                bytes_sent = save_block_hdr(f, block, offset, cont,
> +                                            RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_byte(f, *p);
> +                bytes_sent += 1;
> +            } else if (migrate_use_xbzrle()) {
> +                current_addr = block->offset + offset;
> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                                              offset, cont, last_stage);
> +                if (!last_stage) {
> +                    p = get_cached_data(XBZRLE.cache, current_addr);
> +                }
> +            }
> +
> +            /* XBZRLE overflow or normal page */
> +            if (bytes_sent == -1) {
> +                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
> +                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> +                bytes_sent += TARGET_PAGE_SIZE;
> +                acct_info.norm_pages++;
> +            }
> +
> +            /* if page is unmodified, continue to the next */
> +            if (bytes_sent > 0) {
> +                last_sent_block = block;
> +                break;
> +            }
> +        }
> +    }
> +    last_seen_block = block;
> +    last_offset = offset;
> +
> +    return bytes_sent;
> +}
> +
> +static uint64_t bytes_transferred;
> +
> +static ram_addr_t ram_save_remaining(void)
> +{
> +    return migration_dirty_pages;
> +}
> +
> +uint64_t ram_bytes_remaining(void)
> +{
> +    return ram_save_remaining() * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t ram_bytes_transferred(void)
> +{
> +    return bytes_transferred;
> +}
> +
> +uint64_t ram_bytes_total(void)
> +{
> +    RAMBlock *block;
> +    uint64_t total = 0;
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next)
> +        total += block->length;
> +
> +    return total;
> +}
> +
> +static void migration_end(void)
> +{
> +    if (migration_bitmap) {
> +        memory_global_dirty_log_stop();
> +        g_free(migration_bitmap);
> +        migration_bitmap = NULL;
> +    }
> +
> +    if (XBZRLE.cache) {
> +        cache_fini(XBZRLE.cache);
> +        g_free(XBZRLE.cache);
> +        g_free(XBZRLE.encoded_buf);
> +        g_free(XBZRLE.current_buf);
> +        g_free(XBZRLE.decoded_buf);
> +        XBZRLE.cache = NULL;
> +    }
> +}
> +
> +static void ram_migration_cancel(void *opaque)
> +{
> +    migration_end();
> +}
> +
> +static void reset_ram_globals(void)
> +{
> +    last_seen_block = NULL;
> +    last_sent_block = NULL;
> +    last_offset = 0;
> +    last_version = ram_list.version;
> +}
> +
> +#define MAX_WAIT 50 /* ms, half buffered_file limit */
> +
> +static int ram_save_setup(QEMUFile *f, void *opaque)
> +{
> +    RAMBlock *block;
> +    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +
> +    migration_bitmap = bitmap_new(ram_pages);
> +    bitmap_set(migration_bitmap, 0, ram_pages);
> +    migration_dirty_pages = ram_pages;
> +
> +    if (migrate_use_xbzrle()) {
> +        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> +                                  TARGET_PAGE_SIZE,
> +                                  TARGET_PAGE_SIZE);
> +        if (!XBZRLE.cache) {
> +            DPRINTF("Error creating cache\n");
> +            return -1;
> +        }
> +        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
> +        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
> +        acct_clear();
> +    }
> +
> +    qemu_mutex_lock_iothread();
> +    qemu_mutex_lock_ramlist();
> +    bytes_transferred = 0;
> +    reset_ram_globals();
> +
> +    memory_global_dirty_log_start();
> +    migration_bitmap_sync();
> +    qemu_mutex_unlock_iothread();
> +
> +    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        qemu_put_byte(f, strlen(block->idstr));
> +        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> +        qemu_put_be64(f, block->length);
> +    }
> +
> +    qemu_mutex_unlock_ramlist();
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +
> +    return 0;
> +}
> +
> +static int ram_save_iterate(QEMUFile *f, void *opaque)
> +{
> +    int ret;
> +    int i;
> +    int64_t t0;
> +    int total_sent = 0;
> +
> +    qemu_mutex_lock_ramlist();
> +
> +    if (ram_list.version != last_version) {
> +        reset_ram_globals();
> +    }
> +
> +    t0 = qemu_get_clock_ns(rt_clock);
> +    i = 0;
> +    while ((ret = qemu_file_rate_limit(f)) == 0) {
> +        int bytes_sent;
> +
> +        bytes_sent = ram_save_block(f, false);
> +        /* no more blocks to sent */
> +        if (bytes_sent == 0) {
> +            break;
> +        }
> +        total_sent += bytes_sent;
> +        acct_info.iterations++;
> +        /* we want to check in the 1st loop, just in case it was the 1st time
> +           and we had to sync the dirty bitmap.
> +           qemu_get_clock_ns() is a bit expensive, so we only check each some
> +           iterations
> +        */
> +        if ((i & 63) == 0) {
> +            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
> +            if (t1 > MAX_WAIT) {
> +                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
> +                        t1, i);
> +                break;
> +            }
> +        }
> +        i++;
> +    }
> +
> +    qemu_mutex_unlock_ramlist();
> +
> +    if (ret < 0) {
> +        bytes_transferred += total_sent;
> +        return ret;
> +    }
> +
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +    total_sent += 8;
> +    bytes_transferred += total_sent;
> +
> +    return total_sent;
> +}
> +
> +static int ram_save_complete(QEMUFile *f, void *opaque)
> +{
> +    qemu_mutex_lock_ramlist();
> +    migration_bitmap_sync();
> +
> +    /* try transferring iterative blocks of memory */
> +
> +    /* flush all remaining blocks regardless of rate limiting */
> +    while (true) {
> +        int bytes_sent;
> +
> +        bytes_sent = ram_save_block(f, true);
> +        /* no more blocks to sent */
> +        if (bytes_sent == 0) {
> +            break;
> +        }
> +        bytes_transferred += bytes_sent;
> +    }
> +    migration_end();
> +
> +    qemu_mutex_unlock_ramlist();
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +
> +    return 0;
> +}
> +
> +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
> +{
> +    uint64_t remaining_size;
> +
> +    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> +
> +    if (remaining_size < max_size) {
> +        qemu_mutex_lock_iothread();
> +        migration_bitmap_sync();
> +        qemu_mutex_unlock_iothread();
> +        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> +    }
> +    return remaining_size;
> +}
> +
> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = 0;
> +    unsigned int xh_len;
> +    int xh_flags;
> +
> +    if (!XBZRLE.decoded_buf) {
> +        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
> +    }
> +
> +    /* extract RLE header */
> +    xh_flags = qemu_get_byte(f);
> +    xh_len = qemu_get_be16(f);
> +
> +    if (xh_flags != ENCODING_FLAG_XBZRLE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +        return -1;
> +    }
> +
> +    if (xh_len > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +        return -1;
> +    }
> +    /* load data and decode */
> +    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
> +
> +    /* decode RLE */
> +    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
> +                               TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +        rc = -1;
> +    } else  if (ret > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> +                ret, TARGET_PAGE_SIZE);
> +        abort();
> +    }
> +
> +    return rc;
> +}
> +
> +static inline void *host_from_stream_offset(QEMUFile *f,
> +                                            ram_addr_t offset,
> +                                            int flags)
> +{
> +    static RAMBlock *block = NULL;
> +    char id[256];
> +    uint8_t len;
> +
> +    if (flags & RAM_SAVE_FLAG_CONTINUE) {
> +        if (!block) {
> +            fprintf(stderr, "Ack, bad migration stream!\n");
> +            return NULL;
> +        }
> +
> +        return memory_region_get_ram_ptr(block->mr) + offset;
> +    }
> +
> +    len = qemu_get_byte(f);
> +    qemu_get_buffer(f, (uint8_t *)id, len);
> +    id[len] = 0;
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        if (!strncmp(id, block->idstr, sizeof(id)))
> +            return memory_region_get_ram_ptr(block->mr) + offset;
> +    }
> +
> +    fprintf(stderr, "Can't find block %s!\n", id);
> +    return NULL;
> +}
> +
> +static int ram_load(QEMUFile *f, void *opaque, int version_id)
> +{
> +    ram_addr_t addr;
> +    int flags, ret = 0;
> +    int error;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
> +
> +    if (version_id < 4 || version_id > 4) {
> +        return -EINVAL;
> +    }
> +
> +    do {
> +        addr = qemu_get_be64(f);
> +
> +        flags = addr & ~TARGET_PAGE_MASK;
> +        addr &= TARGET_PAGE_MASK;
> +
> +        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> +            if (version_id == 4) {
> +                /* Synchronize RAM block list */
> +                char id[256];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    uint8_t len;
> +
> +                    len = qemu_get_byte(f);
> +                    qemu_get_buffer(f, (uint8_t *)id, len);
> +                    id[len] = 0;
> +                    length = qemu_get_be64(f);
> +
> +                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +                        if (!strncmp(id, block->idstr, sizeof(id))) {
> +                            if (block->length != length) {
> +                                ret =  -EINVAL;
> +                                goto done;
> +                            }
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> +                                "accept migration\n", id);
> +                        ret = -EINVAL;
> +                        goto done;
> +                    }
> +
> +                    total_ram_bytes -= length;
> +                }
> +            }
> +        }
> +
> +        if (flags & RAM_SAVE_FLAG_COMPRESS) {
> +            void *host;
> +            uint8_t ch;
> +
> +            host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            ch = qemu_get_byte(f);
> +            memset(host, ch, TARGET_PAGE_SIZE);
> +#ifndef _WIN32
> +            if (ch == 0 &&
> +                (!kvm_enabled() || kvm_has_sync_mmu()) &&
> +                getpagesize() <= TARGET_PAGE_SIZE) {
> +                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
> +            }
> +#endif
> +        } else if (flags & RAM_SAVE_FLAG_PAGE) {
> +            void *host;
> +
> +            host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> +            void *host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            if (load_xbzrle(f, addr, host) < 0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }
> +        }
> +        error = qemu_file_get_error(f);
> +        if (error) {
> +            ret = error;
> +            goto done;
> +        }
> +    } while (!(flags & RAM_SAVE_FLAG_EOS));
> +
> +done:
> +    DPRINTF("Completed load of VM with exit code %d seq iteration "
> +            "%" PRIu64 "\n", ret, seq_iter);
> +    return ret;
> +}
> +
> +SaveVMHandlers savevm_ram_handlers = {
> +    .save_live_setup = ram_save_setup,
> +    .save_live_iterate = ram_save_iterate,
> +    .save_live_complete = ram_save_complete,
> +    .save_live_pending = ram_save_pending,
> +    .load_state = ram_load,
> +    .cancel = ram_migration_cancel,
> +};
>
Paolo Bonzini March 7, 2013, 3:57 p.m. UTC | #2
Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto:
> Move RAM migration code from arch_init to savevm-ram.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> Note: this is on top of Juan's pull request
> 
> Changes from v1:
>     - renamed source file, rebased on top of migration.next as
>       suggested by Paolo

The output of

diff -u <(sed -n 's/^-//p' foo ) <(sed -n 's/^+//p' foo )

is trivial.

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

>  Makefile.target |   2 +-
>  arch_init.c     | 763 -----------------------------------------------------
>  savevm-ram.c    | 804 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 805 insertions(+), 764 deletions(-)
>  create mode 100644 savevm-ram.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index ca657b3..54bc21b 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -108,7 +108,7 @@ CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
>  CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y)
>  CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y)
>  
> -obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
> +obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o savevm-ram.o
>  obj-y += qtest.o
>  obj-y += hw/
>  obj-$(CONFIG_KVM) += kvm-all.o
> diff --git a/arch_init.c b/arch_init.c
> index 98e2bc6..9943ed4 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -31,20 +31,15 @@
>  #include "config.h"
>  #include "monitor/monitor.h"
>  #include "sysemu/sysemu.h"
> -#include "qemu/bitops.h"
> -#include "qemu/bitmap.h"
>  #include "sysemu/arch_init.h"
>  #include "audio/audio.h"
>  #include "hw/pc.h"
>  #include "hw/pci/pci.h"
>  #include "hw/audiodev.h"
>  #include "sysemu/kvm.h"
> -#include "migration/migration.h"
>  #include "exec/gdbstub.h"
>  #include "hw/smbios.h"
> -#include "exec/address-spaces.h"
>  #include "hw/pcspk.h"
> -#include "migration/page_cache.h"
>  #include "qemu/config-file.h"
>  #include "qmp-commands.h"
>  #include "trace.h"
> @@ -103,38 +98,6 @@ int graphic_depth = 15;
>  
>  const uint32_t arch_type = QEMU_ARCH;
>  
> -/***********************************************************/
> -/* ram save/restore */
> -
> -#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
> -#define RAM_SAVE_FLAG_COMPRESS 0x02
> -#define RAM_SAVE_FLAG_MEM_SIZE 0x04
> -#define RAM_SAVE_FLAG_PAGE     0x08
> -#define RAM_SAVE_FLAG_EOS      0x10
> -#define RAM_SAVE_FLAG_CONTINUE 0x20
> -#define RAM_SAVE_FLAG_XBZRLE   0x40
> -
> -#ifdef __ALTIVEC__
> -#include <altivec.h>
> -#define VECTYPE        vector unsigned char
> -#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
> -#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
> -/* altivec.h may redefine the bool macro as vector type.
> - * Reset it to POSIX semantics. */
> -#undef bool
> -#define bool _Bool
> -#elif defined __SSE2__
> -#include <emmintrin.h>
> -#define VECTYPE        __m128i
> -#define SPLAT(p)       _mm_set1_epi8(*(p))
> -#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
> -#else
> -#define VECTYPE        unsigned long
> -#define SPLAT(p)       (*(p) * (~0UL / 255))
> -#define ALL_EQ(v1, v2) ((v1) == (v2))
> -#endif
> -
> -
>  static struct defconfig_file {
>      const char *filename;
>      /* Indicates it is an user config file (disabled by -no-user-config) */
> @@ -145,7 +108,6 @@ static struct defconfig_file {
>      { NULL }, /* end of list */
>  };
>  
> -
>  int qemu_read_default_config_files(bool userconfig)
>  {
>      int ret;
> @@ -164,731 +126,6 @@ int qemu_read_default_config_files(bool userconfig)
>      return 0;
>  }
>  
> -static int is_dup_page(uint8_t *page)
> -{
> -    VECTYPE *p = (VECTYPE *)page;
> -    VECTYPE val = SPLAT(page);
> -    int i;
> -
> -    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
> -        if (!ALL_EQ(val, p[i])) {
> -            return 0;
> -        }
> -    }
> -
> -    return 1;
> -}
> -
> -/* struct contains XBZRLE cache and a static page
> -   used by the compression */
> -static struct {
> -    /* buffer used for XBZRLE encoding */
> -    uint8_t *encoded_buf;
> -    /* buffer for storing page content */
> -    uint8_t *current_buf;
> -    /* buffer used for XBZRLE decoding */
> -    uint8_t *decoded_buf;
> -    /* Cache for XBZRLE */
> -    PageCache *cache;
> -} XBZRLE = {
> -    .encoded_buf = NULL,
> -    .current_buf = NULL,
> -    .decoded_buf = NULL,
> -    .cache = NULL,
> -};
> -
> -
> -int64_t xbzrle_cache_resize(int64_t new_size)
> -{
> -    if (XBZRLE.cache != NULL) {
> -        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
> -            TARGET_PAGE_SIZE;
> -    }
> -    return pow2floor(new_size);
> -}
> -
> -/* accounting for migration statistics */
> -typedef struct AccountingInfo {
> -    uint64_t dup_pages;
> -    uint64_t norm_pages;
> -    uint64_t iterations;
> -    uint64_t xbzrle_bytes;
> -    uint64_t xbzrle_pages;
> -    uint64_t xbzrle_cache_miss;
> -    uint64_t xbzrle_overflows;
> -} AccountingInfo;
> -
> -static AccountingInfo acct_info;
> -
> -static void acct_clear(void)
> -{
> -    memset(&acct_info, 0, sizeof(acct_info));
> -}
> -
> -uint64_t dup_mig_bytes_transferred(void)
> -{
> -    return acct_info.dup_pages * TARGET_PAGE_SIZE;
> -}
> -
> -uint64_t dup_mig_pages_transferred(void)
> -{
> -    return acct_info.dup_pages;
> -}
> -
> -uint64_t norm_mig_bytes_transferred(void)
> -{
> -    return acct_info.norm_pages * TARGET_PAGE_SIZE;
> -}
> -
> -uint64_t norm_mig_pages_transferred(void)
> -{
> -    return acct_info.norm_pages;
> -}
> -
> -uint64_t xbzrle_mig_bytes_transferred(void)
> -{
> -    return acct_info.xbzrle_bytes;
> -}
> -
> -uint64_t xbzrle_mig_pages_transferred(void)
> -{
> -    return acct_info.xbzrle_pages;
> -}
> -
> -uint64_t xbzrle_mig_pages_cache_miss(void)
> -{
> -    return acct_info.xbzrle_cache_miss;
> -}
> -
> -uint64_t xbzrle_mig_pages_overflow(void)
> -{
> -    return acct_info.xbzrle_overflows;
> -}
> -
> -static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
> -                             int cont, int flag)
> -{
> -    size_t size;
> -
> -    qemu_put_be64(f, offset | cont | flag);
> -    size = 8;
> -
> -    if (!cont) {
> -        qemu_put_byte(f, strlen(block->idstr));
> -        qemu_put_buffer(f, (uint8_t *)block->idstr,
> -                        strlen(block->idstr));
> -        size += 1 + strlen(block->idstr);
> -    }
> -    return size;
> -}
> -
> -#define ENCODING_FLAG_XBZRLE 0x1
> -
> -static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> -                            ram_addr_t current_addr, RAMBlock *block,
> -                            ram_addr_t offset, int cont, bool last_stage)
> -{
> -    int encoded_len = 0, bytes_sent = -1;
> -    uint8_t *prev_cached_page;
> -
> -    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
> -        if (!last_stage) {
> -            cache_insert(XBZRLE.cache, current_addr, current_data);
> -        }
> -        acct_info.xbzrle_cache_miss++;
> -        return -1;
> -    }
> -
> -    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
> -
> -    /* save current buffer into memory */
> -    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
> -
> -    /* XBZRLE encoding (if there is no overflow) */
> -    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
> -                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
> -                                       TARGET_PAGE_SIZE);
> -    if (encoded_len == 0) {
> -        DPRINTF("Skipping unmodified page\n");
> -        return 0;
> -    } else if (encoded_len == -1) {
> -        DPRINTF("Overflow\n");
> -        acct_info.xbzrle_overflows++;
> -        /* update data in the cache */
> -        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
> -        return -1;
> -    }
> -
> -    /* we need to update the data in the cache, in order to get the same data */
> -    if (!last_stage) {
> -        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
> -    }
> -
> -    /* Send XBZRLE based compressed page */
> -    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> -    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
> -    qemu_put_be16(f, encoded_len);
> -    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
> -    bytes_sent += encoded_len + 1 + 2;
> -    acct_info.xbzrle_pages++;
> -    acct_info.xbzrle_bytes += bytes_sent;
> -
> -    return bytes_sent;
> -}
> -
> -
> -/* This is the last block that we have visited serching for dirty pages
> - */
> -static RAMBlock *last_seen_block;
> -/* This is the last block from where we have sent data */
> -static RAMBlock *last_sent_block;
> -static ram_addr_t last_offset;
> -static unsigned long *migration_bitmap;
> -static uint64_t migration_dirty_pages;
> -static uint32_t last_version;
> -
> -static inline
> -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> -                                                 ram_addr_t start)
> -{
> -    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> -    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> -    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> -
> -    unsigned long next = find_next_bit(migration_bitmap, size, nr);
> -
> -    if (next < size) {
> -        clear_bit(next, migration_bitmap);
> -        migration_dirty_pages--;
> -    }
> -    return (next - base) << TARGET_PAGE_BITS;
> -}
> -
> -static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
> -                                              ram_addr_t offset)
> -{
> -    bool ret;
> -    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
> -
> -    ret = test_and_set_bit(nr, migration_bitmap);
> -
> -    if (!ret) {
> -        migration_dirty_pages++;
> -    }
> -    return ret;
> -}
> -
> -/* Needs iothread lock! */
> -
> -static void migration_bitmap_sync(void)
> -{
> -    RAMBlock *block;
> -    ram_addr_t addr;
> -    uint64_t num_dirty_pages_init = migration_dirty_pages;
> -    MigrationState *s = migrate_get_current();
> -    static int64_t start_time;
> -    static int64_t num_dirty_pages_period;
> -    int64_t end_time;
> -
> -    if (!start_time) {
> -        start_time = qemu_get_clock_ms(rt_clock);
> -    }
> -
> -    trace_migration_bitmap_sync_start();
> -    memory_global_sync_dirty_bitmap(get_system_memory());
> -
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> -        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> -            if (memory_region_test_and_clear_dirty(block->mr,
> -                                                   addr, TARGET_PAGE_SIZE,
> -                                                   DIRTY_MEMORY_MIGRATION)) {
> -                migration_bitmap_set_dirty(block->mr, addr);
> -            }
> -        }
> -    }
> -    trace_migration_bitmap_sync_end(migration_dirty_pages
> -                                    - num_dirty_pages_init);
> -    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
> -    end_time = qemu_get_clock_ms(rt_clock);
> -
> -    /* more than 1 second = 1000 millisecons */
> -    if (end_time > start_time + 1000) {
> -        s->dirty_pages_rate = num_dirty_pages_period * 1000
> -            / (end_time - start_time);
> -        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> -        start_time = end_time;
> -        num_dirty_pages_period = 0;
> -    }
> -}
> -
> -/*
> - * ram_save_block: Writes a page of memory to the stream f
> - *
> - * Returns:  The number of bytes written.
> - *           0 means no dirty pages
> - */
> -
> -static int ram_save_block(QEMUFile *f, bool last_stage)
> -{
> -    RAMBlock *block = last_seen_block;
> -    ram_addr_t offset = last_offset;
> -    bool complete_round = false;
> -    int bytes_sent = 0;
> -    MemoryRegion *mr;
> -    ram_addr_t current_addr;
> -
> -    if (!block)
> -        block = QTAILQ_FIRST(&ram_list.blocks);
> -
> -    while (true) {
> -        mr = block->mr;
> -        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
> -        if (complete_round && block == last_seen_block &&
> -            offset >= last_offset) {
> -            break;
> -        }
> -        if (offset >= block->length) {
> -            offset = 0;
> -            block = QTAILQ_NEXT(block, next);
> -            if (!block) {
> -                block = QTAILQ_FIRST(&ram_list.blocks);
> -                complete_round = true;
> -            }
> -        } else {
> -            uint8_t *p;
> -            int cont = (block == last_sent_block) ?
> -                RAM_SAVE_FLAG_CONTINUE : 0;
> -
> -            p = memory_region_get_ram_ptr(mr) + offset;
> -
> -            /* In doubt sent page as normal */
> -            bytes_sent = -1;
> -            if (is_dup_page(p)) {
> -                acct_info.dup_pages++;
> -                bytes_sent = save_block_hdr(f, block, offset, cont,
> -                                            RAM_SAVE_FLAG_COMPRESS);
> -                qemu_put_byte(f, *p);
> -                bytes_sent += 1;
> -            } else if (migrate_use_xbzrle()) {
> -                current_addr = block->offset + offset;
> -                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> -                                              offset, cont, last_stage);
> -                if (!last_stage) {
> -                    p = get_cached_data(XBZRLE.cache, current_addr);
> -                }
> -            }
> -
> -            /* XBZRLE overflow or normal page */
> -            if (bytes_sent == -1) {
> -                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
> -                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> -                bytes_sent += TARGET_PAGE_SIZE;
> -                acct_info.norm_pages++;
> -            }
> -
> -            /* if page is unmodified, continue to the next */
> -            if (bytes_sent > 0) {
> -                last_sent_block = block;
> -                break;
> -            }
> -        }
> -    }
> -    last_seen_block = block;
> -    last_offset = offset;
> -
> -    return bytes_sent;
> -}
> -
> -static uint64_t bytes_transferred;
> -
> -static ram_addr_t ram_save_remaining(void)
> -{
> -    return migration_dirty_pages;
> -}
> -
> -uint64_t ram_bytes_remaining(void)
> -{
> -    return ram_save_remaining() * TARGET_PAGE_SIZE;
> -}
> -
> -uint64_t ram_bytes_transferred(void)
> -{
> -    return bytes_transferred;
> -}
> -
> -uint64_t ram_bytes_total(void)
> -{
> -    RAMBlock *block;
> -    uint64_t total = 0;
> -
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next)
> -        total += block->length;
> -
> -    return total;
> -}
> -
> -static void migration_end(void)
> -{
> -    if (migration_bitmap) {
> -        memory_global_dirty_log_stop();
> -        g_free(migration_bitmap);
> -        migration_bitmap = NULL;
> -    }
> -
> -    if (XBZRLE.cache) {
> -        cache_fini(XBZRLE.cache);
> -        g_free(XBZRLE.cache);
> -        g_free(XBZRLE.encoded_buf);
> -        g_free(XBZRLE.current_buf);
> -        g_free(XBZRLE.decoded_buf);
> -        XBZRLE.cache = NULL;
> -    }
> -}
> -
> -static void ram_migration_cancel(void *opaque)
> -{
> -    migration_end();
> -}
> -
> -static void reset_ram_globals(void)
> -{
> -    last_seen_block = NULL;
> -    last_sent_block = NULL;
> -    last_offset = 0;
> -    last_version = ram_list.version;
> -}
> -
> -#define MAX_WAIT 50 /* ms, half buffered_file limit */
> -
> -static int ram_save_setup(QEMUFile *f, void *opaque)
> -{
> -    RAMBlock *block;
> -    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> -
> -    migration_bitmap = bitmap_new(ram_pages);
> -    bitmap_set(migration_bitmap, 0, ram_pages);
> -    migration_dirty_pages = ram_pages;
> -
> -    if (migrate_use_xbzrle()) {
> -        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> -                                  TARGET_PAGE_SIZE,
> -                                  TARGET_PAGE_SIZE);
> -        if (!XBZRLE.cache) {
> -            DPRINTF("Error creating cache\n");
> -            return -1;
> -        }
> -        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
> -        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
> -        acct_clear();
> -    }
> -
> -    qemu_mutex_lock_iothread();
> -    qemu_mutex_lock_ramlist();
> -    bytes_transferred = 0;
> -    reset_ram_globals();
> -
> -    memory_global_dirty_log_start();
> -    migration_bitmap_sync();
> -    qemu_mutex_unlock_iothread();
> -
> -    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> -
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> -        qemu_put_byte(f, strlen(block->idstr));
> -        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> -        qemu_put_be64(f, block->length);
> -    }
> -
> -    qemu_mutex_unlock_ramlist();
> -    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> -
> -    return 0;
> -}
> -
> -static int ram_save_iterate(QEMUFile *f, void *opaque)
> -{
> -    int ret;
> -    int i;
> -    int64_t t0;
> -    int total_sent = 0;
> -
> -    qemu_mutex_lock_ramlist();
> -
> -    if (ram_list.version != last_version) {
> -        reset_ram_globals();
> -    }
> -
> -    t0 = qemu_get_clock_ns(rt_clock);
> -    i = 0;
> -    while ((ret = qemu_file_rate_limit(f)) == 0) {
> -        int bytes_sent;
> -
> -        bytes_sent = ram_save_block(f, false);
> -        /* no more blocks to sent */
> -        if (bytes_sent == 0) {
> -            break;
> -        }
> -        total_sent += bytes_sent;
> -        acct_info.iterations++;
> -        /* we want to check in the 1st loop, just in case it was the 1st time
> -           and we had to sync the dirty bitmap.
> -           qemu_get_clock_ns() is a bit expensive, so we only check each some
> -           iterations
> -        */
> -        if ((i & 63) == 0) {
> -            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
> -            if (t1 > MAX_WAIT) {
> -                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
> -                        t1, i);
> -                break;
> -            }
> -        }
> -        i++;
> -    }
> -
> -    qemu_mutex_unlock_ramlist();
> -
> -    if (ret < 0) {
> -        bytes_transferred += total_sent;
> -        return ret;
> -    }
> -
> -    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> -    total_sent += 8;
> -    bytes_transferred += total_sent;
> -
> -    return total_sent;
> -}
> -
> -static int ram_save_complete(QEMUFile *f, void *opaque)
> -{
> -    qemu_mutex_lock_ramlist();
> -    migration_bitmap_sync();
> -
> -    /* try transferring iterative blocks of memory */
> -
> -    /* flush all remaining blocks regardless of rate limiting */
> -    while (true) {
> -        int bytes_sent;
> -
> -        bytes_sent = ram_save_block(f, true);
> -        /* no more blocks to sent */
> -        if (bytes_sent == 0) {
> -            break;
> -        }
> -        bytes_transferred += bytes_sent;
> -    }
> -    migration_end();
> -
> -    qemu_mutex_unlock_ramlist();
> -    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> -
> -    return 0;
> -}
> -
> -static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
> -{
> -    uint64_t remaining_size;
> -
> -    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> -
> -    if (remaining_size < max_size) {
> -        qemu_mutex_lock_iothread();
> -        migration_bitmap_sync();
> -        qemu_mutex_unlock_iothread();
> -        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> -    }
> -    return remaining_size;
> -}
> -
> -static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> -{
> -    int ret, rc = 0;
> -    unsigned int xh_len;
> -    int xh_flags;
> -
> -    if (!XBZRLE.decoded_buf) {
> -        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
> -    }
> -
> -    /* extract RLE header */
> -    xh_flags = qemu_get_byte(f);
> -    xh_len = qemu_get_be16(f);
> -
> -    if (xh_flags != ENCODING_FLAG_XBZRLE) {
> -        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> -        return -1;
> -    }
> -
> -    if (xh_len > TARGET_PAGE_SIZE) {
> -        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> -        return -1;
> -    }
> -    /* load data and decode */
> -    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
> -
> -    /* decode RLE */
> -    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
> -                               TARGET_PAGE_SIZE);
> -    if (ret == -1) {
> -        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> -        rc = -1;
> -    } else  if (ret > TARGET_PAGE_SIZE) {
> -        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> -                ret, TARGET_PAGE_SIZE);
> -        abort();
> -    }
> -
> -    return rc;
> -}
> -
> -static inline void *host_from_stream_offset(QEMUFile *f,
> -                                            ram_addr_t offset,
> -                                            int flags)
> -{
> -    static RAMBlock *block = NULL;
> -    char id[256];
> -    uint8_t len;
> -
> -    if (flags & RAM_SAVE_FLAG_CONTINUE) {
> -        if (!block) {
> -            fprintf(stderr, "Ack, bad migration stream!\n");
> -            return NULL;
> -        }
> -
> -        return memory_region_get_ram_ptr(block->mr) + offset;
> -    }
> -
> -    len = qemu_get_byte(f);
> -    qemu_get_buffer(f, (uint8_t *)id, len);
> -    id[len] = 0;
> -
> -    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> -        if (!strncmp(id, block->idstr, sizeof(id)))
> -            return memory_region_get_ram_ptr(block->mr) + offset;
> -    }
> -
> -    fprintf(stderr, "Can't find block %s!\n", id);
> -    return NULL;
> -}
> -
> -static int ram_load(QEMUFile *f, void *opaque, int version_id)
> -{
> -    ram_addr_t addr;
> -    int flags, ret = 0;
> -    int error;
> -    static uint64_t seq_iter;
> -
> -    seq_iter++;
> -
> -    if (version_id < 4 || version_id > 4) {
> -        return -EINVAL;
> -    }
> -
> -    do {
> -        addr = qemu_get_be64(f);
> -
> -        flags = addr & ~TARGET_PAGE_MASK;
> -        addr &= TARGET_PAGE_MASK;
> -
> -        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> -            if (version_id == 4) {
> -                /* Synchronize RAM block list */
> -                char id[256];
> -                ram_addr_t length;
> -                ram_addr_t total_ram_bytes = addr;
> -
> -                while (total_ram_bytes) {
> -                    RAMBlock *block;
> -                    uint8_t len;
> -
> -                    len = qemu_get_byte(f);
> -                    qemu_get_buffer(f, (uint8_t *)id, len);
> -                    id[len] = 0;
> -                    length = qemu_get_be64(f);
> -
> -                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> -                        if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length) {
> -                                ret =  -EINVAL;
> -                                goto done;
> -                            }
> -                            break;
> -                        }
> -                    }
> -
> -                    if (!block) {
> -                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> -                                "accept migration\n", id);
> -                        ret = -EINVAL;
> -                        goto done;
> -                    }
> -
> -                    total_ram_bytes -= length;
> -                }
> -            }
> -        }
> -
> -        if (flags & RAM_SAVE_FLAG_COMPRESS) {
> -            void *host;
> -            uint8_t ch;
> -
> -            host = host_from_stream_offset(f, addr, flags);
> -            if (!host) {
> -                return -EINVAL;
> -            }
> -
> -            ch = qemu_get_byte(f);
> -            memset(host, ch, TARGET_PAGE_SIZE);
> -#ifndef _WIN32
> -            if (ch == 0 &&
> -                (!kvm_enabled() || kvm_has_sync_mmu()) &&
> -                getpagesize() <= TARGET_PAGE_SIZE) {
> -                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
> -            }
> -#endif
> -        } else if (flags & RAM_SAVE_FLAG_PAGE) {
> -            void *host;
> -
> -            host = host_from_stream_offset(f, addr, flags);
> -            if (!host) {
> -                return -EINVAL;
> -            }
> -
> -            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> -        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> -            void *host = host_from_stream_offset(f, addr, flags);
> -            if (!host) {
> -                return -EINVAL;
> -            }
> -
> -            if (load_xbzrle(f, addr, host) < 0) {
> -                ret = -EINVAL;
> -                goto done;
> -            }
> -        }
> -        error = qemu_file_get_error(f);
> -        if (error) {
> -            ret = error;
> -            goto done;
> -        }
> -    } while (!(flags & RAM_SAVE_FLAG_EOS));
> -
> -done:
> -    DPRINTF("Completed load of VM with exit code %d seq iteration "
> -            "%" PRIu64 "\n", ret, seq_iter);
> -    return ret;
> -}
> -
> -SaveVMHandlers savevm_ram_handlers = {
> -    .save_live_setup = ram_save_setup,
> -    .save_live_iterate = ram_save_iterate,
> -    .save_live_complete = ram_save_complete,
> -    .save_live_pending = ram_save_pending,
> -    .load_state = ram_load,
> -    .cancel = ram_migration_cancel,
> -};
> -
>  #ifdef HAS_AUDIO
>  struct soundhw {
>      const char *name;
> diff --git a/savevm-ram.c b/savevm-ram.c
> new file mode 100644
> index 0000000..cea656c
> --- /dev/null
> +++ b/savevm-ram.c
> @@ -0,0 +1,804 @@
> +/*
> + * RAM Migration support
> + *
> + * Copyright (c) 2003-2008 Fabrice Bellard
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +#include <stdint.h>
> +#include <stdarg.h>
> +#include <stdlib.h>
> +#ifndef _WIN32
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +#endif
> +#include "sysemu/sysemu.h"
> +#include "qemu/bitops.h"
> +#include "qemu/bitmap.h"
> +#include "sysemu/kvm.h"
> +#include "migration/migration.h"
> +#include "exec/address-spaces.h"
> +#include "migration/page_cache.h"
> +#include "migration/qemu-file.h"
> +#include "trace.h"
> +#include "exec/cpu-all.h"
> +
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> +    do { } while (0)
> +#endif
> +
> +/***********************************************************/
> +/* ram save/restore */
> +
> +#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
> +#define RAM_SAVE_FLAG_COMPRESS 0x02
> +#define RAM_SAVE_FLAG_MEM_SIZE 0x04
> +#define RAM_SAVE_FLAG_PAGE     0x08
> +#define RAM_SAVE_FLAG_EOS      0x10
> +#define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE   0x40
> +
> +#ifdef __ALTIVEC__
> +#include <altivec.h>
> +#define VECTYPE        vector unsigned char
> +#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
> +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
> +/* altivec.h may redefine the bool macro as vector type.
> + * Reset it to POSIX semantics. */
> +#undef bool
> +#define bool _Bool
> +#elif defined __SSE2__
> +#include <emmintrin.h>
> +#define VECTYPE        __m128i
> +#define SPLAT(p)       _mm_set1_epi8(*(p))
> +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
> +#else
> +#define VECTYPE        unsigned long
> +#define SPLAT(p)       (*(p) * (~0UL / 255))
> +#define ALL_EQ(v1, v2) ((v1) == (v2))
> +#endif
> +
> +static int is_dup_page(uint8_t *page)
> +{
> +    VECTYPE *p = (VECTYPE *)page;
> +    VECTYPE val = SPLAT(page);
> +    int i;
> +
> +    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
> +        if (!ALL_EQ(val, p[i])) {
> +            return 0;
> +        }
> +    }
> +
> +    return 1;
> +}
> +
> +/* struct contains XBZRLE cache and a static page
> +   used by the compression */
> +static struct {
> +    /* buffer used for XBZRLE encoding */
> +    uint8_t *encoded_buf;
> +    /* buffer for storing page content */
> +    uint8_t *current_buf;
> +    /* buffer used for XBZRLE decoding */
> +    uint8_t *decoded_buf;
> +    /* Cache for XBZRLE */
> +    PageCache *cache;
> +} XBZRLE = {
> +    .encoded_buf = NULL,
> +    .current_buf = NULL,
> +    .decoded_buf = NULL,
> +    .cache = NULL,
> +};
> +
> +
> +int64_t xbzrle_cache_resize(int64_t new_size)
> +{
> +    if (XBZRLE.cache != NULL) {
> +        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
> +            TARGET_PAGE_SIZE;
> +    }
> +    return pow2floor(new_size);
> +}
> +
> +/* accounting for migration statistics */
> +typedef struct AccountingInfo {
> +    uint64_t dup_pages;
> +    uint64_t norm_pages;
> +    uint64_t iterations;
> +    uint64_t xbzrle_bytes;
> +    uint64_t xbzrle_pages;
> +    uint64_t xbzrle_cache_miss;
> +    uint64_t xbzrle_overflows;
> +} AccountingInfo;
> +
> +static AccountingInfo acct_info;
> +
> +static void acct_clear(void)
> +{
> +    memset(&acct_info, 0, sizeof(acct_info));
> +}
> +
> +uint64_t dup_mig_bytes_transferred(void)
> +{
> +    return acct_info.dup_pages * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t dup_mig_pages_transferred(void)
> +{
> +    return acct_info.dup_pages;
> +}
> +
> +uint64_t norm_mig_bytes_transferred(void)
> +{
> +    return acct_info.norm_pages * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t norm_mig_pages_transferred(void)
> +{
> +    return acct_info.norm_pages;
> +}
> +
> +uint64_t xbzrle_mig_bytes_transferred(void)
> +{
> +    return acct_info.xbzrle_bytes;
> +}
> +
> +uint64_t xbzrle_mig_pages_transferred(void)
> +{
> +    return acct_info.xbzrle_pages;
> +}
> +
> +uint64_t xbzrle_mig_pages_cache_miss(void)
> +{
> +    return acct_info.xbzrle_cache_miss;
> +}
> +
> +uint64_t xbzrle_mig_pages_overflow(void)
> +{
> +    return acct_info.xbzrle_overflows;
> +}
> +
> +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
> +                             int cont, int flag)
> +{
> +    size_t size;
> +
> +    qemu_put_be64(f, offset | cont | flag);
> +    size = 8;
> +
> +    if (!cont) {
> +        qemu_put_byte(f, strlen(block->idstr));
> +        qemu_put_buffer(f, (uint8_t *)block->idstr,
> +                        strlen(block->idstr));
> +        size += 1 + strlen(block->idstr);
> +    }
> +    return size;
> +}
> +
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont, bool last_stage)
> +{
> +    int encoded_len = 0, bytes_sent = -1;
> +    uint8_t *prev_cached_page;
> +
> +    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
> +        if (!last_stage) {
> +            cache_insert(XBZRLE.cache, current_addr, current_data);
> +        }
> +        acct_info.xbzrle_cache_miss++;
> +        return -1;
> +    }
> +
> +    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
> +
> +    /* save current buffer into memory */
> +    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
> +                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
> +                                       TARGET_PAGE_SIZE);
> +    if (encoded_len == 0) {
> +        DPRINTF("Skipping unmodified page\n");
> +        return 0;
> +    } else if (encoded_len == -1) {
> +        DPRINTF("Overflow\n");
> +        acct_info.xbzrle_overflows++;
> +        /* update data in the cache */
> +        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
> +        return -1;
> +    }
> +
> +    /* we need to update the data in the cache, in order to get the same data */
> +    if (!last_stage) {
> +        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
> +    }
> +
> +    /* Send XBZRLE based compressed page */
> +    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
> +    qemu_put_be16(f, encoded_len);
> +    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
> +    bytes_sent += encoded_len + 1 + 2;
> +    acct_info.xbzrle_pages++;
> +    acct_info.xbzrle_bytes += bytes_sent;
> +
> +    return bytes_sent;
> +}
> +
> +
> +/* This is the last block that we have visited serching for dirty pages
> + */
> +static RAMBlock *last_seen_block;
> +/* This is the last block from where we have sent data */
> +static RAMBlock *last_sent_block;
> +static ram_addr_t last_offset;
> +static unsigned long *migration_bitmap;
> +static uint64_t migration_dirty_pages;
> +static uint32_t last_version;
> +
> +static inline
> +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> +                                                 ram_addr_t start)
> +{
> +    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> +    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> +    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> +
> +    unsigned long next = find_next_bit(migration_bitmap, size, nr);
> +
> +    if (next < size) {
> +        clear_bit(next, migration_bitmap);
> +        migration_dirty_pages--;
> +    }
> +    return (next - base) << TARGET_PAGE_BITS;
> +}
> +
> +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
> +                                              ram_addr_t offset)
> +{
> +    bool ret;
> +    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
> +
> +    ret = test_and_set_bit(nr, migration_bitmap);
> +
> +    if (!ret) {
> +        migration_dirty_pages++;
> +    }
> +    return ret;
> +}
> +
> +/* Needs iothread lock! */
> +
> +static void migration_bitmap_sync(void)
> +{
> +    RAMBlock *block;
> +    ram_addr_t addr;
> +    uint64_t num_dirty_pages_init = migration_dirty_pages;
> +    MigrationState *s = migrate_get_current();
> +    static int64_t start_time;
> +    static int64_t num_dirty_pages_period;
> +    int64_t end_time;
> +
> +    if (!start_time) {
> +        start_time = qemu_get_clock_ms(rt_clock);
> +    }
> +
> +    trace_migration_bitmap_sync_start();
> +    memory_global_sync_dirty_bitmap(get_system_memory());
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> +            if (memory_region_test_and_clear_dirty(block->mr,
> +                                                   addr, TARGET_PAGE_SIZE,
> +                                                   DIRTY_MEMORY_MIGRATION)) {
> +                migration_bitmap_set_dirty(block->mr, addr);
> +            }
> +        }
> +    }
> +    trace_migration_bitmap_sync_end(migration_dirty_pages
> +                                    - num_dirty_pages_init);
> +    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
> +    end_time = qemu_get_clock_ms(rt_clock);
> +
> +    /* more than 1 second = 1000 millisecons */
> +    if (end_time > start_time + 1000) {
> +        s->dirty_pages_rate = num_dirty_pages_period * 1000
> +            / (end_time - start_time);
> +        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> +        start_time = end_time;
> +        num_dirty_pages_period = 0;
> +    }
> +}
> +
> +/*
> + * ram_save_block: Writes a page of memory to the stream f
> + *
> + * Returns:  The number of bytes written.
> + *           0 means no dirty pages
> + */
> +
> +static int ram_save_block(QEMUFile *f, bool last_stage)
> +{
> +    RAMBlock *block = last_seen_block;
> +    ram_addr_t offset = last_offset;
> +    bool complete_round = false;
> +    int bytes_sent = 0;
> +    MemoryRegion *mr;
> +    ram_addr_t current_addr;
> +
> +    if (!block)
> +        block = QTAILQ_FIRST(&ram_list.blocks);
> +
> +    while (true) {
> +        mr = block->mr;
> +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
> +        if (complete_round && block == last_seen_block &&
> +            offset >= last_offset) {
> +            break;
> +        }
> +        if (offset >= block->length) {
> +            offset = 0;
> +            block = QTAILQ_NEXT(block, next);
> +            if (!block) {
> +                block = QTAILQ_FIRST(&ram_list.blocks);
> +                complete_round = true;
> +            }
> +        } else {
> +            uint8_t *p;
> +            int cont = (block == last_sent_block) ?
> +                RAM_SAVE_FLAG_CONTINUE : 0;
> +
> +            p = memory_region_get_ram_ptr(mr) + offset;
> +
> +            /* In doubt sent page as normal */
> +            bytes_sent = -1;
> +            if (is_dup_page(p)) {
> +                acct_info.dup_pages++;
> +                bytes_sent = save_block_hdr(f, block, offset, cont,
> +                                            RAM_SAVE_FLAG_COMPRESS);
> +                qemu_put_byte(f, *p);
> +                bytes_sent += 1;
> +            } else if (migrate_use_xbzrle()) {
> +                current_addr = block->offset + offset;
> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                                              offset, cont, last_stage);
> +                if (!last_stage) {
> +                    p = get_cached_data(XBZRLE.cache, current_addr);
> +                }
> +            }
> +
> +            /* XBZRLE overflow or normal page */
> +            if (bytes_sent == -1) {
> +                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
> +                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> +                bytes_sent += TARGET_PAGE_SIZE;
> +                acct_info.norm_pages++;
> +            }
> +
> +            /* if page is unmodified, continue to the next */
> +            if (bytes_sent > 0) {
> +                last_sent_block = block;
> +                break;
> +            }
> +        }
> +    }
> +    last_seen_block = block;
> +    last_offset = offset;
> +
> +    return bytes_sent;
> +}
> +
> +static uint64_t bytes_transferred;
> +
> +static ram_addr_t ram_save_remaining(void)
> +{
> +    return migration_dirty_pages;
> +}
> +
> +uint64_t ram_bytes_remaining(void)
> +{
> +    return ram_save_remaining() * TARGET_PAGE_SIZE;
> +}
> +
> +uint64_t ram_bytes_transferred(void)
> +{
> +    return bytes_transferred;
> +}
> +
> +uint64_t ram_bytes_total(void)
> +{
> +    RAMBlock *block;
> +    uint64_t total = 0;
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next)
> +        total += block->length;
> +
> +    return total;
> +}
> +
> +static void migration_end(void)
> +{
> +    if (migration_bitmap) {
> +        memory_global_dirty_log_stop();
> +        g_free(migration_bitmap);
> +        migration_bitmap = NULL;
> +    }
> +
> +    if (XBZRLE.cache) {
> +        cache_fini(XBZRLE.cache);
> +        g_free(XBZRLE.cache);
> +        g_free(XBZRLE.encoded_buf);
> +        g_free(XBZRLE.current_buf);
> +        g_free(XBZRLE.decoded_buf);
> +        XBZRLE.cache = NULL;
> +    }
> +}
> +
> +static void ram_migration_cancel(void *opaque)
> +{
> +    migration_end();
> +}
> +
> +static void reset_ram_globals(void)
> +{
> +    last_seen_block = NULL;
> +    last_sent_block = NULL;
> +    last_offset = 0;
> +    last_version = ram_list.version;
> +}
> +
> +#define MAX_WAIT 50 /* ms, half buffered_file limit */
> +
> +static int ram_save_setup(QEMUFile *f, void *opaque)
> +{
> +    RAMBlock *block;
> +    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +
> +    migration_bitmap = bitmap_new(ram_pages);
> +    bitmap_set(migration_bitmap, 0, ram_pages);
> +    migration_dirty_pages = ram_pages;
> +
> +    if (migrate_use_xbzrle()) {
> +        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> +                                  TARGET_PAGE_SIZE,
> +                                  TARGET_PAGE_SIZE);
> +        if (!XBZRLE.cache) {
> +            DPRINTF("Error creating cache\n");
> +            return -1;
> +        }
> +        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
> +        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
> +        acct_clear();
> +    }
> +
> +    qemu_mutex_lock_iothread();
> +    qemu_mutex_lock_ramlist();
> +    bytes_transferred = 0;
> +    reset_ram_globals();
> +
> +    memory_global_dirty_log_start();
> +    migration_bitmap_sync();
> +    qemu_mutex_unlock_iothread();
> +
> +    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        qemu_put_byte(f, strlen(block->idstr));
> +        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> +        qemu_put_be64(f, block->length);
> +    }
> +
> +    qemu_mutex_unlock_ramlist();
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +
> +    return 0;
> +}
> +
> +static int ram_save_iterate(QEMUFile *f, void *opaque)
> +{
> +    int ret;
> +    int i;
> +    int64_t t0;
> +    int total_sent = 0;
> +
> +    qemu_mutex_lock_ramlist();
> +
> +    if (ram_list.version != last_version) {
> +        reset_ram_globals();
> +    }
> +
> +    t0 = qemu_get_clock_ns(rt_clock);
> +    i = 0;
> +    while ((ret = qemu_file_rate_limit(f)) == 0) {
> +        int bytes_sent;
> +
> +        bytes_sent = ram_save_block(f, false);
> +        /* no more blocks to sent */
> +        if (bytes_sent == 0) {
> +            break;
> +        }
> +        total_sent += bytes_sent;
> +        acct_info.iterations++;
> +        /* we want to check in the 1st loop, just in case it was the 1st time
> +           and we had to sync the dirty bitmap.
> +           qemu_get_clock_ns() is a bit expensive, so we only check each some
> +           iterations
> +        */
> +        if ((i & 63) == 0) {
> +            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
> +            if (t1 > MAX_WAIT) {
> +                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
> +                        t1, i);
> +                break;
> +            }
> +        }
> +        i++;
> +    }
> +
> +    qemu_mutex_unlock_ramlist();
> +
> +    if (ret < 0) {
> +        bytes_transferred += total_sent;
> +        return ret;
> +    }
> +
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +    total_sent += 8;
> +    bytes_transferred += total_sent;
> +
> +    return total_sent;
> +}
> +
> +static int ram_save_complete(QEMUFile *f, void *opaque)
> +{
> +    qemu_mutex_lock_ramlist();
> +    migration_bitmap_sync();
> +
> +    /* try transferring iterative blocks of memory */
> +
> +    /* flush all remaining blocks regardless of rate limiting */
> +    while (true) {
> +        int bytes_sent;
> +
> +        bytes_sent = ram_save_block(f, true);
> +        /* no more blocks to sent */
> +        if (bytes_sent == 0) {
> +            break;
> +        }
> +        bytes_transferred += bytes_sent;
> +    }
> +    migration_end();
> +
> +    qemu_mutex_unlock_ramlist();
> +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> +
> +    return 0;
> +}
> +
> +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
> +{
> +    uint64_t remaining_size;
> +
> +    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> +
> +    if (remaining_size < max_size) {
> +        qemu_mutex_lock_iothread();
> +        migration_bitmap_sync();
> +        qemu_mutex_unlock_iothread();
> +        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> +    }
> +    return remaining_size;
> +}
> +
> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = 0;
> +    unsigned int xh_len;
> +    int xh_flags;
> +
> +    if (!XBZRLE.decoded_buf) {
> +        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
> +    }
> +
> +    /* extract RLE header */
> +    xh_flags = qemu_get_byte(f);
> +    xh_len = qemu_get_be16(f);
> +
> +    if (xh_flags != ENCODING_FLAG_XBZRLE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +        return -1;
> +    }
> +
> +    if (xh_len > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +        return -1;
> +    }
> +    /* load data and decode */
> +    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
> +
> +    /* decode RLE */
> +    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
> +                               TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +        rc = -1;
> +    } else  if (ret > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> +                ret, TARGET_PAGE_SIZE);
> +        abort();
> +    }
> +
> +    return rc;
> +}
> +
> +static inline void *host_from_stream_offset(QEMUFile *f,
> +                                            ram_addr_t offset,
> +                                            int flags)
> +{
> +    static RAMBlock *block = NULL;
> +    char id[256];
> +    uint8_t len;
> +
> +    if (flags & RAM_SAVE_FLAG_CONTINUE) {
> +        if (!block) {
> +            fprintf(stderr, "Ack, bad migration stream!\n");
> +            return NULL;
> +        }
> +
> +        return memory_region_get_ram_ptr(block->mr) + offset;
> +    }
> +
> +    len = qemu_get_byte(f);
> +    qemu_get_buffer(f, (uint8_t *)id, len);
> +    id[len] = 0;
> +
> +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +        if (!strncmp(id, block->idstr, sizeof(id)))
> +            return memory_region_get_ram_ptr(block->mr) + offset;
> +    }
> +
> +    fprintf(stderr, "Can't find block %s!\n", id);
> +    return NULL;
> +}
> +
> +static int ram_load(QEMUFile *f, void *opaque, int version_id)
> +{
> +    ram_addr_t addr;
> +    int flags, ret = 0;
> +    int error;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
> +
> +    if (version_id < 4 || version_id > 4) {
> +        return -EINVAL;
> +    }
> +
> +    do {
> +        addr = qemu_get_be64(f);
> +
> +        flags = addr & ~TARGET_PAGE_MASK;
> +        addr &= TARGET_PAGE_MASK;
> +
> +        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> +            if (version_id == 4) {
> +                /* Synchronize RAM block list */
> +                char id[256];
> +                ram_addr_t length;
> +                ram_addr_t total_ram_bytes = addr;
> +
> +                while (total_ram_bytes) {
> +                    RAMBlock *block;
> +                    uint8_t len;
> +
> +                    len = qemu_get_byte(f);
> +                    qemu_get_buffer(f, (uint8_t *)id, len);
> +                    id[len] = 0;
> +                    length = qemu_get_be64(f);
> +
> +                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> +                        if (!strncmp(id, block->idstr, sizeof(id))) {
> +                            if (block->length != length) {
> +                                ret =  -EINVAL;
> +                                goto done;
> +                            }
> +                            break;
> +                        }
> +                    }
> +
> +                    if (!block) {
> +                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> +                                "accept migration\n", id);
> +                        ret = -EINVAL;
> +                        goto done;
> +                    }
> +
> +                    total_ram_bytes -= length;
> +                }
> +            }
> +        }
> +
> +        if (flags & RAM_SAVE_FLAG_COMPRESS) {
> +            void *host;
> +            uint8_t ch;
> +
> +            host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            ch = qemu_get_byte(f);
> +            memset(host, ch, TARGET_PAGE_SIZE);
> +#ifndef _WIN32
> +            if (ch == 0 &&
> +                (!kvm_enabled() || kvm_has_sync_mmu()) &&
> +                getpagesize() <= TARGET_PAGE_SIZE) {
> +                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
> +            }
> +#endif
> +        } else if (flags & RAM_SAVE_FLAG_PAGE) {
> +            void *host;
> +
> +            host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> +            void *host = host_from_stream_offset(f, addr, flags);
> +            if (!host) {
> +                return -EINVAL;
> +            }
> +
> +            if (load_xbzrle(f, addr, host) < 0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }
> +        }
> +        error = qemu_file_get_error(f);
> +        if (error) {
> +            ret = error;
> +            goto done;
> +        }
> +    } while (!(flags & RAM_SAVE_FLAG_EOS));
> +
> +done:
> +    DPRINTF("Completed load of VM with exit code %d seq iteration "
> +            "%" PRIu64 "\n", ret, seq_iter);
> +    return ret;
> +}
> +
> +SaveVMHandlers savevm_ram_handlers = {
> +    .save_live_setup = ram_save_setup,
> +    .save_live_iterate = ram_save_iterate,
> +    .save_live_complete = ram_save_complete,
> +    .save_live_pending = ram_save_pending,
> +    .load_state = ram_load,
> +    .cancel = ram_migration_cancel,
> +};
>
Michael S. Tsirkin March 7, 2013, 4:38 p.m. UTC | #3
On Thu, Mar 07, 2013 at 04:20:48PM +0100, Paolo Bonzini wrote:
> Il 07/03/2013 14:32, Michael S. Tsirkin ha scritto:
> > +#ifdef DEBUG_ARCH_INIT
> > +#define DPRINTF(fmt, ...) \
> > +    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
> 
> These need to be adjusted, but it can be a follow-up.
> 
> Paolo

Thanks, will do a follow-up.

> > +#else
> > +#define DPRINTF(fmt, ...) \
> > +    do { } while (0)
> > +#endif
> > +
> > +/***********************************************************/
> > +/* ram save/restore */
> > +
> > +#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
> > +#define RAM_SAVE_FLAG_COMPRESS 0x02
> > +#define RAM_SAVE_FLAG_MEM_SIZE 0x04
> > +#define RAM_SAVE_FLAG_PAGE     0x08
> > +#define RAM_SAVE_FLAG_EOS      0x10
> > +#define RAM_SAVE_FLAG_CONTINUE 0x20
> > +#define RAM_SAVE_FLAG_XBZRLE   0x40
> > +
> > +#ifdef __ALTIVEC__
> > +#include <altivec.h>
> > +#define VECTYPE        vector unsigned char
> > +#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
> > +#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
> > +/* altivec.h may redefine the bool macro as vector type.
> > + * Reset it to POSIX semantics. */
> > +#undef bool
> > +#define bool _Bool
> > +#elif defined __SSE2__
> > +#include <emmintrin.h>
> > +#define VECTYPE        __m128i
> > +#define SPLAT(p)       _mm_set1_epi8(*(p))
> > +#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
> > +#else
> > +#define VECTYPE        unsigned long
> > +#define SPLAT(p)       (*(p) * (~0UL / 255))
> > +#define ALL_EQ(v1, v2) ((v1) == (v2))
> > +#endif
> > +
> > +static int is_dup_page(uint8_t *page)
> > +{
> > +    VECTYPE *p = (VECTYPE *)page;
> > +    VECTYPE val = SPLAT(page);
> > +    int i;
> > +
> > +    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
> > +        if (!ALL_EQ(val, p[i])) {
> > +            return 0;
> > +        }
> > +    }
> > +
> > +    return 1;
> > +}
> > +
> > +/* struct contains XBZRLE cache and a static page
> > +   used by the compression */
> > +static struct {
> > +    /* buffer used for XBZRLE encoding */
> > +    uint8_t *encoded_buf;
> > +    /* buffer for storing page content */
> > +    uint8_t *current_buf;
> > +    /* buffer used for XBZRLE decoding */
> > +    uint8_t *decoded_buf;
> > +    /* Cache for XBZRLE */
> > +    PageCache *cache;
> > +} XBZRLE = {
> > +    .encoded_buf = NULL,
> > +    .current_buf = NULL,
> > +    .decoded_buf = NULL,
> > +    .cache = NULL,
> > +};
> > +
> > +
> > +int64_t xbzrle_cache_resize(int64_t new_size)
> > +{
> > +    if (XBZRLE.cache != NULL) {
> > +        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
> > +            TARGET_PAGE_SIZE;
> > +    }
> > +    return pow2floor(new_size);
> > +}
> > +
> > +/* accounting for migration statistics */
> > +typedef struct AccountingInfo {
> > +    uint64_t dup_pages;
> > +    uint64_t norm_pages;
> > +    uint64_t iterations;
> > +    uint64_t xbzrle_bytes;
> > +    uint64_t xbzrle_pages;
> > +    uint64_t xbzrle_cache_miss;
> > +    uint64_t xbzrle_overflows;
> > +} AccountingInfo;
> > +
> > +static AccountingInfo acct_info;
> > +
> > +static void acct_clear(void)
> > +{
> > +    memset(&acct_info, 0, sizeof(acct_info));
> > +}
> > +
> > +uint64_t dup_mig_bytes_transferred(void)
> > +{
> > +    return acct_info.dup_pages * TARGET_PAGE_SIZE;
> > +}
> > +
> > +uint64_t dup_mig_pages_transferred(void)
> > +{
> > +    return acct_info.dup_pages;
> > +}
> > +
> > +uint64_t norm_mig_bytes_transferred(void)
> > +{
> > +    return acct_info.norm_pages * TARGET_PAGE_SIZE;
> > +}
> > +
> > +uint64_t norm_mig_pages_transferred(void)
> > +{
> > +    return acct_info.norm_pages;
> > +}
> > +
> > +uint64_t xbzrle_mig_bytes_transferred(void)
> > +{
> > +    return acct_info.xbzrle_bytes;
> > +}
> > +
> > +uint64_t xbzrle_mig_pages_transferred(void)
> > +{
> > +    return acct_info.xbzrle_pages;
> > +}
> > +
> > +uint64_t xbzrle_mig_pages_cache_miss(void)
> > +{
> > +    return acct_info.xbzrle_cache_miss;
> > +}
> > +
> > +uint64_t xbzrle_mig_pages_overflow(void)
> > +{
> > +    return acct_info.xbzrle_overflows;
> > +}
> > +
> > +static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
> > +                             int cont, int flag)
> > +{
> > +    size_t size;
> > +
> > +    qemu_put_be64(f, offset | cont | flag);
> > +    size = 8;
> > +
> > +    if (!cont) {
> > +        qemu_put_byte(f, strlen(block->idstr));
> > +        qemu_put_buffer(f, (uint8_t *)block->idstr,
> > +                        strlen(block->idstr));
> > +        size += 1 + strlen(block->idstr);
> > +    }
> > +    return size;
> > +}
> > +
> > +#define ENCODING_FLAG_XBZRLE 0x1
> > +
> > +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> > +                            ram_addr_t current_addr, RAMBlock *block,
> > +                            ram_addr_t offset, int cont, bool last_stage)
> > +{
> > +    int encoded_len = 0, bytes_sent = -1;
> > +    uint8_t *prev_cached_page;
> > +
> > +    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
> > +        if (!last_stage) {
> > +            cache_insert(XBZRLE.cache, current_addr, current_data);
> > +        }
> > +        acct_info.xbzrle_cache_miss++;
> > +        return -1;
> > +    }
> > +
> > +    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
> > +
> > +    /* save current buffer into memory */
> > +    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
> > +
> > +    /* XBZRLE encoding (if there is no overflow) */
> > +    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
> > +                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
> > +                                       TARGET_PAGE_SIZE);
> > +    if (encoded_len == 0) {
> > +        DPRINTF("Skipping unmodified page\n");
> > +        return 0;
> > +    } else if (encoded_len == -1) {
> > +        DPRINTF("Overflow\n");
> > +        acct_info.xbzrle_overflows++;
> > +        /* update data in the cache */
> > +        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
> > +        return -1;
> > +    }
> > +
> > +    /* we need to update the data in the cache, in order to get the same data */
> > +    if (!last_stage) {
> > +        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
> > +    }
> > +
> > +    /* Send XBZRLE based compressed page */
> > +    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> > +    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
> > +    qemu_put_be16(f, encoded_len);
> > +    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
> > +    bytes_sent += encoded_len + 1 + 2;
> > +    acct_info.xbzrle_pages++;
> > +    acct_info.xbzrle_bytes += bytes_sent;
> > +
> > +    return bytes_sent;
> > +}
> > +
> > +
> > +/* This is the last block that we have visited serching for dirty pages
> > + */
> > +static RAMBlock *last_seen_block;
> > +/* This is the last block from where we have sent data */
> > +static RAMBlock *last_sent_block;
> > +static ram_addr_t last_offset;
> > +static unsigned long *migration_bitmap;
> > +static uint64_t migration_dirty_pages;
> > +static uint32_t last_version;
> > +
> > +static inline
> > +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
> > +                                                 ram_addr_t start)
> > +{
> > +    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
> > +    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
> > +    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
> > +
> > +    unsigned long next = find_next_bit(migration_bitmap, size, nr);
> > +
> > +    if (next < size) {
> > +        clear_bit(next, migration_bitmap);
> > +        migration_dirty_pages--;
> > +    }
> > +    return (next - base) << TARGET_PAGE_BITS;
> > +}
> > +
> > +static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
> > +                                              ram_addr_t offset)
> > +{
> > +    bool ret;
> > +    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
> > +
> > +    ret = test_and_set_bit(nr, migration_bitmap);
> > +
> > +    if (!ret) {
> > +        migration_dirty_pages++;
> > +    }
> > +    return ret;
> > +}
> > +
> > +/* Needs iothread lock! */
> > +
> > +static void migration_bitmap_sync(void)
> > +{
> > +    RAMBlock *block;
> > +    ram_addr_t addr;
> > +    uint64_t num_dirty_pages_init = migration_dirty_pages;
> > +    MigrationState *s = migrate_get_current();
> > +    static int64_t start_time;
> > +    static int64_t num_dirty_pages_period;
> > +    int64_t end_time;
> > +
> > +    if (!start_time) {
> > +        start_time = qemu_get_clock_ms(rt_clock);
> > +    }
> > +
> > +    trace_migration_bitmap_sync_start();
> > +    memory_global_sync_dirty_bitmap(get_system_memory());
> > +
> > +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> > +            if (memory_region_test_and_clear_dirty(block->mr,
> > +                                                   addr, TARGET_PAGE_SIZE,
> > +                                                   DIRTY_MEMORY_MIGRATION)) {
> > +                migration_bitmap_set_dirty(block->mr, addr);
> > +            }
> > +        }
> > +    }
> > +    trace_migration_bitmap_sync_end(migration_dirty_pages
> > +                                    - num_dirty_pages_init);
> > +    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
> > +    end_time = qemu_get_clock_ms(rt_clock);
> > +
> > +    /* more than 1 second = 1000 millisecons */
> > +    if (end_time > start_time + 1000) {
> > +        s->dirty_pages_rate = num_dirty_pages_period * 1000
> > +            / (end_time - start_time);
> > +        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
> > +        start_time = end_time;
> > +        num_dirty_pages_period = 0;
> > +    }
> > +}
> > +
> > +/*
> > + * ram_save_block: Writes a page of memory to the stream f
> > + *
> > + * Returns:  The number of bytes written.
> > + *           0 means no dirty pages
> > + */
> > +
> > +static int ram_save_block(QEMUFile *f, bool last_stage)
> > +{
> > +    RAMBlock *block = last_seen_block;
> > +    ram_addr_t offset = last_offset;
> > +    bool complete_round = false;
> > +    int bytes_sent = 0;
> > +    MemoryRegion *mr;
> > +    ram_addr_t current_addr;
> > +
> > +    if (!block)
> > +        block = QTAILQ_FIRST(&ram_list.blocks);
> > +
> > +    while (true) {
> > +        mr = block->mr;
> > +        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
> > +        if (complete_round && block == last_seen_block &&
> > +            offset >= last_offset) {
> > +            break;
> > +        }
> > +        if (offset >= block->length) {
> > +            offset = 0;
> > +            block = QTAILQ_NEXT(block, next);
> > +            if (!block) {
> > +                block = QTAILQ_FIRST(&ram_list.blocks);
> > +                complete_round = true;
> > +            }
> > +        } else {
> > +            uint8_t *p;
> > +            int cont = (block == last_sent_block) ?
> > +                RAM_SAVE_FLAG_CONTINUE : 0;
> > +
> > +            p = memory_region_get_ram_ptr(mr) + offset;
> > +
> > +            /* In doubt sent page as normal */
> > +            bytes_sent = -1;
> > +            if (is_dup_page(p)) {
> > +                acct_info.dup_pages++;
> > +                bytes_sent = save_block_hdr(f, block, offset, cont,
> > +                                            RAM_SAVE_FLAG_COMPRESS);
> > +                qemu_put_byte(f, *p);
> > +                bytes_sent += 1;
> > +            } else if (migrate_use_xbzrle()) {
> > +                current_addr = block->offset + offset;
> > +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> > +                                              offset, cont, last_stage);
> > +                if (!last_stage) {
> > +                    p = get_cached_data(XBZRLE.cache, current_addr);
> > +                }
> > +            }
> > +
> > +            /* XBZRLE overflow or normal page */
> > +            if (bytes_sent == -1) {
> > +                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
> > +                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> > +                bytes_sent += TARGET_PAGE_SIZE;
> > +                acct_info.norm_pages++;
> > +            }
> > +
> > +            /* if page is unmodified, continue to the next */
> > +            if (bytes_sent > 0) {
> > +                last_sent_block = block;
> > +                break;
> > +            }
> > +        }
> > +    }
> > +    last_seen_block = block;
> > +    last_offset = offset;
> > +
> > +    return bytes_sent;
> > +}
> > +
> > +static uint64_t bytes_transferred;
> > +
> > +static ram_addr_t ram_save_remaining(void)
> > +{
> > +    return migration_dirty_pages;
> > +}
> > +
> > +uint64_t ram_bytes_remaining(void)
> > +{
> > +    return ram_save_remaining() * TARGET_PAGE_SIZE;
> > +}
> > +
> > +uint64_t ram_bytes_transferred(void)
> > +{
> > +    return bytes_transferred;
> > +}
> > +
> > +uint64_t ram_bytes_total(void)
> > +{
> > +    RAMBlock *block;
> > +    uint64_t total = 0;
> > +
> > +    QTAILQ_FOREACH(block, &ram_list.blocks, next)
> > +        total += block->length;
> > +
> > +    return total;
> > +}
> > +
> > +static void migration_end(void)
> > +{
> > +    if (migration_bitmap) {
> > +        memory_global_dirty_log_stop();
> > +        g_free(migration_bitmap);
> > +        migration_bitmap = NULL;
> > +    }
> > +
> > +    if (XBZRLE.cache) {
> > +        cache_fini(XBZRLE.cache);
> > +        g_free(XBZRLE.cache);
> > +        g_free(XBZRLE.encoded_buf);
> > +        g_free(XBZRLE.current_buf);
> > +        g_free(XBZRLE.decoded_buf);
> > +        XBZRLE.cache = NULL;
> > +    }
> > +}
> > +
> > +static void ram_migration_cancel(void *opaque)
> > +{
> > +    migration_end();
> > +}
> > +
> > +static void reset_ram_globals(void)
> > +{
> > +    last_seen_block = NULL;
> > +    last_sent_block = NULL;
> > +    last_offset = 0;
> > +    last_version = ram_list.version;
> > +}
> > +
> > +#define MAX_WAIT 50 /* ms, half buffered_file limit */
> > +
> > +static int ram_save_setup(QEMUFile *f, void *opaque)
> > +{
> > +    RAMBlock *block;
> > +    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> > +
> > +    migration_bitmap = bitmap_new(ram_pages);
> > +    bitmap_set(migration_bitmap, 0, ram_pages);
> > +    migration_dirty_pages = ram_pages;
> > +
> > +    if (migrate_use_xbzrle()) {
> > +        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
> > +                                  TARGET_PAGE_SIZE,
> > +                                  TARGET_PAGE_SIZE);
> > +        if (!XBZRLE.cache) {
> > +            DPRINTF("Error creating cache\n");
> > +            return -1;
> > +        }
> > +        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
> > +        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
> > +        acct_clear();
> > +    }
> > +
> > +    qemu_mutex_lock_iothread();
> > +    qemu_mutex_lock_ramlist();
> > +    bytes_transferred = 0;
> > +    reset_ram_globals();
> > +
> > +    memory_global_dirty_log_start();
> > +    migration_bitmap_sync();
> > +    qemu_mutex_unlock_iothread();
> > +
> > +    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> > +
> > +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +        qemu_put_byte(f, strlen(block->idstr));
> > +        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
> > +        qemu_put_be64(f, block->length);
> > +    }
> > +
> > +    qemu_mutex_unlock_ramlist();
> > +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> > +
> > +    return 0;
> > +}
> > +
> > +static int ram_save_iterate(QEMUFile *f, void *opaque)
> > +{
> > +    int ret;
> > +    int i;
> > +    int64_t t0;
> > +    int total_sent = 0;
> > +
> > +    qemu_mutex_lock_ramlist();
> > +
> > +    if (ram_list.version != last_version) {
> > +        reset_ram_globals();
> > +    }
> > +
> > +    t0 = qemu_get_clock_ns(rt_clock);
> > +    i = 0;
> > +    while ((ret = qemu_file_rate_limit(f)) == 0) {
> > +        int bytes_sent;
> > +
> > +        bytes_sent = ram_save_block(f, false);
> > +        /* no more blocks to sent */
> > +        if (bytes_sent == 0) {
> > +            break;
> > +        }
> > +        total_sent += bytes_sent;
> > +        acct_info.iterations++;
> > +        /* we want to check in the 1st loop, just in case it was the 1st time
> > +           and we had to sync the dirty bitmap.
> > +           qemu_get_clock_ns() is a bit expensive, so we only check each some
> > +           iterations
> > +        */
> > +        if ((i & 63) == 0) {
> > +            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
> > +            if (t1 > MAX_WAIT) {
> > +                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
> > +                        t1, i);
> > +                break;
> > +            }
> > +        }
> > +        i++;
> > +    }
> > +
> > +    qemu_mutex_unlock_ramlist();
> > +
> > +    if (ret < 0) {
> > +        bytes_transferred += total_sent;
> > +        return ret;
> > +    }
> > +
> > +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> > +    total_sent += 8;
> > +    bytes_transferred += total_sent;
> > +
> > +    return total_sent;
> > +}
> > +
> > +static int ram_save_complete(QEMUFile *f, void *opaque)
> > +{
> > +    qemu_mutex_lock_ramlist();
> > +    migration_bitmap_sync();
> > +
> > +    /* try transferring iterative blocks of memory */
> > +
> > +    /* flush all remaining blocks regardless of rate limiting */
> > +    while (true) {
> > +        int bytes_sent;
> > +
> > +        bytes_sent = ram_save_block(f, true);
> > +        /* no more blocks to sent */
> > +        if (bytes_sent == 0) {
> > +            break;
> > +        }
> > +        bytes_transferred += bytes_sent;
> > +    }
> > +    migration_end();
> > +
> > +    qemu_mutex_unlock_ramlist();
> > +    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> > +
> > +    return 0;
> > +}
> > +
> > +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
> > +{
> > +    uint64_t remaining_size;
> > +
> > +    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> > +
> > +    if (remaining_size < max_size) {
> > +        qemu_mutex_lock_iothread();
> > +        migration_bitmap_sync();
> > +        qemu_mutex_unlock_iothread();
> > +        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
> > +    }
> > +    return remaining_size;
> > +}
> > +
> > +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> > +{
> > +    int ret, rc = 0;
> > +    unsigned int xh_len;
> > +    int xh_flags;
> > +
> > +    if (!XBZRLE.decoded_buf) {
> > +        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
> > +    }
> > +
> > +    /* extract RLE header */
> > +    xh_flags = qemu_get_byte(f);
> > +    xh_len = qemu_get_be16(f);
> > +
> > +    if (xh_flags != ENCODING_FLAG_XBZRLE) {
> > +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> > +        return -1;
> > +    }
> > +
> > +    if (xh_len > TARGET_PAGE_SIZE) {
> > +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> > +        return -1;
> > +    }
> > +    /* load data and decode */
> > +    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
> > +
> > +    /* decode RLE */
> > +    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
> > +                               TARGET_PAGE_SIZE);
> > +    if (ret == -1) {
> > +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> > +        rc = -1;
> > +    } else  if (ret > TARGET_PAGE_SIZE) {
> > +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> > +                ret, TARGET_PAGE_SIZE);
> > +        abort();
> > +    }
> > +
> > +    return rc;
> > +}
> > +
> > +static inline void *host_from_stream_offset(QEMUFile *f,
> > +                                            ram_addr_t offset,
> > +                                            int flags)
> > +{
> > +    static RAMBlock *block = NULL;
> > +    char id[256];
> > +    uint8_t len;
> > +
> > +    if (flags & RAM_SAVE_FLAG_CONTINUE) {
> > +        if (!block) {
> > +            fprintf(stderr, "Ack, bad migration stream!\n");
> > +            return NULL;
> > +        }
> > +
> > +        return memory_region_get_ram_ptr(block->mr) + offset;
> > +    }
> > +
> > +    len = qemu_get_byte(f);
> > +    qemu_get_buffer(f, (uint8_t *)id, len);
> > +    id[len] = 0;
> > +
> > +    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +        if (!strncmp(id, block->idstr, sizeof(id)))
> > +            return memory_region_get_ram_ptr(block->mr) + offset;
> > +    }
> > +
> > +    fprintf(stderr, "Can't find block %s!\n", id);
> > +    return NULL;
> > +}
> > +
> > +static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > +{
> > +    ram_addr_t addr;
> > +    int flags, ret = 0;
> > +    int error;
> > +    static uint64_t seq_iter;
> > +
> > +    seq_iter++;
> > +
> > +    if (version_id < 4 || version_id > 4) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    do {
> > +        addr = qemu_get_be64(f);
> > +
> > +        flags = addr & ~TARGET_PAGE_MASK;
> > +        addr &= TARGET_PAGE_MASK;
> > +
> > +        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> > +            if (version_id == 4) {
> > +                /* Synchronize RAM block list */
> > +                char id[256];
> > +                ram_addr_t length;
> > +                ram_addr_t total_ram_bytes = addr;
> > +
> > +                while (total_ram_bytes) {
> > +                    RAMBlock *block;
> > +                    uint8_t len;
> > +
> > +                    len = qemu_get_byte(f);
> > +                    qemu_get_buffer(f, (uint8_t *)id, len);
> > +                    id[len] = 0;
> > +                    length = qemu_get_be64(f);
> > +
> > +                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > +                        if (!strncmp(id, block->idstr, sizeof(id))) {
> > +                            if (block->length != length) {
> > +                                ret =  -EINVAL;
> > +                                goto done;
> > +                            }
> > +                            break;
> > +                        }
> > +                    }
> > +
> > +                    if (!block) {
> > +                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
> > +                                "accept migration\n", id);
> > +                        ret = -EINVAL;
> > +                        goto done;
> > +                    }
> > +
> > +                    total_ram_bytes -= length;
> > +                }
> > +            }
> > +        }
> > +
> > +        if (flags & RAM_SAVE_FLAG_COMPRESS) {
> > +            void *host;
> > +            uint8_t ch;
> > +
> > +            host = host_from_stream_offset(f, addr, flags);
> > +            if (!host) {
> > +                return -EINVAL;
> > +            }
> > +
> > +            ch = qemu_get_byte(f);
> > +            memset(host, ch, TARGET_PAGE_SIZE);
> > +#ifndef _WIN32
> > +            if (ch == 0 &&
> > +                (!kvm_enabled() || kvm_has_sync_mmu()) &&
> > +                getpagesize() <= TARGET_PAGE_SIZE) {
> > +                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
> > +            }
> > +#endif
> > +        } else if (flags & RAM_SAVE_FLAG_PAGE) {
> > +            void *host;
> > +
> > +            host = host_from_stream_offset(f, addr, flags);
> > +            if (!host) {
> > +                return -EINVAL;
> > +            }
> > +
> > +            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> > +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> > +            void *host = host_from_stream_offset(f, addr, flags);
> > +            if (!host) {
> > +                return -EINVAL;
> > +            }
> > +
> > +            if (load_xbzrle(f, addr, host) < 0) {
> > +                ret = -EINVAL;
> > +                goto done;
> > +            }
> > +        }
> > +        error = qemu_file_get_error(f);
> > +        if (error) {
> > +            ret = error;
> > +            goto done;
> > +        }
> > +    } while (!(flags & RAM_SAVE_FLAG_EOS));
> > +
> > +done:
> > +    DPRINTF("Completed load of VM with exit code %d seq iteration "
> > +            "%" PRIu64 "\n", ret, seq_iter);
> > +    return ret;
> > +}
> > +
> > +SaveVMHandlers savevm_ram_handlers = {
> > +    .save_live_setup = ram_save_setup,
> > +    .save_live_iterate = ram_save_iterate,
> > +    .save_live_complete = ram_save_complete,
> > +    .save_live_pending = ram_save_pending,
> > +    .load_state = ram_load,
> > +    .cancel = ram_migration_cancel,
> > +};
> >
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index ca657b3..54bc21b 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -108,7 +108,7 @@  CONFIG_NO_XEN = $(if $(subst n,,$(CONFIG_XEN)),n,y)
 CONFIG_NO_GET_MEMORY_MAPPING = $(if $(subst n,,$(CONFIG_HAVE_GET_MEMORY_MAPPING)),n,y)
 CONFIG_NO_CORE_DUMP = $(if $(subst n,,$(CONFIG_HAVE_CORE_DUMP)),n,y)
 
-obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o
+obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o savevm-ram.o
 obj-y += qtest.o
 obj-y += hw/
 obj-$(CONFIG_KVM) += kvm-all.o
diff --git a/arch_init.c b/arch_init.c
index 98e2bc6..9943ed4 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -31,20 +31,15 @@ 
 #include "config.h"
 #include "monitor/monitor.h"
 #include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
 #include "sysemu/arch_init.h"
 #include "audio/audio.h"
 #include "hw/pc.h"
 #include "hw/pci/pci.h"
 #include "hw/audiodev.h"
 #include "sysemu/kvm.h"
-#include "migration/migration.h"
 #include "exec/gdbstub.h"
 #include "hw/smbios.h"
-#include "exec/address-spaces.h"
 #include "hw/pcspk.h"
-#include "migration/page_cache.h"
 #include "qemu/config-file.h"
 #include "qmp-commands.h"
 #include "trace.h"
@@ -103,38 +98,6 @@  int graphic_depth = 15;
 
 const uint32_t arch_type = QEMU_ARCH;
 
-/***********************************************************/
-/* ram save/restore */
-
-#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_COMPRESS 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE     0x08
-#define RAM_SAVE_FLAG_EOS      0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE   0x40
-
-#ifdef __ALTIVEC__
-#include <altivec.h>
-#define VECTYPE        vector unsigned char
-#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
-#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
-/* altivec.h may redefine the bool macro as vector type.
- * Reset it to POSIX semantics. */
-#undef bool
-#define bool _Bool
-#elif defined __SSE2__
-#include <emmintrin.h>
-#define VECTYPE        __m128i
-#define SPLAT(p)       _mm_set1_epi8(*(p))
-#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
-#else
-#define VECTYPE        unsigned long
-#define SPLAT(p)       (*(p) * (~0UL / 255))
-#define ALL_EQ(v1, v2) ((v1) == (v2))
-#endif
-
-
 static struct defconfig_file {
     const char *filename;
     /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -145,7 +108,6 @@  static struct defconfig_file {
     { NULL }, /* end of list */
 };
 
-
 int qemu_read_default_config_files(bool userconfig)
 {
     int ret;
@@ -164,731 +126,6 @@  int qemu_read_default_config_files(bool userconfig)
     return 0;
 }
 
-static int is_dup_page(uint8_t *page)
-{
-    VECTYPE *p = (VECTYPE *)page;
-    VECTYPE val = SPLAT(page);
-    int i;
-
-    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
-        if (!ALL_EQ(val, p[i])) {
-            return 0;
-        }
-    }
-
-    return 1;
-}
-
-/* struct contains XBZRLE cache and a static page
-   used by the compression */
-static struct {
-    /* buffer used for XBZRLE encoding */
-    uint8_t *encoded_buf;
-    /* buffer for storing page content */
-    uint8_t *current_buf;
-    /* buffer used for XBZRLE decoding */
-    uint8_t *decoded_buf;
-    /* Cache for XBZRLE */
-    PageCache *cache;
-} XBZRLE = {
-    .encoded_buf = NULL,
-    .current_buf = NULL,
-    .decoded_buf = NULL,
-    .cache = NULL,
-};
-
-
-int64_t xbzrle_cache_resize(int64_t new_size)
-{
-    if (XBZRLE.cache != NULL) {
-        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
-            TARGET_PAGE_SIZE;
-    }
-    return pow2floor(new_size);
-}
-
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
-    uint64_t dup_pages;
-    uint64_t norm_pages;
-    uint64_t iterations;
-    uint64_t xbzrle_bytes;
-    uint64_t xbzrle_pages;
-    uint64_t xbzrle_cache_miss;
-    uint64_t xbzrle_overflows;
-} AccountingInfo;
-
-static AccountingInfo acct_info;
-
-static void acct_clear(void)
-{
-    memset(&acct_info, 0, sizeof(acct_info));
-}
-
-uint64_t dup_mig_bytes_transferred(void)
-{
-    return acct_info.dup_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t dup_mig_pages_transferred(void)
-{
-    return acct_info.dup_pages;
-}
-
-uint64_t norm_mig_bytes_transferred(void)
-{
-    return acct_info.norm_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t norm_mig_pages_transferred(void)
-{
-    return acct_info.norm_pages;
-}
-
-uint64_t xbzrle_mig_bytes_transferred(void)
-{
-    return acct_info.xbzrle_bytes;
-}
-
-uint64_t xbzrle_mig_pages_transferred(void)
-{
-    return acct_info.xbzrle_pages;
-}
-
-uint64_t xbzrle_mig_pages_cache_miss(void)
-{
-    return acct_info.xbzrle_cache_miss;
-}
-
-uint64_t xbzrle_mig_pages_overflow(void)
-{
-    return acct_info.xbzrle_overflows;
-}
-
-static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
-                             int cont, int flag)
-{
-    size_t size;
-
-    qemu_put_be64(f, offset | cont | flag);
-    size = 8;
-
-    if (!cont) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr,
-                        strlen(block->idstr));
-        size += 1 + strlen(block->idstr);
-    }
-    return size;
-}
-
-#define ENCODING_FLAG_XBZRLE 0x1
-
-static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
-                            ram_addr_t current_addr, RAMBlock *block,
-                            ram_addr_t offset, int cont, bool last_stage)
-{
-    int encoded_len = 0, bytes_sent = -1;
-    uint8_t *prev_cached_page;
-
-    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
-        if (!last_stage) {
-            cache_insert(XBZRLE.cache, current_addr, current_data);
-        }
-        acct_info.xbzrle_cache_miss++;
-        return -1;
-    }
-
-    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
-
-    /* save current buffer into memory */
-    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
-
-    /* XBZRLE encoding (if there is no overflow) */
-    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
-                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
-                                       TARGET_PAGE_SIZE);
-    if (encoded_len == 0) {
-        DPRINTF("Skipping unmodified page\n");
-        return 0;
-    } else if (encoded_len == -1) {
-        DPRINTF("Overflow\n");
-        acct_info.xbzrle_overflows++;
-        /* update data in the cache */
-        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
-        return -1;
-    }
-
-    /* we need to update the data in the cache, in order to get the same data */
-    if (!last_stage) {
-        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
-    }
-
-    /* Send XBZRLE based compressed page */
-    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
-    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
-    qemu_put_be16(f, encoded_len);
-    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
-    bytes_sent += encoded_len + 1 + 2;
-    acct_info.xbzrle_pages++;
-    acct_info.xbzrle_bytes += bytes_sent;
-
-    return bytes_sent;
-}
-
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-
-static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
-                                                 ram_addr_t start)
-{
-    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
-    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
-
-    unsigned long next = find_next_bit(migration_bitmap, size, nr);
-
-    if (next < size) {
-        clear_bit(next, migration_bitmap);
-        migration_dirty_pages--;
-    }
-    return (next - base) << TARGET_PAGE_BITS;
-}
-
-static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
-                                              ram_addr_t offset)
-{
-    bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
-
-    ret = test_and_set_bit(nr, migration_bitmap);
-
-    if (!ret) {
-        migration_dirty_pages++;
-    }
-    return ret;
-}
-
-/* Needs iothread lock! */
-
-static void migration_bitmap_sync(void)
-{
-    RAMBlock *block;
-    ram_addr_t addr;
-    uint64_t num_dirty_pages_init = migration_dirty_pages;
-    MigrationState *s = migrate_get_current();
-    static int64_t start_time;
-    static int64_t num_dirty_pages_period;
-    int64_t end_time;
-
-    if (!start_time) {
-        start_time = qemu_get_clock_ms(rt_clock);
-    }
-
-    trace_migration_bitmap_sync_start();
-    memory_global_sync_dirty_bitmap(get_system_memory());
-
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_test_and_clear_dirty(block->mr,
-                                                   addr, TARGET_PAGE_SIZE,
-                                                   DIRTY_MEMORY_MIGRATION)) {
-                migration_bitmap_set_dirty(block->mr, addr);
-            }
-        }
-    }
-    trace_migration_bitmap_sync_end(migration_dirty_pages
-                                    - num_dirty_pages_init);
-    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_get_clock_ms(rt_clock);
-
-    /* more than 1 second = 1000 millisecons */
-    if (end_time > start_time + 1000) {
-        s->dirty_pages_rate = num_dirty_pages_period * 1000
-            / (end_time - start_time);
-        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
-        start_time = end_time;
-        num_dirty_pages_period = 0;
-    }
-}
-
-/*
- * ram_save_block: Writes a page of memory to the stream f
- *
- * Returns:  The number of bytes written.
- *           0 means no dirty pages
- */
-
-static int ram_save_block(QEMUFile *f, bool last_stage)
-{
-    RAMBlock *block = last_seen_block;
-    ram_addr_t offset = last_offset;
-    bool complete_round = false;
-    int bytes_sent = 0;
-    MemoryRegion *mr;
-    ram_addr_t current_addr;
-
-    if (!block)
-        block = QTAILQ_FIRST(&ram_list.blocks);
-
-    while (true) {
-        mr = block->mr;
-        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
-        if (complete_round && block == last_seen_block &&
-            offset >= last_offset) {
-            break;
-        }
-        if (offset >= block->length) {
-            offset = 0;
-            block = QTAILQ_NEXT(block, next);
-            if (!block) {
-                block = QTAILQ_FIRST(&ram_list.blocks);
-                complete_round = true;
-            }
-        } else {
-            uint8_t *p;
-            int cont = (block == last_sent_block) ?
-                RAM_SAVE_FLAG_CONTINUE : 0;
-
-            p = memory_region_get_ram_ptr(mr) + offset;
-
-            /* In doubt sent page as normal */
-            bytes_sent = -1;
-            if (is_dup_page(p)) {
-                acct_info.dup_pages++;
-                bytes_sent = save_block_hdr(f, block, offset, cont,
-                                            RAM_SAVE_FLAG_COMPRESS);
-                qemu_put_byte(f, *p);
-                bytes_sent += 1;
-            } else if (migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
-                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
-                                              offset, cont, last_stage);
-                if (!last_stage) {
-                    p = get_cached_data(XBZRLE.cache, current_addr);
-                }
-            }
-
-            /* XBZRLE overflow or normal page */
-            if (bytes_sent == -1) {
-                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
-                bytes_sent += TARGET_PAGE_SIZE;
-                acct_info.norm_pages++;
-            }
-
-            /* if page is unmodified, continue to the next */
-            if (bytes_sent > 0) {
-                last_sent_block = block;
-                break;
-            }
-        }
-    }
-    last_seen_block = block;
-    last_offset = offset;
-
-    return bytes_sent;
-}
-
-static uint64_t bytes_transferred;
-
-static ram_addr_t ram_save_remaining(void)
-{
-    return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
-    return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
-    return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
-    RAMBlock *block;
-    uint64_t total = 0;
-
-    QTAILQ_FOREACH(block, &ram_list.blocks, next)
-        total += block->length;
-
-    return total;
-}
-
-static void migration_end(void)
-{
-    if (migration_bitmap) {
-        memory_global_dirty_log_stop();
-        g_free(migration_bitmap);
-        migration_bitmap = NULL;
-    }
-
-    if (XBZRLE.cache) {
-        cache_fini(XBZRLE.cache);
-        g_free(XBZRLE.cache);
-        g_free(XBZRLE.encoded_buf);
-        g_free(XBZRLE.current_buf);
-        g_free(XBZRLE.decoded_buf);
-        XBZRLE.cache = NULL;
-    }
-}
-
-static void ram_migration_cancel(void *opaque)
-{
-    migration_end();
-}
-
-static void reset_ram_globals(void)
-{
-    last_seen_block = NULL;
-    last_sent_block = NULL;
-    last_offset = 0;
-    last_version = ram_list.version;
-}
-
-#define MAX_WAIT 50 /* ms, half buffered_file limit */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
-{
-    RAMBlock *block;
-    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
-
-    migration_bitmap = bitmap_new(ram_pages);
-    bitmap_set(migration_bitmap, 0, ram_pages);
-    migration_dirty_pages = ram_pages;
-
-    if (migrate_use_xbzrle()) {
-        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
-                                  TARGET_PAGE_SIZE,
-                                  TARGET_PAGE_SIZE);
-        if (!XBZRLE.cache) {
-            DPRINTF("Error creating cache\n");
-            return -1;
-        }
-        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
-        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
-        acct_clear();
-    }
-
-    qemu_mutex_lock_iothread();
-    qemu_mutex_lock_ramlist();
-    bytes_transferred = 0;
-    reset_ram_globals();
-
-    memory_global_dirty_log_start();
-    migration_bitmap_sync();
-    qemu_mutex_unlock_iothread();
-
-    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
-
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        qemu_put_byte(f, strlen(block->idstr));
-        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
-        qemu_put_be64(f, block->length);
-    }
-
-    qemu_mutex_unlock_ramlist();
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static int ram_save_iterate(QEMUFile *f, void *opaque)
-{
-    int ret;
-    int i;
-    int64_t t0;
-    int total_sent = 0;
-
-    qemu_mutex_lock_ramlist();
-
-    if (ram_list.version != last_version) {
-        reset_ram_globals();
-    }
-
-    t0 = qemu_get_clock_ns(rt_clock);
-    i = 0;
-    while ((ret = qemu_file_rate_limit(f)) == 0) {
-        int bytes_sent;
-
-        bytes_sent = ram_save_block(f, false);
-        /* no more blocks to sent */
-        if (bytes_sent == 0) {
-            break;
-        }
-        total_sent += bytes_sent;
-        acct_info.iterations++;
-        /* we want to check in the 1st loop, just in case it was the 1st time
-           and we had to sync the dirty bitmap.
-           qemu_get_clock_ns() is a bit expensive, so we only check each some
-           iterations
-        */
-        if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
-            if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
-                        t1, i);
-                break;
-            }
-        }
-        i++;
-    }
-
-    qemu_mutex_unlock_ramlist();
-
-    if (ret < 0) {
-        bytes_transferred += total_sent;
-        return ret;
-    }
-
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    total_sent += 8;
-    bytes_transferred += total_sent;
-
-    return total_sent;
-}
-
-static int ram_save_complete(QEMUFile *f, void *opaque)
-{
-    qemu_mutex_lock_ramlist();
-    migration_bitmap_sync();
-
-    /* try transferring iterative blocks of memory */
-
-    /* flush all remaining blocks regardless of rate limiting */
-    while (true) {
-        int bytes_sent;
-
-        bytes_sent = ram_save_block(f, true);
-        /* no more blocks to sent */
-        if (bytes_sent == 0) {
-            break;
-        }
-        bytes_transferred += bytes_sent;
-    }
-    migration_end();
-
-    qemu_mutex_unlock_ramlist();
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
-    return 0;
-}
-
-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
-    uint64_t remaining_size;
-
-    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-
-    if (remaining_size < max_size) {
-        qemu_mutex_lock_iothread();
-        migration_bitmap_sync();
-        qemu_mutex_unlock_iothread();
-        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-    }
-    return remaining_size;
-}
-
-static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
-{
-    int ret, rc = 0;
-    unsigned int xh_len;
-    int xh_flags;
-
-    if (!XBZRLE.decoded_buf) {
-        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
-    }
-
-    /* extract RLE header */
-    xh_flags = qemu_get_byte(f);
-    xh_len = qemu_get_be16(f);
-
-    if (xh_flags != ENCODING_FLAG_XBZRLE) {
-        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
-        return -1;
-    }
-
-    if (xh_len > TARGET_PAGE_SIZE) {
-        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
-        return -1;
-    }
-    /* load data and decode */
-    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
-
-    /* decode RLE */
-    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
-                               TARGET_PAGE_SIZE);
-    if (ret == -1) {
-        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
-        rc = -1;
-    } else  if (ret > TARGET_PAGE_SIZE) {
-        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
-                ret, TARGET_PAGE_SIZE);
-        abort();
-    }
-
-    return rc;
-}
-
-static inline void *host_from_stream_offset(QEMUFile *f,
-                                            ram_addr_t offset,
-                                            int flags)
-{
-    static RAMBlock *block = NULL;
-    char id[256];
-    uint8_t len;
-
-    if (flags & RAM_SAVE_FLAG_CONTINUE) {
-        if (!block) {
-            fprintf(stderr, "Ack, bad migration stream!\n");
-            return NULL;
-        }
-
-        return memory_region_get_ram_ptr(block->mr) + offset;
-    }
-
-    len = qemu_get_byte(f);
-    qemu_get_buffer(f, (uint8_t *)id, len);
-    id[len] = 0;
-
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        if (!strncmp(id, block->idstr, sizeof(id)))
-            return memory_region_get_ram_ptr(block->mr) + offset;
-    }
-
-    fprintf(stderr, "Can't find block %s!\n", id);
-    return NULL;
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
-    ram_addr_t addr;
-    int flags, ret = 0;
-    int error;
-    static uint64_t seq_iter;
-
-    seq_iter++;
-
-    if (version_id < 4 || version_id > 4) {
-        return -EINVAL;
-    }
-
-    do {
-        addr = qemu_get_be64(f);
-
-        flags = addr & ~TARGET_PAGE_MASK;
-        addr &= TARGET_PAGE_MASK;
-
-        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
-            if (version_id == 4) {
-                /* Synchronize RAM block list */
-                char id[256];
-                ram_addr_t length;
-                ram_addr_t total_ram_bytes = addr;
-
-                while (total_ram_bytes) {
-                    RAMBlock *block;
-                    uint8_t len;
-
-                    len = qemu_get_byte(f);
-                    qemu_get_buffer(f, (uint8_t *)id, len);
-                    id[len] = 0;
-                    length = qemu_get_be64(f);
-
-                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-                        if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length) {
-                                ret =  -EINVAL;
-                                goto done;
-                            }
-                            break;
-                        }
-                    }
-
-                    if (!block) {
-                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
-                                "accept migration\n", id);
-                        ret = -EINVAL;
-                        goto done;
-                    }
-
-                    total_ram_bytes -= length;
-                }
-            }
-        }
-
-        if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
-            uint8_t ch;
-
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
-            ch = qemu_get_byte(f);
-            memset(host, ch, TARGET_PAGE_SIZE);
-#ifndef _WIN32
-            if (ch == 0 &&
-                (!kvm_enabled() || kvm_has_sync_mmu()) &&
-                getpagesize() <= TARGET_PAGE_SIZE) {
-                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
-            }
-#endif
-        } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
-            host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
-            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
-        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
-            void *host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
-            if (load_xbzrle(f, addr, host) < 0) {
-                ret = -EINVAL;
-                goto done;
-            }
-        }
-        error = qemu_file_get_error(f);
-        if (error) {
-            ret = error;
-            goto done;
-        }
-    } while (!(flags & RAM_SAVE_FLAG_EOS));
-
-done:
-    DPRINTF("Completed load of VM with exit code %d seq iteration "
-            "%" PRIu64 "\n", ret, seq_iter);
-    return ret;
-}
-
-SaveVMHandlers savevm_ram_handlers = {
-    .save_live_setup = ram_save_setup,
-    .save_live_iterate = ram_save_iterate,
-    .save_live_complete = ram_save_complete,
-    .save_live_pending = ram_save_pending,
-    .load_state = ram_load,
-    .cancel = ram_migration_cancel,
-};
-
 #ifdef HAS_AUDIO
 struct soundhw {
     const char *name;
diff --git a/savevm-ram.c b/savevm-ram.c
new file mode 100644
index 0000000..cea656c
--- /dev/null
+++ b/savevm-ram.c
@@ -0,0 +1,804 @@ 
+/*
+ * RAM Migration support
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#ifndef _WIN32
+#include <sys/types.h>
+#include <sys/mman.h>
+#endif
+#include "sysemu/sysemu.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "sysemu/kvm.h"
+#include "migration/migration.h"
+#include "exec/address-spaces.h"
+#include "migration/page_cache.h"
+#include "migration/qemu-file.h"
+#include "trace.h"
+#include "exec/cpu-all.h"
+
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+/***********************************************************/
+/* ram save/restore */
+
+#define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
+#define RAM_SAVE_FLAG_COMPRESS 0x02
+#define RAM_SAVE_FLAG_MEM_SIZE 0x04
+#define RAM_SAVE_FLAG_PAGE     0x08
+#define RAM_SAVE_FLAG_EOS      0x10
+#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBZRLE   0x40
+
+#ifdef __ALTIVEC__
+#include <altivec.h>
+#define VECTYPE        vector unsigned char
+#define SPLAT(p)       vec_splat(vec_ld(0, p), 0)
+#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
+/* altivec.h may redefine the bool macro as vector type.
+ * Reset it to POSIX semantics. */
+#undef bool
+#define bool _Bool
+#elif defined __SSE2__
+#include <emmintrin.h>
+#define VECTYPE        __m128i
+#define SPLAT(p)       _mm_set1_epi8(*(p))
+#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
+#else
+#define VECTYPE        unsigned long
+#define SPLAT(p)       (*(p) * (~0UL / 255))
+#define ALL_EQ(v1, v2) ((v1) == (v2))
+#endif
+
+static int is_dup_page(uint8_t *page)
+{
+    VECTYPE *p = (VECTYPE *)page;
+    VECTYPE val = SPLAT(page);
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE / sizeof(VECTYPE); i++) {
+        if (!ALL_EQ(val, p[i])) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+/* struct contains XBZRLE cache and a static page
+   used by the compression */
+static struct {
+    /* buffer used for XBZRLE encoding */
+    uint8_t *encoded_buf;
+    /* buffer for storing page content */
+    uint8_t *current_buf;
+    /* buffer used for XBZRLE decoding */
+    uint8_t *decoded_buf;
+    /* Cache for XBZRLE */
+    PageCache *cache;
+} XBZRLE = {
+    .encoded_buf = NULL,
+    .current_buf = NULL,
+    .decoded_buf = NULL,
+    .cache = NULL,
+};
+
+
+int64_t xbzrle_cache_resize(int64_t new_size)
+{
+    if (XBZRLE.cache != NULL) {
+        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
+            TARGET_PAGE_SIZE;
+    }
+    return pow2floor(new_size);
+}
+
+/* accounting for migration statistics */
+typedef struct AccountingInfo {
+    uint64_t dup_pages;
+    uint64_t norm_pages;
+    uint64_t iterations;
+    uint64_t xbzrle_bytes;
+    uint64_t xbzrle_pages;
+    uint64_t xbzrle_cache_miss;
+    uint64_t xbzrle_overflows;
+} AccountingInfo;
+
+static AccountingInfo acct_info;
+
+static void acct_clear(void)
+{
+    memset(&acct_info, 0, sizeof(acct_info));
+}
+
+uint64_t dup_mig_bytes_transferred(void)
+{
+    return acct_info.dup_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t dup_mig_pages_transferred(void)
+{
+    return acct_info.dup_pages;
+}
+
+uint64_t norm_mig_bytes_transferred(void)
+{
+    return acct_info.norm_pages * TARGET_PAGE_SIZE;
+}
+
+uint64_t norm_mig_pages_transferred(void)
+{
+    return acct_info.norm_pages;
+}
+
+uint64_t xbzrle_mig_bytes_transferred(void)
+{
+    return acct_info.xbzrle_bytes;
+}
+
+uint64_t xbzrle_mig_pages_transferred(void)
+{
+    return acct_info.xbzrle_pages;
+}
+
+uint64_t xbzrle_mig_pages_cache_miss(void)
+{
+    return acct_info.xbzrle_cache_miss;
+}
+
+uint64_t xbzrle_mig_pages_overflow(void)
+{
+    return acct_info.xbzrle_overflows;
+}
+
+static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
+                             int cont, int flag)
+{
+    size_t size;
+
+    qemu_put_be64(f, offset | cont | flag);
+    size = 8;
+
+    if (!cont) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr,
+                        strlen(block->idstr));
+        size += 1 + strlen(block->idstr);
+    }
+    return size;
+}
+
+#define ENCODING_FLAG_XBZRLE 0x1
+
+static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
+                            ram_addr_t current_addr, RAMBlock *block,
+                            ram_addr_t offset, int cont, bool last_stage)
+{
+    int encoded_len = 0, bytes_sent = -1;
+    uint8_t *prev_cached_page;
+
+    if (!cache_is_cached(XBZRLE.cache, current_addr)) {
+        if (!last_stage) {
+            cache_insert(XBZRLE.cache, current_addr, current_data);
+        }
+        acct_info.xbzrle_cache_miss++;
+        return -1;
+    }
+
+    prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
+
+    /* save current buffer into memory */
+    memcpy(XBZRLE.current_buf, current_data, TARGET_PAGE_SIZE);
+
+    /* XBZRLE encoding (if there is no overflow) */
+    encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
+                                       TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
+                                       TARGET_PAGE_SIZE);
+    if (encoded_len == 0) {
+        DPRINTF("Skipping unmodified page\n");
+        return 0;
+    } else if (encoded_len == -1) {
+        DPRINTF("Overflow\n");
+        acct_info.xbzrle_overflows++;
+        /* update data in the cache */
+        memcpy(prev_cached_page, current_data, TARGET_PAGE_SIZE);
+        return -1;
+    }
+
+    /* we need to update the data in the cache, in order to get the same data */
+    if (!last_stage) {
+        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
+    }
+
+    /* Send XBZRLE based compressed page */
+    bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
+    qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
+    qemu_put_be16(f, encoded_len);
+    qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
+    bytes_sent += encoded_len + 1 + 2;
+    acct_info.xbzrle_pages++;
+    acct_info.xbzrle_bytes += bytes_sent;
+
+    return bytes_sent;
+}
+
+
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+
+static inline
+ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
+                                                 ram_addr_t start)
+{
+    unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
+    unsigned long nr = base + (start >> TARGET_PAGE_BITS);
+    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
+
+    unsigned long next = find_next_bit(migration_bitmap, size, nr);
+
+    if (next < size) {
+        clear_bit(next, migration_bitmap);
+        migration_dirty_pages--;
+    }
+    return (next - base) << TARGET_PAGE_BITS;
+}
+
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_set_bit(nr, migration_bitmap);
+
+    if (!ret) {
+        migration_dirty_pages++;
+    }
+    return ret;
+}
+
+/* Needs iothread lock! */
+
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    ram_addr_t addr;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    static int64_t start_time;
+    static int64_t num_dirty_pages_period;
+    int64_t end_time;
+
+    if (!start_time) {
+        start_time = qemu_get_clock_ms(rt_clock);
+    }
+
+    trace_migration_bitmap_sync_start();
+    memory_global_sync_dirty_bitmap(get_system_memory());
+
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_test_and_clear_dirty(block->mr,
+                                                   addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
+    }
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_get_clock_ms(rt_clock);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+}
+
+/*
+ * ram_save_block: Writes a page of memory to the stream f
+ *
+ * Returns:  The number of bytes written.
+ *           0 means no dirty pages
+ */
+
+static int ram_save_block(QEMUFile *f, bool last_stage)
+{
+    RAMBlock *block = last_seen_block;
+    ram_addr_t offset = last_offset;
+    bool complete_round = false;
+    int bytes_sent = 0;
+    MemoryRegion *mr;
+    ram_addr_t current_addr;
+
+    if (!block)
+        block = QTAILQ_FIRST(&ram_list.blocks);
+
+    while (true) {
+        mr = block->mr;
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        if (complete_round && block == last_seen_block &&
+            offset >= last_offset) {
+            break;
+        }
+        if (offset >= block->length) {
+            offset = 0;
+            block = QTAILQ_NEXT(block, next);
+            if (!block) {
+                block = QTAILQ_FIRST(&ram_list.blocks);
+                complete_round = true;
+            }
+        } else {
+            uint8_t *p;
+            int cont = (block == last_sent_block) ?
+                RAM_SAVE_FLAG_CONTINUE : 0;
+
+            p = memory_region_get_ram_ptr(mr) + offset;
+
+            /* In doubt sent page as normal */
+            bytes_sent = -1;
+            if (is_dup_page(p)) {
+                acct_info.dup_pages++;
+                bytes_sent = save_block_hdr(f, block, offset, cont,
+                                            RAM_SAVE_FLAG_COMPRESS);
+                qemu_put_byte(f, *p);
+                bytes_sent += 1;
+            } else if (migrate_use_xbzrle()) {
+                current_addr = block->offset + offset;
+                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
+                                              offset, cont, last_stage);
+                if (!last_stage) {
+                    p = get_cached_data(XBZRLE.cache, current_addr);
+                }
+            }
+
+            /* XBZRLE overflow or normal page */
+            if (bytes_sent == -1) {
+                bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
+                qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                bytes_sent += TARGET_PAGE_SIZE;
+                acct_info.norm_pages++;
+            }
+
+            /* if page is unmodified, continue to the next */
+            if (bytes_sent > 0) {
+                last_sent_block = block;
+                break;
+            }
+        }
+    }
+    last_seen_block = block;
+    last_offset = offset;
+
+    return bytes_sent;
+}
+
+static uint64_t bytes_transferred;
+
+static ram_addr_t ram_save_remaining(void)
+{
+    return migration_dirty_pages;
+}
+
+uint64_t ram_bytes_remaining(void)
+{
+    return ram_save_remaining() * TARGET_PAGE_SIZE;
+}
+
+uint64_t ram_bytes_transferred(void)
+{
+    return bytes_transferred;
+}
+
+uint64_t ram_bytes_total(void)
+{
+    RAMBlock *block;
+    uint64_t total = 0;
+
+    QTAILQ_FOREACH(block, &ram_list.blocks, next)
+        total += block->length;
+
+    return total;
+}
+
+static void migration_end(void)
+{
+    if (migration_bitmap) {
+        memory_global_dirty_log_stop();
+        g_free(migration_bitmap);
+        migration_bitmap = NULL;
+    }
+
+    if (XBZRLE.cache) {
+        cache_fini(XBZRLE.cache);
+        g_free(XBZRLE.cache);
+        g_free(XBZRLE.encoded_buf);
+        g_free(XBZRLE.current_buf);
+        g_free(XBZRLE.decoded_buf);
+        XBZRLE.cache = NULL;
+    }
+}
+
+static void ram_migration_cancel(void *opaque)
+{
+    migration_end();
+}
+
+static void reset_ram_globals(void)
+{
+    last_seen_block = NULL;
+    last_sent_block = NULL;
+    last_offset = 0;
+    last_version = ram_list.version;
+}
+
+#define MAX_WAIT 50 /* ms, half buffered_file limit */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+    RAMBlock *block;
+    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    migration_bitmap = bitmap_new(ram_pages);
+    bitmap_set(migration_bitmap, 0, ram_pages);
+    migration_dirty_pages = ram_pages;
+
+    if (migrate_use_xbzrle()) {
+        XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
+                                  TARGET_PAGE_SIZE,
+                                  TARGET_PAGE_SIZE);
+        if (!XBZRLE.cache) {
+            DPRINTF("Error creating cache\n");
+            return -1;
+        }
+        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
+        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
+        acct_clear();
+    }
+
+    qemu_mutex_lock_iothread();
+    qemu_mutex_lock_ramlist();
+    bytes_transferred = 0;
+    reset_ram_globals();
+
+    memory_global_dirty_log_start();
+    migration_bitmap_sync();
+    qemu_mutex_unlock_iothread();
+
+    qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
+
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        qemu_put_byte(f, strlen(block->idstr));
+        qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
+        qemu_put_be64(f, block->length);
+    }
+
+    qemu_mutex_unlock_ramlist();
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static int ram_save_iterate(QEMUFile *f, void *opaque)
+{
+    int ret;
+    int i;
+    int64_t t0;
+    int total_sent = 0;
+
+    qemu_mutex_lock_ramlist();
+
+    if (ram_list.version != last_version) {
+        reset_ram_globals();
+    }
+
+    t0 = qemu_get_clock_ns(rt_clock);
+    i = 0;
+    while ((ret = qemu_file_rate_limit(f)) == 0) {
+        int bytes_sent;
+
+        bytes_sent = ram_save_block(f, false);
+        /* no more blocks to sent */
+        if (bytes_sent == 0) {
+            break;
+        }
+        total_sent += bytes_sent;
+        acct_info.iterations++;
+        /* we want to check in the 1st loop, just in case it was the 1st time
+           and we had to sync the dirty bitmap.
+           qemu_get_clock_ns() is a bit expensive, so we only check each some
+           iterations
+        */
+        if ((i & 63) == 0) {
+            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
+            if (t1 > MAX_WAIT) {
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
+                        t1, i);
+                break;
+            }
+        }
+        i++;
+    }
+
+    qemu_mutex_unlock_ramlist();
+
+    if (ret < 0) {
+        bytes_transferred += total_sent;
+        return ret;
+    }
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    total_sent += 8;
+    bytes_transferred += total_sent;
+
+    return total_sent;
+}
+
+static int ram_save_complete(QEMUFile *f, void *opaque)
+{
+    qemu_mutex_lock_ramlist();
+    migration_bitmap_sync();
+
+    /* try transferring iterative blocks of memory */
+
+    /* flush all remaining blocks regardless of rate limiting */
+    while (true) {
+        int bytes_sent;
+
+        bytes_sent = ram_save_block(f, true);
+        /* no more blocks to sent */
+        if (bytes_sent == 0) {
+            break;
+        }
+        bytes_transferred += bytes_sent;
+    }
+    migration_end();
+
+    qemu_mutex_unlock_ramlist();
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+    return 0;
+}
+
+static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
+{
+    uint64_t remaining_size;
+
+    remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+
+    if (remaining_size < max_size) {
+        qemu_mutex_lock_iothread();
+        migration_bitmap_sync();
+        qemu_mutex_unlock_iothread();
+        remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
+    }
+    return remaining_size;
+}
+
+static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = 0;
+    unsigned int xh_len;
+    int xh_flags;
+
+    if (!XBZRLE.decoded_buf) {
+        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    }
+
+    /* extract RLE header */
+    xh_flags = qemu_get_byte(f);
+    xh_len = qemu_get_be16(f);
+
+    if (xh_flags != ENCODING_FLAG_XBZRLE) {
+        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
+        return -1;
+    }
+
+    if (xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
+        return -1;
+    }
+    /* load data and decode */
+    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
+
+    /* decode RLE */
+    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
+                               TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
+        rc = -1;
+    } else  if (ret > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
+                ret, TARGET_PAGE_SIZE);
+        abort();
+    }
+
+    return rc;
+}
+
+static inline void *host_from_stream_offset(QEMUFile *f,
+                                            ram_addr_t offset,
+                                            int flags)
+{
+    static RAMBlock *block = NULL;
+    char id[256];
+    uint8_t len;
+
+    if (flags & RAM_SAVE_FLAG_CONTINUE) {
+        if (!block) {
+            fprintf(stderr, "Ack, bad migration stream!\n");
+            return NULL;
+        }
+
+        return memory_region_get_ram_ptr(block->mr) + offset;
+    }
+
+    len = qemu_get_byte(f);
+    qemu_get_buffer(f, (uint8_t *)id, len);
+    id[len] = 0;
+
+    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        if (!strncmp(id, block->idstr, sizeof(id)))
+            return memory_region_get_ram_ptr(block->mr) + offset;
+    }
+
+    fprintf(stderr, "Can't find block %s!\n", id);
+    return NULL;
+}
+
+static int ram_load(QEMUFile *f, void *opaque, int version_id)
+{
+    ram_addr_t addr;
+    int flags, ret = 0;
+    int error;
+    static uint64_t seq_iter;
+
+    seq_iter++;
+
+    if (version_id < 4 || version_id > 4) {
+        return -EINVAL;
+    }
+
+    do {
+        addr = qemu_get_be64(f);
+
+        flags = addr & ~TARGET_PAGE_MASK;
+        addr &= TARGET_PAGE_MASK;
+
+        if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
+            if (version_id == 4) {
+                /* Synchronize RAM block list */
+                char id[256];
+                ram_addr_t length;
+                ram_addr_t total_ram_bytes = addr;
+
+                while (total_ram_bytes) {
+                    RAMBlock *block;
+                    uint8_t len;
+
+                    len = qemu_get_byte(f);
+                    qemu_get_buffer(f, (uint8_t *)id, len);
+                    id[len] = 0;
+                    length = qemu_get_be64(f);
+
+                    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+                        if (!strncmp(id, block->idstr, sizeof(id))) {
+                            if (block->length != length) {
+                                ret =  -EINVAL;
+                                goto done;
+                            }
+                            break;
+                        }
+                    }
+
+                    if (!block) {
+                        fprintf(stderr, "Unknown ramblock \"%s\", cannot "
+                                "accept migration\n", id);
+                        ret = -EINVAL;
+                        goto done;
+                    }
+
+                    total_ram_bytes -= length;
+                }
+            }
+        }
+
+        if (flags & RAM_SAVE_FLAG_COMPRESS) {
+            void *host;
+            uint8_t ch;
+
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            ch = qemu_get_byte(f);
+            memset(host, ch, TARGET_PAGE_SIZE);
+#ifndef _WIN32
+            if (ch == 0 &&
+                (!kvm_enabled() || kvm_has_sync_mmu()) &&
+                getpagesize() <= TARGET_PAGE_SIZE) {
+                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+            }
+#endif
+        } else if (flags & RAM_SAVE_FLAG_PAGE) {
+            void *host;
+
+            host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
+            void *host = host_from_stream_offset(f, addr, flags);
+            if (!host) {
+                return -EINVAL;
+            }
+
+            if (load_xbzrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
+        }
+        error = qemu_file_get_error(f);
+        if (error) {
+            ret = error;
+            goto done;
+        }
+    } while (!(flags & RAM_SAVE_FLAG_EOS));
+
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
+    return ret;
+}
+
+SaveVMHandlers savevm_ram_handlers = {
+    .save_live_setup = ram_save_setup,
+    .save_live_iterate = ram_save_iterate,
+    .save_live_complete = ram_save_complete,
+    .save_live_pending = ram_save_pending,
+    .load_state = ram_load,
+    .cancel = ram_migration_cancel,
+};