@@ -114,6 +114,13 @@ struct MigrationState
/* Flag set once the migration has been asked to enter postcopy */
bool start_postcopy;
+
+ /* bitmap of pages that have been sent at least once
+ * only maintained and used in postcopy at the moment
+ * where it's used to send the dirtymap at the start
+ * of the postcopy phase
+ */
+ unsigned long *sentmap;
};
void process_incoming_migration(QEMUFile *f);
@@ -183,6 +190,11 @@ double xbzrle_mig_cache_miss_rate(void);
void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+ uint64_t start, uint64_t end);
/**
* @migrate_add_blocker - prevent migration from proceeding
@@ -16,4 +16,39 @@
/* Return true if the host supports everything we need to do postcopy-ram */
bool postcopy_ram_supported_by_host(void);
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ uint8_t *end);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+ unsigned long offset,
+ const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard
+ * May send a discard message, may just leave it queued to
+ * be sent later
+ * 'start' and 'end' describe an inclusive range of pages in the
+ * migration bitmap in the RAM block passed to postcopy_discard_send_init
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+ unsigned long start, unsigned long end);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code
+ * Sends any outstanding discard messages, frees the PDS
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+ PostcopyDiscardState *pds);
+
#endif
@@ -61,6 +61,7 @@ typedef struct PCIExpressHost PCIExpressHost;
typedef struct PCIHostState PCIHostState;
typedef struct PCMCIACardState PCMCIACardState;
typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
typedef struct PropertyInfo PropertyInfo;
typedef struct Property Property;
typedef struct QEMUBH QEMUBH;
@@ -22,6 +22,7 @@
#include "block/block.h"
#include "qemu/sockets.h"
#include "migration/block.h"
+#include "migration/postcopy-ram.h"
#include "qemu/thread.h"
#include "qmp-commands.h"
#include "trace.h"
@@ -27,6 +27,22 @@
#include "qemu/error-report.h"
#include "trace.h"
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+ const char *name;
+ uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+ uint16_t cur_entry;
+ /*
+ * Start and end address of a discard range; end_list points to the byte
+ * after the end of the range.
+ */
+ uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+ uint64_t end_list[MAX_DISCARDS_PER_COMMAND];
+ unsigned int nsentwords;
+ unsigned int nsentcmds;
+};
+
/* Postcopy needs to detect accesses to pages that haven't yet been copied
* across, and efficiently map new pages in, the techniques for doing this
* are target OS specific.
@@ -145,6 +161,22 @@ out:
return ret;
}
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ uint8_t *end)
+{
+ trace_postcopy_ram_discard_range(start, end);
+ if (madvise(start, (end-start)+1, MADV_DONTNEED)) {
+ error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
#else
/* No target OS support, stubs just fail */
@@ -154,5 +186,81 @@ bool postcopy_ram_supported_by_host(void)
return false;
}
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+ uint8_t *end)
+{
+ assert(0);
+}
#endif
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+ unsigned long offset,
+ const char *name)
+{
+ PostcopyDiscardState *res = g_try_malloc(sizeof(PostcopyDiscardState));
+
+ if (res) {
+ res->name = name;
+ res->cur_entry = 0;
+ res->nsentwords = 0;
+ res->nsentcmds = 0;
+ res->offset = offset;
+ }
+
+ return res;
+}
+
+/*
+ * Called by the bitmap code for each chunk to discard
+ * May send a discard message, may just leave it queued to
+ * be sent later
+ * 'start' and 'end' describe an inclusive range of pages in the
+ * migration bitmap in the RAM block passed to postcopy_discard_send_init
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+ unsigned long start, unsigned long end)
+{
+ size_t tp_bits = qemu_target_page_bits();
+ /* Convert to byte offsets within the RAM block */
+ pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
+ pds->end_list[pds->cur_entry] = (1 + end - pds->offset) << tp_bits;
+ pds->cur_entry++;
+ pds->nsentwords++;
+
+ if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+ /* Full set, ship it! */
+ qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+ pds->cur_entry,
+ pds->start_list, pds->end_list);
+ pds->nsentcmds++;
+ pds->cur_entry = 0;
+ }
+}
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code
+ * Sends any outstanding discard messages, frees the PDS
+ */
+void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
+{
+ /* Anything unsent? */
+ if (pds->cur_entry) {
+ qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+ pds->cur_entry,
+ pds->start_list, pds->end_list);
+ pds->nsentcmds++;
+ }
+
+ trace_postcopy_discard_send_finish(pds->name, pds->nsentwords,
+ pds->nsentcmds);
+
+ g_free(pds);
+}
@@ -32,6 +32,7 @@
#include "qemu/timer.h"
#include "qemu/main-loop.h"
#include "migration/migration.h"
+#include "migration/postcopy-ram.h"
#include "exec/address-spaces.h"
#include "migration/page_cache.h"
#include "qemu/error-report.h"
@@ -494,9 +495,17 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
return 1;
}
+/* mr: The region to search for dirty pages in
+ * start: Start address (typically so we can continue from previous page)
+ * ram_addr_abs: Pointer into which to store the address of the dirty page
+ * within the global ram_addr space
+ *
+ * Returns: byte offset within memory region of the start of a dirty page
+ */
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
- ram_addr_t start)
+ ram_addr_t start,
+ ram_addr_t *ram_addr_abs)
{
unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -515,6 +524,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
clear_bit(next, migration_bitmap);
migration_dirty_pages--;
}
+ *ram_addr_abs = next << TARGET_PAGE_BITS;
return (next - base) << TARGET_PAGE_BITS;
}
@@ -642,6 +652,19 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
return pages;
}
+static RAMBlock *ram_find_block(const char *id)
+{
+ RAMBlock *block;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ if (!strcmp(id, block->idstr)) {
+ return block;
+ }
+ }
+
+ return NULL;
+}
+
/**
* ram_save_page: Send the given page to the stream
*
@@ -921,13 +944,16 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
bool complete_round = false;
int pages = 0;
MemoryRegion *mr;
+ ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
+ ram_addr_t space */
if (!block)
block = QLIST_FIRST_RCU(&ram_list.blocks);
while (true) {
mr = block->mr;
- offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+ offset = migration_bitmap_find_and_reset_dirty(mr, offset,
+ &dirty_ram_abs);
if (complete_round && block == last_seen_block &&
offset >= last_offset) {
break;
@@ -958,6 +984,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage,
/* if page is unmodified, continue to the next */
if (pages > 0) {
+ MigrationState *ms = migrate_get_current();
+ if (ms->sentmap) {
+ set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
+ }
+
last_sent_block = block;
break;
}
@@ -1017,12 +1048,19 @@ void free_xbzrle_decoded_buf(void)
static void migration_end(void)
{
+ MigrationState *s = migrate_get_current();
+
if (migration_bitmap) {
memory_global_dirty_log_stop();
g_free(migration_bitmap);
migration_bitmap = NULL;
}
+ if (s->sentmap) {
+ g_free(s->sentmap);
+ s->sentmap = NULL;
+ }
+
XBZRLE_cache_lock();
if (XBZRLE.cache) {
cache_fini(XBZRLE.cache);
@@ -1090,6 +1128,161 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
}
}
+/* **** functions for postcopy ***** */
+
+/*
+ * Callback from postcopy_each_ram_send_discard for each RAMBlock
+ * start,end: Indexes into the bitmap for the first and last bit
+ * representing the named block
+ */
+static int postcopy_send_discard_bm_ram(MigrationState *ms,
+ PostcopyDiscardState *pds,
+ unsigned long start, unsigned long end)
+{
+ unsigned long current;
+
+ for (current = start; current <= end; ) {
+ unsigned long set = find_next_bit(ms->sentmap, end + 1, current);
+
+ if (set <= end) {
+ unsigned long zero = find_next_zero_bit(ms->sentmap,
+ end + 1, set + 1);
+
+ if (zero > end) {
+ zero = end + 1;
+ }
+ postcopy_discard_send_range(ms, pds, set, zero - 1);
+ current = zero + 1;
+ } else {
+ current = set;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ * Calls postcopy_send_discard_bm_ram for each RAMBlock
+ * passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ * which would mean postcopy code would have to deal with target page)
+ */
+static int postcopy_each_ram_send_discard(MigrationState *ms)
+{
+ struct RAMBlock *block;
+ int ret;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ unsigned long first = block->offset >> TARGET_PAGE_BITS;
+ unsigned long last = (block->offset + (block->max_length-1))
+ >> TARGET_PAGE_BITS;
+ PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
+ first,
+ block->idstr);
+
+ /*
+ * Postcopy sends chunks of bitmap over the wire, but it
+ * just needs indexes at this point, avoids it having
+ * target page specific code.
+ */
+ ret = postcopy_send_discard_bm_ram(ms, pds, first, last);
+ postcopy_discard_send_finish(ms, pds);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that have been sent previously but have been dirtied
+ * Hopefully this is pretty sparse
+ */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms)
+{
+ int ret;
+
+ rcu_read_lock();
+ /* This should be our last sync, the src is now paused */
+ migration_bitmap_sync();
+
+ /*
+ * Update the sentmap to be sentmap&=dirty
+ */
+ bitmap_and(ms->sentmap, ms->sentmap, migration_bitmap,
+ last_ram_offset() >> TARGET_PAGE_BITS);
+
+
+ trace_ram_postcopy_send_discard_bitmap();
+#ifdef DEBUG_POSTCOPY
+ ram_debug_dump_bitmap(ms->sentmap, false);
+#endif
+
+ ret = postcopy_each_ram_send_discard(ms);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start..end is an inclusive byte address range within the RAMBlock
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+ const char *block_name,
+ uint64_t start, uint64_t end)
+{
+ int ret = -1;
+
+ assert(end >= start);
+
+ rcu_read_lock();
+ RAMBlock *rb = ram_find_block(block_name);
+
+ if (!rb) {
+ error_report("ram_discard_range: Failed to find block '%s'",
+ block_name);
+ goto err;
+ }
+
+ uint8_t *host_startaddr = rb->host + start;
+ uint8_t *host_endaddr;
+
+ if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+ error_report("ram_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if (end <= rb->used_length) {
+ host_endaddr = rb->host + end;
+ if (((uintptr_t)host_endaddr + 1) & (qemu_host_page_size - 1)) {
+ error_report("ram_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+ ret = postcopy_ram_discard_range(mis, host_startaddr, host_endaddr);
+ } else {
+ error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
+ "/%" PRIu64 "/%zu)",
+ block_name, start, end, rb->used_length);
+ }
+
+err:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
* start to become numerous it will be necessary to reduce the
@@ -1147,6 +1340,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_bitmap_pages);
bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
+ if (migrate_postcopy_ram()) {
+ MigrationState *s = migrate_get_current();
+ s->sentmap = bitmap_new(ram_bitmap_pages);
+ bitmap_clear(s->sentmap, 0, ram_bitmap_pages);
+ }
+
/*
* Count the total number of pages used by ram blocks not including any
* gaps due to alignment or unplugs.
@@ -1295,7 +1295,6 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
}
trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
while (len) {
- /* TODO - ram_discard_range gets added in a later patch
uint64_t start_addr, end_addr;
start_addr = qemu_get_be64(mis->file);
end_addr = qemu_get_be64(mis->file);
@@ -1305,7 +1304,6 @@ static int loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
if (ret) {
return ret;
}
- */
}
trace_loadvm_postcopy_ram_handle_discard_end();
@@ -1231,6 +1231,7 @@ qemu_file_fclose(void) ""
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
migration_throttle(void) ""
+ram_postcopy_send_discard_bitmap(void) ""
# hw/display/qxl.c
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
@@ -1495,6 +1496,10 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
rdma_start_outgoing_migration_after_rdma_connect(void) ""
rdma_start_outgoing_migration_after_rdma_source_init(void) ""
+# migration/postcopy-ram.c
+postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
+postcopy_ram_discard_range(void *start, void *end) "%p,%p"
+
# kvm-all.c
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"