Message ID | 1434450415-11339-27-git-send-email-dgilbert@redhat.com |
---|---|
State | New |
Headers | show |
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > --- > include/migration/migration.h | 3 + > include/migration/postcopy-ram.h | 12 ++++ > migration/postcopy-ram.c | 116 +++++++++++++++++++++++++++++++++++++++ > migration/ram.c | 11 ++++ > migration/savevm.c | 4 ++ > trace-events | 2 + > 6 files changed, 148 insertions(+) > qemu_hugepage_enable(host_addr, length)? > +#ifdef MADV_NOHUGEPAGE > + if (madvise(host_addr, length, MADV_NOHUGEPAGE)) { > + error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno)); > + return -1; > + } > +#endif qemu_hugepage_disable(host_addr, length)? > +#ifdef MADV_HUGEPAGE > + if (madvise(host_addr, length, MADV_HUGEPAGE)) { > + error_report("%s HUGEPAGE: %s", __func__, strerror(errno)); > + return -1; > + } > +#endif > + > + /* > + * We can also turn off userfault now since we should have all the > + * pages. It can be useful to leave it on to debug postcopy > + * if you're not sure it's always getting every page. > + */ qemu_userfault_unregister(host_addr, length)? > + range_struct.start = (uintptr_t)host_addr; > + range_struct.len = length; > + > + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { > + error_report("%s: userfault unregister %s", __func__, strerror(errno)); > + > + return -1; > + } > > +/* > + * Allocate data structures etc needed by incoming migration with postcopy-ram > + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work > + */ > +int ram_postcopy_incoming_init(MigrationIncomingState *mis) > +{ > + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > + > + return postcopy_ram_incoming_init(mis, ram_pages); > +} > + ram_postocpy_incoming_init() and postcopy_ram_incoming_init() ouch Thinking about better names .... > static int ram_load(QEMUFile *f, void *opaque, int version_id) > { > int flags = 0, ret = 0; > diff --git a/migration/savevm.c b/migration/savevm.c > index e6398dd..f4de52d 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -1238,6 +1238,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, > return -1; > } > > + if (ram_postcopy_incoming_init(mis)) { > + return -1; > + } > + how/where we know that this is called soon enough? > postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE); > > return 0; > diff --git a/trace-events b/trace-events > index 5e8a120..2ffc1c6 100644 > --- a/trace-events > +++ b/trace-events > @@ -1498,7 +1498,9 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" > > # migration/postcopy-ram.c > postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" > +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" > postcopy_ram_discard_range(void *start, void *end) "%p,%p" > +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" once here, if we have range names before, what about: postcopy_ram_cleanup_range() postcopy_ram_init_range() And let the ram* functions the same?
On (Tue) 16 Jun 2015 [11:26:39], Dr. David Alan Gilbert (git) wrote: > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Amit Shah <amit.shah@redhat.com> Amit
* Juan Quintela (quintela@redhat.com) wrote: > "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote: > > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com> > > > > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > > --- > > include/migration/migration.h | 3 + > > include/migration/postcopy-ram.h | 12 ++++ > > migration/postcopy-ram.c | 116 +++++++++++++++++++++++++++++++++++++++ > > migration/ram.c | 11 ++++ > > migration/savevm.c | 4 ++ > > trace-events | 2 + > > 6 files changed, 148 insertions(+) > > > > qemu_hugepage_enable(host_addr, length)? > > > +#ifdef MADV_NOHUGEPAGE > > + if (madvise(host_addr, length, MADV_NOHUGEPAGE)) { > > + error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno)); > > + return -1; > > + } > > +#endif > > qemu_hugepage_disable(host_addr, length)? I've flipped those both to use the qemu_madvise which is how it's done in most other places in the codebase (I added the QEMU_MADV_NOHUGEPAGE since HUGEPAGE was there but not the opposite). > > +#ifdef MADV_HUGEPAGE > > + if (madvise(host_addr, length, MADV_HUGEPAGE)) { > > + error_report("%s HUGEPAGE: %s", __func__, strerror(errno)); > > + return -1; > > + } > > +#endif > > + > > + /* > > + * We can also turn off userfault now since we should have all the > > + * pages. It can be useful to leave it on to debug postcopy > > + * if you're not sure it's always getting every page. > > + */ > > qemu_userfault_unregister(host_addr, length)? Is it worth wrapping that ioctl, when it's already in a function that has to do other calls around it? > > + range_struct.start = (uintptr_t)host_addr; > > + range_struct.len = length; > > + > > + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { > > + error_report("%s: userfault unregister %s", __func__, strerror(errno)); > > + > > + return -1; > > + } > > > > > +/* > > + * Allocate data structures etc needed by incoming migration with postcopy-ram > > + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work > > + */ > > +int ram_postcopy_incoming_init(MigrationIncomingState *mis) > > +{ > > + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; > > + > > + return postcopy_ram_incoming_init(mis, ram_pages); > > +} > > + > > ram_postocpy_incoming_init() > and > postcopy_ram_incoming_init() > > ouch Thinking about better names .... Agreed; suggestions welcome. the 'ram' and 'postcopy_ram' are both the convention based on the filename. > > static int ram_load(QEMUFile *f, void *opaque, int version_id) > > { > > int flags = 0, ret = 0; > > diff --git a/migration/savevm.c b/migration/savevm.c > > index e6398dd..f4de52d 100644 > > --- a/migration/savevm.c > > +++ b/migration/savevm.c > > @@ -1238,6 +1238,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, > > return -1; > > } > > > > + if (ram_postcopy_incoming_init(mis)) { > > + return -1; > > + } > > + > > how/where we know that this is called soon enough? It's driven by the sequence of commands byte that walk it through the state machine. > > postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE); > > > > return 0; > > diff --git a/trace-events b/trace-events > > index 5e8a120..2ffc1c6 100644 > > --- a/trace-events > > +++ b/trace-events > > @@ -1498,7 +1498,9 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" > > > > # migration/postcopy-ram.c > > postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" > > +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" > > postcopy_ram_discard_range(void *start, void *end) "%p,%p" > > +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" > > once here, if we have range names before, what about: > > postcopy_ram_cleanup_range() > postcopy_ram_init_range() Done. > And let the ram* functions the same? Not sure which ones those refer to? Dave -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff --git a/include/migration/migration.h b/include/migration/migration.h index 4c6cf95..98e2568 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -69,6 +69,8 @@ struct MigrationIncomingState { */ QemuEvent main_thread_load_event; + /* For the kernel to send us notifications */ + int userfault_fd; QEMUFile *return_path; QemuMutex rp_mutex; /* We send replies from multiple threads */ PostcopyState postcopy_state; @@ -195,6 +197,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ int ram_discard_range(MigrationIncomingState *mis, const char *block_name, uint64_t start, uint64_t end); +int ram_postcopy_incoming_init(MigrationIncomingState *mis); /** * @migrate_add_blocker - prevent migration from proceeding diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index 1d38f76..4192108 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -17,6 +17,18 @@ bool postcopy_ram_supported_by_host(void); /* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from ram.c's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); + +/* * Discard the contents of memory start..end inclusive. * We can assume that if we've been called postcopy_ram_hosttest returned true */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 9c76472..35c87b4 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -177,6 +177,111 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, return 0; } +/* + * Setup an area of RAM so that it *can* be used for postcopy later; this + * must be done right at the start prior to pre-copy. + * opaque should be the MIS. + */ +static int init_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + + trace_postcopy_init_area(block_name, host_addr, offset, length); + + /* + * We need the whole of RAM to be truly empty for postcopy, so things + * like ROMs and any data tables built during init must be zero'd + * - we're going to get the copy from the source anyway. + * (Precopy will just overwrite this data, so doesn't need the discard) + */ + if (postcopy_ram_discard_range(mis, host_addr, (host_addr + length - 1))) { + return -1; + } + + /* + * We also need the area to be normal 4k pages, not huge pages + * (otherwise we can't be sure we can atomically place the + * 4k page in later). THP might come along and map a 2MB page + * and when it's partially accessed in precopy it might not break + * it down, but leave a 2MB zero'd page. + */ +#ifdef MADV_NOHUGEPAGE + if (madvise(host_addr, length, MADV_NOHUGEPAGE)) { + error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno)); + return -1; + } +#endif + + return 0; +} + +/* + * At the end of migration, undo the effects of init_area + * opaque should be the MIS. + */ +static int cleanup_area(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffdio_range range_struct; + trace_postcopy_cleanup_area(block_name, host_addr, offset, length); + + /* + * We turned off hugepage for the precopy stage with postcopy enabled + * we can turn it back on now. + */ +#ifdef MADV_HUGEPAGE + if (madvise(host_addr, length, MADV_HUGEPAGE)) { + error_report("%s HUGEPAGE: %s", __func__, strerror(errno)); + return -1; + } +#endif + + /* + * We can also turn off userfault now since we should have all the + * pages. It can be useful to leave it on to debug postcopy + * if you're not sure it's always getting every page. + */ + range_struct.start = (uintptr_t)host_addr; + range_struct.len = length; + + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { + error_report("%s: userfault unregister %s", __func__, strerror(errno)); + + return -1; + } + + return 0; +} + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + if (qemu_ram_foreach_block(init_area, mis)) { + return -1; + } + + return 0; +} + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + /* TODO: Join the fault thread once we're sure it will exit */ + if (qemu_ram_foreach_block(cleanup_area, mis)) { + return -1; + } + + return 0; +} + #else /* No target OS support, stubs just fail */ @@ -186,6 +291,17 @@ bool postcopy_ram_supported_by_host(void) return false; } +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + error_report("postcopy_ram_incoming_init: No OS support"); + return -1; +} + +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + assert(0); +} + int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, uint8_t *end) { diff --git a/migration/ram.c b/migration/ram.c index 8e681f2..f7d957e 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1667,6 +1667,17 @@ static void decompress_data_with_multi_threads(uint8_t *compbuf, } } +/* + * Allocate data structures etc needed by incoming migration with postcopy-ram + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work + */ +int ram_postcopy_incoming_init(MigrationIncomingState *mis) +{ + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + return postcopy_ram_incoming_init(mis, ram_pages); +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0; diff --git a/migration/savevm.c b/migration/savevm.c index e6398dd..f4de52d 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1238,6 +1238,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, return -1; } + if (ram_postcopy_incoming_init(mis)) { + return -1; + } + postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE); return 0; diff --git a/trace-events b/trace-events index 5e8a120..2ffc1c6 100644 --- a/trace-events +++ b/trace-events @@ -1498,7 +1498,9 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" # migration/postcopy-ram.c postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" postcopy_ram_discard_range(void *start, void *end) "%p,%p" +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"