diff mbox

[v7,26/42] postcopy: Incoming initialisation

Message ID 1434450415-11339-27-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert June 16, 2015, 10:26 a.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 include/migration/migration.h    |   3 +
 include/migration/postcopy-ram.h |  12 ++++
 migration/postcopy-ram.c         | 116 +++++++++++++++++++++++++++++++++++++++
 migration/ram.c                  |  11 ++++
 migration/savevm.c               |   4 ++
 trace-events                     |   2 +
 6 files changed, 148 insertions(+)

Comments

Juan Quintela July 13, 2015, 12:04 p.m. UTC | #1
"Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  include/migration/migration.h    |   3 +
>  include/migration/postcopy-ram.h |  12 ++++
>  migration/postcopy-ram.c         | 116 +++++++++++++++++++++++++++++++++++++++
>  migration/ram.c                  |  11 ++++
>  migration/savevm.c               |   4 ++
>  trace-events                     |   2 +
>  6 files changed, 148 insertions(+)
>

qemu_hugepage_enable(host_addr, length)?

> +#ifdef MADV_NOHUGEPAGE
> +    if (madvise(host_addr, length, MADV_NOHUGEPAGE)) {
> +        error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
> +        return -1;
> +    }
> +#endif

qemu_hugepage_disable(host_addr, length)?
> +#ifdef MADV_HUGEPAGE
> +    if (madvise(host_addr, length, MADV_HUGEPAGE)) {
> +        error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
> +        return -1;
> +    }
> +#endif
> +
> +    /*
> +     * We can also turn off userfault now since we should have all the
> +     * pages.   It can be useful to leave it on to debug postcopy
> +     * if you're not sure it's always getting every page.
> +     */

qemu_userfault_unregister(host_addr, length)?

> +    range_struct.start = (uintptr_t)host_addr;
> +    range_struct.len = length;
> +
> +    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
> +        error_report("%s: userfault unregister %s", __func__, strerror(errno));
> +
> +        return -1;
> +    }

>  
> +/*
> + * Allocate data structures etc needed by incoming migration with postcopy-ram
> + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
> + */
> +int ram_postcopy_incoming_init(MigrationIncomingState *mis)
> +{
> +    size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> +
> +    return postcopy_ram_incoming_init(mis, ram_pages);
> +}
> +

ram_postocpy_incoming_init()
and
postcopy_ram_incoming_init()

ouch  Thinking about better names ....



>  static int ram_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      int flags = 0, ret = 0;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index e6398dd..f4de52d 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1238,6 +1238,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
>          return -1;
>      }
>  
> +    if (ram_postcopy_incoming_init(mis)) {
> +        return -1;
> +    }
> +

how/where we know that this is called soon enough?

>      postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE);
>  
>      return 0;
> diff --git a/trace-events b/trace-events
> index 5e8a120..2ffc1c6 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1498,7 +1498,9 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
>  
>  # migration/postcopy-ram.c
>  postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
> +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
>  postcopy_ram_discard_range(void *start, void *end) "%p,%p"
> +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"

once here, if we have range names before, what about:

postcopy_ram_cleanup_range()
postcopy_ram_init_range()

And let the ram* functions the same?
Amit Shah July 22, 2015, 6:19 a.m. UTC | #2
On (Tue) 16 Jun 2015 [11:26:39], Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Reviewed-by: Amit Shah <amit.shah@redhat.com>

		Amit
Dr. David Alan Gilbert Sept. 23, 2015, 7:06 p.m. UTC | #3
* Juan Quintela (quintela@redhat.com) wrote:
> "Dr. David Alan Gilbert (git)" <dgilbert@redhat.com> wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> >
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  include/migration/migration.h    |   3 +
> >  include/migration/postcopy-ram.h |  12 ++++
> >  migration/postcopy-ram.c         | 116 +++++++++++++++++++++++++++++++++++++++
> >  migration/ram.c                  |  11 ++++
> >  migration/savevm.c               |   4 ++
> >  trace-events                     |   2 +
> >  6 files changed, 148 insertions(+)
> >
> 
> qemu_hugepage_enable(host_addr, length)?
> 
> > +#ifdef MADV_NOHUGEPAGE
> > +    if (madvise(host_addr, length, MADV_NOHUGEPAGE)) {
> > +        error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
> > +        return -1;
> > +    }
> > +#endif
> 
> qemu_hugepage_disable(host_addr, length)?

I've flipped those both to use the qemu_madvise which is how
it's done in most other places in the codebase (I added
the QEMU_MADV_NOHUGEPAGE since HUGEPAGE was there but not the opposite).

> > +#ifdef MADV_HUGEPAGE
> > +    if (madvise(host_addr, length, MADV_HUGEPAGE)) {
> > +        error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
> > +        return -1;
> > +    }
> > +#endif
> > +
> > +    /*
> > +     * We can also turn off userfault now since we should have all the
> > +     * pages.   It can be useful to leave it on to debug postcopy
> > +     * if you're not sure it's always getting every page.
> > +     */
> 
> qemu_userfault_unregister(host_addr, length)?

Is it worth wrapping that ioctl, when it's already in a function
that has to do other calls around it?

> > +    range_struct.start = (uintptr_t)host_addr;
> > +    range_struct.len = length;
> > +
> > +    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
> > +        error_report("%s: userfault unregister %s", __func__, strerror(errno));
> > +
> > +        return -1;
> > +    }
> 
> >  
> > +/*
> > + * Allocate data structures etc needed by incoming migration with postcopy-ram
> > + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
> > + */
> > +int ram_postcopy_incoming_init(MigrationIncomingState *mis)
> > +{
> > +    size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
> > +
> > +    return postcopy_ram_incoming_init(mis, ram_pages);
> > +}
> > +
> 
> ram_postocpy_incoming_init()
> and
> postcopy_ram_incoming_init()
> 
> ouch  Thinking about better names ....

Agreed; suggestions welcome. the 'ram' and 'postcopy_ram' are
both the convention based on the filename.

> >  static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >  {
> >      int flags = 0, ret = 0;
> > diff --git a/migration/savevm.c b/migration/savevm.c
> > index e6398dd..f4de52d 100644
> > --- a/migration/savevm.c
> > +++ b/migration/savevm.c
> > @@ -1238,6 +1238,10 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
> >          return -1;
> >      }
> >  
> > +    if (ram_postcopy_incoming_init(mis)) {
> > +        return -1;
> > +    }
> > +
> 
> how/where we know that this is called soon enough?

It's driven by the sequence of commands byte that walk it through
the state machine.

> >      postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE);
> >  
> >      return 0;
> > diff --git a/trace-events b/trace-events
> > index 5e8a120..2ffc1c6 100644
> > --- a/trace-events
> > +++ b/trace-events
> > @@ -1498,7 +1498,9 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
> >  
> >  # migration/postcopy-ram.c
> >  postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
> > +postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
> >  postcopy_ram_discard_range(void *start, void *end) "%p,%p"
> > +postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
> 
> once here, if we have range names before, what about:
> 
> postcopy_ram_cleanup_range()
> postcopy_ram_init_range()

Done.

> And let the ram* functions the same?

Not sure which ones those refer to?

Dave

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 4c6cf95..98e2568 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -69,6 +69,8 @@  struct MigrationIncomingState {
      */
     QemuEvent      main_thread_load_event;
 
+    /* For the kernel to send us notifications */
+    int            userfault_fd;
     QEMUFile *return_path;
     QemuMutex      rp_mutex;    /* We send replies from multiple threads */
     PostcopyState postcopy_state;
@@ -195,6 +197,7 @@  int ram_postcopy_send_discard_bitmap(MigrationState *ms);
 /* For incoming postcopy discard */
 int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
                       uint64_t start, uint64_t end);
+int ram_postcopy_incoming_init(MigrationIncomingState *mis);
 
 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index 1d38f76..4192108 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -17,6 +17,18 @@ 
 bool postcopy_ram_supported_by_host(void);
 
 /*
+ * Initialise postcopy-ram, setting the RAM to a state where we can go into
+ * postcopy later; must be called prior to any precopy.
+ * called from ram.c's similarly named ram_postcopy_incoming_init
+ */
+int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages);
+
+/*
+ * At the end of a migration where postcopy_ram_incoming_init was called.
+ */
+int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis);
+
+/*
  * Discard the contents of memory start..end inclusive.
  * We can assume that if we've been called postcopy_ram_hosttest returned true
  */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9c76472..35c87b4 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -177,6 +177,111 @@  int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
     return 0;
 }
 
+/*
+ * Setup an area of RAM so that it *can* be used for postcopy later; this
+ * must be done right at the start prior to pre-copy.
+ * opaque should be the MIS.
+ */
+static int init_area(const char *block_name, void *host_addr,
+                     ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+    MigrationIncomingState *mis = opaque;
+
+    trace_postcopy_init_area(block_name, host_addr, offset, length);
+
+    /*
+     * We need the whole of RAM to be truly empty for postcopy, so things
+     * like ROMs and any data tables built during init must be zero'd
+     * - we're going to get the copy from the source anyway.
+     * (Precopy will just overwrite this data, so doesn't need the discard)
+     */
+    if (postcopy_ram_discard_range(mis, host_addr, (host_addr + length - 1))) {
+        return -1;
+    }
+
+    /*
+     * We also need the area to be normal 4k pages, not huge pages
+     * (otherwise we can't be sure we can atomically place the
+     * 4k page in later).  THP might come along and map a 2MB page
+     * and when it's partially accessed in precopy it might not break
+     * it down, but leave a 2MB zero'd page.
+     */
+#ifdef MADV_NOHUGEPAGE
+    if (madvise(host_addr, length, MADV_NOHUGEPAGE)) {
+        error_report("%s: NOHUGEPAGE: %s", __func__, strerror(errno));
+        return -1;
+    }
+#endif
+
+    return 0;
+}
+
+/*
+ * At the end of migration, undo the effects of init_area
+ * opaque should be the MIS.
+ */
+static int cleanup_area(const char *block_name, void *host_addr,
+                        ram_addr_t offset, ram_addr_t length, void *opaque)
+{
+    MigrationIncomingState *mis = opaque;
+    struct uffdio_range range_struct;
+    trace_postcopy_cleanup_area(block_name, host_addr, offset, length);
+
+    /*
+     * We turned off hugepage for the precopy stage with postcopy enabled
+     * we can turn it back on now.
+     */
+#ifdef MADV_HUGEPAGE
+    if (madvise(host_addr, length, MADV_HUGEPAGE)) {
+        error_report("%s HUGEPAGE: %s", __func__, strerror(errno));
+        return -1;
+    }
+#endif
+
+    /*
+     * We can also turn off userfault now since we should have all the
+     * pages.   It can be useful to leave it on to debug postcopy
+     * if you're not sure it's always getting every page.
+     */
+    range_struct.start = (uintptr_t)host_addr;
+    range_struct.len = length;
+
+    if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
+        error_report("%s: userfault unregister %s", __func__, strerror(errno));
+
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * Initialise postcopy-ram, setting the RAM to a state where we can go into
+ * postcopy later; must be called prior to any precopy.
+ * called from arch_init's similarly named ram_postcopy_incoming_init
+ */
+int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+{
+    if (qemu_ram_foreach_block(init_area, mis)) {
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * At the end of a migration where postcopy_ram_incoming_init was called.
+ */
+int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
+{
+    /* TODO: Join the fault thread once we're sure it will exit */
+    if (qemu_ram_foreach_block(cleanup_area, mis)) {
+        return -1;
+    }
+
+    return 0;
+}
+
 #else
 /* No target OS support, stubs just fail */
 
@@ -186,6 +291,17 @@  bool postcopy_ram_supported_by_host(void)
     return false;
 }
 
+int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+{
+    error_report("postcopy_ram_incoming_init: No OS support");
+    return -1;
+}
+
+int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
+{
+    assert(0);
+}
+
 int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
                                uint8_t *end)
 {
diff --git a/migration/ram.c b/migration/ram.c
index 8e681f2..f7d957e 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -1667,6 +1667,17 @@  static void decompress_data_with_multi_threads(uint8_t *compbuf,
     }
 }
 
+/*
+ * Allocate data structures etc needed by incoming migration with postcopy-ram
+ * postcopy-ram's similarly names postcopy_ram_incoming_init does the work
+ */
+int ram_postcopy_incoming_init(MigrationIncomingState *mis)
+{
+    size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    return postcopy_ram_incoming_init(mis, ram_pages);
+}
+
 static int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     int flags = 0, ret = 0;
diff --git a/migration/savevm.c b/migration/savevm.c
index e6398dd..f4de52d 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1238,6 +1238,10 @@  static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
         return -1;
     }
 
+    if (ram_postcopy_incoming_init(mis)) {
+        return -1;
+    }
+
     postcopy_state_set(mis, POSTCOPY_INCOMING_ADVISE);
 
     return 0;
diff --git a/trace-events b/trace-events
index 5e8a120..2ffc1c6 100644
--- a/trace-events
+++ b/trace-events
@@ -1498,7 +1498,9 @@  rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 
 # migration/postcopy-ram.c
 postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
+postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_ram_discard_range(void *start, void *end) "%p,%p"
+postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"