diff mbox

[v5,35/45] postcopy_ram.c: place_page and helpers

Message ID 1424883128-9841-36-git-send-email-dgilbert@redhat.com
State New
Headers show

Commit Message

Dr. David Alan Gilbert Feb. 25, 2015, 4:51 p.m. UTC
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>

postcopy_place_page (etc) provide a way for postcopy to place a page
into guests memory atomically (using the copy ioctl on the ufd).

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/migration.h    |   2 +
 include/migration/postcopy-ram.h |  16 ++++++
 migration/postcopy-ram.c         | 113 ++++++++++++++++++++++++++++++++++++++-
 trace-events                     |   1 +
 4 files changed, 130 insertions(+), 2 deletions(-)

Comments

David Gibson March 24, 2015, 2:33 a.m. UTC | #1
On Wed, Feb 25, 2015 at 04:51:58PM +0000, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> 
> postcopy_place_page (etc) provide a way for postcopy to place a page
> into guests memory atomically (using the copy ioctl on the ufd).
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  include/migration/migration.h    |   2 +
>  include/migration/postcopy-ram.h |  16 ++++++
>  migration/postcopy-ram.c         | 113 ++++++++++++++++++++++++++++++++++++++-
>  trace-events                     |   1 +
>  4 files changed, 130 insertions(+), 2 deletions(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index b1c7cad..139bb1b 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -94,6 +94,8 @@ struct MigrationIncomingState {
>      QEMUFile *return_path;
>      QemuMutex      rp_mutex;    /* We send replies from multiple threads */
>      PostcopyPMI    postcopy_pmi;
> +    void          *postcopy_tmp_page;
> +    long           postcopy_place_skipped; /* Check for incorrect place ops */
>  };
>  
>  MigrationIncomingState *migration_incoming_get_current(void);
> diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
> index fbb2a93..3d30280 100644
> --- a/include/migration/postcopy-ram.h
> +++ b/include/migration/postcopy-ram.h
> @@ -80,4 +80,20 @@ void postcopy_discard_send_chunk(MigrationState *ms, PostcopyDiscardState *pds,
>  void postcopy_discard_send_finish(MigrationState *ms,
>                                    PostcopyDiscardState *pds);
>  
> +/*
> + * Place a page (from) at (host) efficiently
> + *    There are restrictions on how 'from' must be mapped, in general best
> + *    to use other postcopy_ routines to allocate.
> + * returns 0 on success
> + */
> +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> +                        long bitmap_offset, bool all_zero);
> +
> +/*
> + * Allocate a page of memory that can be mapped at a later point in time
> + * using postcopy_place_page
> + * Returns: Pointer to allocated page
> + */
> +void *postcopy_get_tmp_page(MigrationIncomingState *mis);
> +
>  #endif
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 33dd332..86fa5a0 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -197,7 +197,6 @@ static PostcopyPMIState postcopy_pmi_get_state_nolock(
>  }
>  
>  /* Retrieve the state of the given page */
> -__attribute__ (( unused )) /* Until later in patch series */
>  static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
>                                                 size_t bitmap_index)
>  {
> @@ -213,7 +212,6 @@ static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
>   * Set the page state to the given state if the previous state was as expected
>   * Return the actual previous state.
>   */
> -__attribute__ (( unused )) /* Until later in patch series */
>  static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis,
>                                             size_t bitmap_index,
>                                             PostcopyPMIState expected_state,
> @@ -477,6 +475,7 @@ static int cleanup_area(const char *block_name, void *host_addr,
>  int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
>  {
>      postcopy_pmi_init(mis, ram_pages);
> +    mis->postcopy_place_skipped = -1;
>  
>      if (qemu_ram_foreach_block(init_area, mis)) {
>          return -1;
> @@ -495,6 +494,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
>          return -1;
>      }
>  
> +    if (mis->postcopy_tmp_page) {
> +        munmap(mis->postcopy_tmp_page, getpagesize());
> +        mis->postcopy_tmp_page = NULL;
> +    }
>      return 0;
>  }
>  
> @@ -561,6 +564,100 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>      return 0;
>  }
>  
> +/*
> + * Place a host page (from) at (host) tomically

s/tomically/atomically/

> + *    There are restrictions on how 'from' must be mapped, in general best
> + *    to use other postcopy_ routines to allocate.
> + * all_zero: Hint that the page being placed is 0 throughout
> + * returns 0 on success
> + * bitmap_offset: Index into the migration bitmaps
> + *
> + * State changes:
> + *   none -> received
> + *   requested -> received (ack)
> + *
> + * Note the UF thread is also updating the state, and maybe none->requested
> + * at the same time.

Hrm.. these facts do tend me towards thinking that separate and
explicit requested and received bits will be clearer than treating it
as an enum state variable

> + */
> +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> +                        long bitmap_offset, bool all_zero)
> +{
> +    PostcopyPMIState old_state, tmp_state, new_state;
> +
> +    if (!all_zero) {
> +        struct uffdio_copy copy_struct;
> +
> +        copy_struct.dst = (uint64_t)(uintptr_t)host;
> +        copy_struct.src = (uint64_t)(uintptr_t)from;
> +        copy_struct.len = getpagesize();
> +        copy_struct.mode = 0;
> +
> +        /* copy also acks to the kernel waking the stalled thread up
> +         * TODO: We can inhibit that ack and only do it if it was requested
> +         * which would be slightly cheaper, but we'd have to be careful
> +         * of the order of updating our page state.
> +         */
> +        if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
> +            int e = errno;
> +            error_report("%s: %s copy host: %p from: %p pmi=%d",
> +                         __func__, strerror(e), host, from,
> +                         postcopy_pmi_get_state(mis, bitmap_offset));
> +
> +            return -e;
> +        }
> +    } else {
> +        struct uffdio_zeropage zero_struct;
> +
> +        zero_struct.range.start = (uint64_t)(uintptr_t)host;
> +        zero_struct.range.len = getpagesize();
> +        zero_struct.mode = 0;
> +
> +        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
> +            int e = errno;
> +            error_report("%s: %s zero host: %p from: %p pmi=%d",
> +                         __func__, strerror(e), host, from,
> +                         postcopy_pmi_get_state(mis, bitmap_offset));
> +
> +            return -e;
> +        }
> +    }
> +
> +    bitmap_offset &= ~(mis->postcopy_pmi.host_bits-1);
> +    new_state = POSTCOPY_PMI_RECEIVED;
> +    tmp_state = postcopy_pmi_get_state(mis, bitmap_offset);
> +    do {
> +        old_state = tmp_state;
> +        tmp_state = postcopy_pmi_change_state(mis, bitmap_offset, old_state,
> +                                              new_state);
> +    } while (old_state != tmp_state);

Yeah.. see treating the state as two separate booleans, here you'd
just need to update received, without caring about whether requested
has changed.

> +    trace_postcopy_place_page(bitmap_offset, host, all_zero, old_state);
> +
> +    return 0;
> +}
> +
> +/*
> + * Returns a target page of memory that can be mapped at a later point in time
> + * using postcopy_place_page
> + * The same address is used repeatedly, postcopy_place_page just takes the
> + * backing page away.
> + * Returns: Pointer to allocated page
> + *
> + */
> +void *postcopy_get_tmp_page(MigrationIncomingState *mis)
> +{
> +    if (!mis->postcopy_tmp_page) {
> +        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
> +                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
> +                             MAP_ANONYMOUS, -1, 0);
> +        if (!mis->postcopy_tmp_page) {
> +            perror("mapping postcopy tmp page");
> +            return NULL;
> +        }
> +    }
> +
> +    return mis->postcopy_tmp_page;
> +}
> +
>  #else
>  /* No target OS support, stubs just fail */
>  bool postcopy_ram_supported_by_host(void)
> @@ -608,6 +705,18 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
>  {
>      assert(0);
>  }
> +
> +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> +                        long bitmap_offset, bool all_zero)
> +{
> +    assert(0);
> +}
> +
> +void *postcopy_get_tmp_page(MigrationIncomingState *mis)
> +{
> +    assert(0);
> +}
> +
>  #endif
>  
>  /* ------------------------------------------------------------------------- */
> diff --git a/trace-events b/trace-events
> index 781cf5c..16a91d9 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1497,6 +1497,7 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
>  postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
>  postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
>  postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
> +postcopy_place_page(unsigned long offset, void *host_addr, bool all_zero, int old_state) "offset=%lx host=%p all_zero=%d old_state=%d"
>  
>  # kvm-all.c
>  kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
Dr. David Alan Gilbert March 25, 2015, 5:46 p.m. UTC | #2
* David Gibson (david@gibson.dropbear.id.au) wrote:
> On Wed, Feb 25, 2015 at 04:51:58PM +0000, Dr. David Alan Gilbert (git) wrote:
> > From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
> > 
> > postcopy_place_page (etc) provide a way for postcopy to place a page
> > into guests memory atomically (using the copy ioctl on the ufd).
> > 
> > Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> > ---
> >  include/migration/migration.h    |   2 +
> >  include/migration/postcopy-ram.h |  16 ++++++
> >  migration/postcopy-ram.c         | 113 ++++++++++++++++++++++++++++++++++++++-
> >  trace-events                     |   1 +
> >  4 files changed, 130 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/migration/migration.h b/include/migration/migration.h
> > index b1c7cad..139bb1b 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -94,6 +94,8 @@ struct MigrationIncomingState {
> >      QEMUFile *return_path;
> >      QemuMutex      rp_mutex;    /* We send replies from multiple threads */
> >      PostcopyPMI    postcopy_pmi;
> > +    void          *postcopy_tmp_page;
> > +    long           postcopy_place_skipped; /* Check for incorrect place ops */
> >  };
> >  
> >  MigrationIncomingState *migration_incoming_get_current(void);
> > diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
> > index fbb2a93..3d30280 100644
> > --- a/include/migration/postcopy-ram.h
> > +++ b/include/migration/postcopy-ram.h
> > @@ -80,4 +80,20 @@ void postcopy_discard_send_chunk(MigrationState *ms, PostcopyDiscardState *pds,
> >  void postcopy_discard_send_finish(MigrationState *ms,
> >                                    PostcopyDiscardState *pds);
> >  
> > +/*
> > + * Place a page (from) at (host) efficiently
> > + *    There are restrictions on how 'from' must be mapped, in general best
> > + *    to use other postcopy_ routines to allocate.
> > + * returns 0 on success
> > + */
> > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> > +                        long bitmap_offset, bool all_zero);
> > +
> > +/*
> > + * Allocate a page of memory that can be mapped at a later point in time
> > + * using postcopy_place_page
> > + * Returns: Pointer to allocated page
> > + */
> > +void *postcopy_get_tmp_page(MigrationIncomingState *mis);
> > +
> >  #endif
> > diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> > index 33dd332..86fa5a0 100644
> > --- a/migration/postcopy-ram.c
> > +++ b/migration/postcopy-ram.c
> > @@ -197,7 +197,6 @@ static PostcopyPMIState postcopy_pmi_get_state_nolock(
> >  }
> >  
> >  /* Retrieve the state of the given page */
> > -__attribute__ (( unused )) /* Until later in patch series */
> >  static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
> >                                                 size_t bitmap_index)
> >  {
> > @@ -213,7 +212,6 @@ static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
> >   * Set the page state to the given state if the previous state was as expected
> >   * Return the actual previous state.
> >   */
> > -__attribute__ (( unused )) /* Until later in patch series */
> >  static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis,
> >                                             size_t bitmap_index,
> >                                             PostcopyPMIState expected_state,
> > @@ -477,6 +475,7 @@ static int cleanup_area(const char *block_name, void *host_addr,
> >  int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
> >  {
> >      postcopy_pmi_init(mis, ram_pages);
> > +    mis->postcopy_place_skipped = -1;
> >  
> >      if (qemu_ram_foreach_block(init_area, mis)) {
> >          return -1;
> > @@ -495,6 +494,10 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
> >          return -1;
> >      }
> >  
> > +    if (mis->postcopy_tmp_page) {
> > +        munmap(mis->postcopy_tmp_page, getpagesize());
> > +        mis->postcopy_tmp_page = NULL;
> > +    }
> >      return 0;
> >  }
> >  
> > @@ -561,6 +564,100 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> >      return 0;
> >  }
> >  
> > +/*
> > + * Place a host page (from) at (host) tomically
> 
> s/tomically/atomically/

Done.

> > + *    There are restrictions on how 'from' must be mapped, in general best
> > + *    to use other postcopy_ routines to allocate.
> > + * all_zero: Hint that the page being placed is 0 throughout
> > + * returns 0 on success
> > + * bitmap_offset: Index into the migration bitmaps
> > + *
> > + * State changes:
> > + *   none -> received
> > + *   requested -> received (ack)
> > + *
> > + * Note the UF thread is also updating the state, and maybe none->requested
> > + * at the same time.
> 
> Hrm.. these facts do tend me towards thinking that separate and
> explicit requested and received bits will be clearer than treating it
> as an enum state variable

This is all about to get simpler; Andrea realised he can make a small
change to the kernel interface semantics that means that I no longer have
to keep the destination side bitmaps.  That saves all this messing about
updating page states at all.

> > + */
> > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> > +                        long bitmap_offset, bool all_zero)
> > +{
> > +    PostcopyPMIState old_state, tmp_state, new_state;
> > +
> > +    if (!all_zero) {
> > +        struct uffdio_copy copy_struct;
> > +
> > +        copy_struct.dst = (uint64_t)(uintptr_t)host;
> > +        copy_struct.src = (uint64_t)(uintptr_t)from;
> > +        copy_struct.len = getpagesize();
> > +        copy_struct.mode = 0;
> > +
> > +        /* copy also acks to the kernel waking the stalled thread up
> > +         * TODO: We can inhibit that ack and only do it if it was requested
> > +         * which would be slightly cheaper, but we'd have to be careful
> > +         * of the order of updating our page state.
> > +         */
> > +        if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
> > +            int e = errno;
> > +            error_report("%s: %s copy host: %p from: %p pmi=%d",
> > +                         __func__, strerror(e), host, from,
> > +                         postcopy_pmi_get_state(mis, bitmap_offset));
> > +
> > +            return -e;
> > +        }
> > +    } else {
> > +        struct uffdio_zeropage zero_struct;
> > +
> > +        zero_struct.range.start = (uint64_t)(uintptr_t)host;
> > +        zero_struct.range.len = getpagesize();
> > +        zero_struct.mode = 0;
> > +
> > +        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
> > +            int e = errno;
> > +            error_report("%s: %s zero host: %p from: %p pmi=%d",
> > +                         __func__, strerror(e), host, from,
> > +                         postcopy_pmi_get_state(mis, bitmap_offset));
> > +
> > +            return -e;
> > +        }
> > +    }
> > +
> > +    bitmap_offset &= ~(mis->postcopy_pmi.host_bits-1);
> > +    new_state = POSTCOPY_PMI_RECEIVED;
> > +    tmp_state = postcopy_pmi_get_state(mis, bitmap_offset);
> > +    do {
> > +        old_state = tmp_state;
> > +        tmp_state = postcopy_pmi_change_state(mis, bitmap_offset, old_state,
> > +                                              new_state);
> > +    } while (old_state != tmp_state);
> 
> Yeah.. see treating the state as two separate booleans, here you'd
> just need to update received, without caring about whether requested
> has changed.

It's all very dependent on the exact things you need to do with the states;
whenever you get into a situation where you have to do something on a combination
of the bits you suddenly have to be a lot more careful.

Anyway, as I say, both of those bits are about to disappear.

Dave

> 
> > +    trace_postcopy_place_page(bitmap_offset, host, all_zero, old_state);
> > +
> > +    return 0;
> > +}
> > +
> > +/*
> > + * Returns a target page of memory that can be mapped at a later point in time
> > + * using postcopy_place_page
> > + * The same address is used repeatedly, postcopy_place_page just takes the
> > + * backing page away.
> > + * Returns: Pointer to allocated page
> > + *
> > + */
> > +void *postcopy_get_tmp_page(MigrationIncomingState *mis)
> > +{
> > +    if (!mis->postcopy_tmp_page) {
> > +        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
> > +                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
> > +                             MAP_ANONYMOUS, -1, 0);
> > +        if (!mis->postcopy_tmp_page) {
> > +            perror("mapping postcopy tmp page");
> > +            return NULL;
> > +        }
> > +    }
> > +
> > +    return mis->postcopy_tmp_page;
> > +}
> > +
> >  #else
> >  /* No target OS support, stubs just fail */
> >  bool postcopy_ram_supported_by_host(void)
> > @@ -608,6 +705,18 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
> >  {
> >      assert(0);
> >  }
> > +
> > +int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
> > +                        long bitmap_offset, bool all_zero)
> > +{
> > +    assert(0);
> > +}
> > +
> > +void *postcopy_get_tmp_page(MigrationIncomingState *mis)
> > +{
> > +    assert(0);
> > +}
> > +
> >  #endif
> >  
> >  /* ------------------------------------------------------------------------- */
> > diff --git a/trace-events b/trace-events
> > index 781cf5c..16a91d9 100644
> > --- a/trace-events
> > +++ b/trace-events
> > @@ -1497,6 +1497,7 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) ""
> >  postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
> >  postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
> >  postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
> > +postcopy_place_page(unsigned long offset, void *host_addr, bool all_zero, int old_state) "offset=%lx host=%p all_zero=%d old_state=%d"
> >  
> >  # kvm-all.c
> >  kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
> 
> -- 
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson


--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index b1c7cad..139bb1b 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -94,6 +94,8 @@  struct MigrationIncomingState {
     QEMUFile *return_path;
     QemuMutex      rp_mutex;    /* We send replies from multiple threads */
     PostcopyPMI    postcopy_pmi;
+    void          *postcopy_tmp_page;
+    long           postcopy_place_skipped; /* Check for incorrect place ops */
 };
 
 MigrationIncomingState *migration_incoming_get_current(void);
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index fbb2a93..3d30280 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -80,4 +80,20 @@  void postcopy_discard_send_chunk(MigrationState *ms, PostcopyDiscardState *pds,
 void postcopy_discard_send_finish(MigrationState *ms,
                                   PostcopyDiscardState *pds);
 
+/*
+ * Place a page (from) at (host) efficiently
+ *    There are restrictions on how 'from' must be mapped, in general best
+ *    to use other postcopy_ routines to allocate.
+ * returns 0 on success
+ */
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        long bitmap_offset, bool all_zero);
+
+/*
+ * Allocate a page of memory that can be mapped at a later point in time
+ * using postcopy_place_page
+ * Returns: Pointer to allocated page
+ */
+void *postcopy_get_tmp_page(MigrationIncomingState *mis);
+
 #endif
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 33dd332..86fa5a0 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -197,7 +197,6 @@  static PostcopyPMIState postcopy_pmi_get_state_nolock(
 }
 
 /* Retrieve the state of the given page */
-__attribute__ (( unused )) /* Until later in patch series */
 static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
                                                size_t bitmap_index)
 {
@@ -213,7 +212,6 @@  static PostcopyPMIState postcopy_pmi_get_state(MigrationIncomingState *mis,
  * Set the page state to the given state if the previous state was as expected
  * Return the actual previous state.
  */
-__attribute__ (( unused )) /* Until later in patch series */
 static PostcopyPMIState postcopy_pmi_change_state(MigrationIncomingState *mis,
                                            size_t bitmap_index,
                                            PostcopyPMIState expected_state,
@@ -477,6 +475,7 @@  static int cleanup_area(const char *block_name, void *host_addr,
 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 {
     postcopy_pmi_init(mis, ram_pages);
+    mis->postcopy_place_skipped = -1;
 
     if (qemu_ram_foreach_block(init_area, mis)) {
         return -1;
@@ -495,6 +494,10 @@  int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
         return -1;
     }
 
+    if (mis->postcopy_tmp_page) {
+        munmap(mis->postcopy_tmp_page, getpagesize());
+        mis->postcopy_tmp_page = NULL;
+    }
     return 0;
 }
 
@@ -561,6 +564,100 @@  int postcopy_ram_enable_notify(MigrationIncomingState *mis)
     return 0;
 }
 
+/*
+ * Place a host page (from) at (host) tomically
+ *    There are restrictions on how 'from' must be mapped, in general best
+ *    to use other postcopy_ routines to allocate.
+ * all_zero: Hint that the page being placed is 0 throughout
+ * returns 0 on success
+ * bitmap_offset: Index into the migration bitmaps
+ *
+ * State changes:
+ *   none -> received
+ *   requested -> received (ack)
+ *
+ * Note the UF thread is also updating the state, and maybe none->requested
+ * at the same time.
+ */
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        long bitmap_offset, bool all_zero)
+{
+    PostcopyPMIState old_state, tmp_state, new_state;
+
+    if (!all_zero) {
+        struct uffdio_copy copy_struct;
+
+        copy_struct.dst = (uint64_t)(uintptr_t)host;
+        copy_struct.src = (uint64_t)(uintptr_t)from;
+        copy_struct.len = getpagesize();
+        copy_struct.mode = 0;
+
+        /* copy also acks to the kernel waking the stalled thread up
+         * TODO: We can inhibit that ack and only do it if it was requested
+         * which would be slightly cheaper, but we'd have to be careful
+         * of the order of updating our page state.
+         */
+        if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
+            int e = errno;
+            error_report("%s: %s copy host: %p from: %p pmi=%d",
+                         __func__, strerror(e), host, from,
+                         postcopy_pmi_get_state(mis, bitmap_offset));
+
+            return -e;
+        }
+    } else {
+        struct uffdio_zeropage zero_struct;
+
+        zero_struct.range.start = (uint64_t)(uintptr_t)host;
+        zero_struct.range.len = getpagesize();
+        zero_struct.mode = 0;
+
+        if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
+            int e = errno;
+            error_report("%s: %s zero host: %p from: %p pmi=%d",
+                         __func__, strerror(e), host, from,
+                         postcopy_pmi_get_state(mis, bitmap_offset));
+
+            return -e;
+        }
+    }
+
+    bitmap_offset &= ~(mis->postcopy_pmi.host_bits-1);
+    new_state = POSTCOPY_PMI_RECEIVED;
+    tmp_state = postcopy_pmi_get_state(mis, bitmap_offset);
+    do {
+        old_state = tmp_state;
+        tmp_state = postcopy_pmi_change_state(mis, bitmap_offset, old_state,
+                                              new_state);
+    } while (old_state != tmp_state);
+    trace_postcopy_place_page(bitmap_offset, host, all_zero, old_state);
+
+    return 0;
+}
+
+/*
+ * Returns a target page of memory that can be mapped at a later point in time
+ * using postcopy_place_page
+ * The same address is used repeatedly, postcopy_place_page just takes the
+ * backing page away.
+ * Returns: Pointer to allocated page
+ *
+ */
+void *postcopy_get_tmp_page(MigrationIncomingState *mis)
+{
+    if (!mis->postcopy_tmp_page) {
+        mis->postcopy_tmp_page = mmap(NULL, getpagesize(),
+                             PROT_READ | PROT_WRITE, MAP_PRIVATE |
+                             MAP_ANONYMOUS, -1, 0);
+        if (!mis->postcopy_tmp_page) {
+            perror("mapping postcopy tmp page");
+            return NULL;
+        }
+    }
+
+    return mis->postcopy_tmp_page;
+}
+
 #else
 /* No target OS support, stubs just fail */
 bool postcopy_ram_supported_by_host(void)
@@ -608,6 +705,18 @@  int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 {
     assert(0);
 }
+
+int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
+                        long bitmap_offset, bool all_zero)
+{
+    assert(0);
+}
+
+void *postcopy_get_tmp_page(MigrationIncomingState *mis)
+{
+    assert(0);
+}
+
 #endif
 
 /* ------------------------------------------------------------------------- */
diff --git a/trace-events b/trace-events
index 781cf5c..16a91d9 100644
--- a/trace-events
+++ b/trace-events
@@ -1497,6 +1497,7 @@  rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands"
 postcopy_cleanup_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
 postcopy_init_area(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx"
+postcopy_place_page(unsigned long offset, void *host_addr, bool all_zero, int old_state) "offset=%lx host=%p all_zero=%d old_state=%d"
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"