diff mbox series

[v2,5/8] migration/ram: ensure write persistence on loading zero pages to PMEM

Message ID 20180207073331.14158-6-haozhong.zhang@intel.com
State New
Headers show
Series nvdimm: guarantee persistence of QEMU writes to persistent memory | expand

Commit Message

Haozhong Zhang Feb. 7, 2018, 7:33 a.m. UTC
When loading a zero page, check whether it will be loaded to
persistent memory If yes, load it by libpmem function
pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
end of RAM loading, we can guarantee all those zero pages are
persistently loaded.

Depending on the host HW/SW configurations, pmem_drain() can be
"sfence".  Therefore, we do not call pmem_drain() after each
pmem_memset_nodrain(), or use pmem_memset_persist() (equally
pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
overhead.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
 include/qemu/pmem.h |  9 +++++++++
 migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
 2 files changed, 38 insertions(+), 5 deletions(-)

Comments

Pankaj Gupta Feb. 7, 2018, 10:17 a.m. UTC | #1
> 
> When loading a zero page, check whether it will be loaded to
> persistent memory If yes, load it by libpmem function
> pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> end of RAM loading, we can guarantee all those zero pages are
> persistently loaded.
> 
> Depending on the host HW/SW configurations, pmem_drain() can be
> "sfence".  Therefore, we do not call pmem_drain() after each
> pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> overhead.

Are you saying we don't need 'pmem_drain()'?

I can see its called in patch 5 in ram_load? But
I also see empty definition. Anything I am missing here?

Thanks,
Pankaj

> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  include/qemu/pmem.h |  9 +++++++++
>  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
>  2 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> index 9017596ff0..861d8ecc21 100644
> --- a/include/qemu/pmem.h
> +++ b/include/qemu/pmem.h
> @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src,
> size_t len)
>      return memcpy(pmemdest, src, len);
>  }
>  
> +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
> +{
> +    return memset(pmemdest, c, len);
> +}
> +
> +static inline void pmem_drain(void)
> +{
> +}
> +
>  #endif /* CONFIG_LIBPMEM */
>  
>  #endif /* !QEMU_PMEM_H */
> diff --git a/migration/ram.c b/migration/ram.c
> index cb1950f3eb..5a0e503818 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -49,6 +49,7 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "qemu/pmem.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -2467,6 +2468,20 @@ static inline void
> *host_from_ram_block_offset(RAMBlock *block,
>      return block->host + offset;
>  }
>  
> +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t
> size,
> +                                         bool is_pmem)
> +{
> +    if (!ch && is_zero_range(host, size)) {
> +        return;
> +    }
> +
> +    if (!is_pmem) {
> +        memset(host, ch, size);
> +    } else {
> +        pmem_memset_nodrain(host, ch, size);
> +    }
> +}
> +
>  /**
>   * ram_handle_compressed: handle the zero page case
>   *
> @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlock
> *block,
>   */
>  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
>  {
> -    if (ch != 0 || !is_zero_range(host, size)) {
> -        memset(host, ch, size);
> -    }
> +    return ram_handle_compressed_common(host, ch, size, false);
>  }
>  
>  static void *do_data_decompress(void *opaque)
> @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>      bool postcopy_running = postcopy_is_running();
>      /* ADVISE is earlier, it shows the source has the postcopy capability on
>      */
>      bool postcopy_advised = postcopy_is_advised();
> +    bool need_pmem_drain = false;
>  
>      seq_iter++;
>  
> @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>          ram_addr_t addr, total_ram_bytes;
>          void *host = NULL;
>          uint8_t ch;
> +        RAMBlock *block = NULL;
> +        bool is_pmem = false;
>  
>          addr = qemu_get_be64(f);
>          flags = addr & ~TARGET_PAGE_MASK;
> @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>  
>          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
>                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
> -            RAMBlock *block = ram_block_from_stream(f, flags);
> +            block = ram_block_from_stream(f, flags);
>  
>              host = host_from_ram_block_offset(block, addr);
>              if (!host) {
> @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>              }
>              ramblock_recv_bitmap_set(block, host);
>              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
> +
> +            is_pmem = ramblock_is_pmem(block);
> +            need_pmem_drain = need_pmem_drain || is_pmem;
>          }
>  
>          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>  
>          case RAM_SAVE_FLAG_ZERO:
>              ch = qemu_get_byte(f);
> -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE,
> is_pmem);
>              break;
>  
>          case RAM_SAVE_FLAG_PAGE:
> @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int
> version_id)
>      }
>  
>      wait_for_decompress_done();
> +
> +    if (need_pmem_drain) {
> +        pmem_drain();
> +    }
> +
>      rcu_read_unlock();
>      trace_ram_load_complete(ret, seq_iter);
>      return ret;
> --
> 2.14.1
> 
> 
>
Haozhong Zhang Feb. 7, 2018, 11:18 a.m. UTC | #2
On 02/07/18 05:17 -0500, Pankaj Gupta wrote:
> 
> > 
> > When loading a zero page, check whether it will be loaded to
> > persistent memory If yes, load it by libpmem function
> > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > end of RAM loading, we can guarantee all those zero pages are
> > persistently loaded.
> > 
> > Depending on the host HW/SW configurations, pmem_drain() can be
> > "sfence".  Therefore, we do not call pmem_drain() after each
> > pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> > pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> > overhead.
> 
> Are you saying we don't need 'pmem_drain()'?

pmem_drain() is called only once after all ram blocks are loaded.

> 
> I can see its called in patch 5 in ram_load? But
> I also see empty definition. Anything I am missing here?

Functions defined in include/qemu/pmem.h are stubs and used only when
QEMU is not compiled with libpmem. When QEMU is compiled with
--enabled-libpmem, the one in libpmem is used.

Haozhong

> 
> Thanks,
> Pankaj
> 
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > ---
> >  include/qemu/pmem.h |  9 +++++++++
> >  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
> >  2 files changed, 38 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> > index 9017596ff0..861d8ecc21 100644
> > --- a/include/qemu/pmem.h
> > +++ b/include/qemu/pmem.h
> > @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src,
> > size_t len)
> >      return memcpy(pmemdest, src, len);
> >  }
> >  
> > +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
> > +{
> > +    return memset(pmemdest, c, len);
> > +}
> > +
> > +static inline void pmem_drain(void)
> > +{
> > +}
> > +
> >  #endif /* CONFIG_LIBPMEM */
> >  
> >  #endif /* !QEMU_PMEM_H */
> > diff --git a/migration/ram.c b/migration/ram.c
> > index cb1950f3eb..5a0e503818 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -49,6 +49,7 @@
> >  #include "qemu/rcu_queue.h"
> >  #include "migration/colo.h"
> >  #include "migration/block.h"
> > +#include "qemu/pmem.h"
> >  
> >  /***********************************************************/
> >  /* ram save/restore */
> > @@ -2467,6 +2468,20 @@ static inline void
> > *host_from_ram_block_offset(RAMBlock *block,
> >      return block->host + offset;
> >  }
> >  
> > +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t
> > size,
> > +                                         bool is_pmem)
> > +{
> > +    if (!ch && is_zero_range(host, size)) {
> > +        return;
> > +    }
> > +
> > +    if (!is_pmem) {
> > +        memset(host, ch, size);
> > +    } else {
> > +        pmem_memset_nodrain(host, ch, size);
> > +    }
> > +}
> > +
> >  /**
> >   * ram_handle_compressed: handle the zero page case
> >   *
> > @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlock
> > *block,
> >   */
> >  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
> >  {
> > -    if (ch != 0 || !is_zero_range(host, size)) {
> > -        memset(host, ch, size);
> > -    }
> > +    return ram_handle_compressed_common(host, ch, size, false);
> >  }
> >  
> >  static void *do_data_decompress(void *opaque)
> > @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >      bool postcopy_running = postcopy_is_running();
> >      /* ADVISE is earlier, it shows the source has the postcopy capability on
> >      */
> >      bool postcopy_advised = postcopy_is_advised();
> > +    bool need_pmem_drain = false;
> >  
> >      seq_iter++;
> >  
> > @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >          ram_addr_t addr, total_ram_bytes;
> >          void *host = NULL;
> >          uint8_t ch;
> > +        RAMBlock *block = NULL;
> > +        bool is_pmem = false;
> >  
> >          addr = qemu_get_be64(f);
> >          flags = addr & ~TARGET_PAGE_MASK;
> > @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >  
> >          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
> >                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
> > -            RAMBlock *block = ram_block_from_stream(f, flags);
> > +            block = ram_block_from_stream(f, flags);
> >  
> >              host = host_from_ram_block_offset(block, addr);
> >              if (!host) {
> > @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >              }
> >              ramblock_recv_bitmap_set(block, host);
> >              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
> > +
> > +            is_pmem = ramblock_is_pmem(block);
> > +            need_pmem_drain = need_pmem_drain || is_pmem;
> >          }
> >  
> >          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> > @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >  
> >          case RAM_SAVE_FLAG_ZERO:
> >              ch = qemu_get_byte(f);
> > -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> > +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE,
> > is_pmem);
> >              break;
> >  
> >          case RAM_SAVE_FLAG_PAGE:
> > @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > version_id)
> >      }
> >  
> >      wait_for_decompress_done();
> > +
> > +    if (need_pmem_drain) {
> > +        pmem_drain();
> > +    }
> > +
> >      rcu_read_unlock();
> >      trace_ram_load_complete(ret, seq_iter);
> >      return ret;
> > --
> > 2.14.1
> > 
> > 
> >
Pankaj Gupta Feb. 7, 2018, 11:30 a.m. UTC | #3
> 
> On 02/07/18 05:17 -0500, Pankaj Gupta wrote:
> > 
> > > 
> > > When loading a zero page, check whether it will be loaded to
> > > persistent memory If yes, load it by libpmem function
> > > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > > end of RAM loading, we can guarantee all those zero pages are
> > > persistently loaded.
> > > 
> > > Depending on the host HW/SW configurations, pmem_drain() can be
> > > "sfence".  Therefore, we do not call pmem_drain() after each
> > > pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> > > pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> > > overhead.
> > 
> > Are you saying we don't need 'pmem_drain()'?
> 
> pmem_drain() is called only once after all ram blocks are loaded.

o.k

> 
> > 
> > I can see its called in patch 5 in ram_load? But
> > I also see empty definition. Anything I am missing here?
> 
> Functions defined in include/qemu/pmem.h are stubs and used only when
> QEMU is not compiled with libpmem. When QEMU is compiled with
> --enabled-libpmem, the one in libpmem is used.

p.k.

Thanks,
Pankaj

> 
> Haozhong
> 
> > 
> > Thanks,
> > Pankaj
> > 
> > > 
> > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > > ---
> > >  include/qemu/pmem.h |  9 +++++++++
> > >  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
> > >  2 files changed, 38 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> > > index 9017596ff0..861d8ecc21 100644
> > > --- a/include/qemu/pmem.h
> > > +++ b/include/qemu/pmem.h
> > > @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src,
> > > size_t len)
> > >      return memcpy(pmemdest, src, len);
> > >  }
> > >  
> > > +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t
> > > len)
> > > +{
> > > +    return memset(pmemdest, c, len);
> > > +}
> > > +
> > > +static inline void pmem_drain(void)
> > > +{
> > > +}
> > > +
> > >  #endif /* CONFIG_LIBPMEM */
> > >  
> > >  #endif /* !QEMU_PMEM_H */
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index cb1950f3eb..5a0e503818 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -49,6 +49,7 @@
> > >  #include "qemu/rcu_queue.h"
> > >  #include "migration/colo.h"
> > >  #include "migration/block.h"
> > > +#include "qemu/pmem.h"
> > >  
> > >  /***********************************************************/
> > >  /* ram save/restore */
> > > @@ -2467,6 +2468,20 @@ static inline void
> > > *host_from_ram_block_offset(RAMBlock *block,
> > >      return block->host + offset;
> > >  }
> > >  
> > > +static void ram_handle_compressed_common(void *host, uint8_t ch,
> > > uint64_t
> > > size,
> > > +                                         bool is_pmem)
> > > +{
> > > +    if (!ch && is_zero_range(host, size)) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!is_pmem) {
> > > +        memset(host, ch, size);
> > > +    } else {
> > > +        pmem_memset_nodrain(host, ch, size);
> > > +    }
> > > +}
> > > +
> > >  /**
> > >   * ram_handle_compressed: handle the zero page case
> > >   *
> > > @@ -2479,9 +2494,7 @@ static inline void
> > > *host_from_ram_block_offset(RAMBlock
> > > *block,
> > >   */
> > >  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
> > >  {
> > > -    if (ch != 0 || !is_zero_range(host, size)) {
> > > -        memset(host, ch, size);
> > > -    }
> > > +    return ram_handle_compressed_common(host, ch, size, false);
> > >  }
> > >  
> > >  static void *do_data_decompress(void *opaque)
> > > @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >      bool postcopy_running = postcopy_is_running();
> > >      /* ADVISE is earlier, it shows the source has the postcopy
> > >      capability on
> > >      */
> > >      bool postcopy_advised = postcopy_is_advised();
> > > +    bool need_pmem_drain = false;
> > >  
> > >      seq_iter++;
> > >  
> > > @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >          ram_addr_t addr, total_ram_bytes;
> > >          void *host = NULL;
> > >          uint8_t ch;
> > > +        RAMBlock *block = NULL;
> > > +        bool is_pmem = false;
> > >  
> > >          addr = qemu_get_be64(f);
> > >          flags = addr & ~TARGET_PAGE_MASK;
> > > @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >  
> > >          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
> > >                       RAM_SAVE_FLAG_COMPRESS_PAGE |
> > >                       RAM_SAVE_FLAG_XBZRLE)) {
> > > -            RAMBlock *block = ram_block_from_stream(f, flags);
> > > +            block = ram_block_from_stream(f, flags);
> > >  
> > >              host = host_from_ram_block_offset(block, addr);
> > >              if (!host) {
> > > @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >              }
> > >              ramblock_recv_bitmap_set(block, host);
> > >              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags,
> > >              host);
> > > +
> > > +            is_pmem = ramblock_is_pmem(block);
> > > +            need_pmem_drain = need_pmem_drain || is_pmem;
> > >          }
> > >  
> > >          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> > > @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >  
> > >          case RAM_SAVE_FLAG_ZERO:
> > >              ch = qemu_get_byte(f);
> > > -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> > > +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE,
> > > is_pmem);
> > >              break;
> > >  
> > >          case RAM_SAVE_FLAG_PAGE:
> > > @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int
> > > version_id)
> > >      }
> > >  
> > >      wait_for_decompress_done();
> > > +
> > > +    if (need_pmem_drain) {
> > > +        pmem_drain();
> > > +    }
> > > +
> > >      rcu_read_unlock();
> > >      trace_ram_load_complete(ret, seq_iter);
> > >      return ret;
> > > --
> > > 2.14.1
> > > 
> > > 
> > > 
>
Dr. David Alan Gilbert Feb. 7, 2018, 11:38 a.m. UTC | #4
* Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> When loading a zero page, check whether it will be loaded to
> persistent memory If yes, load it by libpmem function
> pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> end of RAM loading, we can guarantee all those zero pages are
> persistently loaded.

I'm surprised pmem is this invasive to be honest;   I hadn't expected
the need for special memcpy's etc everywhere.  We're bound to miss some.
I assume there's separate kernel work needed to make postcopy work;
certainly the patches here don't seem to touch the postcopy path.

> Depending on the host HW/SW configurations, pmem_drain() can be
> "sfence".  Therefore, we do not call pmem_drain() after each
> pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> overhead.
> 
> Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> ---
>  include/qemu/pmem.h |  9 +++++++++
>  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
>  2 files changed, 38 insertions(+), 5 deletions(-)
> 
> diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> index 9017596ff0..861d8ecc21 100644
> --- a/include/qemu/pmem.h
> +++ b/include/qemu/pmem.h
> @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
>      return memcpy(pmemdest, src, len);
>  }
>  
> +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
> +{
> +    return memset(pmemdest, c, len);
> +}
> +
> +static inline void pmem_drain(void)
> +{
> +}
> +
>  #endif /* CONFIG_LIBPMEM */
>  
>  #endif /* !QEMU_PMEM_H */
> diff --git a/migration/ram.c b/migration/ram.c
> index cb1950f3eb..5a0e503818 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -49,6 +49,7 @@
>  #include "qemu/rcu_queue.h"
>  #include "migration/colo.h"
>  #include "migration/block.h"
> +#include "qemu/pmem.h"
>  
>  /***********************************************************/
>  /* ram save/restore */
> @@ -2467,6 +2468,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
>      return block->host + offset;
>  }
>  
> +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t size,
> +                                         bool is_pmem)

I don't think there's any advantage of splitting out this _common
routine; lets just add the parameter to ram_handle_compressed.

> +{
> +    if (!ch && is_zero_range(host, size)) {
> +        return;
> +    }
> +
> +    if (!is_pmem) {
> +        memset(host, ch, size);
> +    } else {
> +        pmem_memset_nodrain(host, ch, size);
> +    }

I'm wondering if it would be easier to pass in a memsetfunc ptr and call
that (defualting to memset if it's NULL).

> +}
> +
>  /**
>   * ram_handle_compressed: handle the zero page case
>   *
> @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
>   */
>  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
>  {
> -    if (ch != 0 || !is_zero_range(host, size)) {
> -        memset(host, ch, size);
> -    }
> +    return ram_handle_compressed_common(host, ch, size, false);
>  }
>  
>  static void *do_data_decompress(void *opaque)
> @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>      bool postcopy_running = postcopy_is_running();
>      /* ADVISE is earlier, it shows the source has the postcopy capability on */
>      bool postcopy_advised = postcopy_is_advised();
> +    bool need_pmem_drain = false;
>  
>      seq_iter++;
>  
> @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>          ram_addr_t addr, total_ram_bytes;
>          void *host = NULL;
>          uint8_t ch;
> +        RAMBlock *block = NULL;
> +        bool is_pmem = false;
>  
>          addr = qemu_get_be64(f);
>          flags = addr & ~TARGET_PAGE_MASK;
> @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>  
>          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
>                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
> -            RAMBlock *block = ram_block_from_stream(f, flags);
> +            block = ram_block_from_stream(f, flags);
>  
>              host = host_from_ram_block_offset(block, addr);
>              if (!host) {
> @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>              }
>              ramblock_recv_bitmap_set(block, host);
>              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
> +
> +            is_pmem = ramblock_is_pmem(block);
> +            need_pmem_drain = need_pmem_drain || is_pmem;
>          }
>  
>          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>  
>          case RAM_SAVE_FLAG_ZERO:
>              ch = qemu_get_byte(f);
> -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE, is_pmem);
>              break;
>  
>          case RAM_SAVE_FLAG_PAGE:
> @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
>      }
>  
>      wait_for_decompress_done();
> +
> +    if (need_pmem_drain) {
> +        pmem_drain();
> +    }
> +
>      rcu_read_unlock();
>      trace_ram_load_complete(ret, seq_iter);
>      return ret;
> -- 
> 2.14.1

Dave

> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Haozhong Zhang Feb. 7, 2018, 11:52 a.m. UTC | #5
On 02/07/18 11:38 +0000, Dr. David Alan Gilbert wrote:
> * Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> > When loading a zero page, check whether it will be loaded to
> > persistent memory If yes, load it by libpmem function
> > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > end of RAM loading, we can guarantee all those zero pages are
> > persistently loaded.
> 
> I'm surprised pmem is this invasive to be honest;   I hadn't expected
> the need for special memcpy's etc everywhere.  We're bound to miss some.
> I assume there's separate kernel work needed to make postcopy work;
> certainly the patches here don't seem to touch the postcopy path.

This link at
https://wiki.qemu.org/Features/PostCopyLiveMigration#Conflicts shows
that postcopy with memory-backend-file requires kernel support. Can
you point me the details of the required kernel support, so that I can
understand what would be needed to NVDIMM postcopy?

> 
> > Depending on the host HW/SW configurations, pmem_drain() can be
> > "sfence".  Therefore, we do not call pmem_drain() after each
> > pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> > pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> > overhead.
> > 
> > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > ---
> >  include/qemu/pmem.h |  9 +++++++++
> >  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
> >  2 files changed, 38 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> > index 9017596ff0..861d8ecc21 100644
> > --- a/include/qemu/pmem.h
> > +++ b/include/qemu/pmem.h
> > @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
> >      return memcpy(pmemdest, src, len);
> >  }
> >  
> > +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
> > +{
> > +    return memset(pmemdest, c, len);
> > +}
> > +
> > +static inline void pmem_drain(void)
> > +{
> > +}
> > +
> >  #endif /* CONFIG_LIBPMEM */
> >  
> >  #endif /* !QEMU_PMEM_H */
> > diff --git a/migration/ram.c b/migration/ram.c
> > index cb1950f3eb..5a0e503818 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -49,6 +49,7 @@
> >  #include "qemu/rcu_queue.h"
> >  #include "migration/colo.h"
> >  #include "migration/block.h"
> > +#include "qemu/pmem.h"
> >  
> >  /***********************************************************/
> >  /* ram save/restore */
> > @@ -2467,6 +2468,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
> >      return block->host + offset;
> >  }
> >  
> > +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t size,
> > +                                         bool is_pmem)
> 
> I don't think there's any advantage of splitting out this _common
> routine; lets just add the parameter to ram_handle_compressed.
> 
> > +{
> > +    if (!ch && is_zero_range(host, size)) {
> > +        return;
> > +    }
> > +
> > +    if (!is_pmem) {
> > +        memset(host, ch, size);
> > +    } else {
> > +        pmem_memset_nodrain(host, ch, size);
> > +    }
> 
> I'm wondering if it would be easier to pass in a memsetfunc ptr and call
> that (defualting to memset if it's NULL).

Yes, it would be more extensible if we have other special memory
devices in the future.

Thank,
Haozhong

> 
> > +}
> > +
> >  /**
> >   * ram_handle_compressed: handle the zero page case
> >   *
> > @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
> >   */
> >  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
> >  {
> > -    if (ch != 0 || !is_zero_range(host, size)) {
> > -        memset(host, ch, size);
> > -    }
> > +    return ram_handle_compressed_common(host, ch, size, false);
> >  }
> >  
> >  static void *do_data_decompress(void *opaque)
> > @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >      bool postcopy_running = postcopy_is_running();
> >      /* ADVISE is earlier, it shows the source has the postcopy capability on */
> >      bool postcopy_advised = postcopy_is_advised();
> > +    bool need_pmem_drain = false;
> >  
> >      seq_iter++;
> >  
> > @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >          ram_addr_t addr, total_ram_bytes;
> >          void *host = NULL;
> >          uint8_t ch;
> > +        RAMBlock *block = NULL;
> > +        bool is_pmem = false;
> >  
> >          addr = qemu_get_be64(f);
> >          flags = addr & ~TARGET_PAGE_MASK;
> > @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >  
> >          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
> >                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
> > -            RAMBlock *block = ram_block_from_stream(f, flags);
> > +            block = ram_block_from_stream(f, flags);
> >  
> >              host = host_from_ram_block_offset(block, addr);
> >              if (!host) {
> > @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >              }
> >              ramblock_recv_bitmap_set(block, host);
> >              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
> > +
> > +            is_pmem = ramblock_is_pmem(block);
> > +            need_pmem_drain = need_pmem_drain || is_pmem;
> >          }
> >  
> >          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> > @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >  
> >          case RAM_SAVE_FLAG_ZERO:
> >              ch = qemu_get_byte(f);
> > -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> > +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE, is_pmem);
> >              break;
> >  
> >          case RAM_SAVE_FLAG_PAGE:
> > @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> >      }
> >  
> >      wait_for_decompress_done();
> > +
> > +    if (need_pmem_drain) {
> > +        pmem_drain();
> > +    }
> > +
> >      rcu_read_unlock();
> >      trace_ram_load_complete(ret, seq_iter);
> >      return ret;
> > -- 
> > 2.14.1
> 
> Dave
> 
> > 
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Haozhong Zhang Feb. 7, 2018, 12:51 p.m. UTC | #6
On 02/07/18 19:52 +0800, Haozhong Zhang wrote:
> On 02/07/18 11:38 +0000, Dr. David Alan Gilbert wrote:
> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> > > When loading a zero page, check whether it will be loaded to
> > > persistent memory If yes, load it by libpmem function
> > > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > > end of RAM loading, we can guarantee all those zero pages are
> > > persistently loaded.
> > 
> > I'm surprised pmem is this invasive to be honest;   I hadn't expected
> > the need for special memcpy's etc everywhere.  We're bound to miss some.
> > I assume there's separate kernel work needed to make postcopy work;
> > certainly the patches here don't seem to touch the postcopy path.
> 
> This link at
> https://wiki.qemu.org/Features/PostCopyLiveMigration#Conflicts shows
> that postcopy with memory-backend-file requires kernel support. Can
> you point me the details of the required kernel support, so that I can
> understand what would be needed to NVDIMM postcopy?

I saw test_ramblock_postcopiable() requires the ram block not be
mmap'ed with MAP_SHARED. The only pmem device (i.e., device DAX e.g.,
/dev/dax0.0) that can be safely used as the backend of vNVDIMM must be
shared mapped which is required by kernel, so postcopy does not work
with pmem right now. Even if the private mmap was supported for device
dax, it would still make little sense to private mmap it in QEMU,
because vNVDIMM intends to be non-volatile.

Haozhong
Dr. David Alan Gilbert Feb. 7, 2018, 12:56 p.m. UTC | #7
* Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> On 02/07/18 11:38 +0000, Dr. David Alan Gilbert wrote:
> > * Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> > > When loading a zero page, check whether it will be loaded to
> > > persistent memory If yes, load it by libpmem function
> > > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > > end of RAM loading, we can guarantee all those zero pages are
> > > persistently loaded.
> > 
> > I'm surprised pmem is this invasive to be honest;   I hadn't expected
> > the need for special memcpy's etc everywhere.  We're bound to miss some.
> > I assume there's separate kernel work needed to make postcopy work;
> > certainly the patches here don't seem to touch the postcopy path.
> 
> This link at
> https://wiki.qemu.org/Features/PostCopyLiveMigration#Conflicts shows
> that postcopy with memory-backend-file requires kernel support. Can
> you point me the details of the required kernel support, so that I can
> understand what would be needed to NVDIMM postcopy?

I can't, but ask Andrea Arcangeli ( aarcange@redhat.com ); he wrote the
userfault kernel code.  Note that we have a mechanism for atomically
placing a page into memory, so that might also need modifications for
pmem; again check with Andrea.

Dave

> > 
> > > Depending on the host HW/SW configurations, pmem_drain() can be
> > > "sfence".  Therefore, we do not call pmem_drain() after each
> > > pmem_memset_nodrain(), or use pmem_memset_persist() (equally
> > > pmem_memset_nodrain() + pmem_drain()), in order to avoid unnecessary
> > > overhead.
> > > 
> > > Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
> > > ---
> > >  include/qemu/pmem.h |  9 +++++++++
> > >  migration/ram.c     | 34 +++++++++++++++++++++++++++++-----
> > >  2 files changed, 38 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
> > > index 9017596ff0..861d8ecc21 100644
> > > --- a/include/qemu/pmem.h
> > > +++ b/include/qemu/pmem.h
> > > @@ -26,6 +26,15 @@ pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
> > >      return memcpy(pmemdest, src, len);
> > >  }
> > >  
> > > +static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
> > > +{
> > > +    return memset(pmemdest, c, len);
> > > +}
> > > +
> > > +static inline void pmem_drain(void)
> > > +{
> > > +}
> > > +
> > >  #endif /* CONFIG_LIBPMEM */
> > >  
> > >  #endif /* !QEMU_PMEM_H */
> > > diff --git a/migration/ram.c b/migration/ram.c
> > > index cb1950f3eb..5a0e503818 100644
> > > --- a/migration/ram.c
> > > +++ b/migration/ram.c
> > > @@ -49,6 +49,7 @@
> > >  #include "qemu/rcu_queue.h"
> > >  #include "migration/colo.h"
> > >  #include "migration/block.h"
> > > +#include "qemu/pmem.h"
> > >  
> > >  /***********************************************************/
> > >  /* ram save/restore */
> > > @@ -2467,6 +2468,20 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
> > >      return block->host + offset;
> > >  }
> > >  
> > > +static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t size,
> > > +                                         bool is_pmem)
> > 
> > I don't think there's any advantage of splitting out this _common
> > routine; lets just add the parameter to ram_handle_compressed.
> > 
> > > +{
> > > +    if (!ch && is_zero_range(host, size)) {
> > > +        return;
> > > +    }
> > > +
> > > +    if (!is_pmem) {
> > > +        memset(host, ch, size);
> > > +    } else {
> > > +        pmem_memset_nodrain(host, ch, size);
> > > +    }
> > 
> > I'm wondering if it would be easier to pass in a memsetfunc ptr and call
> > that (defualting to memset if it's NULL).
> 
> Yes, it would be more extensible if we have other special memory
> devices in the future.
> 
> Thank,
> Haozhong
> 
> > 
> > > +}
> > > +
> > >  /**
> > >   * ram_handle_compressed: handle the zero page case
> > >   *
> > > @@ -2479,9 +2494,7 @@ static inline void *host_from_ram_block_offset(RAMBlock *block,
> > >   */
> > >  void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
> > >  {
> > > -    if (ch != 0 || !is_zero_range(host, size)) {
> > > -        memset(host, ch, size);
> > > -    }
> > > +    return ram_handle_compressed_common(host, ch, size, false);
> > >  }
> > >  
> > >  static void *do_data_decompress(void *opaque)
> > > @@ -2823,6 +2836,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >      bool postcopy_running = postcopy_is_running();
> > >      /* ADVISE is earlier, it shows the source has the postcopy capability on */
> > >      bool postcopy_advised = postcopy_is_advised();
> > > +    bool need_pmem_drain = false;
> > >  
> > >      seq_iter++;
> > >  
> > > @@ -2848,6 +2862,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >          ram_addr_t addr, total_ram_bytes;
> > >          void *host = NULL;
> > >          uint8_t ch;
> > > +        RAMBlock *block = NULL;
> > > +        bool is_pmem = false;
> > >  
> > >          addr = qemu_get_be64(f);
> > >          flags = addr & ~TARGET_PAGE_MASK;
> > > @@ -2864,7 +2880,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >  
> > >          if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
> > >                       RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
> > > -            RAMBlock *block = ram_block_from_stream(f, flags);
> > > +            block = ram_block_from_stream(f, flags);
> > >  
> > >              host = host_from_ram_block_offset(block, addr);
> > >              if (!host) {
> > > @@ -2874,6 +2890,9 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >              }
> > >              ramblock_recv_bitmap_set(block, host);
> > >              trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
> > > +
> > > +            is_pmem = ramblock_is_pmem(block);
> > > +            need_pmem_drain = need_pmem_drain || is_pmem;
> > >          }
> > >  
> > >          switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
> > > @@ -2927,7 +2946,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >  
> > >          case RAM_SAVE_FLAG_ZERO:
> > >              ch = qemu_get_byte(f);
> > > -            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
> > > +            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE, is_pmem);
> > >              break;
> > >  
> > >          case RAM_SAVE_FLAG_PAGE:
> > > @@ -2970,6 +2989,11 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
> > >      }
> > >  
> > >      wait_for_decompress_done();
> > > +
> > > +    if (need_pmem_drain) {
> > > +        pmem_drain();
> > > +    }
> > > +
> > >      rcu_read_unlock();
> > >      trace_ram_load_complete(ret, seq_iter);
> > >      return ret;
> > > -- 
> > > 2.14.1
> > 
> > Dave
> > 
> > > 
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert Feb. 7, 2018, 12:59 p.m. UTC | #8
* Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> On 02/07/18 19:52 +0800, Haozhong Zhang wrote:
> > On 02/07/18 11:38 +0000, Dr. David Alan Gilbert wrote:
> > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> > > > When loading a zero page, check whether it will be loaded to
> > > > persistent memory If yes, load it by libpmem function
> > > > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > > > end of RAM loading, we can guarantee all those zero pages are
> > > > persistently loaded.
> > > 
> > > I'm surprised pmem is this invasive to be honest;   I hadn't expected
> > > the need for special memcpy's etc everywhere.  We're bound to miss some.
> > > I assume there's separate kernel work needed to make postcopy work;
> > > certainly the patches here don't seem to touch the postcopy path.
> > 
> > This link at
> > https://wiki.qemu.org/Features/PostCopyLiveMigration#Conflicts shows
> > that postcopy with memory-backend-file requires kernel support. Can
> > you point me the details of the required kernel support, so that I can
> > understand what would be needed to NVDIMM postcopy?
> 
> I saw test_ramblock_postcopiable() requires the ram block not be
> mmap'ed with MAP_SHARED. The only pmem device (i.e., device DAX e.g.,
> /dev/dax0.0) that can be safely used as the backend of vNVDIMM must be
> shared mapped which is required by kernel, so postcopy does not work
> with pmem right now. Even if the private mmap was supported for device
> dax, it would still make little sense to private mmap it in QEMU,
> because vNVDIMM intends to be non-volatile.

I've got patches which do shareable for vhost-user at the moment;
(I've posted a few versions and I'm working on an updated set).
However, it's probably more than just the shareable that needs thinking
about for a device like that.

Dave

> Haozhong
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Pankaj Gupta Feb. 7, 2018, 2:10 p.m. UTC | #9
> 
> On 02/07/18 19:52 +0800, Haozhong Zhang wrote:
> > On 02/07/18 11:38 +0000, Dr. David Alan Gilbert wrote:
> > > * Haozhong Zhang (haozhong.zhang@intel.com) wrote:
> > > > When loading a zero page, check whether it will be loaded to
> > > > persistent memory If yes, load it by libpmem function
> > > > pmem_memset_nodrain().  Combined with a call to pmem_drain() at the
> > > > end of RAM loading, we can guarantee all those zero pages are
> > > > persistently loaded.
> > > 
> > > I'm surprised pmem is this invasive to be honest;   I hadn't expected
> > > the need for special memcpy's etc everywhere.  We're bound to miss some.
> > > I assume there's separate kernel work needed to make postcopy work;
> > > certainly the patches here don't seem to touch the postcopy path.
> > 
> > This link at
> > https://wiki.qemu.org/Features/PostCopyLiveMigration#Conflicts shows
> > that postcopy with memory-backend-file requires kernel support. Can
> > you point me the details of the required kernel support, so that I can
> > understand what would be needed to NVDIMM postcopy?
> 
> I saw test_ramblock_postcopiable() requires the ram block not be
> mmap'ed with MAP_SHARED. The only pmem device (i.e., device DAX e.g.,
> /dev/dax0.0) that can be safely used as the backend of vNVDIMM must be
> shared mapped which is required by kernel, so postcopy does not work
> with pmem right now. Even if the private mmap was supported for device
> dax, it would still make little sense to private mmap it in QEMU,
> because vNVDIMM intends to be non-volatile.

o.k that's the reason post-copy pages does not work with THP.

If we want to support device DAX with postcopy we need to handle
this use-case? 

Adding 'Andrea' to the thread for more clear answer.


Thanks,
Pankaj
diff mbox series

Patch

diff --git a/include/qemu/pmem.h b/include/qemu/pmem.h
index 9017596ff0..861d8ecc21 100644
--- a/include/qemu/pmem.h
+++ b/include/qemu/pmem.h
@@ -26,6 +26,15 @@  pmem_memcpy_persist(void *pmemdest, const void *src, size_t len)
     return memcpy(pmemdest, src, len);
 }
 
+static inline void *pmem_memset_nodrain(void *pmemdest, int c, size_t len)
+{
+    return memset(pmemdest, c, len);
+}
+
+static inline void pmem_drain(void)
+{
+}
+
 #endif /* CONFIG_LIBPMEM */
 
 #endif /* !QEMU_PMEM_H */
diff --git a/migration/ram.c b/migration/ram.c
index cb1950f3eb..5a0e503818 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -49,6 +49,7 @@ 
 #include "qemu/rcu_queue.h"
 #include "migration/colo.h"
 #include "migration/block.h"
+#include "qemu/pmem.h"
 
 /***********************************************************/
 /* ram save/restore */
@@ -2467,6 +2468,20 @@  static inline void *host_from_ram_block_offset(RAMBlock *block,
     return block->host + offset;
 }
 
+static void ram_handle_compressed_common(void *host, uint8_t ch, uint64_t size,
+                                         bool is_pmem)
+{
+    if (!ch && is_zero_range(host, size)) {
+        return;
+    }
+
+    if (!is_pmem) {
+        memset(host, ch, size);
+    } else {
+        pmem_memset_nodrain(host, ch, size);
+    }
+}
+
 /**
  * ram_handle_compressed: handle the zero page case
  *
@@ -2479,9 +2494,7 @@  static inline void *host_from_ram_block_offset(RAMBlock *block,
  */
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 {
-    if (ch != 0 || !is_zero_range(host, size)) {
-        memset(host, ch, size);
-    }
+    return ram_handle_compressed_common(host, ch, size, false);
 }
 
 static void *do_data_decompress(void *opaque)
@@ -2823,6 +2836,7 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
     bool postcopy_running = postcopy_is_running();
     /* ADVISE is earlier, it shows the source has the postcopy capability on */
     bool postcopy_advised = postcopy_is_advised();
+    bool need_pmem_drain = false;
 
     seq_iter++;
 
@@ -2848,6 +2862,8 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
         ram_addr_t addr, total_ram_bytes;
         void *host = NULL;
         uint8_t ch;
+        RAMBlock *block = NULL;
+        bool is_pmem = false;
 
         addr = qemu_get_be64(f);
         flags = addr & ~TARGET_PAGE_MASK;
@@ -2864,7 +2880,7 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
         if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
-            RAMBlock *block = ram_block_from_stream(f, flags);
+            block = ram_block_from_stream(f, flags);
 
             host = host_from_ram_block_offset(block, addr);
             if (!host) {
@@ -2874,6 +2890,9 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
             }
             ramblock_recv_bitmap_set(block, host);
             trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
+
+            is_pmem = ramblock_is_pmem(block);
+            need_pmem_drain = need_pmem_drain || is_pmem;
         }
 
         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
@@ -2927,7 +2946,7 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
 
         case RAM_SAVE_FLAG_ZERO:
             ch = qemu_get_byte(f);
-            ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
+            ram_handle_compressed_common(host, ch, TARGET_PAGE_SIZE, is_pmem);
             break;
 
         case RAM_SAVE_FLAG_PAGE:
@@ -2970,6 +2989,11 @@  static int ram_load(QEMUFile *f, void *opaque, int version_id)
     }
 
     wait_for_decompress_done();
+
+    if (need_pmem_drain) {
+        pmem_drain();
+    }
+
     rcu_read_unlock();
     trace_ram_load_complete(ret, seq_iter);
     return ret;