diff mbox

migration: initialize RAM to zero

Message ID 1365418028-2546-1-git-send-email-pbonzini@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini April 8, 2013, 10:47 a.m. UTC
Using qemu_memalign only leaves the RAM zero by chance, because libc
will usually use mmap to satisfy our huge requests.  But memory will
not be zero when using MALLOC_PERTURB_ with a nonzero value.  In the
case of incoming migration, this breaks a recently-introduced
invariant (commit f1c7279, migration: do not sent zero pages in
bulk stage, 2013-03-26).

To fix this, use mmap ourselves to get a well-aligned, always zero
block for the RAM.  Mmap-ed memory is easy to "trim" at the sides.

This also removes the need to do something special on valgrind
(see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 util/oslib-posix.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

Comments

Peter Lieven April 8, 2013, 2:55 p.m. UTC | #1
Am 08.04.2013 um 12:47 schrieb Paolo Bonzini <pbonzini@redhat.com>:

> Using qemu_memalign only leaves the RAM zero by chance, because libc
> will usually use mmap to satisfy our huge requests.  But memory will
> not be zero when using MALLOC_PERTURB_ with a nonzero value.  In the
> case of incoming migration, this breaks a recently-introduced
> invariant (commit f1c7279, migration: do not sent zero pages in
> bulk stage, 2013-03-26).
> 
> To fix this, use mmap ourselves to get a well-aligned, always zero
> block for the RAM.  Mmap-ed memory is easy to "trim" at the sides.
> 
> This also removes the need to do something special on valgrind
> (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> util/oslib-posix.c | 30 +++++++++++++-----------------
> 1 file changed, 13 insertions(+), 17 deletions(-)
> 
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 433dd68..91f5aab 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -52,12 +52,8 @@ extern int daemon(int, int);
> #include "sysemu/sysemu.h"
> #include "trace.h"
> #include "qemu/sockets.h"
> +#include <sys/mman.h>
> 
> -#if defined(CONFIG_VALGRIND)
> -static int running_on_valgrind = -1;
> -#else
> -#  define running_on_valgrind 0
> -#endif
> #ifdef CONFIG_LINUX
> #include <sys/syscall.h>
> #endif
> @@ -108,22 +104,22 @@ void *qemu_memalign(size_t alignment, size_t size)
> /* alloc shared memory pages */
> void *qemu_vmalloc(size_t size)
> {
> -    void *ptr;
>     size_t align = QEMU_VMALLOC_ALIGN;
> +    size_t total = size + align - getpagesize();
> +    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
> +                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> 
> -#if defined(CONFIG_VALGRIND)
> -    if (running_on_valgrind < 0) {
> -        /* First call, test whether we are running on Valgrind.
> -           This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */
> -        const char *ld = getenv("LD_PRELOAD");
> -        running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload"));
> -    }
> -#endif
> +    ptr += offset;
> +    total -= offset;
> 
> -    if (size < align || running_on_valgrind) {
> -        align = getpagesize();
> +    if (offset > 0) {
> +        munmap(ptr - offset, offset);
> +    }
> +    if (total > size) {
> +        munmap(ptr + size, total - size);
>     }
> -    ptr = qemu_memalign(align, size);
> +
>     trace_qemu_vmalloc(size, ptr);
>     return ptr;
> }
> -- 
> 1.8.1.4
> 

Reviewed-by: Peter Lieven <pl@kamp.de>
Markus Armbruster April 9, 2013, 8:52 a.m. UTC | #2
Paolo Bonzini <pbonzini@redhat.com> writes:

> Using qemu_memalign only leaves the RAM zero by chance, because libc
> will usually use mmap to satisfy our huge requests.  But memory will
> not be zero when using MALLOC_PERTURB_ with a nonzero value.  In the
> case of incoming migration, this breaks a recently-introduced
> invariant (commit f1c7279, migration: do not sent zero pages in
> bulk stage, 2013-03-26).
>
> To fix this, use mmap ourselves to get a well-aligned, always zero
> block for the RAM.  Mmap-ed memory is easy to "trim" at the sides.
>
> This also removes the need to do something special on valgrind
> (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).

Suggest to state explicitly that you effectively revert it.

You left #define CONFIG_VALGRIND in, even though it's no longer used.
Intentional?

> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  util/oslib-posix.c | 30 +++++++++++++-----------------
>  1 file changed, 13 insertions(+), 17 deletions(-)
>
> diff --git a/util/oslib-posix.c b/util/oslib-posix.c
> index 433dd68..91f5aab 100644
> --- a/util/oslib-posix.c
> +++ b/util/oslib-posix.c
> @@ -52,12 +52,8 @@ extern int daemon(int, int);
>  #include "sysemu/sysemu.h"
>  #include "trace.h"
>  #include "qemu/sockets.h"
> +#include <sys/mman.h>
>  
> -#if defined(CONFIG_VALGRIND)
> -static int running_on_valgrind = -1;
> -#else
> -#  define running_on_valgrind 0
> -#endif
>  #ifdef CONFIG_LINUX
>  #include <sys/syscall.h>
>  #endif
> @@ -108,22 +104,22 @@ void *qemu_memalign(size_t alignment, size_t size)
>  /* alloc shared memory pages */
>  void *qemu_vmalloc(size_t size)
>  {
> -    void *ptr;
>      size_t align = QEMU_VMALLOC_ALIGN;
> +    size_t total = size + align - getpagesize();
> +    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
> +                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
>  
> -#if defined(CONFIG_VALGRIND)
> -    if (running_on_valgrind < 0) {
> -        /* First call, test whether we are running on Valgrind.
> -           This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */
> -        const char *ld = getenv("LD_PRELOAD");
> -        running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload"));
> -    }
> -#endif

Please check for mmap() failure.

The old code uses qemu_memalign(), which treats allocation failure as a
programming error: calls abort().  Not sure that's actually appropriate
here.

> +    ptr += offset;
> +    total -= offset;
>  
> -    if (size < align || running_on_valgrind) {
> -        align = getpagesize();
> +    if (offset > 0) {
> +        munmap(ptr - offset, offset);
> +    }
> +    if (total > size) {
> +        munmap(ptr + size, total - size);
>      }
> -    ptr = qemu_memalign(align, size);
> +
>      trace_qemu_vmalloc(size, ptr);
>      return ptr;
>  }
Paolo Bonzini April 9, 2013, 8:56 a.m. UTC | #3
Il 09/04/2013 10:52, Markus Armbruster ha scritto:
>> > This also removes the need to do something special on valgrind
>> > (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).
> Suggest to state explicitly that you effectively revert it.
> 
> You left #define CONFIG_VALGRIND in, even though it's no longer used.
> Intentional?

Oh, there's both CONFIG_VALGRIND and CONFIG_VALGRIND_H.  Nice.  I'll
send v2.

Paolo
Juan Quintela April 9, 2013, 11:23 a.m. UTC | #4
Paolo Bonzini <pbonzini@redhat.com> wrote:
> Using qemu_memalign only leaves the RAM zero by chance, because libc
> will usually use mmap to satisfy our huge requests.  But memory will
> not be zero when using MALLOC_PERTURB_ with a nonzero value.  In the
> case of incoming migration, this breaks a recently-introduced
> invariant (commit f1c7279, migration: do not sent zero pages in
> bulk stage, 2013-03-26).
>
> To fix this, use mmap ourselves to get a well-aligned, always zero
> block for the RAM.  Mmap-ed memory is easy to "trim" at the sides.
>
> This also removes the need to do something special on valgrind
> (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Except for the mmap return check and the missing valgrind check,  the
patch looks good.

Thanks,  Juan.
Markus Armbruster April 9, 2013, 11:27 a.m. UTC | #5
Paolo Bonzini <pbonzini@redhat.com> writes:

> Il 09/04/2013 10:52, Markus Armbruster ha scritto:
>>> > This also removes the need to do something special on valgrind
>>> > (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31).
>> Suggest to state explicitly that you effectively revert it.
>> 
>> You left #define CONFIG_VALGRIND in, even though it's no longer used.
>> Intentional?
>
> Oh, there's both CONFIG_VALGRIND and CONFIG_VALGRIND_H.  Nice.  I'll
> send v2.

That confused me briefly, too :)

Make sure to address the mmap() error checking as well.
diff mbox

Patch

diff --git a/util/oslib-posix.c b/util/oslib-posix.c
index 433dd68..91f5aab 100644
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -52,12 +52,8 @@  extern int daemon(int, int);
 #include "sysemu/sysemu.h"
 #include "trace.h"
 #include "qemu/sockets.h"
+#include <sys/mman.h>
 
-#if defined(CONFIG_VALGRIND)
-static int running_on_valgrind = -1;
-#else
-#  define running_on_valgrind 0
-#endif
 #ifdef CONFIG_LINUX
 #include <sys/syscall.h>
 #endif
@@ -108,22 +104,22 @@  void *qemu_memalign(size_t alignment, size_t size)
 /* alloc shared memory pages */
 void *qemu_vmalloc(size_t size)
 {
-    void *ptr;
     size_t align = QEMU_VMALLOC_ALIGN;
+    size_t total = size + align - getpagesize();
+    void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
+                     MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
 
-#if defined(CONFIG_VALGRIND)
-    if (running_on_valgrind < 0) {
-        /* First call, test whether we are running on Valgrind.
-           This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */
-        const char *ld = getenv("LD_PRELOAD");
-        running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload"));
-    }
-#endif
+    ptr += offset;
+    total -= offset;
 
-    if (size < align || running_on_valgrind) {
-        align = getpagesize();
+    if (offset > 0) {
+        munmap(ptr - offset, offset);
+    }
+    if (total > size) {
+        munmap(ptr + size, total - size);
     }
-    ptr = qemu_memalign(align, size);
+
     trace_qemu_vmalloc(size, ptr);
     return ptr;
 }