Patchwork [v3] Introduce qemu_madvise()

login
register
mail settings
Submitter Andreas Färber
Date Sept. 12, 2010, 12:55 p.m.
Message ID <1284296134-2503-1-git-send-email-andreas.faerber@web.de>
Download mbox | patch
Permalink /patch/64548/
State New
Headers show

Comments

Andreas Färber - Sept. 12, 2010, 12:55 p.m.
From: Andreas Färber <afaerber@opensolaris.org>

vl.c has a Sun-specific hack to supply a prototype for madvise(),
but the call site has apparently moved to arch_init.c.

Haiku doesn't implement madvise() in favor of posix_madvise().
OpenBSD and Solaris 10 don't implement posix_madvise() but madvise().

Check for madvise() and posix_madvise() in configure and supply qemu_madvise()
as wrapper. Prefer madvise() over posix_madvise() due to flag availability.
Convert all callers to use qemu_madvise() and QEMU_MADV_*. No functional change
except for arch_init.c:ram_load() now potentially falling back to posix_madvise()
or no-op in lack of both.

v2 -> v3:
* Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf.
* Add configure check for madvise(), too.
  Add defines to Makefile, not QEMU_CFLAGS.
  Convert all callers, untested. Suggested by Blue Swirl.
* Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf.

v1 -> v2:
* Don't rely on posix_madvise() availability, add qemu_madvise().
  Suggested by Blue Swirl.

Signed-off-by: Andreas Färber <afaerber@opensolaris.org>
Cc: Blue Swirl <blauwirbel@gmail.com>
Cc: Alexander Graf <agraf@suse.de>
---
 arch_init.c         |    2 +-
 configure           |   33 +++++++++++++++++++++++++++++++++
 exec.c              |    8 ++++----
 hw/virtio-balloon.c |    4 ++--
 kvm-all.c           |    6 +++---
 osdep.c             |   15 +++++++++++++++
 osdep.h             |   25 +++++++++++++++++++++++++
 vl.c                |    3 ---
 8 files changed, 83 insertions(+), 13 deletions(-)
Blue Swirl - Sept. 12, 2010, 5:29 p.m.
On Sun, Sep 12, 2010 at 12:55 PM, Andreas Färber <andreas.faerber@web.de> wrote:
> From: Andreas Färber <afaerber@opensolaris.org>
>
> vl.c has a Sun-specific hack to supply a prototype for madvise(),
> but the call site has apparently moved to arch_init.c.
>
> Haiku doesn't implement madvise() in favor of posix_madvise().
> OpenBSD and Solaris 10 don't implement posix_madvise() but madvise().
>
> Check for madvise() and posix_madvise() in configure and supply qemu_madvise()
> as wrapper. Prefer madvise() over posix_madvise() due to flag availability.
> Convert all callers to use qemu_madvise() and QEMU_MADV_*. No functional change
> except for arch_init.c:ram_load() now potentially falling back to posix_madvise()
> or no-op in lack of both.
>
> v2 -> v3:
> * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf.
> * Add configure check for madvise(), too.
>  Add defines to Makefile, not QEMU_CFLAGS.
>  Convert all callers, untested. Suggested by Blue Swirl.
> * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf.
>
> v1 -> v2:
> * Don't rely on posix_madvise() availability, add qemu_madvise().
>  Suggested by Blue Swirl.
>
> Signed-off-by: Andreas Färber <afaerber@opensolaris.org>
> Cc: Blue Swirl <blauwirbel@gmail.com>
> Cc: Alexander Graf <agraf@suse.de>
> ---
>  arch_init.c         |    2 +-
>  configure           |   33 +++++++++++++++++++++++++++++++++
>  exec.c              |    8 ++++----
>  hw/virtio-balloon.c |    4 ++--
>  kvm-all.c           |    6 +++---
>  osdep.c             |   15 +++++++++++++++
>  osdep.h             |   25 +++++++++++++++++++++++++
>  vl.c                |    3 ---
>  8 files changed, 83 insertions(+), 13 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index e468c0c..a910033 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>  #ifndef _WIN32
>             if (ch == 0 &&
>                 (!kvm_enabled() || kvm_has_sync_mmu())) {
> -                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
> +                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
>             }
>  #endif
>         } else if (flags & RAM_SAVE_FLAG_PAGE) {
> diff --git a/configure b/configure
> index 4061cb7..86558eb 100755
> --- a/configure
> +++ b/configure
> @@ -2069,6 +2069,31 @@ if compile_prog "" "" ; then
>  fi
>
>  ##########################################
> +# check if we have madvise
> +
> +madvise=no
> +cat > $TMPC << EOF
> +#include <sys/types.h>
> +#include <sys/mman.h>
> +int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }
> +EOF
> +if compile_prog "" "" ; then
> +    madvise=yes
> +fi
> +
> +##########################################
> +# check if we have posix_madvise
> +
> +posix_madvise=no
> +cat > $TMPC << EOF
> +#include <sys/mman.h>
> +int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }
> +EOF
> +if compile_prog "" "" ; then
> +    posix_madvise=yes
> +fi
> +
> +##########################################
>  # check if trace backend exists
>
>  sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null
> @@ -2226,6 +2251,8 @@ echo "KVM support       $kvm"
>  echo "fdt support       $fdt"
>  echo "preadv support    $preadv"
>  echo "fdatasync         $fdatasync"
> +echo "madvise           $madvise"
> +echo "posix_madvise     $posix_madvise"
>  echo "uuid support      $uuid"
>  echo "vhost-net support $vhost_net"
>  echo "Trace backend     $trace_backend"
> @@ -2466,6 +2493,12 @@ fi
>  if test "$fdatasync" = "yes" ; then
>   echo "CONFIG_FDATASYNC=y" >> $config_host_mak
>  fi
> +if test "$madvise" = "yes" ; then
> +  echo "CONFIG_MADVISE=y" >> $config_host_mak
> +fi
> +if test "$posix_madvise" = "yes" ; then
> +  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
> +fi
>
>  # XXX: suppress that
>  if [ "$bsd" = "yes" ] ; then
> diff --git a/exec.c b/exec.c
> index 380dab5..b1fe3e9 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2841,8 +2841,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>             new_block->host = file_ram_alloc(new_block, size, mem_path);
>             if (!new_block->host) {
>                 new_block->host = qemu_vmalloc(size);
> -#ifdef MADV_MERGEABLE
> -                madvise(new_block->host, size, MADV_MERGEABLE);
> +#ifdef QEMU_MADV_MERGEABLE

I'd like to avoid these #ifdefs. How about always #defining
QEMU_MADV_MERGEABLE? QEMU_MADV_* values could be synthetic and not
rely on MADV_*.

> +                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>  #endif
>             }
>  #else
> @@ -2858,8 +2858,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>  #else
>             new_block->host = qemu_vmalloc(size);
>  #endif
> -#ifdef MADV_MERGEABLE
> -            madvise(new_block->host, size, MADV_MERGEABLE);
> +#ifdef QEMU_MADV_MERGEABLE
> +            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
>  #endif
>         }
>     }
> diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
> index 9fe3886..1e74674 100644
> --- a/hw/virtio-balloon.c
> +++ b/hw/virtio-balloon.c
> @@ -51,8 +51,8 @@ static void balloon_page(void *addr, int deflate)
>  {
>  #if defined(__linux__)
>     if (!kvm_enabled() || kvm_has_sync_mmu())
> -        madvise(addr, TARGET_PAGE_SIZE,
> -                deflate ? MADV_WILLNEED : MADV_DONTNEED);
> +        qemu_madvise(addr, TARGET_PAGE_SIZE,
> +                deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
>  #endif
>  }
>
> diff --git a/kvm-all.c b/kvm-all.c
> index 58b0404..9393419 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -1031,11 +1031,11 @@ int kvm_has_xcrs(void)
>  void kvm_setup_guest_memory(void *start, size_t size)
>  {
>     if (!kvm_has_sync_mmu()) {
> -#ifdef MADV_DONTFORK
> -        int ret = madvise(start, size, MADV_DONTFORK);
> +#ifdef QEMU_MADV_DONTFORK
> +        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
>
>         if (ret) {
> -            perror("madvice");
> +            perror("qemu_madvise");
>             exit(1);
>         }
>  #else

Here the approach could be that if the return value is -ENOTSUP, we
don't print the error message. Then the #ifdefs could be dropped.
Alexander Graf - Sept. 13, 2010, 12:02 p.m.
Blue Swirl wrote:
> On Sun, Sep 12, 2010 at 12:55 PM, Andreas Färber <andreas.faerber@web.de> wrote:
>   
>> From: Andreas Färber <afaerber@opensolaris.org>
>>
>> vl.c has a Sun-specific hack to supply a prototype for madvise(),
>> but the call site has apparently moved to arch_init.c.
>>
>> Haiku doesn't implement madvise() in favor of posix_madvise().
>> OpenBSD and Solaris 10 don't implement posix_madvise() but madvise().
>>
>> Check for madvise() and posix_madvise() in configure and supply qemu_madvise()
>> as wrapper. Prefer madvise() over posix_madvise() due to flag availability.
>> Convert all callers to use qemu_madvise() and QEMU_MADV_*. No functional change
>> except for arch_init.c:ram_load() now potentially falling back to posix_madvise()
>> or no-op in lack of both.
>>
>> v2 -> v3:
>> * Reuse the *_MADV_* defines for QEMU_MADV_*. Suggested by Alexander Graf.
>> * Add configure check for madvise(), too.
>>  Add defines to Makefile, not QEMU_CFLAGS.
>>  Convert all callers, untested. Suggested by Blue Swirl.
>> * Keep Solaris' madvise() prototype around. Pointed out by Alexander Graf.
>>
>> v1 -> v2:
>> * Don't rely on posix_madvise() availability, add qemu_madvise().
>>  Suggested by Blue Swirl.
>>
>> Signed-off-by: Andreas Färber <afaerber@opensolaris.org>
>> Cc: Blue Swirl <blauwirbel@gmail.com>
>> Cc: Alexander Graf <agraf@suse.de>
>> ---
>>  arch_init.c         |    2 +-
>>  configure           |   33 +++++++++++++++++++++++++++++++++
>>  exec.c              |    8 ++++----
>>  hw/virtio-balloon.c |    4 ++--
>>  kvm-all.c           |    6 +++---
>>  osdep.c             |   15 +++++++++++++++
>>  osdep.h             |   25 +++++++++++++++++++++++++
>>  vl.c                |    3 ---
>>  8 files changed, 83 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch_init.c b/arch_init.c
>> index e468c0c..a910033 100644
>> --- a/arch_init.c
>> +++ b/arch_init.c
>> @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>>  #ifndef _WIN32
>>             if (ch == 0 &&
>>                 (!kvm_enabled() || kvm_has_sync_mmu())) {
>> -                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
>> +                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
>>             }
>>  #endif
>>         } else if (flags & RAM_SAVE_FLAG_PAGE) {
>> diff --git a/configure b/configure
>> index 4061cb7..86558eb 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2069,6 +2069,31 @@ if compile_prog "" "" ; then
>>  fi
>>
>>  ##########################################
>> +# check if we have madvise
>> +
>> +madvise=no
>> +cat > $TMPC << EOF
>> +#include <sys/types.h>
>> +#include <sys/mman.h>
>> +int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }
>> +EOF
>> +if compile_prog "" "" ; then
>> +    madvise=yes
>> +fi
>> +
>> +##########################################
>> +# check if we have posix_madvise
>> +
>> +posix_madvise=no
>> +cat > $TMPC << EOF
>> +#include <sys/mman.h>
>> +int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }
>> +EOF
>> +if compile_prog "" "" ; then
>> +    posix_madvise=yes
>> +fi
>> +
>> +##########################################
>>  # check if trace backend exists
>>
>>  sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null
>> @@ -2226,6 +2251,8 @@ echo "KVM support       $kvm"
>>  echo "fdt support       $fdt"
>>  echo "preadv support    $preadv"
>>  echo "fdatasync         $fdatasync"
>> +echo "madvise           $madvise"
>> +echo "posix_madvise     $posix_madvise"
>>  echo "uuid support      $uuid"
>>  echo "vhost-net support $vhost_net"
>>  echo "Trace backend     $trace_backend"
>> @@ -2466,6 +2493,12 @@ fi
>>  if test "$fdatasync" = "yes" ; then
>>   echo "CONFIG_FDATASYNC=y" >> $config_host_mak
>>  fi
>> +if test "$madvise" = "yes" ; then
>> +  echo "CONFIG_MADVISE=y" >> $config_host_mak
>> +fi
>> +if test "$posix_madvise" = "yes" ; then
>> +  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
>> +fi
>>
>>  # XXX: suppress that
>>  if [ "$bsd" = "yes" ] ; then
>> diff --git a/exec.c b/exec.c
>> index 380dab5..b1fe3e9 100644
>> --- a/exec.c
>> +++ b/exec.c
>> @@ -2841,8 +2841,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>>             new_block->host = file_ram_alloc(new_block, size, mem_path);
>>             if (!new_block->host) {
>>                 new_block->host = qemu_vmalloc(size);
>> -#ifdef MADV_MERGEABLE
>> -                madvise(new_block->host, size, MADV_MERGEABLE);
>> +#ifdef QEMU_MADV_MERGEABLE
>>     
>
> I'd like to avoid these #ifdefs. How about always #defining
> QEMU_MADV_MERGEABLE? QEMU_MADV_* values could be synthetic and not
> rely on MADV_*.
>   

Hrm. Something like

#ifdef MADV_MEREABLE
#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
#else
#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
#endif

would work, right? Only need to find an unused bit in madv ...


Alex

Patch

diff --git a/arch_init.c b/arch_init.c
index e468c0c..a910033 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -396,7 +396,7 @@  int ram_load(QEMUFile *f, void *opaque, int version_id)
 #ifndef _WIN32
             if (ch == 0 &&
                 (!kvm_enabled() || kvm_has_sync_mmu())) {
-                madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED);
+                qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
             }
 #endif
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
diff --git a/configure b/configure
index 4061cb7..86558eb 100755
--- a/configure
+++ b/configure
@@ -2069,6 +2069,31 @@  if compile_prog "" "" ; then
 fi
 
 ##########################################
+# check if we have madvise
+
+madvise=no
+cat > $TMPC << EOF
+#include <sys/types.h>
+#include <sys/mman.h>
+int main(void) { return madvise(NULL, 0, MADV_DONTNEED); }
+EOF
+if compile_prog "" "" ; then
+    madvise=yes
+fi
+
+##########################################
+# check if we have posix_madvise
+
+posix_madvise=no
+cat > $TMPC << EOF
+#include <sys/mman.h>
+int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }
+EOF
+if compile_prog "" "" ; then
+    posix_madvise=yes
+fi
+
+##########################################
 # check if trace backend exists
 
 sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null
@@ -2226,6 +2251,8 @@  echo "KVM support       $kvm"
 echo "fdt support       $fdt"
 echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
+echo "madvise           $madvise"
+echo "posix_madvise     $posix_madvise"
 echo "uuid support      $uuid"
 echo "vhost-net support $vhost_net"
 echo "Trace backend     $trace_backend"
@@ -2466,6 +2493,12 @@  fi
 if test "$fdatasync" = "yes" ; then
   echo "CONFIG_FDATASYNC=y" >> $config_host_mak
 fi
+if test "$madvise" = "yes" ; then
+  echo "CONFIG_MADVISE=y" >> $config_host_mak
+fi
+if test "$posix_madvise" = "yes" ; then
+  echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
+fi
 
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
diff --git a/exec.c b/exec.c
index 380dab5..b1fe3e9 100644
--- a/exec.c
+++ b/exec.c
@@ -2841,8 +2841,8 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
             new_block->host = file_ram_alloc(new_block, size, mem_path);
             if (!new_block->host) {
                 new_block->host = qemu_vmalloc(size);
-#ifdef MADV_MERGEABLE
-                madvise(new_block->host, size, MADV_MERGEABLE);
+#ifdef QEMU_MADV_MERGEABLE
+                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
 #endif
             }
 #else
@@ -2858,8 +2858,8 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
 #else
             new_block->host = qemu_vmalloc(size);
 #endif
-#ifdef MADV_MERGEABLE
-            madvise(new_block->host, size, MADV_MERGEABLE);
+#ifdef QEMU_MADV_MERGEABLE
+            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
 #endif
         }
     }
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 9fe3886..1e74674 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -51,8 +51,8 @@  static void balloon_page(void *addr, int deflate)
 {
 #if defined(__linux__)
     if (!kvm_enabled() || kvm_has_sync_mmu())
-        madvise(addr, TARGET_PAGE_SIZE,
-                deflate ? MADV_WILLNEED : MADV_DONTNEED);
+        qemu_madvise(addr, TARGET_PAGE_SIZE,
+                deflate ? QEMU_MADV_WILLNEED : QEMU_MADV_DONTNEED);
 #endif
 }
 
diff --git a/kvm-all.c b/kvm-all.c
index 58b0404..9393419 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1031,11 +1031,11 @@  int kvm_has_xcrs(void)
 void kvm_setup_guest_memory(void *start, size_t size)
 {
     if (!kvm_has_sync_mmu()) {
-#ifdef MADV_DONTFORK
-        int ret = madvise(start, size, MADV_DONTFORK);
+#ifdef QEMU_MADV_DONTFORK
+        int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
 
         if (ret) {
-            perror("madvice");
+            perror("qemu_madvise");
             exit(1);
         }
 #else
diff --git a/osdep.c b/osdep.c
index 30426ff..b5e006f 100644
--- a/osdep.c
+++ b/osdep.c
@@ -28,6 +28,7 @@ 
 #include <errno.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <sys/mman.h>
 
 /* Needed early for CONFIG_BSD etc. */
 #include "config-host.h"
@@ -35,6 +36,9 @@ 
 #ifdef CONFIG_SOLARIS
 #include <sys/types.h>
 #include <sys/statvfs.h>
+/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
+   discussion about Solaris header problems */
+extern int madvise(caddr_t, size_t, int);
 #endif
 
 #ifdef CONFIG_EVENTFD
@@ -139,6 +143,17 @@  void qemu_vfree(void *ptr)
 
 #endif
 
+int qemu_madvise(void *addr, size_t len, int advice)
+{
+#if defined(CONFIG_MADVISE)
+    return madvise(addr, len, advice);
+#elif defined(CONFIG_POSIX_MADVISE)
+    return posix_madvise(addr, len, advice);
+#else
+    return -ENOTSUP;
+#endif
+}
+
 int qemu_create_pidfile(const char *filename)
 {
     char buffer[128];
diff --git a/osdep.h b/osdep.h
index 1cdc7e2..552e453 100644
--- a/osdep.h
+++ b/osdep.h
@@ -90,6 +90,31 @@  void *qemu_memalign(size_t alignment, size_t size);
 void *qemu_vmalloc(size_t size);
 void qemu_vfree(void *ptr);
 
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED MADV_WILLNEED
+#define QEMU_MADV_DONTNEED MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK MADV_DONTFORK
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED (1 << 0)
+#define QEMU_MADV_DONTNEED (2 << 0)
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
 int qemu_create_pidfile(const char *filename);
 
 #ifdef _WIN32
diff --git a/vl.c b/vl.c
index 3f45aa9..d352d18 100644
--- a/vl.c
+++ b/vl.c
@@ -80,9 +80,6 @@ 
 #include <net/if.h>
 #include <syslog.h>
 #include <stropts.h>
-/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
-   discussion about Solaris header problems */
-extern int madvise(caddr_t, size_t, int);
 #endif
 #endif
 #endif