Message ID | 1284224755-11299-1-git-send-email-andreas.faerber@web.de |
---|---|
State | New |
Headers | show |
On 11.09.2010, at 19:05, Andreas Färber wrote: > From: Andreas Färber <afaerber@opensolaris.org> > > vl.c has a Sun-specific hack to supply a prototype for madvise(), > but the call site has apparently moved to arch_init.c. > The underlying issue is that madvise() is not a POSIX function, > therefore Solaris' _POSIX_C_SOURCE suppresses the prototype. > > Haiku doesn't implement madvise() at all. > OpenBSD however doesn't implement posix_madvise(). > > Check for posix_madvise() in configure and supply qemu_madvise() as wrapper. > Use qemu_madvise() in arch_init.c's ram_load(). > > http://www.opengroup.org/onlinepubs/9699919799/functions/posix_madvise.html > > Remaining madvise() users: > exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX equivalent) > kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), > otherwise runtime error if !kvm_has_sync_mmu() > hw/virtio-balloon.c: limited to __linux__ > > v1 -> v2: > * Don't rely on posix_madvise() availability, add qemu_madvise(). > Suggested by Blue Swirl. > > Signed-off-by: Andreas Färber <afaerber@opensolaris.org> > Cc: Blue Swirl <blauwirbel@gmail.com> > --- > arch_init.c | 2 +- > configure | 11 +++++++++++ > osdep.c | 26 ++++++++++++++++++++++++++ > osdep.h | 4 ++++ > vl.c | 3 --- > 5 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index e468c0c..a910033 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > #ifndef _WIN32 > if (ch == 0 && > (!kvm_enabled() || kvm_has_sync_mmu())) { > - madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); Is this the only occurence in the whole code base? This patch should convert all users, right? > } > #endif > } else if (flags & RAM_SAVE_FLAG_PAGE) { > diff --git a/configure b/configure > index 4061cb7..5e6f3e1 100755 > --- a/configure > +++ b/configure > @@ -2069,6 +2069,17 @@ if compile_prog "" "" ; then > fi > > ########################################## > +# check if we have posix_madvise > + > +cat > $TMPC << EOF > +#include <sys/mman.h> > +int main(void) { posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); return 0; } > +EOF > +if compile_prog "" "" ; then > + QEMU_CFLAGS="-DCONFIG_POSIX_MADVISE ${QEMU_CFLAGS}" > +fi > + > +########################################## > # check if trace backend exists > > sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null > diff --git a/osdep.c b/osdep.c > index 30426ff..8c09597 100644 > --- a/osdep.c > +++ b/osdep.c > @@ -28,6 +28,7 @@ > #include <errno.h> > #include <unistd.h> > #include <fcntl.h> > +#include <sys/mman.h> > > /* Needed early for CONFIG_BSD etc. */ > #include "config-host.h" > @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) > > #endif > > +int qemu_madvise(void *addr, size_t len, int advice) > +{ > +#ifdef CONFIG_POSIX_MADVISE > + switch (advice) { > + case QEMU_MADV_DONTNEED: > + advice = POSIX_MADV_DONTNEED; > + break; > + default: > + fprintf(stderr, "Advice %d unhandled.\n", advice); Advise :) It should probably also be 'madvise' here. Plus, I'd recommend %x as that makes the bits slightly more obvious. > + abort(); > + } > + return posix_madvise(addr, len, advice); > +#else > + switch (advice) { > + case QEMU_MADV_DONTNEED: > + advice = MADV_DONTNEED; > + break; > + default: > + fprintf(stderr, "Advice %d unhandled.\n", advice); > + abort(); > + } > + return madvise(addr, len, advice); So what do you do on haiku where there's no madvise? > +#endif > +} > + > int qemu_create_pidfile(const char *filename) > { > char buffer[128]; > diff --git a/osdep.h b/osdep.h > index 1cdc7e2..9f0da46 100644 > --- a/osdep.h > +++ b/osdep.h > @@ -90,6 +90,10 @@ void *qemu_memalign(size_t alignment, size_t size); > void *qemu_vmalloc(size_t size); > void qemu_vfree(void *ptr); > > +#define QEMU_MADV_DONTNEED 1 (1 << 0) There are probably more to follow. I'm also not sure if it wouldn't make sense to just directly map qemu_madvise and real madvise bits. Something like #ifdef MADV_DONTNEED #define QEMU_MADV_DONTNEED (1 << 0) #else #define QEMU_MADV_DONTNEED 0 #endif Unless of course a flag is mandatory - but I don't think any of the madvise bits are. > + > +int qemu_madvise(void *addr, size_t len, int advice); > + > int qemu_create_pidfile(const char *filename); > > #ifdef _WIN32 > diff --git a/vl.c b/vl.c > index 3f45aa9..d352d18 100644 > --- a/vl.c > +++ b/vl.c > @@ -80,9 +80,6 @@ > #include <net/if.h> > #include <syslog.h> > #include <stropts.h> > -/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for > - discussion about Solaris header problems */ > -extern int madvise(caddr_t, size_t, int); Does Solaris have posix_madvise? Otherwise it would still require this header hack, no? Alex
Am 11.09.2010 um 23:37 schrieb Alexander Graf: > On 11.09.2010, at 19:05, Andreas Färber wrote: > >> Remaining madvise() users: >> exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX >> equivalent) >> kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), >> otherwise runtime error if !kvm_has_sync_mmu() >> hw/virtio-balloon.c: limited to __linux__ >> diff --git a/arch_init.c b/arch_init.c >> index e468c0c..a910033 100644 >> --- a/arch_init.c >> +++ b/arch_init.c >> @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int >> version_id) >> #ifndef _WIN32 >> if (ch == 0 && >> (!kvm_enabled() || kvm_has_sync_mmu())) { >> - madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); >> + qemu_madvise(host, TARGET_PAGE_SIZE, >> QEMU_MADV_DONTNEED); > > Is this the only occurence in the whole code base? This patch should > convert all users, right? It's the only relevant occurrence, cf. description above. We could in theory create QEMU_MADV_MERGEABLE and QEMU_MADV_DONTFORK and convert the callers, too, but what's the point when only madvise supports it? Using the current #ifdef mechanism we can detect/handle it at compile- time; inside qemu_madvise it would be deferred to runtime. Or do you have a suggestion how to handle that scenario other than returning an error? >> diff --git a/osdep.c b/osdep.c >> index 30426ff..8c09597 100644 >> --- a/osdep.c >> +++ b/osdep.c >> @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) >> >> #endif >> >> +int qemu_madvise(void *addr, size_t len, int advice) >> +{ >> +#ifdef CONFIG_POSIX_MADVISE >> + switch (advice) { >> + case QEMU_MADV_DONTNEED: >> + advice = POSIX_MADV_DONTNEED; >> + break; >> + default: >> + fprintf(stderr, "Advice %d unhandled.\n", advice); > > Advise :) I'd advise against that advice. ;) (*hint hint*) > It should probably also be 'madvise' here. Plus, I'd recommend %x as > that makes the bits slightly more obvious. You mean, "qemu_madvise: Advice %x ..."? Or "Advice %x for posix_madvise ..."? >> + abort(); >> + } >> + return posix_madvise(addr, len, advice); >> +#else >> + switch (advice) { >> + case QEMU_MADV_DONTNEED: >> + advice = MADV_DONTNEED; >> + break; >> + default: >> + fprintf(stderr, "Advice %d unhandled.\n", advice); >> + abort(); >> + } >> + return madvise(addr, len, advice); > > So what do you do on haiku where there's no madvise? It should detect posix_madvise and not run into this code path, just like OpenSolaris. I was hoping for Michael Lotz to resurface though and haven't retried myself yet. Platforms that have neither posix_madvise nor madvise are equally broken before and after. > >> +#endif >> +} >> + >> int qemu_create_pidfile(const char *filename) >> { >> char buffer[128]; >> diff --git a/osdep.h b/osdep.h >> index 1cdc7e2..9f0da46 100644 >> --- a/osdep.h >> +++ b/osdep.h >> @@ -90,6 +90,10 @@ void *qemu_memalign(size_t alignment, size_t >> size); >> void *qemu_vmalloc(size_t size); >> void qemu_vfree(void *ptr); >> >> +#define QEMU_MADV_DONTNEED 1 > > (1 << 0) > > There are probably more to follow. I'm also not sure if it wouldn't > make sense to just directly map qemu_madvise and real madvise bits. > Something like > > #ifdef MADV_DONTNEED > #define QEMU_MADV_DONTNEED (1 << 0) > #else > #define QEMU_MADV_DONTNEED 0 > #endif > > Unless of course a flag is mandatory - but I don't think any of the > madvise bits are. I don't follow. Something like the following? #ifdef CONFIG_POSIX_MADVISE #define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED #define qemu_madvise posix_madvise #else #ifdef MADV_DONTNEED #define QEMU_MADV_DONTNEED MADV_DONTNEED #endif #define qemu_madvise madvise #endif >> + >> +int qemu_madvise(void *addr, size_t len, int advice); >> + >> int qemu_create_pidfile(const char *filename); >> >> #ifdef _WIN32 >> diff --git a/vl.c b/vl.c >> index 3f45aa9..d352d18 100644 >> --- a/vl.c >> +++ b/vl.c >> @@ -80,9 +80,6 @@ >> #include <net/if.h> >> #include <syslog.h> >> #include <stropts.h> >> -/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for >> - discussion about Solaris header problems */ >> -extern int madvise(caddr_t, size_t, int); > > Does Solaris have posix_madvise? Otherwise it would still require > this header hack, no? I tested on OpenSolaris 2009.06. Haven't tested on Solaris 10 yet, don't have access to older ones. We could wrap it in #ifndef CONFIG_POSIX_MADVISE to be fool-safe. Thanks for the review, Andreas
On 12.09.2010, at 00:39, Andreas Färber wrote: > Am 11.09.2010 um 23:37 schrieb Alexander Graf: > >> On 11.09.2010, at 19:05, Andreas Färber wrote: >> >>> Remaining madvise() users: >>> exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX equivalent) >>> kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), >>> otherwise runtime error if !kvm_has_sync_mmu() >>> hw/virtio-balloon.c: limited to __linux__ > >>> diff --git a/arch_init.c b/arch_init.c >>> index e468c0c..a910033 100644 >>> --- a/arch_init.c >>> +++ b/arch_init.c >>> @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) >>> #ifndef _WIN32 >>> if (ch == 0 && >>> (!kvm_enabled() || kvm_has_sync_mmu())) { >>> - madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); >>> + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); >> >> Is this the only occurence in the whole code base? This patch should convert all users, right? > > It's the only relevant occurrence, cf. description above. > > We could in theory create QEMU_MADV_MERGEABLE and QEMU_MADV_DONTFORK and convert the callers, too, but what's the point when only madvise supports it? > Using the current #ifdef mechanism we can detect/handle it at compile-time; inside qemu_madvise it would be deferred to runtime. Or do you have a suggestion how to handle that scenario other than returning an error? Hrm. I'm not sure. Do we have to fail madvise at all? > >>> diff --git a/osdep.c b/osdep.c >>> index 30426ff..8c09597 100644 >>> --- a/osdep.c >>> +++ b/osdep.c >>> @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) >>> >>> #endif >>> >>> +int qemu_madvise(void *addr, size_t len, int advice) >>> +{ >>> +#ifdef CONFIG_POSIX_MADVISE >>> + switch (advice) { >>> + case QEMU_MADV_DONTNEED: >>> + advice = POSIX_MADV_DONTNEED; >>> + break; >>> + default: >>> + fprintf(stderr, "Advice %d unhandled.\n", advice); >> >> Advise :) > > I'd advise against that advice. ;) (*hint hint*) > >> It should probably also be 'madvise' here. Plus, I'd recommend %x as that makes the bits slightly more obvious. > > You mean, "qemu_madvise: Advice %x ..."? Or "Advice %x for posix_madvise ..."? I'd remove the "Advice" part: "qemu_madvise: Flag %x unknown". > >>> + abort(); >>> + } >>> + return posix_madvise(addr, len, advice); >>> +#else >>> + switch (advice) { >>> + case QEMU_MADV_DONTNEED: >>> + advice = MADV_DONTNEED; >>> + break; >>> + default: >>> + fprintf(stderr, "Advice %d unhandled.\n", advice); >>> + abort(); >>> + } >>> + return madvise(addr, len, advice); >> >> So what do you do on haiku where there's no madvise? > > It should detect posix_madvise and not run into this code path, just like OpenSolaris. > I was hoping for Michael Lotz to resurface though and haven't retried myself yet. Oh, so OpenSolaris and Haiku have posix_madvise? Nice. > > Platforms that have neither posix_madvise nor madvise are equally broken before and after. Sure :). > >> >>> +#endif >>> +} >>> + >>> int qemu_create_pidfile(const char *filename) >>> { >>> char buffer[128]; >>> diff --git a/osdep.h b/osdep.h >>> index 1cdc7e2..9f0da46 100644 >>> --- a/osdep.h >>> +++ b/osdep.h >>> @@ -90,6 +90,10 @@ void *qemu_memalign(size_t alignment, size_t size); >>> void *qemu_vmalloc(size_t size); >>> void qemu_vfree(void *ptr); >>> >>> +#define QEMU_MADV_DONTNEED 1 >> >> (1 << 0) >> >> There are probably more to follow. I'm also not sure if it wouldn't make sense to just directly map qemu_madvise and real madvise bits. Something like >> >> #ifdef MADV_DONTNEED >> #define QEMU_MADV_DONTNEED (1 << 0) >> #else >> #define QEMU_MADV_DONTNEED 0 >> #endif >> >> Unless of course a flag is mandatory - but I don't think any of the madvise bits are. > > I don't follow. Something like the following? > > #ifdef CONFIG_POSIX_MADVISE > #define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED > #define qemu_madvise posix_madvise > #else > #ifdef MADV_DONTNEED > #define QEMU_MADV_DONTNEED MADV_DONTNEED > #endif > #define qemu_madvise madvise > #endif This could work, though in general preprocessor magic is ... too magic. I was thinking: #ifdef CONFIG_POSIX_MADVISE #define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED #else #define QEMU_MADV_DONTNEED MADV_DONTNEED #endif and keep the qemu_madvise implementation in C. But then there's no need to convert between QEMU_MADV and real MADV bits. > >>> + >>> +int qemu_madvise(void *addr, size_t len, int advice); >>> + >>> int qemu_create_pidfile(const char *filename); >>> >>> #ifdef _WIN32 >>> diff --git a/vl.c b/vl.c >>> index 3f45aa9..d352d18 100644 >>> --- a/vl.c >>> +++ b/vl.c >>> @@ -80,9 +80,6 @@ >>> #include <net/if.h> >>> #include <syslog.h> >>> #include <stropts.h> >>> -/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for >>> - discussion about Solaris header problems */ >>> -extern int madvise(caddr_t, size_t, int); >> >> Does Solaris have posix_madvise? Otherwise it would still require this header hack, no? > > I tested on OpenSolaris 2009.06. Haven't tested on Solaris 10 yet, don't have access to older ones. > We could wrap it in #ifndef CONFIG_POSIX_MADVISE to be fool-safe. *shrug* I was just afraid this was there for a reason. But maybe the author of those lines simply didn't know about posix_madvise. > Thanks for the review, You're welcome :) Alex
On Sat, Sep 11, 2010 at 5:05 PM, Andreas Färber <andreas.faerber@web.de> wrote: > From: Andreas Färber <afaerber@opensolaris.org> > > vl.c has a Sun-specific hack to supply a prototype for madvise(), > but the call site has apparently moved to arch_init.c. > The underlying issue is that madvise() is not a POSIX function, > therefore Solaris' _POSIX_C_SOURCE suppresses the prototype. > > Haiku doesn't implement madvise() at all. > OpenBSD however doesn't implement posix_madvise(). > > Check for posix_madvise() in configure and supply qemu_madvise() as wrapper. > Use qemu_madvise() in arch_init.c's ram_load(). > > http://www.opengroup.org/onlinepubs/9699919799/functions/posix_madvise.html > > Remaining madvise() users: > exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX equivalent) > kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), > otherwise runtime error if !kvm_has_sync_mmu() > hw/virtio-balloon.c: limited to __linux__ For consistency, I'd convert all users. If an OS doesn't support a flag, we can return something like -ENOTSUP which may be checked by the caller if it cares. > > v1 -> v2: > * Don't rely on posix_madvise() availability, add qemu_madvise(). > Suggested by Blue Swirl. > > Signed-off-by: Andreas Färber <afaerber@opensolaris.org> > Cc: Blue Swirl <blauwirbel@gmail.com> > --- > arch_init.c | 2 +- > configure | 11 +++++++++++ > osdep.c | 26 ++++++++++++++++++++++++++ > osdep.h | 4 ++++ > vl.c | 3 --- > 5 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/arch_init.c b/arch_init.c > index e468c0c..a910033 100644 > --- a/arch_init.c > +++ b/arch_init.c > @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) > #ifndef _WIN32 > if (ch == 0 && > (!kvm_enabled() || kvm_has_sync_mmu())) { > - madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); > + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); > } > #endif > } else if (flags & RAM_SAVE_FLAG_PAGE) { > diff --git a/configure b/configure > index 4061cb7..5e6f3e1 100755 > --- a/configure > +++ b/configure > @@ -2069,6 +2069,17 @@ if compile_prog "" "" ; then > fi > > ########################################## > +# check if we have posix_madvise > + > +cat > $TMPC << EOF > +#include <sys/mman.h> > +int main(void) { posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); return 0; } > +EOF > +if compile_prog "" "" ; then > + QEMU_CFLAGS="-DCONFIG_POSIX_MADVISE ${QEMU_CFLAGS}" Please take a look around configure:2444 how to add new config flags. I'd also add a similar check for madvise() if posix_madvise() check fails. > +fi > + > +########################################## > # check if trace backend exists > > sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null > diff --git a/osdep.c b/osdep.c > index 30426ff..8c09597 100644 > --- a/osdep.c > +++ b/osdep.c > @@ -28,6 +28,7 @@ > #include <errno.h> > #include <unistd.h> > #include <fcntl.h> > +#include <sys/mman.h> > > /* Needed early for CONFIG_BSD etc. */ > #include "config-host.h" > @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) > > #endif > > +int qemu_madvise(void *addr, size_t len, int advice) > +{ > +#ifdef CONFIG_POSIX_MADVISE > + switch (advice) { > + case QEMU_MADV_DONTNEED: > + advice = POSIX_MADV_DONTNEED; > + break; > + default: > + fprintf(stderr, "Advice %d unhandled.\n", advice); > + abort(); This should be an assert, it's an internal error. It's also common to both cases. > + } > + return posix_madvise(addr, len, advice); > +#else #elif defined(CONFIG_MADVISE) > + switch (advice) { > + case QEMU_MADV_DONTNEED: > + advice = MADV_DONTNEED; > + break; case QEMU_MADV_DONTFORK: #ifndef MADV_DONTFORK return -ENOTSUP; #else advice = MADV_DONTFORK; break; #endif Same goes for MADV_MERGEABLE. > + default: > + fprintf(stderr, "Advice %d unhandled.\n", advice); > + abort(); > + } > + return madvise(addr, len, advice); #else return -ENOTSUP;
Am 12.09.2010 um 09:20 schrieb Blue Swirl: > On Sat, Sep 11, 2010 at 5:05 PM, Andreas Färber <andreas.faerber@web.de > > wrote: >> Use qemu_madvise() in arch_init.c's ram_load(). >> >> http://www.opengroup.org/onlinepubs/9699919799/functions/posix_madvise.html >> >> Remaining madvise() users: >> exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX >> equivalent) >> kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), >> otherwise runtime error if !kvm_has_sync_mmu() >> hw/virtio-balloon.c: limited to __linux__ > > For consistency, I'd convert all users. If an OS doesn't support a > flag, we can return something like -ENOTSUP which may be checked by > the caller if it cares. Will do. >> diff --git a/configure b/configure >> index 4061cb7..5e6f3e1 100755 >> --- a/configure >> +++ b/configure >> @@ -2069,6 +2069,17 @@ if compile_prog "" "" ; then >> fi >> >> ########################################## >> +# check if we have posix_madvise >> + >> +cat > $TMPC << EOF >> +#include <sys/mman.h> >> +int main(void) { posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); >> return 0; } >> +EOF >> +if compile_prog "" "" ; then >> + QEMU_CFLAGS="-DCONFIG_POSIX_MADVISE ${QEMU_CFLAGS}" > > Please take a look around configure:2444 how to add new config flags. I did. It's just not obvious that they find their way from the Makefile to a C header, unlike autoconf. > I'd also add a similar check for madvise() if posix_madvise() check > fails. Fine with me. >> diff --git a/osdep.c b/osdep.c >> index 30426ff..8c09597 100644 >> --- a/osdep.c >> +++ b/osdep.c >> @@ -28,6 +28,7 @@ >> #include <errno.h> >> #include <unistd.h> >> #include <fcntl.h> >> +#include <sys/mman.h> >> >> /* Needed early for CONFIG_BSD etc. */ >> #include "config-host.h" >> @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) >> >> #endif >> >> +int qemu_madvise(void *addr, size_t len, int advice) >> +{ >> +#ifdef CONFIG_POSIX_MADVISE >> + switch (advice) { >> + case QEMU_MADV_DONTNEED: >> + advice = POSIX_MADV_DONTNEED; >> + break; >> + default: >> + fprintf(stderr, "Advice %d unhandled.\n", advice); >> + abort(); > > This should be an assert, it's an internal error. It's also common to > both cases. > >> + } >> + return posix_madvise(addr, len, advice); >> +#else > > #elif defined(CONFIG_MADVISE) > >> + switch (advice) { >> + case QEMU_MADV_DONTNEED: >> + advice = MADV_DONTNEED; >> + break; > > case QEMU_MADV_DONTFORK: > #ifndef MADV_DONTFORK > return -ENOTSUP; > #else > advice = MADV_DONTFORK; > break; > #endif > > Same goes for MADV_MERGEABLE. So you disagree with or didn't yet read Alex' suggestion of eliminating this mapping code in qemu_madvise() altogether? Doing that in a sensible way would allow code to do: #ifdef QEMU_MADV_MERGEABLE ... as before at compile-time. The only qemu_madvise() error condition would then be the #else below. >> + default: >> + fprintf(stderr, "Advice %d unhandled.\n", advice); >> + abort(); >> + } >> + return madvise(addr, len, advice); > > #else > return -ENOTSUP; Thanks, Andreas
On Sun, Sep 12, 2010 at 8:50 AM, Andreas Färber <andreas.faerber@web.de> wrote: > Am 12.09.2010 um 09:20 schrieb Blue Swirl: > >> On Sat, Sep 11, 2010 at 5:05 PM, Andreas Färber <andreas.faerber@web.de> >> wrote: >>> >>> Use qemu_madvise() in arch_init.c's ram_load(). >>> >>> >>> http://www.opengroup.org/onlinepubs/9699919799/functions/posix_madvise.html >>> >>> Remaining madvise() users: >>> exec.c: limited to __linux__ and/or MADV_MERGEABLE (no POSIX equivalent) >>> kvm-all.c: limited to MADV_DONTFORK (no POSIX equivalent), >>> otherwise runtime error if !kvm_has_sync_mmu() >>> hw/virtio-balloon.c: limited to __linux__ >> >> For consistency, I'd convert all users. If an OS doesn't support a >> flag, we can return something like -ENOTSUP which may be checked by >> the caller if it cares. > > Will do. > >>> diff --git a/configure b/configure >>> index 4061cb7..5e6f3e1 100755 >>> --- a/configure >>> +++ b/configure >>> @@ -2069,6 +2069,17 @@ if compile_prog "" "" ; then >>> fi >>> >>> ########################################## >>> +# check if we have posix_madvise >>> + >>> +cat > $TMPC << EOF >>> +#include <sys/mman.h> >>> +int main(void) { posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); return 0; >>> } >>> +EOF >>> +if compile_prog "" "" ; then >>> + QEMU_CFLAGS="-DCONFIG_POSIX_MADVISE ${QEMU_CFLAGS}" >> >> Please take a look around configure:2444 how to add new config flags. > > I did. It's just not obvious that they find their way from the Makefile to a > C header, unlike autoconf. > >> I'd also add a similar check for madvise() if posix_madvise() check >> fails. > > Fine with me. > >>> diff --git a/osdep.c b/osdep.c >>> index 30426ff..8c09597 100644 >>> --- a/osdep.c >>> +++ b/osdep.c >>> @@ -28,6 +28,7 @@ >>> #include <errno.h> >>> #include <unistd.h> >>> #include <fcntl.h> >>> +#include <sys/mman.h> >>> >>> /* Needed early for CONFIG_BSD etc. */ >>> #include "config-host.h" >>> @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) >>> >>> #endif >>> >>> +int qemu_madvise(void *addr, size_t len, int advice) >>> +{ >>> +#ifdef CONFIG_POSIX_MADVISE >>> + switch (advice) { >>> + case QEMU_MADV_DONTNEED: >>> + advice = POSIX_MADV_DONTNEED; >>> + break; >>> + default: >>> + fprintf(stderr, "Advice %d unhandled.\n", advice); >>> + abort(); >> >> This should be an assert, it's an internal error. It's also common to >> both cases. >> >>> + } >>> + return posix_madvise(addr, len, advice); >>> +#else >> >> #elif defined(CONFIG_MADVISE) >> >>> + switch (advice) { >>> + case QEMU_MADV_DONTNEED: >>> + advice = MADV_DONTNEED; >>> + break; >> >> case QEMU_MADV_DONTFORK: >> #ifndef MADV_DONTFORK >> return -ENOTSUP; >> #else >> advice = MADV_DONTFORK; >> break; >> #endif >> >> Same goes for MADV_MERGEABLE. > > So you disagree with or didn't yet read Alex' suggestion of eliminating this > mapping code in qemu_madvise() altogether? > Doing that in a sensible way would allow code to do: > > #ifdef QEMU_MADV_MERGEABLE > ... > > as before at compile-time. The only qemu_madvise() error condition would > then be the #else below. That's one way too, if nobody cares about madvise() return values for MADV_MERGEABLE. In any case I'd like to eliminate the #ifdefs in other places than osdep.[ch].
diff --git a/arch_init.c b/arch_init.c index e468c0c..a910033 100644 --- a/arch_init.c +++ b/arch_init.c @@ -396,7 +396,7 @@ int ram_load(QEMUFile *f, void *opaque, int version_id) #ifndef _WIN32 if (ch == 0 && (!kvm_enabled() || kvm_has_sync_mmu())) { - madvise(host, TARGET_PAGE_SIZE, MADV_DONTNEED); + qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED); } #endif } else if (flags & RAM_SAVE_FLAG_PAGE) { diff --git a/configure b/configure index 4061cb7..5e6f3e1 100755 --- a/configure +++ b/configure @@ -2069,6 +2069,17 @@ if compile_prog "" "" ; then fi ########################################## +# check if we have posix_madvise + +cat > $TMPC << EOF +#include <sys/mman.h> +int main(void) { posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); return 0; } +EOF +if compile_prog "" "" ; then + QEMU_CFLAGS="-DCONFIG_POSIX_MADVISE ${QEMU_CFLAGS}" +fi + +########################################## # check if trace backend exists sh "$source_path/tracetool" "--$trace_backend" --check-backend > /dev/null 2> /dev/null diff --git a/osdep.c b/osdep.c index 30426ff..8c09597 100644 --- a/osdep.c +++ b/osdep.c @@ -28,6 +28,7 @@ #include <errno.h> #include <unistd.h> #include <fcntl.h> +#include <sys/mman.h> /* Needed early for CONFIG_BSD etc. */ #include "config-host.h" @@ -139,6 +140,31 @@ void qemu_vfree(void *ptr) #endif +int qemu_madvise(void *addr, size_t len, int advice) +{ +#ifdef CONFIG_POSIX_MADVISE + switch (advice) { + case QEMU_MADV_DONTNEED: + advice = POSIX_MADV_DONTNEED; + break; + default: + fprintf(stderr, "Advice %d unhandled.\n", advice); + abort(); + } + return posix_madvise(addr, len, advice); +#else + switch (advice) { + case QEMU_MADV_DONTNEED: + advice = MADV_DONTNEED; + break; + default: + fprintf(stderr, "Advice %d unhandled.\n", advice); + abort(); + } + return madvise(addr, len, advice); +#endif +} + int qemu_create_pidfile(const char *filename) { char buffer[128]; diff --git a/osdep.h b/osdep.h index 1cdc7e2..9f0da46 100644 --- a/osdep.h +++ b/osdep.h @@ -90,6 +90,10 @@ void *qemu_memalign(size_t alignment, size_t size); void *qemu_vmalloc(size_t size); void qemu_vfree(void *ptr); +#define QEMU_MADV_DONTNEED 1 + +int qemu_madvise(void *addr, size_t len, int advice); + int qemu_create_pidfile(const char *filename); #ifdef _WIN32 diff --git a/vl.c b/vl.c index 3f45aa9..d352d18 100644 --- a/vl.c +++ b/vl.c @@ -80,9 +80,6 @@ #include <net/if.h> #include <syslog.h> #include <stropts.h> -/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for - discussion about Solaris header problems */ -extern int madvise(caddr_t, size_t, int); #endif #endif #endif