[v3,24/25] tcg: Allocate a guard page after code_gen_buffer

Message ID	1442953507-4074-25-git-send-email-rth@twiddle.net
State	New
Headers	show Return-Path: <qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org> From: Richard Henderson <rth@twiddle.net> To: qemu-devel@nongnu.org Date: Tue, 22 Sep 2015 13:25:06 -0700 Message-Id: <1442953507-4074-25-git-send-email-rth@twiddle.net> In-Reply-To: <1442953507-4074-1-git-send-email-rth@twiddle.net> References: <1442953507-4074-1-git-send-email-rth@twiddle.net> Error: Malformed IPv6 address (bad octet value). Cc: peter.maydell@linaro.org, alex.bennee@linaro.org, aurelien@aurel32.net Subject: [Qemu-devel] [PATCH v3 24/25] tcg: Allocate a guard page after code_gen_buffer Precedence: list Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Message ID

1442953507-4074-25-git-send-email-rth@twiddle.net

State

New

Headers

From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Date: Tue, 22 Sep 2015 13:25:06 -0700
Message-Id: <1442953507-4074-25-git-send-email-rth@twiddle.net>
In-Reply-To: <1442953507-4074-1-git-send-email-rth@twiddle.net>
References: <1442953507-4074-1-git-send-email-rth@twiddle.net>
Cc: peter.maydell@linaro.org, alex.bennee@linaro.org, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH v3 24/25] tcg: Allocate a guard page after
	code_gen_buffer
Precedence: list
Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org
Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org

Commit Message

Richard Henderson Sept. 22, 2015, 8:25 p.m. UTC

This will catch any overflow of the buffer.

Add a native win32 alternative for alloc_code_gen_buffer;
remove the malloc alternative.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 translate-all.c | 210 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 119 insertions(+), 91 deletions(-)

Comments

Peter Maydell Sept. 23, 2015, 7:39 p.m. UTC | #1

On 22 September 2015 at 13:25, Richard Henderson <rth@twiddle.net> wrote:
> This will catch any overflow of the buffer.
>
> Add a native win32 alternative for alloc_code_gen_buffer;
> remove the malloc alternative.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  translate-all.c | 210 ++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 119 insertions(+), 91 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index 4c994bb..0049927 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -311,31 +311,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
>      return false;
>  }
>
> -#ifdef _WIN32
> -static __attribute__((unused)) void map_exec(void *addr, long size)
> -{
> -    DWORD old_protect;
> -    VirtualProtect(addr, size,
> -                   PAGE_EXECUTE_READWRITE, &old_protect);
> -}
> -#else
> -static __attribute__((unused)) void map_exec(void *addr, long size)
> -{
> -    unsigned long start, end, page_size;
> -
> -    page_size = getpagesize();
> -    start = (unsigned long)addr;
> -    start &= ~(page_size - 1);
> -
> -    end = (unsigned long)addr + size;
> -    end += page_size - 1;
> -    end &= ~(page_size - 1);
> -
> -    mprotect((void *)start, end - start,
> -             PROT_READ | PROT_WRITE | PROT_EXEC);
> -}
> -#endif
> -
>  void page_size_init(void)
>  {
>      /* NOTE: we can always suppose that qemu_host_page_size >=
> @@ -472,14 +447,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>  #define USE_STATIC_CODE_GEN_BUFFER
>  #endif
>
> -/* ??? Should configure for this, not list operating systems here.  */
> -#if (defined(__linux__) \
> -    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> -    || defined(__DragonFly__) || defined(__OpenBSD__) \
> -    || defined(__NetBSD__))
> -# define USE_MMAP
> -#endif
> -
>  /* Minimum size of the code gen buffer.  This number is randomly chosen,
>     but not so small that we can't have a fair number of TB's live.  */
>  #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
> @@ -567,22 +534,102 @@ static inline void *split_cross_256mb(void *buf1, size_t size1)
>  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>      __attribute__((aligned(CODE_GEN_ALIGN)));
>
> +# ifdef _WIN32

Why the space before ifdef here ?

> +static inline void do_protect(void *addr, long size, int prot)
> +{
> +    DWORD old_protect;
> +    VirtualProtect(addr, size, PAGE_EXECUTE_READWRITE, &old_protect);

The 'prot' argument isn't used -- did you mean to pass it
in as VirtualProtect argument 3 ?

> +}
> +
> +static inline void map_exec(void *addr, long size)
> +{
> +    do_protect(addr, size, PAGE_EXECUTE_READWRITE);
> +}
> +
> +static inline void map_none(void *addr, long size)
> +{
> +    do_protect(addr, size, PAGE_NOACCESS);
> +}
> +# else
> +static inline void do_protect(void *addr, long size, int prot)
> +{
> +    uintptr_t start, end;
> +
> +    start = (uintptr_t)addr;
> +    start &= qemu_real_host_page_mask;
> +
> +    end = (uintptr_t)addr + size;
> +    end = ROUND_UP(end, qemu_real_host_page_size);
> +
> +    mprotect((void *)start, end - start, prot);
> +}
> +
> +static inline void map_exec(void *addr, long size)
> +{
> +    do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
> +}
> +
> +static inline void map_none(void *addr, long size)
> +{
> +    do_protect(addr, size, PROT_NONE);
> +}
> +# endif /* WIN32 */
> +
>  static inline void *alloc_code_gen_buffer(void)
>  {
>      void *buf = static_code_gen_buffer;
> +    size_t full_size, size;
> +
> +    /* The size of the buffer, rounded down to end on a page boundary.  */
> +    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
> +                 & qemu_real_host_page_mask) - (uintptr_t)buf;
> +
> +    /* Reserve a guard page.  */
> +    size = full_size - qemu_real_host_page_size;
> +
> +    /* Honor a command-line option limiting the size of the buffer.  */
> +    if (size > tcg_ctx.code_gen_buffer_size) {
> +        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
> +                & qemu_real_host_page_mask) - (uintptr_t)buf;
> +    }
> +    tcg_ctx.code_gen_buffer_size = size;
> +
>  #ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> -        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
> +    if (cross_256mb(buf, size)) {
> +        buf = split_cross_256mb(buf, size);
> +        size = tcg_ctx.code_gen_buffer_size;
>      }
>  #endif
> -    map_exec(buf, tcg_ctx.code_gen_buffer_size);
> +
> +    map_exec(buf, size);
> +    map_none(buf + size, qemu_real_host_page_size);
> +    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);

I think we're now doing the MADV_HUGEPAGE over "buffer size
minus a page" rather than "buffer size". Does that mean
we've gone from doing the madvise on a whole number of
hugepages to doing it on something that's not a whole number
of hugepages, and if so does the kernel decide not to use
hugepages here?

(aka, should we make the buffer size we allocate size + a
guard page, rather than taking the guard page out of the size?)


> +
>      return buf;
>  }
> -#elif defined(USE_MMAP)
> +#elif defined(_WIN32)
> +static inline void *alloc_code_gen_buffer(void)
> +{
> +    size_t size = tcg_ctx.code_gen_buffer_size;
> +    void *buf1, *buf2;
> +
> +    /* Perform the allocation in two steps, so that the guard page
> +       is reserved but uncommitted.  */
> +    buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size,
> +                        MEM_RESERVE, PAGE_NOACCESS);
> +    if (buf1 != NULL) {
> +        buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
> +        assert(buf1 == buf2);
> +    }
> +
> +    return buf1;
> +}
> +#else
>  static inline void *alloc_code_gen_buffer(void)
>  {
>      int flags = MAP_PRIVATE | MAP_ANONYMOUS;
>      uintptr_t start = 0;
> +    size_t size = tcg_ctx.code_gen_buffer_size;
>      void *buf;
>
>      /* Constrain the position of the buffer based on the host cpu.
> @@ -598,86 +645,70 @@ static inline void *alloc_code_gen_buffer(void)
>         Leave the choice of exact location with the kernel.  */
>      flags |= MAP_32BIT;
>      /* Cannot expect to map more than 800MB in low memory.  */
> -    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
> -        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
> +    if (size > 800u * 1024 * 1024) {
> +        tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024;
>      }
>  # elif defined(__sparc__)
>      start = 0x40000000ul;
>  # elif defined(__s390x__)
>      start = 0x90000000ul;
>  # elif defined(__mips__)
> -    /* ??? We ought to more explicitly manage layout for softmmu too.  */
> -#  ifdef CONFIG_USER_ONLY
> -    start = 0x68000000ul;
> -#  elif _MIPS_SIM == _ABI64
> +#  if _MIPS_SIM == _ABI64
>      start = 0x128000000ul;
>  #  else
>      start = 0x08000000ul;
>  #  endif
>  # endif
>
> -    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
> -               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
> +    buf = mmap((void *)start, size + qemu_real_host_page_size,
> +               PROT_NONE, flags, -1, 0);
>      if (buf == MAP_FAILED) {
>          return NULL;
>      }
>
>  #ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> +    if (cross_256mb(buf, size)) {
>          /* Try again, with the original still mapped, to avoid re-acquiring
>             that 256mb crossing.  This time don't specify an address.  */
> -        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
> -        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
> -                          flags, -1, 0);
> -        if (buf2 != MAP_FAILED) {
> -            if (!cross_256mb(buf2, size1)) {
> +        size_t size2;
> +        void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
> +                          PROT_NONE, flags, -1, 0);
> +        switch (buf2 != MAP_FAILED) {
> +        case 1:
> +            if (!cross_256mb(buf2, size)) {
>                  /* Success!  Use the new buffer.  */
> -                munmap(buf, size1);
> -                return buf2;
> +                munmap(buf, size);
> +                break;
>              }
>              /* Failure.  Work with what we had.  */
> -            munmap(buf2, size1);
> +            munmap(buf2, size);
> +            /* fallthru */
> +        default:
> +            /* Split the original buffer.  Free the smaller half.  */
> +            buf2 = split_cross_256mb(buf, size);
> +            size2 = tcg_ctx.code_gen_buffer_size;
> +            if (buf == buf2) {
> +                munmap(buf + size2 + qemu_real_host_page_size, size - size2);
> +            } else {
> +                munmap(buf, size - size2);
> +            }
> +            size = size2;
> +            break;
>          }
> -
> -        /* Split the original buffer.  Free the smaller half.  */
> -        buf2 = split_cross_256mb(buf, size1);
> -        size2 = tcg_ctx.code_gen_buffer_size;
> -        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
> -        return buf2;
> +        buf = buf2;
>      }
>  #endif
>
> -    return buf;
> -}
> -#else
> -static inline void *alloc_code_gen_buffer(void)
> -{
> -    void *buf = g_try_malloc(tcg_ctx.code_gen_buffer_size);
> +    /* Make the final buffer accessable.  The guard page at the end
> +       will remain inaccessable with PROT_NONE.  */

"accessible"; "inaccessible".

> +    mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
>
> -    if (buf == NULL) {
> -        return NULL;
> -    }
> +    /* Request large pages for the buffer.  */
> +    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
>
> -#ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> -        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
> -        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
> -            /* Success!  Use the new buffer.  */
> -            free(buf);
> -            buf = buf2;
> -        } else {
> -            /* Failure.  Work with what we had.  Since this is malloc
> -               and not mmap, we can't free the other half.  */
> -            free(buf2);
> -            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
> -        }
> -    }
> -#endif
> -
> -    map_exec(buf, tcg_ctx.code_gen_buffer_size);
>      return buf;
>  }
> -#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
> +#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
>
>  static inline void code_gen_alloc(size_t tb_size)
>  {
> @@ -688,9 +719,6 @@ static inline void code_gen_alloc(size_t tb_size)
>          exit(1);
>      }
>
> -    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
> -                 QEMU_MADV_HUGEPAGE);
> -
>      /* Estimate a good size for the number of TBs we can support.  We
>         still haven't deducted the prologue from the buffer size here,
>         but that's minimal and won't affect the estimate much.  */
> @@ -708,8 +736,8 @@ static inline void code_gen_alloc(size_t tb_size)
>  void tcg_exec_init(unsigned long tb_size)
>  {
>      cpu_gen_init();
> -    code_gen_alloc(tb_size);
>      page_init();
> +    code_gen_alloc(tb_size);
>  #if defined(CONFIG_SOFTMMU)
>      /* There's no guest base to take into account, so go ahead and
>         initialize the prologue now.  */
> --
> 2.4.3
>

thanks
-- PMM

Richard Henderson Sept. 23, 2015, 8 p.m. UTC | #2

On 09/23/2015 12:39 PM, Peter Maydell wrote:
>> +# ifdef _WIN32
> 
> Why the space before ifdef here ?

#ifdef USE_STATIC_CODE_GEN_BUFFER
# ifdef _WIN32
# else
# endif /* WIN32 */
#elif defined(_WIN32)
#else
#endif

It's something that glibc requires for its coding style, and I find myself
using it most of the time.

>> +static inline void do_protect(void *addr, long size, int prot)
>> +{
>> +    DWORD old_protect;
>> +    VirtualProtect(addr, size, PAGE_EXECUTE_READWRITE, &old_protect);
> 
> The 'prot' argument isn't used -- did you mean to pass it
> in as VirtualProtect argument 3 ?

Oops, yes.

>>  static inline void *alloc_code_gen_buffer(void)
>>  {
>>      void *buf = static_code_gen_buffer;
>> +    size_t full_size, size;
>> +
>> +    /* The size of the buffer, rounded down to end on a page boundary.  */
>> +    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
>> +                 & qemu_real_host_page_mask) - (uintptr_t)buf;
>> +
>> +    /* Reserve a guard page.  */
>> +    size = full_size - qemu_real_host_page_size;
>> +
>> +    /* Honor a command-line option limiting the size of the buffer.  */
>> +    if (size > tcg_ctx.code_gen_buffer_size) {
>> +        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
>> +                & qemu_real_host_page_mask) - (uintptr_t)buf;
>> +    }
>> +    tcg_ctx.code_gen_buffer_size = size;
>> +
>>  #ifdef __mips__
>> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
>> -        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
>> +    if (cross_256mb(buf, size)) {
>> +        buf = split_cross_256mb(buf, size);
>> +        size = tcg_ctx.code_gen_buffer_size;
>>      }
>>  #endif
>> -    map_exec(buf, tcg_ctx.code_gen_buffer_size);
>> +
>> +    map_exec(buf, size);
>> +    map_none(buf + size, qemu_real_host_page_size);
>> +    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
> 
> I think we're now doing the MADV_HUGEPAGE over "buffer size
> minus a page" rather than "buffer size". Does that mean
> we've gone from doing the madvise on a whole number of
> hugepages to doing it on something that's not a whole number
> of hugepages, and if so does the kernel decide not to use
> hugepages here?

On the whole I don't think it matters.  The static buffer isn't page aligned to
begin with, much less hugepage aligned, so the fact that we're allocating a
round number like 32mb here doesn't really mean much.  The beginning and/or end
pages of the buffer definitely aren't going to be hugepage.

Worse, the same is true for the mmap path, since I've never seen the kernel
select a hugepage aligned address.  You'd think that adding MAP_HUGEPAGE would
be akin to MADV_HUGEPAGE, with the additional hint that the address should be
appropriately aligned for the hugepage, but no.  It implies forced use of
something from the hugepage pool and that requires extra suid capabilities.

I've wondered about over-allocating on the mmap path, so that we can choose the
hugepage aligned subregion.  But as far as I can tell, my kernel doesn't
allocate hugepages at all, no matter what we do.  So it seems a little silly to
go so far out of the way to get an aligned buffer.

r~

Peter Maydell Sept. 23, 2015, 8:37 p.m. UTC | #3

On 23 September 2015 at 13:00, Richard Henderson <rth@twiddle.net> wrote:
> On 09/23/2015 12:39 PM, Peter Maydell wrote:
>> I think we're now doing the MADV_HUGEPAGE over "buffer size
>> minus a page" rather than "buffer size". Does that mean
>> we've gone from doing the madvise on a whole number of
>> hugepages to doing it on something that's not a whole number
>> of hugepages, and if so does the kernel decide not to use
>> hugepages here?
>
> On the whole I don't think it matters.  The static buffer isn't page aligned to
> begin with, much less hugepage aligned, so the fact that we're allocating a
> round number like 32mb here doesn't really mean much.  The beginning and/or end
> pages of the buffer definitely aren't going to be hugepage.
>
> Worse, the same is true for the mmap path, since I've never seen the kernel
> select a hugepage aligned address.  You'd think that adding MAP_HUGEPAGE would
> be akin to MADV_HUGEPAGE, with the additional hint that the address should be
> appropriately aligned for the hugepage, but no.  It implies forced use of
> something from the hugepage pool and that requires extra suid capabilities.
>
> I've wondered about over-allocating on the mmap path, so that we can choose the
> hugepage aligned subregion.  But as far as I can tell, my kernel doesn't
> allocate hugepages at all, no matter what we do.  So it seems a little silly to
> go so far out of the way to get an aligned buffer.

This raises the converse question of "why are we bothering with
MADV_HUGEPAGE at all?" :-)

-- PMM

Richard Henderson Sept. 23, 2015, 10:12 p.m. UTC | #4

On 09/23/2015 01:37 PM, Peter Maydell wrote:
> On 23 September 2015 at 13:00, Richard Henderson <rth@twiddle.net> wrote:
>> I've wondered about over-allocating on the mmap path, so that we can choose the
>> hugepage aligned subregion.  But as far as I can tell, my kernel doesn't
>> allocate hugepages at all, no matter what we do.  So it seems a little silly to
>> go so far out of the way to get an aligned buffer.
>
> This raises the converse question of "why are we bothering with
> MADV_HUGEPAGE at all?" :-)

I beg your pardon -- I was merely looking in the wrong place for the info. 
/proc/pid/smap does show that nearly all of the area is using huge pages:

Main memory:
7fc130000000-7fc1b0000000 rw-p 00000000 00:00 0
Size:            2097152 kB
Rss:               88064 kB
Pss:               88064 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:     88064 kB
Referenced:        88064 kB
Anonymous:         88064 kB
AnonHugePages:     88064 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Locked:                0 kB

code_gen_buffer:
7fc1d76e6000-7fc1f76e6000 rwxp 00000000 00:00 0
Size:             524288 kB
Rss:               58472 kB
Pss:               58472 kB
Shared_Clean:          0 kB
Shared_Dirty:          0 kB
Private_Clean:         0 kB
Private_Dirty:     58472 kB
Referenced:        58472 kB
Anonymous:         58472 kB
AnonHugePages:     57344 kB
Swap:                  0 kB
KernelPageSize:        4 kB
MMUPageSize:           4 kB
Locked:                0 kB


r~

diff --git a/translate-all.c b/translate-all.c
index 4c994bb..0049927 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -311,31 +311,6 @@  bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
     return false;
 }
 
-#ifdef _WIN32
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    DWORD old_protect;
-    VirtualProtect(addr, size,
-                   PAGE_EXECUTE_READWRITE, &old_protect);
-}
-#else
-static __attribute__((unused)) void map_exec(void *addr, long size)
-{
-    unsigned long start, end, page_size;
-
-    page_size = getpagesize();
-    start = (unsigned long)addr;
-    start &= ~(page_size - 1);
-
-    end = (unsigned long)addr + size;
-    end += page_size - 1;
-    end &= ~(page_size - 1);
-
-    mprotect((void *)start, end - start,
-             PROT_READ | PROT_WRITE | PROT_EXEC);
-}
-#endif
-
 void page_size_init(void)
 {
     /* NOTE: we can always suppose that qemu_host_page_size >=
@@ -472,14 +447,6 @@  static inline PageDesc *page_find(tb_page_addr_t index)
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-/* ??? Should configure for this, not list operating systems here.  */
-#if (defined(__linux__) \
-    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__))
-# define USE_MMAP
-#endif
-
 /* Minimum size of the code gen buffer.  This number is randomly chosen,
    but not so small that we can't have a fair number of TB's live.  */
 #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
@@ -567,22 +534,102 @@  static inline void *split_cross_256mb(void *buf1, size_t size1)
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
     __attribute__((aligned(CODE_GEN_ALIGN)));
 
+# ifdef _WIN32
+static inline void do_protect(void *addr, long size, int prot)
+{
+    DWORD old_protect;
+    VirtualProtect(addr, size, PAGE_EXECUTE_READWRITE, &old_protect);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_EXECUTE_READWRITE);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PAGE_NOACCESS);
+}
+# else
+static inline void do_protect(void *addr, long size, int prot)
+{
+    uintptr_t start, end;
+
+    start = (uintptr_t)addr;
+    start &= qemu_real_host_page_mask;
+
+    end = (uintptr_t)addr + size;
+    end = ROUND_UP(end, qemu_real_host_page_size);
+
+    mprotect((void *)start, end - start, prot);
+}
+
+static inline void map_exec(void *addr, long size)
+{
+    do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+}
+
+static inline void map_none(void *addr, long size)
+{
+    do_protect(addr, size, PROT_NONE);
+}
+# endif /* WIN32 */
+
 static inline void *alloc_code_gen_buffer(void)
 {
     void *buf = static_code_gen_buffer;
+    size_t full_size, size;
+
+    /* The size of the buffer, rounded down to end on a page boundary.  */
+    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
+                 & qemu_real_host_page_mask) - (uintptr_t)buf;
+
+    /* Reserve a guard page.  */
+    size = full_size - qemu_real_host_page_size;
+
+    /* Honor a command-line option limiting the size of the buffer.  */
+    if (size > tcg_ctx.code_gen_buffer_size) {
+        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
+                & qemu_real_host_page_mask) - (uintptr_t)buf;
+    }
+    tcg_ctx.code_gen_buffer_size = size;
+
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
+    if (cross_256mb(buf, size)) {
+        buf = split_cross_256mb(buf, size);
+        size = tcg_ctx.code_gen_buffer_size;
     }
 #endif
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
+
+    map_exec(buf, size);
+    map_none(buf + size, qemu_real_host_page_size);
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
+
     return buf;
 }
-#elif defined(USE_MMAP)
+#elif defined(_WIN32)
+static inline void *alloc_code_gen_buffer(void)
+{
+    size_t size = tcg_ctx.code_gen_buffer_size;
+    void *buf1, *buf2;
+
+    /* Perform the allocation in two steps, so that the guard page
+       is reserved but uncommitted.  */
+    buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size,
+                        MEM_RESERVE, PAGE_NOACCESS);
+    if (buf1 != NULL) {
+        buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
+        assert(buf1 == buf2);
+    }
+
+    return buf1;
+}
+#else
 static inline void *alloc_code_gen_buffer(void)
 {
     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
     uintptr_t start = 0;
+    size_t size = tcg_ctx.code_gen_buffer_size;
     void *buf;
 
     /* Constrain the position of the buffer based on the host cpu.
@@ -598,86 +645,70 @@  static inline void *alloc_code_gen_buffer(void)
        Leave the choice of exact location with the kernel.  */
     flags |= MAP_32BIT;
     /* Cannot expect to map more than 800MB in low memory.  */
-    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
-        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
+    if (size > 800u * 1024 * 1024) {
+        tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024;
     }
 # elif defined(__sparc__)
     start = 0x40000000ul;
 # elif defined(__s390x__)
     start = 0x90000000ul;
 # elif defined(__mips__)
-    /* ??? We ought to more explicitly manage layout for softmmu too.  */
-#  ifdef CONFIG_USER_ONLY
-    start = 0x68000000ul;
-#  elif _MIPS_SIM == _ABI64
+#  if _MIPS_SIM == _ABI64
     start = 0x128000000ul;
 #  else
     start = 0x08000000ul;
 #  endif
 # endif
 
-    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
-               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    buf = mmap((void *)start, size + qemu_real_host_page_size,
+               PROT_NONE, flags, -1, 0);
     if (buf == MAP_FAILED) {
         return NULL;
     }
 
 #ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+    if (cross_256mb(buf, size)) {
         /* Try again, with the original still mapped, to avoid re-acquiring
            that 256mb crossing.  This time don't specify an address.  */
-        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
-        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
-                          flags, -1, 0);
-        if (buf2 != MAP_FAILED) {
-            if (!cross_256mb(buf2, size1)) {
+        size_t size2;
+        void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
+                          PROT_NONE, flags, -1, 0);
+        switch (buf2 != MAP_FAILED) {
+        case 1:
+            if (!cross_256mb(buf2, size)) {
                 /* Success!  Use the new buffer.  */
-                munmap(buf, size1);
-                return buf2;
+                munmap(buf, size);
+                break;
             }
             /* Failure.  Work with what we had.  */
-            munmap(buf2, size1);
+            munmap(buf2, size);
+            /* fallthru */
+        default:
+            /* Split the original buffer.  Free the smaller half.  */
+            buf2 = split_cross_256mb(buf, size);
+            size2 = tcg_ctx.code_gen_buffer_size;
+            if (buf == buf2) {
+                munmap(buf + size2 + qemu_real_host_page_size, size - size2);
+            } else {
+                munmap(buf, size - size2);
+            }
+            size = size2;
+            break;
         }
-
-        /* Split the original buffer.  Free the smaller half.  */
-        buf2 = split_cross_256mb(buf, size1);
-        size2 = tcg_ctx.code_gen_buffer_size;
-        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
-        return buf2;
+        buf = buf2;
     }
 #endif
 
-    return buf;
-}
-#else
-static inline void *alloc_code_gen_buffer(void)
-{
-    void *buf = g_try_malloc(tcg_ctx.code_gen_buffer_size);
+    /* Make the final buffer accessable.  The guard page at the end
+       will remain inaccessable with PROT_NONE.  */
+    mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
 
-    if (buf == NULL) {
-        return NULL;
-    }
+    /* Request large pages for the buffer.  */
+    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
 
-#ifdef __mips__
-    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
-        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
-        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
-            /* Success!  Use the new buffer.  */
-            free(buf);
-            buf = buf2;
-        } else {
-            /* Failure.  Work with what we had.  Since this is malloc
-               and not mmap, we can't free the other half.  */
-            free(buf2);
-            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
-        }
-    }
-#endif
-
-    map_exec(buf, tcg_ctx.code_gen_buffer_size);
     return buf;
 }
-#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 
 static inline void code_gen_alloc(size_t tb_size)
 {
@@ -688,9 +719,6 @@  static inline void code_gen_alloc(size_t tb_size)
         exit(1);
     }
 
-    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
-                 QEMU_MADV_HUGEPAGE);
-
     /* Estimate a good size for the number of TBs we can support.  We
        still haven't deducted the prologue from the buffer size here,
        but that's minimal and won't affect the estimate much.  */
@@ -708,8 +736,8 @@  static inline void code_gen_alloc(size_t tb_size)
 void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
-    code_gen_alloc(tb_size);
     page_init();
+    code_gen_alloc(tb_size);
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
        initialize the prologue now.  */

[v3,24/25] tcg: Allocate a guard page after code_gen_buffer

Commit Message

Comments

Patch