Patchwork exec: Don't request an address for code_gen_buffer if -fpie

login
register
mail settings
Submitter Richard Henderson
Date Oct. 4, 2012, 9:31 p.m.
Message ID <1349386306-24764-1-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/189340/
State New
Headers show

Comments

Richard Henderson - Oct. 4, 2012, 9:31 p.m.
The hard-coded addresses inside code_gen_alloc only make sense if
we're building an executable that will actually run at the address
we've put into the linker scripts.

When we're building with -fpie, the executable will run at some
random location chosen by the kernel.  We get better placement for
the code_gen_buffer if we allow the kernel to place the memory,
as it will tend to to place it near the executable, based on the
PROT_EXEC bit.

Since code_gen_prologue is always inside the executable, this effect
is easily seen at the end of most TB, with the exit_tb opcode:

Before:
0x40b82024:  mov    $0x7fa97bd5c296,%r10
0x40b8202e:  jmpq   *%r10

After:
0x7f1191ff1024:  jmpq   0x7f119edc0296

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 127 +++++++++++++++++++++++++++++++----------------------------------
 1 file changed, 60 insertions(+), 67 deletions(-)
Blue Swirl - Oct. 7, 2012, 4:34 p.m.
On Thu, Oct 4, 2012 at 9:31 PM, Richard Henderson <rth@twiddle.net> wrote:
> The hard-coded addresses inside code_gen_alloc only make sense if
> we're building an executable that will actually run at the address
> we've put into the linker scripts.
>
> When we're building with -fpie, the executable will run at some
> random location chosen by the kernel.  We get better placement for
> the code_gen_buffer if we allow the kernel to place the memory,
> as it will tend to to place it near the executable, based on the
> PROT_EXEC bit.
>
> Since code_gen_prologue is always inside the executable, this effect
> is easily seen at the end of most TB, with the exit_tb opcode:
>
> Before:
> 0x40b82024:  mov    $0x7fa97bd5c296,%r10
> 0x40b8202e:  jmpq   *%r10
>
> After:
> 0x7f1191ff1024:  jmpq   0x7f119edc0296
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  exec.c | 127 +++++++++++++++++++++++++++++++----------------------------------
>  1 file changed, 60 insertions(+), 67 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index bb6aa4a..0ddc07a 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -510,6 +510,14 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>                 __attribute__((aligned (CODE_GEN_ALIGN)));
>  #endif
>
> +/* ??? Should configure for this not list operating systems here.  */
> +#if defined(__linux__) \
> +    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> +    || defined(__DragonFly__) || defined(__OpenBSD__) \
> +    || defined(__NetBSD__)
> +# define USE_MMAP
> +#endif
> +
>  static void code_gen_alloc(unsigned long tb_size)
>  {
>  #ifdef USE_STATIC_CODE_GEN_BUFFER
> @@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size)
>      code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
>      map_exec(code_gen_buffer, code_gen_buffer_size);
>  #else
> +#ifdef USE_MMAP
> +    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
> +#endif
> +    uintptr_t max_buf = -1, start = 0;
> +
> +    /* Constrain the size and position of the buffer based on the host cpu.  */
> +#if defined(__x86_64__)
> +# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT)
> +    /* Force the memory down into low memory with the executable.
> +       Leave the choice of exact location with the kernel.  */
> +    flags |= MAP_32BIT;
> +    /* Cannot expect to map more than 800MB in low memory.  */
> +    max_buf = 800 * 1024 * 1024;
> +# else
> +    /* Maximum range of direct branches.  */
> +    max_buf = 2ul * 1024 * 1024 * 1024;
> +# endif
> +#elif defined(__sparc__) && HOST_LONG_BITS == 64
> +    /* Maximum range of direct branches between TB (via "call").  */
> +    max_buf = 2ul * 1024 * 1024 * 1024;
> +    start = 0x40000000ul;
> +#elif defined(__arm__)
> +    /* Keep the buffer no bigger than 16MB to branch between blocks */
> +    max_buf = 16 * 1024 * 1024;
> +#elif defined(__s390x__)
> +    /* Map the buffer so that we can use direct calls and branches.  */
> +    /* We have a +- 4GB range on the branches; leave some slop.  */
> +    max_buf = 3ul * 1024 * 1024 * 1024;
> +    start = 0x90000000ul;
> +#endif
> +#if defined(__PIE__) || defined(__PIC__)
> +    /* Don't bother setting a preferred location if we're building
> +       a position-independent executable.  We're more likely to get
> +       an address near the main executable if we let the kernel
> +       choose the address.  */
> +    start = 0;
> +#endif
> +
> +    /* Size the buffer.  */
>      code_gen_buffer_size = tb_size;
>      if (code_gen_buffer_size == 0) {
>  #if defined(CONFIG_USER_ONLY)
> @@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size)
>          code_gen_buffer_size = (unsigned long)(ram_size / 4);
>  #endif
>      }
> -    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
> +    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) {
>          code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
> -    /* The code gen buffer location may have constraints depending on
> -       the host cpu and OS */
> -#if defined(__linux__)
> -    {
> -        int flags;
> -        void *start = NULL;
> -
> -        flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> -        flags |= MAP_32BIT;
> -        /* Cannot map more than that */
> -        if (code_gen_buffer_size > (800 * 1024 * 1024))
> -            code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> -        // Map the buffer below 2G, so we can use direct calls and branches
> -        start = (void *) 0x40000000UL;
> -        if (code_gen_buffer_size > (512 * 1024 * 1024))
> -            code_gen_buffer_size = (512 * 1024 * 1024);
> -#elif defined(__arm__)
> -        /* Keep the buffer no bigger than 16MB to branch between blocks */
> -        if (code_gen_buffer_size > 16 * 1024 * 1024)
> -            code_gen_buffer_size = 16 * 1024 * 1024;
> -#elif defined(__s390x__)
> -        /* Map the buffer so that we can use direct calls and branches.  */
> -        /* We have a +- 4GB range on the branches; leave some slop.  */
> -        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
> -            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
> -        }
> -        start = (void *)0x90000000UL;
> -#endif
> -        code_gen_buffer = mmap(start, code_gen_buffer_size,
> -                               PROT_WRITE | PROT_READ | PROT_EXEC,
> -                               flags, -1, 0);
> -        if (code_gen_buffer == MAP_FAILED) {
> -            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> -            exit(1);
> -        }
>      }
> -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> -    || defined(__DragonFly__) || defined(__OpenBSD__) \
> -    || defined(__NetBSD__)
> -    {
> -        int flags;
> -        void *addr = NULL;
> -        flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> -        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
> -         * 0x40000000 is free */
> -        flags |= MAP_FIXED;
> -        addr = (void *)0x40000000;
> -        /* Cannot map more than that */
> -        if (code_gen_buffer_size > (800 * 1024 * 1024))
> -            code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> -        // Map the buffer below 2G, so we can use direct calls and branches
> -        addr = (void *) 0x40000000UL;
> -        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
> -            code_gen_buffer_size = (512 * 1024 * 1024);
> -        }
> -#endif
> -        code_gen_buffer = mmap(addr, code_gen_buffer_size,
> -                               PROT_WRITE | PROT_READ | PROT_EXEC,
> -                               flags, -1, 0);
> -        if (code_gen_buffer == MAP_FAILED) {
> -            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> -            exit(1);
> -        }
> +    if (code_gen_buffer_size > max_buf) {
> +        code_gen_buffer_size = max_buf;
> +    }
> +
> +#ifdef USE_MMAP
> +    code_gen_buffer = mmap((void *)start, code_gen_buffer_size,
> +                           PROT_WRITE | PROT_READ | PROT_EXEC,
> +                           flags, -1, 0);
> +    if (code_gen_buffer == MAP_FAILED) {
> +        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> +        exit(1);
>      }
>  #else
>      code_gen_buffer = g_malloc(code_gen_buffer_size);
>      map_exec(code_gen_buffer, code_gen_buffer_size);

In this branch (e.g. mingw32), 'start' is unused:
/src/qemu/exec.c: In function 'code_gen_alloc':
/src/qemu/exec.c:531: warning: unused variable 'start'

>  #endif
>  #endif /* !USE_STATIC_CODE_GEN_BUFFER */
> +
>      map_exec(code_gen_prologue, sizeof(code_gen_prologue));
>      code_gen_buffer_max_size = code_gen_buffer_size -
>          (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
> --
> 1.7.11.4
>
>

Patch

diff --git a/exec.c b/exec.c
index bb6aa4a..0ddc07a 100644
--- a/exec.c
+++ b/exec.c
@@ -510,6 +510,14 @@  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
                __attribute__((aligned (CODE_GEN_ALIGN)));
 #endif
 
+/* ??? Should configure for this not list operating systems here.  */
+#if defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__)
+# define USE_MMAP
+#endif
+
 static void code_gen_alloc(unsigned long tb_size)
 {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
@@ -517,6 +525,45 @@  static void code_gen_alloc(unsigned long tb_size)
     code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #else
+#ifdef USE_MMAP
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#endif
+    uintptr_t max_buf = -1, start = 0;
+
+    /* Constrain the size and position of the buffer based on the host cpu.  */
+#if defined(__x86_64__)
+# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    max_buf = 800 * 1024 * 1024;
+# else
+    /* Maximum range of direct branches.  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+# endif
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
+    /* Maximum range of direct branches between TB (via "call").  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+    start = 0x40000000ul;
+#elif defined(__arm__)
+    /* Keep the buffer no bigger than 16MB to branch between blocks */
+    max_buf = 16 * 1024 * 1024;
+#elif defined(__s390x__)
+    /* Map the buffer so that we can use direct calls and branches.  */
+    /* We have a +- 4GB range on the branches; leave some slop.  */
+    max_buf = 3ul * 1024 * 1024 * 1024;
+    start = 0x90000000ul;
+#endif
+#if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+    start = 0;
+#endif
+
+    /* Size the buffer.  */
     code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
@@ -526,81 +573,27 @@  static void code_gen_alloc(unsigned long tb_size)
         code_gen_buffer_size = (unsigned long)(ram_size / 4);
 #endif
     }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
+    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) {
         code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (code_gen_buffer_size > max_buf) {
+        code_gen_buffer_size = max_buf;
+    }
+
+#ifdef USE_MMAP
+    code_gen_buffer = mmap((void *)start, code_gen_buffer_size,
+                           PROT_WRITE | PROT_READ | PROT_EXEC,
+                           flags, -1, 0);
+    if (code_gen_buffer == MAP_FAILED) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
     }
 #else
     code_gen_buffer = g_malloc(code_gen_buffer_size);
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);