Message ID | 1349386306-24764-1-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On Thu, Oct 4, 2012 at 9:31 PM, Richard Henderson <rth@twiddle.net> wrote: > The hard-coded addresses inside code_gen_alloc only make sense if > we're building an executable that will actually run at the address > we've put into the linker scripts. > > When we're building with -fpie, the executable will run at some > random location chosen by the kernel. We get better placement for > the code_gen_buffer if we allow the kernel to place the memory, > as it will tend to to place it near the executable, based on the > PROT_EXEC bit. > > Since code_gen_prologue is always inside the executable, this effect > is easily seen at the end of most TB, with the exit_tb opcode: > > Before: > 0x40b82024: mov $0x7fa97bd5c296,%r10 > 0x40b8202e: jmpq *%r10 > > After: > 0x7f1191ff1024: jmpq 0x7f119edc0296 > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > exec.c | 127 +++++++++++++++++++++++++++++++---------------------------------- > 1 file changed, 60 insertions(+), 67 deletions(-) > > diff --git a/exec.c b/exec.c > index bb6aa4a..0ddc07a 100644 > --- a/exec.c > +++ b/exec.c > @@ -510,6 +510,14 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] > __attribute__((aligned (CODE_GEN_ALIGN))); > #endif > > +/* ??? Should configure for this not list operating systems here. */ > +#if defined(__linux__) \ > + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ > + || defined(__DragonFly__) || defined(__OpenBSD__) \ > + || defined(__NetBSD__) > +# define USE_MMAP > +#endif > + > static void code_gen_alloc(unsigned long tb_size) > { > #ifdef USE_STATIC_CODE_GEN_BUFFER > @@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size) > code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; > map_exec(code_gen_buffer, code_gen_buffer_size); > #else > +#ifdef USE_MMAP > + int flags = MAP_PRIVATE | MAP_ANONYMOUS; > +#endif > + uintptr_t max_buf = -1, start = 0; > + > + /* Constrain the size and position of the buffer based on the host cpu. */ > +#if defined(__x86_64__) > +# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT) > + /* Force the memory down into low memory with the executable. > + Leave the choice of exact location with the kernel. */ > + flags |= MAP_32BIT; > + /* Cannot expect to map more than 800MB in low memory. */ > + max_buf = 800 * 1024 * 1024; > +# else > + /* Maximum range of direct branches. */ > + max_buf = 2ul * 1024 * 1024 * 1024; > +# endif > +#elif defined(__sparc__) && HOST_LONG_BITS == 64 > + /* Maximum range of direct branches between TB (via "call"). */ > + max_buf = 2ul * 1024 * 1024 * 1024; > + start = 0x40000000ul; > +#elif defined(__arm__) > + /* Keep the buffer no bigger than 16MB to branch between blocks */ > + max_buf = 16 * 1024 * 1024; > +#elif defined(__s390x__) > + /* Map the buffer so that we can use direct calls and branches. */ > + /* We have a +- 4GB range on the branches; leave some slop. */ > + max_buf = 3ul * 1024 * 1024 * 1024; > + start = 0x90000000ul; > +#endif > +#if defined(__PIE__) || defined(__PIC__) > + /* Don't bother setting a preferred location if we're building > + a position-independent executable. We're more likely to get > + an address near the main executable if we let the kernel > + choose the address. */ > + start = 0; > +#endif > + > + /* Size the buffer. */ > code_gen_buffer_size = tb_size; > if (code_gen_buffer_size == 0) { > #if defined(CONFIG_USER_ONLY) > @@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size) > code_gen_buffer_size = (unsigned long)(ram_size / 4); > #endif > } > - if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) > + if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) { > code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE; > - /* The code gen buffer location may have constraints depending on > - the host cpu and OS */ > -#if defined(__linux__) > - { > - int flags; > - void *start = NULL; > - > - flags = MAP_PRIVATE | MAP_ANONYMOUS; > -#if defined(__x86_64__) > - flags |= MAP_32BIT; > - /* Cannot map more than that */ > - if (code_gen_buffer_size > (800 * 1024 * 1024)) > - code_gen_buffer_size = (800 * 1024 * 1024); > -#elif defined(__sparc__) && HOST_LONG_BITS == 64 > - // Map the buffer below 2G, so we can use direct calls and branches > - start = (void *) 0x40000000UL; > - if (code_gen_buffer_size > (512 * 1024 * 1024)) > - code_gen_buffer_size = (512 * 1024 * 1024); > -#elif defined(__arm__) > - /* Keep the buffer no bigger than 16MB to branch between blocks */ > - if (code_gen_buffer_size > 16 * 1024 * 1024) > - code_gen_buffer_size = 16 * 1024 * 1024; > -#elif defined(__s390x__) > - /* Map the buffer so that we can use direct calls and branches. */ > - /* We have a +- 4GB range on the branches; leave some slop. */ > - if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) { > - code_gen_buffer_size = 3ul * 1024 * 1024 * 1024; > - } > - start = (void *)0x90000000UL; > -#endif > - code_gen_buffer = mmap(start, code_gen_buffer_size, > - PROT_WRITE | PROT_READ | PROT_EXEC, > - flags, -1, 0); > - if (code_gen_buffer == MAP_FAILED) { > - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); > - exit(1); > - } > } > -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ > - || defined(__DragonFly__) || defined(__OpenBSD__) \ > - || defined(__NetBSD__) > - { > - int flags; > - void *addr = NULL; > - flags = MAP_PRIVATE | MAP_ANONYMOUS; > -#if defined(__x86_64__) > - /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume > - * 0x40000000 is free */ > - flags |= MAP_FIXED; > - addr = (void *)0x40000000; > - /* Cannot map more than that */ > - if (code_gen_buffer_size > (800 * 1024 * 1024)) > - code_gen_buffer_size = (800 * 1024 * 1024); > -#elif defined(__sparc__) && HOST_LONG_BITS == 64 > - // Map the buffer below 2G, so we can use direct calls and branches > - addr = (void *) 0x40000000UL; > - if (code_gen_buffer_size > (512 * 1024 * 1024)) { > - code_gen_buffer_size = (512 * 1024 * 1024); > - } > -#endif > - code_gen_buffer = mmap(addr, code_gen_buffer_size, > - PROT_WRITE | PROT_READ | PROT_EXEC, > - flags, -1, 0); > - if (code_gen_buffer == MAP_FAILED) { > - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); > - exit(1); > - } > + if (code_gen_buffer_size > max_buf) { > + code_gen_buffer_size = max_buf; > + } > + > +#ifdef USE_MMAP > + code_gen_buffer = mmap((void *)start, code_gen_buffer_size, > + PROT_WRITE | PROT_READ | PROT_EXEC, > + flags, -1, 0); > + if (code_gen_buffer == MAP_FAILED) { > + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); > + exit(1); > } > #else > code_gen_buffer = g_malloc(code_gen_buffer_size); > map_exec(code_gen_buffer, code_gen_buffer_size); In this branch (e.g. mingw32), 'start' is unused: /src/qemu/exec.c: In function 'code_gen_alloc': /src/qemu/exec.c:531: warning: unused variable 'start' > #endif > #endif /* !USE_STATIC_CODE_GEN_BUFFER */ > + > map_exec(code_gen_prologue, sizeof(code_gen_prologue)); > code_gen_buffer_max_size = code_gen_buffer_size - > (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); > -- > 1.7.11.4 > >
diff --git a/exec.c b/exec.c index bb6aa4a..0ddc07a 100644 --- a/exec.c +++ b/exec.c @@ -510,6 +510,14 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned (CODE_GEN_ALIGN))); #endif +/* ??? Should configure for this not list operating systems here. */ +#if defined(__linux__) \ + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ + || defined(__DragonFly__) || defined(__OpenBSD__) \ + || defined(__NetBSD__) +# define USE_MMAP +#endif + static void code_gen_alloc(unsigned long tb_size) { #ifdef USE_STATIC_CODE_GEN_BUFFER @@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size) code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE; map_exec(code_gen_buffer, code_gen_buffer_size); #else +#ifdef USE_MMAP + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#endif + uintptr_t max_buf = -1, start = 0; + + /* Constrain the size and position of the buffer based on the host cpu. */ +#if defined(__x86_64__) +# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT) + /* Force the memory down into low memory with the executable. + Leave the choice of exact location with the kernel. */ + flags |= MAP_32BIT; + /* Cannot expect to map more than 800MB in low memory. */ + max_buf = 800 * 1024 * 1024; +# else + /* Maximum range of direct branches. */ + max_buf = 2ul * 1024 * 1024 * 1024; +# endif +#elif defined(__sparc__) && HOST_LONG_BITS == 64 + /* Maximum range of direct branches between TB (via "call"). */ + max_buf = 2ul * 1024 * 1024 * 1024; + start = 0x40000000ul; +#elif defined(__arm__) + /* Keep the buffer no bigger than 16MB to branch between blocks */ + max_buf = 16 * 1024 * 1024; +#elif defined(__s390x__) + /* Map the buffer so that we can use direct calls and branches. */ + /* We have a +- 4GB range on the branches; leave some slop. */ + max_buf = 3ul * 1024 * 1024 * 1024; + start = 0x90000000ul; +#endif +#if defined(__PIE__) || defined(__PIC__) + /* Don't bother setting a preferred location if we're building + a position-independent executable. We're more likely to get + an address near the main executable if we let the kernel + choose the address. */ + start = 0; +#endif + + /* Size the buffer. */ code_gen_buffer_size = tb_size; if (code_gen_buffer_size == 0) { #if defined(CONFIG_USER_ONLY) @@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size) code_gen_buffer_size = (unsigned long)(ram_size / 4); #endif } - if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) + if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) { code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE; - /* The code gen buffer location may have constraints depending on - the host cpu and OS */ -#if defined(__linux__) - { - int flags; - void *start = NULL; - - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#if defined(__x86_64__) - flags |= MAP_32BIT; - /* Cannot map more than that */ - if (code_gen_buffer_size > (800 * 1024 * 1024)) - code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc__) && HOST_LONG_BITS == 64 - // Map the buffer below 2G, so we can use direct calls and branches - start = (void *) 0x40000000UL; - if (code_gen_buffer_size > (512 * 1024 * 1024)) - code_gen_buffer_size = (512 * 1024 * 1024); -#elif defined(__arm__) - /* Keep the buffer no bigger than 16MB to branch between blocks */ - if (code_gen_buffer_size > 16 * 1024 * 1024) - code_gen_buffer_size = 16 * 1024 * 1024; -#elif defined(__s390x__) - /* Map the buffer so that we can use direct calls and branches. */ - /* We have a +- 4GB range on the branches; leave some slop. */ - if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) { - code_gen_buffer_size = 3ul * 1024 * 1024 * 1024; - } - start = (void *)0x90000000UL; -#endif - code_gen_buffer = mmap(start, code_gen_buffer_size, - PROT_WRITE | PROT_READ | PROT_EXEC, - flags, -1, 0); - if (code_gen_buffer == MAP_FAILED) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); - } } -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ - || defined(__DragonFly__) || defined(__OpenBSD__) \ - || defined(__NetBSD__) - { - int flags; - void *addr = NULL; - flags = MAP_PRIVATE | MAP_ANONYMOUS; -#if defined(__x86_64__) - /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume - * 0x40000000 is free */ - flags |= MAP_FIXED; - addr = (void *)0x40000000; - /* Cannot map more than that */ - if (code_gen_buffer_size > (800 * 1024 * 1024)) - code_gen_buffer_size = (800 * 1024 * 1024); -#elif defined(__sparc__) && HOST_LONG_BITS == 64 - // Map the buffer below 2G, so we can use direct calls and branches - addr = (void *) 0x40000000UL; - if (code_gen_buffer_size > (512 * 1024 * 1024)) { - code_gen_buffer_size = (512 * 1024 * 1024); - } -#endif - code_gen_buffer = mmap(addr, code_gen_buffer_size, - PROT_WRITE | PROT_READ | PROT_EXEC, - flags, -1, 0); - if (code_gen_buffer == MAP_FAILED) { - fprintf(stderr, "Could not allocate dynamic translator buffer\n"); - exit(1); - } + if (code_gen_buffer_size > max_buf) { + code_gen_buffer_size = max_buf; + } + +#ifdef USE_MMAP + code_gen_buffer = mmap((void *)start, code_gen_buffer_size, + PROT_WRITE | PROT_READ | PROT_EXEC, + flags, -1, 0); + if (code_gen_buffer == MAP_FAILED) { + fprintf(stderr, "Could not allocate dynamic translator buffer\n"); + exit(1); } #else code_gen_buffer = g_malloc(code_gen_buffer_size); map_exec(code_gen_buffer, code_gen_buffer_size); #endif #endif /* !USE_STATIC_CODE_GEN_BUFFER */ + map_exec(code_gen_prologue, sizeof(code_gen_prologue)); code_gen_buffer_max_size = code_gen_buffer_size - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
The hard-coded addresses inside code_gen_alloc only make sense if we're building an executable that will actually run at the address we've put into the linker scripts. When we're building with -fpie, the executable will run at some random location chosen by the kernel. We get better placement for the code_gen_buffer if we allow the kernel to place the memory, as it will tend to to place it near the executable, based on the PROT_EXEC bit. Since code_gen_prologue is always inside the executable, this effect is easily seen at the end of most TB, with the exit_tb opcode: Before: 0x40b82024: mov $0x7fa97bd5c296,%r10 0x40b8202e: jmpq *%r10 After: 0x7f1191ff1024: jmpq 0x7f119edc0296 Signed-off-by: Richard Henderson <rth@twiddle.net> --- exec.c | 127 +++++++++++++++++++++++++++++++---------------------------------- 1 file changed, 60 insertions(+), 67 deletions(-)