Patchwork [1/4] exec: Split up and tidy code_gen_buffer

login
register
mail settings
Submitter Richard Henderson
Date Oct. 12, 2012, 9:20 p.m.
Message ID <1350076850-7099-2-git-send-email-rth@twiddle.net>
Download mbox | patch
Permalink /patch/191222/
State New
Headers show

Comments

Richard Henderson - Oct. 12, 2012, 9:20 p.m.
It now consists of:

A macro definition of MAX_CODE_GEN_BUFFER_SIZE with host-specific values,

A function size_code_gen_buffer that applies most of the reasoning for
choosing a buffer size,

Three variations of a function alloc_code_gen_buffer that contain all
of the logic for allocating executable memory via a given allocation
mechanism.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 193 ++++++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 102 insertions(+), 91 deletions(-)
Blue Swirl - Oct. 13, 2012, 1:33 p.m.
On Fri, Oct 12, 2012 at 9:20 PM, Richard Henderson <rth@twiddle.net> wrote:
> It now consists of:
>
> A macro definition of MAX_CODE_GEN_BUFFER_SIZE with host-specific values,
>
> A function size_code_gen_buffer that applies most of the reasoning for
> choosing a buffer size,
>
> Three variations of a function alloc_code_gen_buffer that contain all
> of the logic for allocating executable memory via a given allocation
> mechanism.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  exec.c | 193 ++++++++++++++++++++++++++++++++++-------------------------------
>  1 file changed, 102 insertions(+), 91 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 7899042..db735dd 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -103,9 +103,9 @@ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
>
>  uint8_t code_gen_prologue[1024] code_gen_section;
>  static uint8_t *code_gen_buffer;
> -static unsigned long code_gen_buffer_size;
> +static size_t code_gen_buffer_size;
>  /* threshold to flush the translated code buffer */
> -static unsigned long code_gen_buffer_max_size;
> +static size_t code_gen_buffer_max_size;

This breaks build on i386:
/src/qemu/exec.c: In function 'dump_exec_info':
/src/qemu/exec.c:4208: error: format '%ld' expects type 'long int',
but argument 4 has type 'size_t'

>  static uint8_t *code_gen_ptr;
>
>  #if !defined(CONFIG_USER_ONLY)
> @@ -497,110 +497,121 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
>  #define mmap_unlock() do { } while(0)
>  #endif
>
> -#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
> -
>  #if defined(CONFIG_USER_ONLY)
>  /* Currently it is not recommended to allocate big chunks of data in
> -   user mode. It will change when a dedicated libc will be used */
> +   user mode. It will change when a dedicated libc will be used.  */
> +/* ??? 64-bit hosts ought to have no problem mmaping data outside the
> +   region in which the guest needs to run.  Revisit this.  */
>  #define USE_STATIC_CODE_GEN_BUFFER
>  #endif
>
> -#ifdef USE_STATIC_CODE_GEN_BUFFER
> -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> -               __attribute__((aligned (CODE_GEN_ALIGN)));
> +/* ??? Should configure for this, not list operating systems here.  */
> +#if (defined(__linux__) \
> +    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> +    || defined(__DragonFly__) || defined(__OpenBSD__) \
> +    || defined(__NetBSD__))
> +# define USE_MMAP
>  #endif
>
> -static void code_gen_alloc(unsigned long tb_size)
> +/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
> +   indicated, this is constrained by the range of direct branches on the
> +   host cpu, as used by the TCG implementation of goto_tb.  */
> +#if defined(__x86_64__)
> +# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
> +#elif defined(__sparc__)
> +# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
> +#elif defined(__arm__)
> +# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
> +#elif defined(__s390x__)
> +  /* We have a +- 4GB range on the branches; leave some slop.  */
> +# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
> +#else
> +# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
> +#endif
> +
> +#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
> +
> +static inline size_t size_code_gen_buffer(size_t tb_size)
>  {
> +    /* Size the buffer.  */
> +    if (tb_size == 0) {
>  #ifdef USE_STATIC_CODE_GEN_BUFFER
> -    code_gen_buffer = static_code_gen_buffer;
> -    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> -    map_exec(code_gen_buffer, code_gen_buffer_size);
> -#else
> -    code_gen_buffer_size = tb_size;
> -    if (code_gen_buffer_size == 0) {
> -#if defined(CONFIG_USER_ONLY)
> -        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> +        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
>  #else
> -        /* XXX: needs adjustments */
> -        code_gen_buffer_size = (unsigned long)(ram_size / 4);
> +        /* ??? Needs adjustments.  */
> +        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
> +           static buffer, we could size this on RESERVED_VA, on the text
> +           segment size of the executable, or continue to use the default.  */
> +        tb_size = (unsigned long)(ram_size / 4);
>  #endif
>      }
> -    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
> -        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
> -    /* The code gen buffer location may have constraints depending on
> -       the host cpu and OS */
> -#if defined(__linux__)
> -    {
> -        int flags;
> -        void *start = NULL;
> -
> -        flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> -        flags |= MAP_32BIT;
> -        /* Cannot map more than that */
> -        if (code_gen_buffer_size > (800 * 1024 * 1024))
> -            code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> -        // Map the buffer below 2G, so we can use direct calls and branches
> -        start = (void *) 0x40000000UL;
> -        if (code_gen_buffer_size > (512 * 1024 * 1024))
> -            code_gen_buffer_size = (512 * 1024 * 1024);
> -#elif defined(__arm__)
> -        /* Keep the buffer no bigger than 16MB to branch between blocks */
> -        if (code_gen_buffer_size > 16 * 1024 * 1024)
> -            code_gen_buffer_size = 16 * 1024 * 1024;
> -#elif defined(__s390x__)
> -        /* Map the buffer so that we can use direct calls and branches.  */
> -        /* We have a +- 4GB range on the branches; leave some slop.  */
> -        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
> -            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
> -        }
> -        start = (void *)0x90000000UL;
> -#endif
> -        code_gen_buffer = mmap(start, code_gen_buffer_size,
> -                               PROT_WRITE | PROT_READ | PROT_EXEC,
> -                               flags, -1, 0);
> -        if (code_gen_buffer == MAP_FAILED) {
> -            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> -            exit(1);
> -        }
> +    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
> +        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
>      }
> -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> -    || defined(__DragonFly__) || defined(__OpenBSD__) \
> -    || defined(__NetBSD__)
> -    {
> -        int flags;
> -        void *addr = NULL;
> -        flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> -        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
> -         * 0x40000000 is free */
> -        flags |= MAP_FIXED;
> -        addr = (void *)0x40000000;
> -        /* Cannot map more than that */
> -        if (code_gen_buffer_size > (800 * 1024 * 1024))
> -            code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> -        // Map the buffer below 2G, so we can use direct calls and branches
> -        addr = (void *) 0x40000000UL;
> -        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
> -            code_gen_buffer_size = (512 * 1024 * 1024);
> -        }
> -#endif
> -        code_gen_buffer = mmap(addr, code_gen_buffer_size,
> -                               PROT_WRITE | PROT_READ | PROT_EXEC,
> -                               flags, -1, 0);
> -        if (code_gen_buffer == MAP_FAILED) {
> -            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> -            exit(1);
> -        }
> +    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
> +        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
>      }
> +    code_gen_buffer_size = tb_size;
> +    return tb_size;
> +}
> +
> +#ifdef USE_STATIC_CODE_GEN_BUFFER
> +static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> +    __attribute__((aligned(CODE_GEN_ALIGN)));
> +
> +static inline void *alloc_code_gen_buffer(void)
> +{
> +    map_exec(static_code_gen_buffer, code_gen_buffer_size);
> +    return static_code_gen_buffer;
> +}
> +#elif defined(USE_MMAP)
> +static inline void *alloc_code_gen_buffer(void)
> +{
> +    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
> +    uintptr_t start = 0;
> +    void *buf;
> +
> +    /* Constrain the position of the buffer based on the host cpu.
> +       Note that these addresses are chosen in concert with the
> +       addresses assigned in the relevant linker script file.  */
> +# if defined(__x86_64__) && defined(MAP_32BIT)
> +    /* Force the memory down into low memory with the executable.
> +       Leave the choice of exact location with the kernel.  */
> +    flags |= MAP_32BIT;
> +    /* Cannot expect to map more than 800MB in low memory.  */
> +    if (code_gen_buffer_size > 800u * 1024 * 1024) {
> +        code_gen_buffer_size = 800u * 1024 * 1024;
> +    }
> +# elif defined(__sparc__)
> +    start = 0x40000000ul;
> +# elif defined(__s390x__)
> +    start = 0x90000000ul;
> +# endif
> +
> +    buf = mmap((void *)start, code_gen_buffer_size,
> +               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
> +    return buf == MAP_FAILED ? NULL : buf;
> +}
>  #else
> -    code_gen_buffer = g_malloc(code_gen_buffer_size);
> -    map_exec(code_gen_buffer, code_gen_buffer_size);
> -#endif
> -#endif /* !USE_STATIC_CODE_GEN_BUFFER */
> +static inline void *alloc_code_gen_buffer(void)
> +{
> +    void *buf = g_malloc(code_gen_buffer_size);
> +    if (buf) {
> +        map_exec(buf, code_gen_buffer_size);
> +    }
> +    return buf;
> +}
> +#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
> +
> +static inline void code_gen_alloc(size_t tb_size)
> +{
> +    code_gen_buffer_size = size_code_gen_buffer(tb_size);
> +    code_gen_buffer = alloc_code_gen_buffer();
> +    if (code_gen_buffer == NULL) {
> +        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> +        exit(1);
> +    }
> +
>      map_exec(code_gen_prologue, sizeof(code_gen_prologue));
>      code_gen_buffer_max_size = code_gen_buffer_size -
>          (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
> --
> 1.7.11.7
>
Richard Henderson - Oct. 15, 2012, 8:16 p.m.
On 2012-10-13 23:33, Blue Swirl wrote:
> /src/qemu/exec.c:4208: error: format '%ld' expects type 'long int',
> but argument 4 has type 'size_t'

Dang it.  And here I thought I was helping get the type right for win64.
That printf format should be changed to %zd...


r~

Patch

diff --git a/exec.c b/exec.c
index 7899042..db735dd 100644
--- a/exec.c
+++ b/exec.c
@@ -103,9 +103,9 @@  spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
 uint8_t code_gen_prologue[1024] code_gen_section;
 static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
+static size_t code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
+static size_t code_gen_buffer_max_size;
 static uint8_t *code_gen_ptr;
 
 #if !defined(CONFIG_USER_ONLY)
@@ -497,110 +497,121 @@  bool memory_region_is_unassigned(MemoryRegion *mr)
 #define mmap_unlock() do { } while(0)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used */
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-               __attribute__((aligned (CODE_GEN_ALIGN)));
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
 #endif
 
-static void code_gen_alloc(unsigned long tb_size)
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
 {
+    /* Size the buffer.  */
+    if (tb_size == 0) {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
-    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
-    code_gen_buffer_size = tb_size;
-    if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
-        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 #else
-        /* XXX: needs adjustments */
-        code_gen_buffer_size = (unsigned long)(ram_size / 4);
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
 #endif
     }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
-        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
     }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+    __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
+
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
+    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
 #else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);