diff mbox

[v3,23/25] tcg: Emit prologue to the beginning of code_gen_buffer

Message ID 1442953507-4074-24-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Sept. 22, 2015, 8:25 p.m. UTC
By putting the prologue at the end, we risk overwriting the
prologue should our estimate of maximum TB size.  Given the
two different placements of the call to tcg_prologue_init,
move the high water mark computation into tcg_prologue_init.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg.c       | 25 +++++++++++++++++++------
 translate-all.c | 29 ++++++++++-------------------
 2 files changed, 29 insertions(+), 25 deletions(-)

Comments

Peter Maydell Sept. 23, 2015, 7:28 p.m. UTC | #1
On 22 September 2015 at 13:25, Richard Henderson <rth@twiddle.net> wrote:
> By putting the prologue at the end, we risk overwriting the
> prologue should our estimate of maximum TB size.  Given the
> two different placements of the call to tcg_prologue_init,
> move the high water mark computation into tcg_prologue_init.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/tcg.c       | 25 +++++++++++++++++++------
>  translate-all.c | 29 ++++++++++-------------------
>  2 files changed, 29 insertions(+), 25 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 8126af9..db4032a 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -363,17 +363,30 @@ void tcg_context_init(TCGContext *s)
>
>  void tcg_prologue_init(TCGContext *s)
>  {
> -    /* init global prologue and epilogue */
> -    s->code_buf = s->code_gen_prologue;
> -    s->code_ptr = s->code_buf;
> +    size_t prologue_size, total_size;
> +
> +    /* Put the prologue at the beginning of code_gen_buffer.  */
> +    s->code_ptr = s->code_buf = s->code_gen_prologue = s->code_gen_buffer;
> +
> +    /* Generate the prologue.  */
>      tcg_target_qemu_prologue(s);
>      flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
>
> +    /* Deduct the prologue from the buffer.  */
> +    prologue_size = tcg_current_code_size(s);
> +    s->code_gen_ptr = s->code_gen_buffer = s->code_buf = s->code_ptr;
> +
> +    /* Compute a high-water mark, at which we voluntarily flush the
> +       buffer and start over.  */
> +    total_size = s->code_gen_buffer_size -= prologue_size;

-= on the RHS of an assignment seems unnecessarily tricky to me;
I think splitting this into two lines would be clearer.

> +    s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE;
> +
> +    tcg_register_jit(s->code_gen_buffer, total_size);
> +
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
> -        size_t size = tcg_current_code_size(s);
> -        qemu_log("PROLOGUE: [size=%zu]\n", size);
> -        log_disas(s->code_buf, size);
> +        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
> +        log_disas(s->code_gen_prologue, prologue_size);
>          qemu_log("\n");
>          qemu_log_flush();
>      }
> diff --git a/translate-all.c b/translate-all.c
> index f6b8148..4c994bb 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -689,23 +689,16 @@ static inline void code_gen_alloc(size_t tb_size)
>      }
>
>      qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
> -            QEMU_MADV_HUGEPAGE);
> -
> -    /* Steal room for the prologue at the end of the buffer.  This ensures
> -       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
> -       from TB's to the prologue are going to be in range.  It also means
> -       that we don't need to mark (additional) portions of the data segment
> -       as executable.  */

I take it we don't have any annoying targets with branch-forwards
ranges larger than their branch-backwards ranges...

> -    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
> -            tcg_ctx.code_gen_buffer_size - 1024;
> -    tcg_ctx.code_gen_buffer_size -= 1024;
> -
> -    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
> -        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
> -    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
> -            CODE_GEN_AVG_BLOCK_SIZE;
> -    tcg_ctx.tb_ctx.tbs =
> -            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
> +                 QEMU_MADV_HUGEPAGE);
> +
> +    /* Estimate a good size for the number of TBs we can support.  We
> +       still haven't deducted the prologue from the buffer size here,
> +       but that's minimal and won't affect the estimate much.  */
> +    tcg_ctx.code_gen_max_blocks
> +        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
> +    tcg_ctx.tb_ctx.tbs
> +        = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));

Prefer g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks).

> +
>      qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
>  }
>
> @@ -716,8 +709,6 @@ void tcg_exec_init(unsigned long tb_size)
>  {
>      cpu_gen_init();
>      code_gen_alloc(tb_size);
> -    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
> -    tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
>      page_init();
>  #if defined(CONFIG_SOFTMMU)
>      /* There's no guest base to take into account, so go ahead and
> --
> 2.4.3
>

Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
Richard Henderson Sept. 23, 2015, 7:39 p.m. UTC | #2
On 09/23/2015 12:28 PM, Peter Maydell wrote:
>> -    /* Steal room for the prologue at the end of the buffer.  This ensures
>> -       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
>> -       from TB's to the prologue are going to be in range.  It also means
>> -       that we don't need to mark (additional) portions of the data segment
>> -       as executable.  */
> 
> I take it we don't have any annoying targets with branch-forwards
> ranges larger than their branch-backwards ranges...

No, thankfully.


r~
diff mbox

Patch

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8126af9..db4032a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -363,17 +363,30 @@  void tcg_context_init(TCGContext *s)
 
 void tcg_prologue_init(TCGContext *s)
 {
-    /* init global prologue and epilogue */
-    s->code_buf = s->code_gen_prologue;
-    s->code_ptr = s->code_buf;
+    size_t prologue_size, total_size;
+
+    /* Put the prologue at the beginning of code_gen_buffer.  */
+    s->code_ptr = s->code_buf = s->code_gen_prologue = s->code_gen_buffer;
+
+    /* Generate the prologue.  */
     tcg_target_qemu_prologue(s);
     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
 
+    /* Deduct the prologue from the buffer.  */
+    prologue_size = tcg_current_code_size(s);
+    s->code_gen_ptr = s->code_gen_buffer = s->code_buf = s->code_ptr;
+
+    /* Compute a high-water mark, at which we voluntarily flush the
+       buffer and start over.  */
+    total_size = s->code_gen_buffer_size -= prologue_size;
+    s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE;
+
+    tcg_register_jit(s->code_gen_buffer, total_size);
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        size_t size = tcg_current_code_size(s);
-        qemu_log("PROLOGUE: [size=%zu]\n", size);
-        log_disas(s->code_buf, size);
+        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
+        log_disas(s->code_gen_prologue, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
     }
diff --git a/translate-all.c b/translate-all.c
index f6b8148..4c994bb 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -689,23 +689,16 @@  static inline void code_gen_alloc(size_t tb_size)
     }
 
     qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
-            QEMU_MADV_HUGEPAGE);
-
-    /* Steal room for the prologue at the end of the buffer.  This ensures
-       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
-       from TB's to the prologue are going to be in range.  It also means
-       that we don't need to mark (additional) portions of the data segment
-       as executable.  */
-    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
-            tcg_ctx.code_gen_buffer_size - 1024;
-    tcg_ctx.code_gen_buffer_size -= 1024;
-
-    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
-        (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
-            CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs =
-            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+                 QEMU_MADV_HUGEPAGE);
+
+    /* Estimate a good size for the number of TBs we can support.  We
+       still haven't deducted the prologue from the buffer size here,
+       but that's minimal and won't affect the estimate much.  */
+    tcg_ctx.code_gen_max_blocks
+        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx.tb_ctx.tbs
+        = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
 
@@ -716,8 +709,6 @@  void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
     code_gen_alloc(tb_size);
-    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
-    tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
     page_init();
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and