Message ID | 1442953507-4074-24-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
On 22 September 2015 at 13:25, Richard Henderson <rth@twiddle.net> wrote: > By putting the prologue at the end, we risk overwriting the > prologue should our estimate of maximum TB size. Given the > two different placements of the call to tcg_prologue_init, > move the high water mark computation into tcg_prologue_init. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > tcg/tcg.c | 25 +++++++++++++++++++------ > translate-all.c | 29 ++++++++++------------------- > 2 files changed, 29 insertions(+), 25 deletions(-) > > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 8126af9..db4032a 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -363,17 +363,30 @@ void tcg_context_init(TCGContext *s) > > void tcg_prologue_init(TCGContext *s) > { > - /* init global prologue and epilogue */ > - s->code_buf = s->code_gen_prologue; > - s->code_ptr = s->code_buf; > + size_t prologue_size, total_size; > + > + /* Put the prologue at the beginning of code_gen_buffer. */ > + s->code_ptr = s->code_buf = s->code_gen_prologue = s->code_gen_buffer; > + > + /* Generate the prologue. */ > tcg_target_qemu_prologue(s); > flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); > > + /* Deduct the prologue from the buffer. */ > + prologue_size = tcg_current_code_size(s); > + s->code_gen_ptr = s->code_gen_buffer = s->code_buf = s->code_ptr; > + > + /* Compute a high-water mark, at which we voluntarily flush the > + buffer and start over. */ > + total_size = s->code_gen_buffer_size -= prologue_size; -= on the RHS of an assignment seems unnecessarily tricky to me; I think splitting this into two lines would be clearer. > + s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE; > + > + tcg_register_jit(s->code_gen_buffer, total_size); > + > #ifdef DEBUG_DISAS > if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { > - size_t size = tcg_current_code_size(s); > - qemu_log("PROLOGUE: [size=%zu]\n", size); > - log_disas(s->code_buf, size); > + qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); > + log_disas(s->code_gen_prologue, prologue_size); > qemu_log("\n"); > qemu_log_flush(); > } > diff --git a/translate-all.c b/translate-all.c > index f6b8148..4c994bb 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -689,23 +689,16 @@ static inline void code_gen_alloc(size_t tb_size) > } > > qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size, > - QEMU_MADV_HUGEPAGE); > - > - /* Steal room for the prologue at the end of the buffer. This ensures > - (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches > - from TB's to the prologue are going to be in range. It also means > - that we don't need to mark (additional) portions of the data segment > - as executable. */ I take it we don't have any annoying targets with branch-forwards ranges larger than their branch-backwards ranges... > - tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer + > - tcg_ctx.code_gen_buffer_size - 1024; > - tcg_ctx.code_gen_buffer_size -= 1024; > - > - tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size - > - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); > - tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size / > - CODE_GEN_AVG_BLOCK_SIZE; > - tcg_ctx.tb_ctx.tbs = > - g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); > + QEMU_MADV_HUGEPAGE); > + > + /* Estimate a good size for the number of TBs we can support. We > + still haven't deducted the prologue from the buffer size here, > + but that's minimal and won't affect the estimate much. */ > + tcg_ctx.code_gen_max_blocks > + = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; > + tcg_ctx.tb_ctx.tbs > + = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); Prefer g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks). > + > qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); > } > > @@ -716,8 +709,6 @@ void tcg_exec_init(unsigned long tb_size) > { > cpu_gen_init(); > code_gen_alloc(tb_size); > - tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; > - tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size); > page_init(); > #if defined(CONFIG_SOFTMMU) > /* There's no guest base to take into account, so go ahead and > -- > 2.4.3 > Otherwise Reviewed-by: Peter Maydell <peter.maydell@linaro.org> thanks -- PMM
On 09/23/2015 12:28 PM, Peter Maydell wrote: >> - /* Steal room for the prologue at the end of the buffer. This ensures >> - (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches >> - from TB's to the prologue are going to be in range. It also means >> - that we don't need to mark (additional) portions of the data segment >> - as executable. */ > > I take it we don't have any annoying targets with branch-forwards > ranges larger than their branch-backwards ranges... No, thankfully. r~
diff --git a/tcg/tcg.c b/tcg/tcg.c index 8126af9..db4032a 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -363,17 +363,30 @@ void tcg_context_init(TCGContext *s) void tcg_prologue_init(TCGContext *s) { - /* init global prologue and epilogue */ - s->code_buf = s->code_gen_prologue; - s->code_ptr = s->code_buf; + size_t prologue_size, total_size; + + /* Put the prologue at the beginning of code_gen_buffer. */ + s->code_ptr = s->code_buf = s->code_gen_prologue = s->code_gen_buffer; + + /* Generate the prologue. */ tcg_target_qemu_prologue(s); flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr); + /* Deduct the prologue from the buffer. */ + prologue_size = tcg_current_code_size(s); + s->code_gen_ptr = s->code_gen_buffer = s->code_buf = s->code_ptr; + + /* Compute a high-water mark, at which we voluntarily flush the + buffer and start over. */ + total_size = s->code_gen_buffer_size -= prologue_size; + s->code_gen_buffer_max_size = total_size - TCG_MAX_OP_SIZE * OPC_BUF_SIZE; + + tcg_register_jit(s->code_gen_buffer, total_size); + #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) { - size_t size = tcg_current_code_size(s); - qemu_log("PROLOGUE: [size=%zu]\n", size); - log_disas(s->code_buf, size); + qemu_log("PROLOGUE: [size=%zu]\n", prologue_size); + log_disas(s->code_gen_prologue, prologue_size); qemu_log("\n"); qemu_log_flush(); } diff --git a/translate-all.c b/translate-all.c index f6b8148..4c994bb 100644 --- a/translate-all.c +++ b/translate-all.c @@ -689,23 +689,16 @@ static inline void code_gen_alloc(size_t tb_size) } qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size, - QEMU_MADV_HUGEPAGE); - - /* Steal room for the prologue at the end of the buffer. This ensures - (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches - from TB's to the prologue are going to be in range. It also means - that we don't need to mark (additional) portions of the data segment - as executable. */ - tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer + - tcg_ctx.code_gen_buffer_size - 1024; - tcg_ctx.code_gen_buffer_size -= 1024; - - tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size - - (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); - tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size / - CODE_GEN_AVG_BLOCK_SIZE; - tcg_ctx.tb_ctx.tbs = - g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); + QEMU_MADV_HUGEPAGE); + + /* Estimate a good size for the number of TBs we can support. We + still haven't deducted the prologue from the buffer size here, + but that's minimal and won't affect the estimate much. */ + tcg_ctx.code_gen_max_blocks + = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; + tcg_ctx.tb_ctx.tbs + = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); + qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); } @@ -716,8 +709,6 @@ void tcg_exec_init(unsigned long tb_size) { cpu_gen_init(); code_gen_alloc(tb_size); - tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; - tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size); page_init(); #if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and
By putting the prologue at the end, we risk overwriting the prologue should our estimate of maximum TB size. Given the two different placements of the call to tcg_prologue_init, move the high water mark computation into tcg_prologue_init. Signed-off-by: Richard Henderson <rth@twiddle.net> --- tcg/tcg.c | 25 +++++++++++++++++++------ translate-all.c | 29 ++++++++++------------------- 2 files changed, 29 insertions(+), 25 deletions(-)