diff mbox

[v4,02/11] tcg: light re-factor and pass down TranslationBlock

Message ID 1438593291-27109-3-git-send-email-alex.bennee@linaro.org
State New
Headers show

Commit Message

Alex Bennée Aug. 3, 2015, 9:14 a.m. UTC
My later debugging patches need access to the origin PC. At the same
time we have a slightly clumsy pass-by-reference access to the size of
the translated block again for debugging purposes.

To simplify the code I have expanded the TranslationBlock structure to
include a tc_size variable to compliment the tc_ptr (and the subject pc,
block size). This is set on code generation and then accessed directly
by all the people that need it.

I've also cleaned up some comments and removed un-used return variables.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v1
 - checkpatch fixes
---
 include/exec/exec-all.h |  4 ++--
 tcg/tcg.c               | 22 +++++++++++++---------
 tcg/tcg.h               |  5 ++---
 translate-all.c         | 43 ++++++++++++++++++-------------------------
 4 files changed, 35 insertions(+), 39 deletions(-)

Comments

Aurelien Jarno Aug. 4, 2015, 12:36 p.m. UTC | #1
On 2015-08-03 10:14, Alex Bennée wrote:
> My later debugging patches need access to the origin PC. At the same
> time we have a slightly clumsy pass-by-reference access to the size of
> the translated block again for debugging purposes.
> 
> To simplify the code I have expanded the TranslationBlock structure to
> include a tc_size variable to compliment the tc_ptr (and the subject pc,
> block size). This is set on code generation and then accessed directly
> by all the people that need it.
> 
> I've also cleaned up some comments and removed un-used return variables.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> 
> ---
> v1
>  - checkpatch fixes
> ---
>  include/exec/exec-all.h |  4 ++--
>  tcg/tcg.c               | 22 +++++++++++++---------
>  tcg/tcg.h               |  5 ++---
>  translate-all.c         | 43 ++++++++++++++++++-------------------------
>  4 files changed, 35 insertions(+), 39 deletions(-)
> 
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index a6fce04..7ac8e7e 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -78,8 +78,7 @@ void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
>                            int pc_pos);
>  
>  void cpu_gen_init(void);
> -int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
> -                 int *gen_code_size_ptr);
> +void cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb);
>  bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
>  void page_size_init(void);
>  
> @@ -153,6 +152,7 @@ struct TranslationBlock {
>  #define CF_USE_ICOUNT  0x20000
>  
>      void *tc_ptr;    /* pointer to the translated code */
> +    uint32_t tc_size;/* size of translated code */
>      /* next matching tb for physical address. */
>      struct TranslationBlock *phys_hash_next;
>      /* first and second physical page containing code. The lower bit

What's the impact on the memory here? Given the alignement, we add 8
bytes to the structure on a 64-bit host.

> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 0892a9b..587bd89 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -2295,12 +2295,15 @@ void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
>  #endif
>  
>  
> -static inline int tcg_gen_code_common(TCGContext *s,
> -                                      tcg_insn_unit *gen_code_buf,
> +static inline int tcg_gen_code_common(TCGContext *s, TranslationBlock *tb,
>                                        long search_pc)
>  {
>      int oi, oi_next;
>  
> +    /* if we are coming via cpu_restore_state we already have a
> +       generated block */
> +     g_assert(tb->tc_size == 0 || search_pc > 0);

In TCG code, you should use tcg_debug_assert.

> +
>  #ifdef DEBUG_DISAS
>      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
>          qemu_log("OP:\n");
> @@ -2338,8 +2341,8 @@ static inline int tcg_gen_code_common(TCGContext *s,
>  
>      tcg_reg_alloc_start(s);
>  
> -    s->code_buf = gen_code_buf;
> -    s->code_ptr = gen_code_buf;
> +    s->code_buf = tb->tc_ptr;
> +    s->code_ptr = tb->tc_ptr;
>  
>      tcg_out_tb_init(s);
>  
> @@ -2402,7 +2405,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
>      return -1;
>  }
>  
> -int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
> +void tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>  {
>  #ifdef CONFIG_PROFILER
>      {
> @@ -2422,22 +2425,23 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
>      }
>  #endif
>  
> -    tcg_gen_code_common(s, gen_code_buf, -1);
> +    tcg_gen_code_common(s, tb, -1);
>  
>      /* flush instruction cache */
>      flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
>  
> -    return tcg_current_code_size(s);
> +    tb->tc_size = tcg_current_code_size(s);
> +    return;
>  }
>  
>  /* Return the index of the micro operation such as the pc after is <
>     offset bytes from the start of the TB.  The contents of gen_code_buf must
>     not be changed, though writing the same values is ok.
>     Return -1 if not found. */
> -int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
> +int tcg_gen_code_search_pc(TCGContext *s, TranslationBlock *tb,
>                             long offset)
>  {
> -    return tcg_gen_code_common(s, gen_code_buf, offset);
> +    return tcg_gen_code_common(s, tb, offset);
>  }
>  
>  #ifdef CONFIG_PROFILER
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 231a781..e4f9f97 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -613,9 +613,8 @@ void tcg_context_init(TCGContext *s);
>  void tcg_prologue_init(TCGContext *s);
>  void tcg_func_start(TCGContext *s);
>  
> -int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
> -int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
> -                           long offset);
> +void tcg_gen_code(TCGContext *s, TranslationBlock *tb);
> +int  tcg_gen_code_search_pc(TCGContext *s, TranslationBlock *tb, long offset);
>  
>  void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
>  
> diff --git a/translate-all.c b/translate-all.c
> index c05e2a5..e8072d8 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -157,17 +157,12 @@ void cpu_gen_init(void)
>      tcg_context_init(&tcg_ctx); 
>  }
>  
> -/* return non zero if the very first instruction is invalid so that
> -   the virtual CPU can trigger an exception.
> -
> -   '*gen_code_size_ptr' contains the size of the generated code (host
> -   code).
> +/* code generation. On return tb->tc_size will reflect the size of
> + * generated code.
>  */
> -int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
> +void cpu_gen_code(CPUArchState *env, TranslationBlock *tb)
>  {
>      TCGContext *s = &tcg_ctx;
> -    tcg_insn_unit *gen_code_buf;
> -    int gen_code_size;
>  #ifdef CONFIG_PROFILER
>      int64_t ti;
>  #endif
> @@ -184,7 +179,6 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
>      trace_translate_block(tb, tb->pc, tb->tc_ptr);
>  
>      /* generate machine code */
> -    gen_code_buf = tb->tc_ptr;
>      tb->tb_next_offset[0] = 0xffff;
>      tb->tb_next_offset[1] = 0xffff;
>      s->tb_next_offset = tb->tb_next_offset;
> @@ -201,24 +195,23 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
>      s->interm_time += profile_getclock() - ti;
>      s->code_time -= profile_getclock();
>  #endif
> -    gen_code_size = tcg_gen_code(s, gen_code_buf);
> -    *gen_code_size_ptr = gen_code_size;
> +   tcg_gen_code(s, tb);

Watch the indentation here.

> +
>  #ifdef CONFIG_PROFILER
>      s->code_time += profile_getclock();
>      s->code_in_len += tb->size;
> -    s->code_out_len += gen_code_size;
> +    s->code_out_len += tb->tc_size;
>  #endif
>  
> -    tb_write_perfmap(gen_code_buf, gen_code_size, tb->pc);
> +    tb_write_perfmap(tb->tc_ptr, tb->tc_size, tb->pc);
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
> -        qemu_log("OUT: [size=%d]\n", gen_code_size);
> -        log_disas(tb->tc_ptr, gen_code_size);
> +        qemu_log("OUT: [size=%d]\n", tb->tc_size);
> +        log_disas(tb->tc_ptr, tb->tc_size);
>          qemu_log("\n");
>          qemu_log_flush();
>      }
>  #endif
> -    return 0;
>  }
>  
>  /* The cpu state corresponding to 'searched_pc' is restored.
> @@ -229,7 +222,6 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>      CPUArchState *env = cpu->env_ptr;
>      TCGContext *s = &tcg_ctx;
>      int j;
> -    uintptr_t tc_ptr;
>  #ifdef CONFIG_PROFILER
>      int64_t ti;
>  #endif
> @@ -249,9 +241,9 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>      }
>  
>      /* find opc index corresponding to search_pc */
> -    tc_ptr = (uintptr_t)tb->tc_ptr;
> -    if (searched_pc < tc_ptr)
> +    if (searched_pc < (uintptr_t) tb->tc_ptr) {
>          return -1;
> +    }
>  
>      s->tb_next_offset = tb->tb_next_offset;
>  #ifdef USE_DIRECT_JUMP
> @@ -261,8 +253,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
>      s->tb_jmp_offset = NULL;
>      s->tb_next = tb->tb_next;
>  #endif
> -    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
> -                               searched_pc - tc_ptr);
> +    j = tcg_gen_code_search_pc(s, tb,
> +                               searched_pc - (uintptr_t) tb->tc_ptr);
>      if (j < 0)
>          return -1;
>      /* now find start of instruction before */
> @@ -1029,7 +1021,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      TranslationBlock *tb;
>      tb_page_addr_t phys_pc, phys_page2;
>      target_ulong virt_page2;
> -    int code_gen_size;
>  
>      phys_pc = get_page_addr_code(env, pc);
>      if (use_icount) {
> @@ -1045,12 +1036,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>          tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
>      }
>      tb->tc_ptr = tcg_ctx.code_gen_ptr;
> +    tb->tc_size = 0;
>      tb->cs_base = cs_base;
>      tb->flags = flags;
>      tb->cflags = cflags;
> -    cpu_gen_code(env, tb, &code_gen_size);
> -    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
> -            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
> +    cpu_gen_code(env, tb);
> +    tcg_ctx.code_gen_ptr = (void *) (
> +        ((uintptr_t) tcg_ctx.code_gen_ptr + tb->tc_size + CODE_GEN_ALIGN - 1)
> +        & ~(CODE_GEN_ALIGN - 1));
>  
>      /* check next page if needed */
>      virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
> -- 
> 2.5.0
> 
>
Alex Bennée Feb. 3, 2016, 6:38 p.m. UTC | #2
Aurelien Jarno <aurelien@aurel32.net> writes:

> On 2015-08-03 10:14, Alex Bennée wrote:
>> My later debugging patches need access to the origin PC. At the same
>> time we have a slightly clumsy pass-by-reference access to the size of
>> the translated block again for debugging purposes.
<snip>
>>      void *tc_ptr;    /* pointer to the translated code */
>> +    uint32_t tc_size;/* size of translated code */
>>      /* next matching tb for physical address. */
>>      struct TranslationBlock *phys_hash_next;
>>      /* first and second physical page containing code. The lower bit
>
> What's the impact on the memory here? Given the alignement, we add 8
> bytes to the structure on a 64-bit host.
<snip>

Well this has all got a lot simpler thanks to Richard's clean-up work
since this was last posted.

--
Alex Bennée
diff mbox

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index a6fce04..7ac8e7e 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -78,8 +78,7 @@  void restore_state_to_opc(CPUArchState *env, struct TranslationBlock *tb,
                           int pc_pos);
 
 void cpu_gen_init(void);
-int cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb,
-                 int *gen_code_size_ptr);
+void cpu_gen_code(CPUArchState *env, struct TranslationBlock *tb);
 bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc);
 void page_size_init(void);
 
@@ -153,6 +152,7 @@  struct TranslationBlock {
 #define CF_USE_ICOUNT  0x20000
 
     void *tc_ptr;    /* pointer to the translated code */
+    uint32_t tc_size;/* size of translated code */
     /* next matching tb for physical address. */
     struct TranslationBlock *phys_hash_next;
     /* first and second physical page containing code. The lower bit
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0892a9b..587bd89 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2295,12 +2295,15 @@  void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
 #endif
 
 
-static inline int tcg_gen_code_common(TCGContext *s,
-                                      tcg_insn_unit *gen_code_buf,
+static inline int tcg_gen_code_common(TCGContext *s, TranslationBlock *tb,
                                       long search_pc)
 {
     int oi, oi_next;
 
+    /* if we are coming via cpu_restore_state we already have a
+       generated block */
+     g_assert(tb->tc_size == 0 || search_pc > 0);
+
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
         qemu_log("OP:\n");
@@ -2338,8 +2341,8 @@  static inline int tcg_gen_code_common(TCGContext *s,
 
     tcg_reg_alloc_start(s);
 
-    s->code_buf = gen_code_buf;
-    s->code_ptr = gen_code_buf;
+    s->code_buf = tb->tc_ptr;
+    s->code_ptr = tb->tc_ptr;
 
     tcg_out_tb_init(s);
 
@@ -2402,7 +2405,7 @@  static inline int tcg_gen_code_common(TCGContext *s,
     return -1;
 }
 
-int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
+void tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 {
 #ifdef CONFIG_PROFILER
     {
@@ -2422,22 +2425,23 @@  int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
     }
 #endif
 
-    tcg_gen_code_common(s, gen_code_buf, -1);
+    tcg_gen_code_common(s, tb, -1);
 
     /* flush instruction cache */
     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
 
-    return tcg_current_code_size(s);
+    tb->tc_size = tcg_current_code_size(s);
+    return;
 }
 
 /* Return the index of the micro operation such as the pc after is <
    offset bytes from the start of the TB.  The contents of gen_code_buf must
    not be changed, though writing the same values is ok.
    Return -1 if not found. */
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
+int tcg_gen_code_search_pc(TCGContext *s, TranslationBlock *tb,
                            long offset)
 {
-    return tcg_gen_code_common(s, gen_code_buf, offset);
+    return tcg_gen_code_common(s, tb, offset);
 }
 
 #ifdef CONFIG_PROFILER
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 231a781..e4f9f97 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -613,9 +613,8 @@  void tcg_context_init(TCGContext *s);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
-int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
-                           long offset);
+void tcg_gen_code(TCGContext *s, TranslationBlock *tb);
+int  tcg_gen_code_search_pc(TCGContext *s, TranslationBlock *tb, long offset);
 
 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
 
diff --git a/translate-all.c b/translate-all.c
index c05e2a5..e8072d8 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -157,17 +157,12 @@  void cpu_gen_init(void)
     tcg_context_init(&tcg_ctx); 
 }
 
-/* return non zero if the very first instruction is invalid so that
-   the virtual CPU can trigger an exception.
-
-   '*gen_code_size_ptr' contains the size of the generated code (host
-   code).
+/* code generation. On return tb->tc_size will reflect the size of
+ * generated code.
 */
-int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
+void cpu_gen_code(CPUArchState *env, TranslationBlock *tb)
 {
     TCGContext *s = &tcg_ctx;
-    tcg_insn_unit *gen_code_buf;
-    int gen_code_size;
 #ifdef CONFIG_PROFILER
     int64_t ti;
 #endif
@@ -184,7 +179,6 @@  int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
     trace_translate_block(tb, tb->pc, tb->tc_ptr);
 
     /* generate machine code */
-    gen_code_buf = tb->tc_ptr;
     tb->tb_next_offset[0] = 0xffff;
     tb->tb_next_offset[1] = 0xffff;
     s->tb_next_offset = tb->tb_next_offset;
@@ -201,24 +195,23 @@  int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
     s->interm_time += profile_getclock() - ti;
     s->code_time -= profile_getclock();
 #endif
-    gen_code_size = tcg_gen_code(s, gen_code_buf);
-    *gen_code_size_ptr = gen_code_size;
+   tcg_gen_code(s, tb);
+
 #ifdef CONFIG_PROFILER
     s->code_time += profile_getclock();
     s->code_in_len += tb->size;
-    s->code_out_len += gen_code_size;
+    s->code_out_len += tb->tc_size;
 #endif
 
-    tb_write_perfmap(gen_code_buf, gen_code_size, tb->pc);
+    tb_write_perfmap(tb->tc_ptr, tb->tc_size, tb->pc);
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        qemu_log("OUT: [size=%d]\n", gen_code_size);
-        log_disas(tb->tc_ptr, gen_code_size);
+        qemu_log("OUT: [size=%d]\n", tb->tc_size);
+        log_disas(tb->tc_ptr, tb->tc_size);
         qemu_log("\n");
         qemu_log_flush();
     }
 #endif
-    return 0;
 }
 
 /* The cpu state corresponding to 'searched_pc' is restored.
@@ -229,7 +222,6 @@  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     CPUArchState *env = cpu->env_ptr;
     TCGContext *s = &tcg_ctx;
     int j;
-    uintptr_t tc_ptr;
 #ifdef CONFIG_PROFILER
     int64_t ti;
 #endif
@@ -249,9 +241,9 @@  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     }
 
     /* find opc index corresponding to search_pc */
-    tc_ptr = (uintptr_t)tb->tc_ptr;
-    if (searched_pc < tc_ptr)
+    if (searched_pc < (uintptr_t) tb->tc_ptr) {
         return -1;
+    }
 
     s->tb_next_offset = tb->tb_next_offset;
 #ifdef USE_DIRECT_JUMP
@@ -261,8 +253,8 @@  static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
     s->tb_jmp_offset = NULL;
     s->tb_next = tb->tb_next;
 #endif
-    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
-                               searched_pc - tc_ptr);
+    j = tcg_gen_code_search_pc(s, tb,
+                               searched_pc - (uintptr_t) tb->tc_ptr);
     if (j < 0)
         return -1;
     /* now find start of instruction before */
@@ -1029,7 +1021,6 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
     TranslationBlock *tb;
     tb_page_addr_t phys_pc, phys_page2;
     target_ulong virt_page2;
-    int code_gen_size;
 
     phys_pc = get_page_addr_code(env, pc);
     if (use_icount) {
@@ -1045,12 +1036,14 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
         tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
     }
     tb->tc_ptr = tcg_ctx.code_gen_ptr;
+    tb->tc_size = 0;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
-            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+    cpu_gen_code(env, tb);
+    tcg_ctx.code_gen_ptr = (void *) (
+        ((uintptr_t) tcg_ctx.code_gen_ptr + tb->tc_size + CODE_GEN_ALIGN - 1)
+        & ~(CODE_GEN_ALIGN - 1));
 
     /* check next page if needed */
     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;