Patchwork [2/2] TCG: Move translation block variables to new context inside tcg_ctx: tb_ctx

login
register
mail settings
Submitter Evgeny Voevodin
Date Jan. 31, 2013, 6:47 p.m.
Message ID <1359658043-2425-3-git-send-email-evgenyvoevodin@gmail.com>
Download mbox | patch
Permalink /patch/217256/
State New
Headers show

Comments

Evgeny Voevodin - Jan. 31, 2013, 6:47 p.m.
It's worth to clean-up translation blocks variables and move them
into one context as was suggested by Swirl.
Also if we use this context directly inside tcg_ctx, then it
speeds up code generation a bit.

Signed-off-by: Evgeny Voevodin <evgenyvoevodin@gmail.com>
---
 cpu-exec.c              |   18 ++++-----
 include/exec/exec-all.h |   27 +++++++++----
 linux-user/main.c       |    6 +--
 tcg/tcg.h               |    2 +
 translate-all.c         |   96 +++++++++++++++++++++++------------------------
 5 files changed, 79 insertions(+), 70 deletions(-)

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 19ebb4a..ff9a884 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -23,8 +23,6 @@ 
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
 
-int tb_invalidated_flag;
-
 //#define CONFIG_DEBUG_EXEC
 
 bool qemu_cpu_has_work(CPUState *cpu)
@@ -90,13 +88,13 @@  static TranslationBlock *tb_find_slow(CPUArchState *env,
     tb_page_addr_t phys_pc, phys_page1;
     target_ulong virt_page2;
 
-    tb_invalidated_flag = 0;
+    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
 
     /* find translated block using physical mappings */
     phys_pc = get_page_addr_code(env, pc);
     phys_page1 = phys_pc & TARGET_PAGE_MASK;
     h = tb_phys_hash_func(phys_pc);
-    ptb1 = &tb_phys_hash[h];
+    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
     for(;;) {
         tb = *ptb1;
         if (!tb)
@@ -128,8 +126,8 @@  static TranslationBlock *tb_find_slow(CPUArchState *env,
     /* Move the last found TB to the head of the list */
     if (likely(*ptb1)) {
         *ptb1 = tb->phys_hash_next;
-        tb->phys_hash_next = tb_phys_hash[h];
-        tb_phys_hash[h] = tb;
+        tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
+        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
     }
     /* we add the TB in the virtual pc hash table */
     env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
@@ -563,16 +561,16 @@  int cpu_exec(CPUArchState *env)
 #endif
                 }
 #endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */
-                spin_lock(&tb_lock);
+                spin_lock(&tcg_ctx.tb_ctx.tb_lock);
                 tb = tb_find_fast(env);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
-                if (tb_invalidated_flag) {
+                if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
                     /* as some TB could have been invalidated because
                        of memory exceptions while generating the code, we
                        must recompute the hash index here */
                     next_tb = 0;
-                    tb_invalidated_flag = 0;
+                    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
                 }
 #ifdef CONFIG_DEBUG_EXEC
                 qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n",
@@ -585,7 +583,7 @@  int cpu_exec(CPUArchState *env)
                 if (next_tb != 0 && tb->page_addr[1] == -1) {
                     tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb);
                 }
-                spin_unlock(&tb_lock);
+                spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
 
                 /* cpu_interrupt might be called while translating the
                    TB, but before it is linked into a potentially
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d235ef8..f685c28 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -168,6 +168,25 @@  struct TranslationBlock {
     uint32_t icount;
 };
 
+#include "exec/spinlock.h"
+
+typedef struct TBContext TBContext;
+
+struct TBContext {
+
+    TranslationBlock *tbs;
+    TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
+    int nb_tbs;
+    /* any access to the tbs or the page table must use this lock */
+    spinlock_t tb_lock;
+
+    /* statistics */
+    int tb_flush_count;
+    int tb_phys_invalidate_count;
+
+    int tb_invalidated_flag;
+};
+
 static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
 {
     target_ulong tmp;
@@ -192,8 +211,6 @@  void tb_free(TranslationBlock *tb);
 void tb_flush(CPUArchState *env);
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
 
-extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-
 #if defined(USE_DIRECT_JUMP)
 
 #if defined(CONFIG_TCG_INTERPRETER)
@@ -275,12 +292,6 @@  static inline void tb_add_jump(TranslationBlock *tb, int n,
     }
 }
 
-#include "exec/spinlock.h"
-
-extern spinlock_t tb_lock;
-
-extern int tb_invalidated_flag;
-
 /* The return address may point to the start of the next instruction.
    Subtracting one gets us the call instruction itself.  */
 #if defined(CONFIG_TCG_INTERPRETER)
diff --git a/linux-user/main.c b/linux-user/main.c
index 0181bc2..8f09abd 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -111,7 +111,7 @@  static int pending_cpus;
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
 {
-    pthread_mutex_lock(&tb_lock);
+    pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
     pthread_mutex_lock(&exclusive_lock);
     mmap_fork_start();
 }
@@ -129,11 +129,11 @@  void fork_end(int child)
         pthread_mutex_init(&cpu_list_mutex, NULL);
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
-        pthread_mutex_init(&tb_lock, NULL);
+        pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL);
         gdbserver_fork(thread_env);
     } else {
         pthread_mutex_unlock(&exclusive_lock);
-        pthread_mutex_unlock(&tb_lock);
+        pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
     }
 }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4086e98..51c8176 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -471,6 +471,8 @@  struct TCGContext {
     size_t code_gen_buffer_max_size;
     uint8_t *code_gen_ptr;
 
+    TBContext tb_ctx;
+
 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
     /* labels info for qemu_ld/st IRs
        The labels help to generate TLB miss case codes at the end of TB */
diff --git a/translate-all.c b/translate-all.c
index d666562..efeb247 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -72,13 +72,6 @@ 
 
 #define SMC_BITMAP_USE_THRESHOLD 10
 
-/* Translation blocks */
-static TranslationBlock *tbs;
-TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
-/* any access to the tbs or the page table must use this lock */
-spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-
 typedef struct PageDesc {
     /* list of TBs intersecting this ram page */
     TranslationBlock *first_tb;
@@ -125,10 +118,6 @@  uintptr_t qemu_host_page_mask;
    The bottom level has pointers to PageDesc.  */
 static void *l1_map[V_L1_SIZE];
 
-/* statistics */
-static int tb_flush_count;
-static int tb_phys_invalidate_count;
-
 /* code generation context */
 TCGContext tcg_ctx;
 
@@ -589,7 +578,8 @@  static inline void code_gen_alloc(size_t tb_size)
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
             CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+    tcg_ctx.tb_ctx.tbs =
+            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -620,12 +610,12 @@  static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
-    if (nb_tbs >= tcg_ctx.code_gen_max_blocks ||
+    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
         (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
          tcg_ctx.code_gen_buffer_max_size) {
         return NULL;
     }
-    tb = &tbs[nb_tbs++];
+    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
     tb->pc = pc;
     tb->cflags = 0;
     return tb;
@@ -636,9 +626,10 @@  void tb_free(TranslationBlock *tb)
     /* In practice this is mostly used for single use temporary TB
        Ignore the hard cases and just back up if this TB happens to
        be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+    if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
+            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
         tcg_ctx.code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
+        tcg_ctx.tb_ctx.nb_tbs--;
     }
 }
 
@@ -693,27 +684,28 @@  void tb_flush(CPUArchState *env1)
 #if defined(DEBUG_FLUSH)
     printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
            (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
+           tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
            ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
-           nb_tbs : 0);
+           tcg_ctx.tb_ctx.nb_tbs : 0);
 #endif
     if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
         > tcg_ctx.code_gen_buffer_size) {
         cpu_abort(env1, "Internal error: code buffer overflow\n");
     }
-    nb_tbs = 0;
+    tcg_ctx.tb_ctx.nb_tbs = 0;
 
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
     }
 
-    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0,
+            CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
     /* XXX: flush processor icache at this point if cache flush is
        expensive */
-    tb_flush_count++;
+    tcg_ctx.tb_ctx.tb_flush_count++;
 }
 
 #ifdef DEBUG_TB_CHECK
@@ -725,7 +717,7 @@  static void tb_invalidate_check(target_ulong address)
 
     address &= TARGET_PAGE_MASK;
     for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
             if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
                   address >= tb->pc + tb->size)) {
                 printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -743,7 +735,8 @@  static void tb_page_check(void)
     int i, flags1, flags2;
 
     for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
+                tb = tb->phys_hash_next) {
             flags1 = page_get_flags(tb->pc);
             flags2 = page_get_flags(tb->pc + tb->size - 1);
             if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
@@ -835,7 +828,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_phys_hash_func(phys_pc);
-    tb_hash_remove(&tb_phys_hash[h], tb);
+    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
@@ -849,7 +842,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
         invalidate_page_bitmap(p);
     }
 
-    tb_invalidated_flag = 1;
+    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
 
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
@@ -878,7 +871,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     }
     tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
 
-    tb_phys_invalidate_count++;
+    tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
 }
 
 static inline void set_bits(uint8_t *tab, int start, int len)
@@ -955,7 +948,7 @@  TranslationBlock *tb_gen_code(CPUArchState *env,
         /* cannot fail at this point */
         tb = tb_alloc(pc);
         /* Don't forget to invalidate previous TB info.  */
-        tb_invalidated_flag = 1;
+        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
     }
     tc_ptr = tcg_ctx.code_gen_ptr;
     tb->tc_ptr = tc_ptr;
@@ -1273,7 +1266,7 @@  static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     mmap_lock();
     /* add in the physical hash table */
     h = tb_phys_hash_func(phys_pc);
-    ptb = &tb_phys_hash[h];
+    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
     tb->phys_hash_next = *ptb;
     *ptb = tb;
 
@@ -1323,7 +1316,7 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     uintptr_t v;
     TranslationBlock *tb;
 
-    if (nb_tbs <= 0) {
+    if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
         return NULL;
     }
     if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
@@ -1332,10 +1325,10 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     }
     /* binary search (cf Knuth) */
     m_min = 0;
-    m_max = nb_tbs - 1;
+    m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
     while (m_min <= m_max) {
         m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
+        tb = &tcg_ctx.tb_ctx.tbs[m];
         v = (uintptr_t)tb->tc_ptr;
         if (v == tc_ptr) {
             return tb;
@@ -1345,7 +1338,7 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
             m_min = m + 1;
         }
     }
-    return &tbs[m_max];
+    return &tcg_ctx.tb_ctx.tbs[m_max];
 }
 
 static void tb_reset_jump_recursive(TranslationBlock *tb);
@@ -1566,8 +1559,8 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cross_page = 0;
     direct_jmp_count = 0;
     direct_jmp2_count = 0;
-    for (i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
+    for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
+        tb = &tcg_ctx.tb_ctx.tbs[i];
         target_code_size += tb->size;
         if (tb->size > max_target_code_size) {
             max_target_code_size = tb->size;
@@ -1588,27 +1581,32 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                 tcg_ctx.code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n",
-                nb_tbs, tcg_ctx.code_gen_max_blocks);
+            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
-                max_target_code_size);
+            tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
+                    tcg_ctx.tb_ctx.nb_tbs : 0,
+            max_target_code_size);
     cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
-                        nb_tbs : 0,
-                target_code_size ?
-                (double) (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) /
-                    target_code_size : 0);
-    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
-            cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+            tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
+                                     tcg_ctx.code_gen_buffer) /
+                                     tcg_ctx.tb_ctx.nb_tbs : 0,
+                target_code_size ? (double) (tcg_ctx.code_gen_ptr -
+                                             tcg_ctx.code_gen_buffer) /
+                                             target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
+            tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
+                                    tcg_ctx.tb_ctx.nb_tbs : 0);
     cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
                 direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0,
                 direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0);
     cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
-    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n",
+            tcg_ctx.tb_ctx.tb_phys_invalidate_count);
     cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
     tcg_dump_info(f, cpu_fprintf);
 }