diff mbox

[RFC,05/10] extract TBContext from TCGContext.

Message ID 1421428797-23697-6-git-send-email-fred.konrad@greensocs.com
State New
Headers show

Commit Message

fred.konrad@greensocs.com Jan. 16, 2015, 5:19 p.m. UTC
From: KONRAD Frederic <fred.konrad@greensocs.com>

In order to have one TCGContext per thread and a single TBContext we have to
extract TBContext from TCGContext.

Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
---
 cpu-exec.c        | 18 ++++++-------
 linux-user/main.c |  6 ++---
 tcg/tcg.h         |  3 +--
 translate-all.c   | 79 +++++++++++++++++++++++++++----------------------------
 4 files changed, 52 insertions(+), 54 deletions(-)

Comments

Peter Maydell Jan. 29, 2015, 3:44 p.m. UTC | #1
On 16 January 2015 at 17:19,  <fred.konrad@greensocs.com> wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> In order to have one TCGContext per thread and a single TBContext we have to
> extract TBContext from TCGContext.

This seems a bit odd. It's not clear to me what the advantages
are of having one TCGContext per thread but only a single
TBContext (as opposed to either (1) having a single TCGContext
and TBContext with locks protecting against multiple threads
generating code at once, or (2) having each thread have its
own TCGContext and TBContext and completely independent codegen).

Maybe it would help if you sketched out your design in a little
more detail in the cover letter, with emphasis on which data
structures are going to be per-thread and which are going to
be shared (and if so how shared).

(Long term we would want to be able to have multiple
TBContexts to support heterogenous systems where CPUs
might be different architectures or have different views
of physical memory...)

thanks
-- PMM
Richard Henderson Feb. 3, 2015, 4:30 p.m. UTC | #2
On 01/29/2015 07:44 AM, Peter Maydell wrote:
> On 16 January 2015 at 17:19,  <fred.konrad@greensocs.com> wrote:
>> From: KONRAD Frederic <fred.konrad@greensocs.com>
>>
>> In order to have one TCGContext per thread and a single TBContext we have to
>> extract TBContext from TCGContext.
> 
> This seems a bit odd. It's not clear to me what the advantages
> are of having one TCGContext per thread but only a single
> TBContext (as opposed to either (1) having a single TCGContext
> and TBContext with locks protecting against multiple threads
> generating code at once, or (2) having each thread have its
> own TCGContext and TBContext and completely independent codegen).
> 
> Maybe it would help if you sketched out your design in a little
> more detail in the cover letter, with emphasis on which data
> structures are going to be per-thread and which are going to
> be shared (and if so how shared).
> 
> (Long term we would want to be able to have multiple
> TBContexts to support heterogenous systems where CPUs
> might be different architectures or have different views
> of physical memory...)

Seconded.


r~
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 1e7513c..4d22252 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -237,13 +237,13 @@  static TranslationBlock *tb_find_slow(CPUArchState *env,
     tb_page_addr_t phys_pc, phys_page1;
     target_ulong virt_page2;
 
-    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+    tb_ctx.tb_invalidated_flag = 0;
 
     /* find translated block using physical mappings */
     phys_pc = get_page_addr_code(env, pc);
     phys_page1 = phys_pc & TARGET_PAGE_MASK;
     h = tb_phys_hash_func(phys_pc);
-    ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+    ptb1 = &tb_ctx.tb_phys_hash[h];
     for(;;) {
         tb = *ptb1;
         if (!tb)
@@ -275,8 +275,8 @@  static TranslationBlock *tb_find_slow(CPUArchState *env,
     /* Move the last found TB to the head of the list */
     if (likely(*ptb1)) {
         *ptb1 = tb->phys_hash_next;
-        tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
-        tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+        tb->phys_hash_next = tb_ctx.tb_phys_hash[h];
+        tb_ctx.tb_phys_hash[h] = tb;
     }
     /* we add the TB in the virtual pc hash table */
     cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
@@ -454,18 +454,18 @@  int cpu_exec(CPUArchState *env)
                     cpu_loop_exit(cpu);
                 }
 #if defined(CONFIG_USER_ONLY)
-                qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+                qemu_mutex_lock(&tb_ctx.tb_lock);
                 have_tb_lock = true;
 #endif
                 tb = tb_find_fast(env);
                 /* Note: we do it here to avoid a gcc bug on Mac OS X when
                    doing it in tb_find_slow */
-                if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
+                if (tb_ctx.tb_invalidated_flag) {
                     /* as some TB could have been invalidated because
                        of memory exceptions while generating the code, we
                        must recompute the hash index here */
                     next_tb = 0;
-                    tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+                    tb_ctx.tb_invalidated_flag = 0;
                 }
                 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
                     qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
@@ -480,7 +480,7 @@  int cpu_exec(CPUArchState *env)
                 }
 #if defined(CONFIG_USER_ONLY)
                 have_tb_lock = false;
-                qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+                qemu_mutex_unlock(&tb_ctx.tb_lock);
 #endif
                 /* cpu_interrupt might be called while translating the
                    TB, but before it is linked into a potentially
@@ -556,7 +556,7 @@  int cpu_exec(CPUArchState *env)
 #endif
 #if defined(CONFIG_USER_ONLY)
             if (have_tb_lock) {
-                qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+                qemu_mutex_unlock(&tb_ctx.tb_lock);
                 have_tb_lock = false;
             }
 #endif
diff --git a/linux-user/main.c b/linux-user/main.c
index 97e7d50..2a4c948 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -107,7 +107,7 @@  static int pending_cpus;
 /* Make sure everything is in a consistent state for calling fork().  */
 void fork_start(void)
 {
-    qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+    qemu_mutex_lock(&tb_ctx.tb_lock);
     pthread_mutex_lock(&exclusive_lock);
     mmap_fork_start();
 }
@@ -129,11 +129,11 @@  void fork_end(int child)
         pthread_mutex_init(&cpu_list_mutex, NULL);
         pthread_cond_init(&exclusive_cond, NULL);
         pthread_cond_init(&exclusive_resume, NULL);
-        qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+        qemu_mutex_init(&tb_ctx.tb_lock);
         gdbserver_fork((CPUArchState *)thread_cpu->env_ptr);
     } else {
         pthread_mutex_unlock(&exclusive_lock);
-        qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+        qemu_mutex_unlock(&tb_ctx.tb_lock);
     }
 }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 944b877..baf053a 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -529,13 +529,12 @@  struct TCGContext {
     size_t code_gen_buffer_max_size;
     void *code_gen_ptr;
 
-    TBContext tb_ctx;
-
     /* The TCGBackendData structure is private to tcg-target.c.  */
     struct TCGBackendData *be;
 };
 
 extern TCGContext tcg_ctx;
+extern TBContext tb_ctx;
 
 /* pool based memory allocation */
 
diff --git a/translate-all.c b/translate-all.c
index 0e11c70..e393d30 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -127,6 +127,9 @@  static void *l1_map[V_L1_SIZE];
 /* code generation context */
 TCGContext tcg_ctx;
 
+/* translation block context */
+TBContext tb_ctx;
+
 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
                          tb_page_addr_t phys_page2);
 static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
@@ -684,8 +687,8 @@  static inline void code_gen_alloc(size_t tb_size)
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
             CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs =
-            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
+    tb_ctx.tbs = g_malloc(tcg_ctx.code_gen_max_blocks
+                          * sizeof(TranslationBlock));
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -716,12 +719,12 @@  static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
-    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
+    if (tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
         (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
          tcg_ctx.code_gen_buffer_max_size) {
         return NULL;
     }
-    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+    tb = &tb_ctx.tbs[tb_ctx.nb_tbs++];
     tb->pc = pc;
     tb->cflags = 0;
     return tb;
@@ -732,10 +735,10 @@  void tb_free(TranslationBlock *tb)
     /* In practice this is mostly used for single use temporary TB
        Ignore the hard cases and just back up if this TB happens to
        be the last one generated.  */
-    if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
-            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
+    if (tb_ctx.nb_tbs > 0 &&
+            tb == &tb_ctx.tbs[tb_ctx.nb_tbs - 1]) {
         tcg_ctx.code_gen_ptr = tb->tc_ptr;
-        tcg_ctx.tb_ctx.nb_tbs--;
+        tb_ctx.nb_tbs--;
     }
 }
 
@@ -792,27 +795,27 @@  void tb_flush(CPUArchState *env1)
 #if defined(DEBUG_FLUSH)
     printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
            (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
-           tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
+           tb_ctx.nb_tbs, tb_ctx.nb_tbs > 0 ?
            ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
-           tcg_ctx.tb_ctx.nb_tbs : 0);
+           tb_ctx.nb_tbs : 0);
 #endif
     if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
         > tcg_ctx.code_gen_buffer_size) {
         cpu_abort(cpu, "Internal error: code buffer overflow\n");
     }
-    tcg_ctx.tb_ctx.nb_tbs = 0;
+    tb_ctx.nb_tbs = 0;
 
     CPU_FOREACH(cpu) {
         memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
     }
 
-    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+    memset(tb_ctx.tb_phys_hash, 0, sizeof(tb_ctx.tb_phys_hash));
     page_flush_tb();
 
     tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
     /* XXX: flush processor icache at this point if cache flush is
        expensive */
-    tcg_ctx.tb_ctx.tb_flush_count++;
+    tb_ctx.tb_flush_count++;
 }
 
 #ifdef DEBUG_TB_CHECK
@@ -842,7 +845,7 @@  static void tb_page_check(void)
     int i, flags1, flags2;
 
     for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
+        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL;
                 tb = tb->phys_hash_next) {
             flags1 = page_get_flags(tb->pc);
             flags2 = page_get_flags(tb->pc + tb->size - 1);
@@ -935,7 +938,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     /* remove the TB from the hash list */
     phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
     h = tb_phys_hash_func(phys_pc);
-    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+    tb_hash_remove(&tb_ctx.tb_phys_hash[h], tb);
 
     /* remove the TB from the page list */
     if (tb->page_addr[0] != page_addr) {
@@ -949,7 +952,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
         invalidate_page_bitmap(p);
     }
 
-    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+    tb_ctx.tb_invalidated_flag = 1;
 
     /* remove the TB from the hash list */
     h = tb_jmp_cache_hash_func(tb->pc);
@@ -978,7 +981,7 @@  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
     }
     tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
 
-    tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
+    tb_ctx.tb_phys_invalidate_count++;
 }
 
 static inline void set_bits(uint8_t *tab, int start, int len)
@@ -1058,7 +1061,7 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
         /* cannot fail at this point */
         tb = tb_alloc(pc);
         /* Don't forget to invalidate previous TB info.  */
-        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+        tb_ctx.tb_invalidated_flag = 1;
     }
     tb->tc_ptr = tcg_ctx.code_gen_ptr;
     tb->cs_base = cs_base;
@@ -1392,7 +1395,7 @@  static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
     mmap_lock();
     /* add in the physical hash table */
     h = tb_phys_hash_func(phys_pc);
-    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+    ptb = &tb_ctx.tb_phys_hash[h];
     tb->phys_hash_next = *ptb;
     *ptb = tb;
 
@@ -1430,7 +1433,7 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     uintptr_t v;
     TranslationBlock *tb;
 
-    if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
+    if (tb_ctx.nb_tbs <= 0) {
         return NULL;
     }
     if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
@@ -1439,10 +1442,10 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     }
     /* binary search (cf Knuth) */
     m_min = 0;
-    m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
+    m_max = tb_ctx.nb_tbs - 1;
     while (m_min <= m_max) {
         m = (m_min + m_max) >> 1;
-        tb = &tcg_ctx.tb_ctx.tbs[m];
+        tb = &tb_ctx.tbs[m];
         v = (uintptr_t)tb->tc_ptr;
         if (v == tc_ptr) {
             return tb;
@@ -1452,7 +1455,7 @@  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
             m_min = m + 1;
         }
     }
-    return &tcg_ctx.tb_ctx.tbs[m_max];
+    return &tb_ctx.tbs[m_max];
 }
 
 #if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
@@ -1606,8 +1609,8 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cross_page = 0;
     direct_jmp_count = 0;
     direct_jmp2_count = 0;
-    for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
-        tb = &tcg_ctx.tb_ctx.tbs[i];
+    for (i = 0; i < tb_ctx.nb_tbs; i++) {
+        tb = &tb_ctx.tbs[i];
         target_code_size += tb->size;
         if (tb->size > max_target_code_size) {
             max_target_code_size = tb->size;
@@ -1628,32 +1631,28 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                 tcg_ctx.code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n",
-            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
+                tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-            tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
-                    tcg_ctx.tb_ctx.nb_tbs : 0,
-            max_target_code_size);
+                tb_ctx.nb_tbs ? target_code_size / tb_ctx.nb_tbs : 0,
+                max_target_code_size);
     cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-            tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
-                                     tcg_ctx.code_gen_buffer) /
-                                     tcg_ctx.tb_ctx.nb_tbs : 0,
+                tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
+                               / tb_ctx.nb_tbs : 0,
                 target_code_size ? (double) (tcg_ctx.code_gen_ptr -
                                              tcg_ctx.code_gen_buffer) /
                                              target_code_size : 0);
     cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
-            tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
-                                    tcg_ctx.tb_ctx.nb_tbs : 0);
+                tb_ctx.nb_tbs ? (cross_page * 100) / tb_ctx.nb_tbs : 0);
     cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
-                direct_jmp_count,
-                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
-                        tcg_ctx.tb_ctx.nb_tbs : 0,
+                direct_jmp_count, tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
+                tb_ctx.nb_tbs : 0,
                 direct_jmp2_count,
-                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
-                        tcg_ctx.tb_ctx.nb_tbs : 0);
+                tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
+                tb_ctx.nb_tbs : 0);
     cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB flush count      %d\n", tb_ctx.tb_flush_count);
     cpu_fprintf(f, "TB invalidate count %d\n",
-            tcg_ctx.tb_ctx.tb_phys_invalidate_count);
+                tb_ctx.tb_phys_invalidate_count);
     cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
     tcg_dump_info(f, cpu_fprintf);
 }