Patchwork TCG: Convert global variables to be TLS.

login
register
mail settings
Submitter Evgeny Voevodin
Date Feb. 27, 2012, 11:06 a.m.
Message ID <1330340818-4125-2-git-send-email-e.voevodin@samsung.com>
Download mbox | patch
Permalink /patch/143177/
State New
Headers show

Comments

Evgeny Voevodin - Feb. 27, 2012, 11:06 a.m.
This commit converts code_gen_buffer, code_gen_ptr, tbs, nb_tbs to
TLS. We need this if we want TCG to become multithreaded.

Initialization of code_gen_buffer and code_gen_ptr is moved to new
tcg_gen_buffer_init() function. This is done because we do not need
to allocate and initialize TCG buffers for IO thread. Initialization
is now done in qemu_tcg_cpu_thread_fn() by each HW thread
individually.

Also tcg_enabled() returns a variable instead of
(code_gen_buffer != NULL) since if called from IO thread, this will
always return FALSE.

Also some code format changes.

Signed-off-by: Evgeny Voevodin <e.voevodin@samsung.com>
---
 bsd-user/main.c    |    1 +
 cpus.c             |    2 +
 darwin-user/main.c |    1 +
 exec.c             |  121 ++++++++++++++++++++++++++++++---------------------
 linux-user/main.c  |    1 +
 qemu-common.h      |    1 +
 6 files changed, 77 insertions(+), 50 deletions(-)

Patch

diff --git a/bsd-user/main.c b/bsd-user/main.c
index cc7d4a3..11e4540 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -906,6 +906,7 @@  int main(int argc, char **argv)
 #endif
     }
     tcg_exec_init(0);
+    tcg_gen_buffer_init();
     cpu_exec_init_all();
     /* NOTE: we need to init the CPU at this stage to get
        qemu_host_page_size */
diff --git a/cpus.c b/cpus.c
index f45a438..6190250 100644
--- a/cpus.c
+++ b/cpus.c
@@ -746,6 +746,8 @@  static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
     CPUState *env = arg;
 
+    tcg_gen_buffer_init();
+
     qemu_tcg_init_cpu_signals();
     qemu_thread_get_self(env->thread);
 
diff --git a/darwin-user/main.c b/darwin-user/main.c
index 9b57c20..8618a52 100644
--- a/darwin-user/main.c
+++ b/darwin-user/main.c
@@ -851,6 +851,7 @@  int main(int argc, char **argv)
 #endif
     }
     tcg_exec_init(0);
+    tcg_gen_buffer_init();
     cpu_exec_init_all();
     /* NOTE: we need to init the CPU at this stage to get
        qemu_host_page_size */
diff --git a/exec.c b/exec.c
index b81677a..51a93d9 100644
--- a/exec.c
+++ b/exec.c
@@ -79,10 +79,10 @@ 
 
 #define SMC_BITMAP_USE_THRESHOLD 10
 
-static TranslationBlock *tbs;
+static DEFINE_TLS(TranslationBlock*, tbs);
 static int code_gen_max_blocks;
 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
+static DEFINE_TLS(int, nb_tbs);
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
@@ -103,11 +103,12 @@  spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 #endif
 
 uint8_t code_gen_prologue[1024] code_gen_section;
-static uint8_t *code_gen_buffer;
+static bool code_gen_enabled;
+static DEFINE_TLS(uint8_t*, code_gen_buffer);
 static unsigned long code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
 static unsigned long code_gen_buffer_max_size;
-static uint8_t *code_gen_ptr;
+static DEFINE_TLS(uint8_t*, code_gen_ptr);
 
 #if !defined(CONFIG_USER_ONLY)
 int phys_ram_fd;
@@ -469,18 +470,17 @@  static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
 #endif
 
 #ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-               __attribute__((aligned (CODE_GEN_ALIGN)));
+static DEFINE_TLS(uint8_t [DEFAULT_CODE_GEN_BUFFER_SIZE],
+        static_code_gen_buffer) __attribute__((aligned(CODE_GEN_ALIGN)));
 #endif
 
-static void code_gen_alloc(unsigned long tb_size)
+static void code_gen_alloc(void)
 {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
+    tls_var(code_gen_buffer) = tls_var(static_code_gen_buffer);
     code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
+    map_exec(tls_var(code_gen_buffer), code_gen_buffer_size);
 #else
-    code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
         code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
@@ -522,10 +522,10 @@  static void code_gen_alloc(unsigned long tb_size)
         }
         start = (void *)0x90000000UL;
 #endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
+        tls_var(code_gen_buffer) = mmap(start, code_gen_buffer_size,
                                PROT_WRITE | PROT_READ | PROT_EXEC,
                                flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
+        if (tls_var(code_gen_buffer) == MAP_FAILED) {
             fprintf(stderr, "Could not allocate dynamic translator buffer\n");
             exit(1);
         }
@@ -553,24 +553,31 @@  static void code_gen_alloc(unsigned long tb_size)
             code_gen_buffer_size = (512 * 1024 * 1024);
         }
 #endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
+        tls_var(code_gen_buffer) = mmap(addr, code_gen_buffer_size,
                                PROT_WRITE | PROT_READ | PROT_EXEC, 
                                flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
+        if (tls_var(code_gen_buffer) == MAP_FAILED) {
             fprintf(stderr, "Could not allocate dynamic translator buffer\n");
             exit(1);
         }
     }
 #else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
+    tls_var(code_gen_buffer) = g_malloc(code_gen_buffer_size);
+    map_exec(tls_var(code_gen_buffer), code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+    tls_var(tbs) = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+    code_gen_enabled = 1;
+}
+
+void tcg_gen_buffer_init(void)
+{
+    code_gen_alloc();
+    tls_var(code_gen_ptr) = tls_var(code_gen_buffer);
 }
 
 /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -579,8 +586,7 @@  static void code_gen_alloc(unsigned long tb_size)
 void tcg_exec_init(unsigned long tb_size)
 {
     cpu_gen_init();
-    code_gen_alloc(tb_size);
-    code_gen_ptr = code_gen_buffer;
+    code_gen_buffer_size = tb_size;
     page_init();
 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
     /* There's no guest base to take into account, so go ahead and
@@ -591,7 +597,7 @@  void tcg_exec_init(unsigned long tb_size)
 
 bool tcg_enabled(void)
 {
-    return code_gen_buffer != NULL;
+    return code_gen_enabled;
 }
 
 void cpu_exec_init_all(void)
@@ -682,10 +688,13 @@  static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
 
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
+    if (tls_var(nb_tbs) >= code_gen_max_blocks ||
+        (tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) >=
+        code_gen_buffer_max_size) {
         return NULL;
-    tb = &tbs[nb_tbs++];
+    }
+
+    tb = &tls_var(tbs)[tls_var(nb_tbs)++];
     tb->pc = pc;
     tb->cflags = 0;
     return tb;
@@ -696,9 +705,9 @@  void tb_free(TranslationBlock *tb)
     /* In practice this is mostly used for single use temporary TB
        Ignore the hard cases and just back up if this TB happens to
        be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
+    if (tls_var(nb_tbs) > 0 && tb == &tls_var(tbs)[tls_var(nb_tbs) - 1]) {
+        tls_var(code_gen_ptr) = tb->tc_ptr;
+        tls_var(nb_tbs)--;
     }
 }
 
@@ -749,14 +758,17 @@  void tb_flush(CPUState *env1)
     CPUState *env;
 #if defined(DEBUG_FLUSH)
     printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
-           (unsigned long)(code_gen_ptr - code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+           (unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer)),
+           tls_var(nb_tbs), tls_var(nb_tbs) > 0 ?
+           ((unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer))) /
+           tls_var(nb_tbs) : 0);
 #endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
+    if ((unsigned long)(tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) >
+        code_gen_buffer_size) {
         cpu_abort(env1, "Internal error: code buffer overflow\n");
+    }
 
-    nb_tbs = 0;
+    tls_var(nb_tbs) = 0;
 
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
@@ -765,7 +777,7 @@  void tb_flush(CPUState *env1)
     memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
     page_flush_tb();
 
-    code_gen_ptr = code_gen_buffer;
+    tls_var(code_gen_ptr) = tls_var(code_gen_buffer);
     /* XXX: flush processor icache at this point if cache flush is
        expensive */
     tb_flush_count++;
@@ -1008,13 +1020,14 @@  TranslationBlock *tb_gen_code(CPUState *env,
         /* Don't forget to invalidate previous TB info.  */
         tb_invalidated_flag = 1;
     }
-    tc_ptr = code_gen_ptr;
+    tc_ptr = tls_var(code_gen_ptr);
     tb->tc_ptr = tc_ptr;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
     cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+    tls_var(code_gen_ptr) = (void *)(((unsigned long)tls_var(code_gen_ptr) +
+                code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
 
     /* check next page if needed */
     virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
@@ -1330,17 +1343,19 @@  TranslationBlock *tb_find_pc(unsigned long tc_ptr)
     unsigned long v;
     TranslationBlock *tb;
 
-    if (nb_tbs <= 0)
+    if (tls_var(nb_tbs) <= 0) {
         return NULL;
-    if (tc_ptr < (unsigned long)code_gen_buffer ||
-        tc_ptr >= (unsigned long)code_gen_ptr)
+    }
+    if (tc_ptr < (unsigned long)tls_var(code_gen_buffer) ||
+        tc_ptr >= (unsigned long)tls_var(code_gen_ptr)) {
         return NULL;
+    }
     /* binary search (cf Knuth) */
     m_min = 0;
-    m_max = nb_tbs - 1;
+    m_max = tls_var(nb_tbs) - 1;
     while (m_min <= m_max) {
         m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
+        tb = &tls_var(tbs)[m];
         v = (unsigned long)tb->tc_ptr;
         if (v == tc_ptr)
             return tb;
@@ -1350,7 +1365,7 @@  TranslationBlock *tb_find_pc(unsigned long tc_ptr)
             m_min = m + 1;
         }
     }
-    return &tbs[m_max];
+    return &tls_var(tbs)[m_max];
 }
 
 static void tb_reset_jump_recursive(TranslationBlock *tb);
@@ -4332,8 +4347,8 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cross_page = 0;
     direct_jmp_count = 0;
     direct_jmp2_count = 0;
-    for(i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
+    for(i = 0; i < tls_var(nb_tbs); i++) {
+        tb = &tls_var(tbs)[i];
         target_code_size += tb->size;
         if (tb->size > max_target_code_size)
             max_target_code_size = tb->size;
@@ -4349,23 +4364,29 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
     cpu_fprintf(f, "gen code size       %td/%ld\n",
-                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+                tls_var(code_gen_ptr) - tls_var(code_gen_buffer),
+                code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n", 
-                nb_tbs, code_gen_max_blocks);
+                tls_var(nb_tbs), code_gen_max_blocks);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
+                tls_var(nb_tbs) ? target_code_size / tls_var(nb_tbs) : 0,
                 max_target_code_size);
     cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
+                tls_var(nb_tbs) ?
+                    (tls_var(code_gen_ptr) - tls_var(code_gen_buffer)) /
+                    tls_var(nb_tbs) : 0,
+                target_code_size ? (double) (tls_var(code_gen_ptr) -
+                    tls_var(code_gen_buffer)) / target_code_size : 0);
     cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
             cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+            tls_var(nb_tbs) ? (cross_page * 100) / tls_var(nb_tbs) : 0);
     cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
                 direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                tls_var(nb_tbs) ?
+                    (direct_jmp_count * 100) / tls_var(nb_tbs) : 0,
                 direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+                tls_var(nb_tbs) ?
+                    (direct_jmp2_count * 100) / tls_var(nb_tbs) : 0);
     cpu_fprintf(f, "\nStatistics:\n");
     cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
     cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
diff --git a/linux-user/main.c b/linux-user/main.c
index 14bf5f0..483482f 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -3364,6 +3364,7 @@  int main(int argc, char **argv, char **envp)
 #endif
     }
     tcg_exec_init(0);
+    tcg_gen_buffer_init();
     cpu_exec_init_all();
     /* NOTE: we need to init the CPU at this stage to get
        qemu_host_page_size */
diff --git a/qemu-common.h b/qemu-common.h
index c5e9cad..13d45e0 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -258,6 +258,7 @@  typedef enum LostTickPolicy {
     LOST_TICK_MAX
 } LostTickPolicy;
 
+void tcg_gen_buffer_init(void);
 void tcg_exec_init(unsigned long tb_size);
 bool tcg_enabled(void);