From patchwork Thu Jan 31 18:47:23 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Evgeny Voevodin X-Patchwork-Id: 217256 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 0026E2C008D for ; Fri, 1 Feb 2013 08:54:16 +1100 (EST) Received: from localhost ([::1]:55059 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U125H-0006WJ-5h for incoming@patchwork.ozlabs.org; Thu, 31 Jan 2013 16:54:15 -0500 Received: from eggs.gnu.org ([208.118.235.92]:57451) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U0zAm-00075X-HB for qemu-devel@nongnu.org; Thu, 31 Jan 2013 13:47:49 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1U0zAg-0006Pe-Jt for qemu-devel@nongnu.org; Thu, 31 Jan 2013 13:47:44 -0500 Received: from mail-lb0-f170.google.com ([209.85.217.170]:36496) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1U0zAg-0006PO-7D for qemu-devel@nongnu.org; Thu, 31 Jan 2013 13:47:38 -0500 Received: by mail-lb0-f170.google.com with SMTP id ge1so3734307lbb.29 for ; Thu, 31 Jan 2013 10:47:37 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=x-received:from:to:cc:subject:date:message-id:x-mailer:in-reply-to :references; bh=U0hCNzo+8c7Jr/1Jksc2b4xPA30wWANwx1iXwlzoQCo=; b=V9PZFkcM6ItiRAShvU+CJBqua79IJvH5ev3HBZ4lwq6VPqw0xib0sOqE1sIeJeA6MZ 4xNuHCowwQh2x2I7DVg0lxYpN0577K6pOI4bmzxeKx+H6OGUlAv+ttdLXIkmnIMj3Zt6 IU3A9JKQterdBUriKAPfKmNy7VoCiCkgRGjeH0alglwy/9GztdAyd/b7LC4vczA4or65 qL85uhe7TQgMTlPrdozNoNwKIrjrmneIPL5bCMtToMBivthy/+YPkBDkeWgP191MPHHw sipF1tvCBdBfr0/awYH61wklZNyiA+GDjELwVxdGQBx+EDrncPpx1NtjiHoj0L5g8mkL QV9Q== X-Received: by 10.152.113.6 with SMTP id iu6mr8817461lab.43.1359658057110; Thu, 31 Jan 2013 10:47:37 -0800 (PST) Received: from localhost.localdomain (37-144-47-224.broadband.corbina.ru. [37.144.47.224]) by mx.google.com with ESMTPS id pk1sm2860616lab.0.2013.01.31.10.47.35 (version=TLSv1.1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Thu, 31 Jan 2013 10:47:36 -0800 (PST) From: Evgeny Voevodin To: qemu-devel@nongnu.org Date: Fri, 1 Feb 2013 01:47:23 +0700 Message-Id: <1359658043-2425-3-git-send-email-evgenyvoevodin@gmail.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1359658043-2425-1-git-send-email-evgenyvoevodin@gmail.com> References: <1359658043-2425-1-git-send-email-evgenyvoevodin@gmail.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.217.170 X-Mailman-Approved-At: Thu, 31 Jan 2013 16:53:57 -0500 Cc: blauwirbel@gmail.com, Evgeny Voevodin Subject: [Qemu-devel] [PATCH 2/2] TCG: Move translation block variables to new context inside tcg_ctx: tb_ctx X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org It's worth to clean-up translation blocks variables and move them into one context as was suggested by Swirl. Also if we use this context directly inside tcg_ctx, then it speeds up code generation a bit. Signed-off-by: Evgeny Voevodin --- cpu-exec.c | 18 ++++----- include/exec/exec-all.h | 27 +++++++++---- linux-user/main.c | 6 +-- tcg/tcg.h | 2 + translate-all.c | 96 +++++++++++++++++++++++------------------------ 5 files changed, 79 insertions(+), 70 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 19ebb4a..ff9a884 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -23,8 +23,6 @@ #include "qemu/atomic.h" #include "sysemu/qtest.h" -int tb_invalidated_flag; - //#define CONFIG_DEBUG_EXEC bool qemu_cpu_has_work(CPUState *cpu) @@ -90,13 +88,13 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, tb_page_addr_t phys_pc, phys_page1; target_ulong virt_page2; - tb_invalidated_flag = 0; + tcg_ctx.tb_ctx.tb_invalidated_flag = 0; /* find translated block using physical mappings */ phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; h = tb_phys_hash_func(phys_pc); - ptb1 = &tb_phys_hash[h]; + ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; for(;;) { tb = *ptb1; if (!tb) @@ -128,8 +126,8 @@ static TranslationBlock *tb_find_slow(CPUArchState *env, /* Move the last found TB to the head of the list */ if (likely(*ptb1)) { *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = tb_phys_hash[h]; - tb_phys_hash[h] = tb; + tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; + tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; } /* we add the TB in the virtual pc hash table */ env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb; @@ -563,16 +561,16 @@ int cpu_exec(CPUArchState *env) #endif } #endif /* DEBUG_DISAS || CONFIG_DEBUG_EXEC */ - spin_lock(&tb_lock); + spin_lock(&tcg_ctx.tb_ctx.tb_lock); tb = tb_find_fast(env); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ - if (tb_invalidated_flag) { + if (tcg_ctx.tb_ctx.tb_invalidated_flag) { /* as some TB could have been invalidated because of memory exceptions while generating the code, we must recompute the hash index here */ next_tb = 0; - tb_invalidated_flag = 0; + tcg_ctx.tb_ctx.tb_invalidated_flag = 0; } #ifdef CONFIG_DEBUG_EXEC qemu_log_mask(CPU_LOG_EXEC, "Trace %p [" TARGET_FMT_lx "] %s\n", @@ -585,7 +583,7 @@ int cpu_exec(CPUArchState *env) if (next_tb != 0 && tb->page_addr[1] == -1) { tb_add_jump((TranslationBlock *)(next_tb & ~3), next_tb & 3, tb); } - spin_unlock(&tb_lock); + spin_unlock(&tcg_ctx.tb_ctx.tb_lock); /* cpu_interrupt might be called while translating the TB, but before it is linked into a potentially diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d235ef8..f685c28 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -168,6 +168,25 @@ struct TranslationBlock { uint32_t icount; }; +#include "exec/spinlock.h" + +typedef struct TBContext TBContext; + +struct TBContext { + + TranslationBlock *tbs; + TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; + int nb_tbs; + /* any access to the tbs or the page table must use this lock */ + spinlock_t tb_lock; + + /* statistics */ + int tb_flush_count; + int tb_phys_invalidate_count; + + int tb_invalidated_flag; +}; + static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc) { target_ulong tmp; @@ -192,8 +211,6 @@ void tb_free(TranslationBlock *tb); void tb_flush(CPUArchState *env); void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr); -extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; - #if defined(USE_DIRECT_JUMP) #if defined(CONFIG_TCG_INTERPRETER) @@ -275,12 +292,6 @@ static inline void tb_add_jump(TranslationBlock *tb, int n, } } -#include "exec/spinlock.h" - -extern spinlock_t tb_lock; - -extern int tb_invalidated_flag; - /* The return address may point to the start of the next instruction. Subtracting one gets us the call instruction itself. */ #if defined(CONFIG_TCG_INTERPRETER) diff --git a/linux-user/main.c b/linux-user/main.c index 0181bc2..8f09abd 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -111,7 +111,7 @@ static int pending_cpus; /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { - pthread_mutex_lock(&tb_lock); + pthread_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); pthread_mutex_lock(&exclusive_lock); mmap_fork_start(); } @@ -129,11 +129,11 @@ void fork_end(int child) pthread_mutex_init(&cpu_list_mutex, NULL); pthread_cond_init(&exclusive_cond, NULL); pthread_cond_init(&exclusive_resume, NULL); - pthread_mutex_init(&tb_lock, NULL); + pthread_mutex_init(&tcg_ctx.tb_ctx.tb_lock, NULL); gdbserver_fork(thread_env); } else { pthread_mutex_unlock(&exclusive_lock); - pthread_mutex_unlock(&tb_lock); + pthread_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); } } diff --git a/tcg/tcg.h b/tcg/tcg.h index 4086e98..51c8176 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -471,6 +471,8 @@ struct TCGContext { size_t code_gen_buffer_max_size; uint8_t *code_gen_ptr; + TBContext tb_ctx; + #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU) /* labels info for qemu_ld/st IRs The labels help to generate TLB miss case codes at the end of TB */ diff --git a/translate-all.c b/translate-all.c index d666562..efeb247 100644 --- a/translate-all.c +++ b/translate-all.c @@ -72,13 +72,6 @@ #define SMC_BITMAP_USE_THRESHOLD 10 -/* Translation blocks */ -static TranslationBlock *tbs; -TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; -static int nb_tbs; -/* any access to the tbs or the page table must use this lock */ -spinlock_t tb_lock = SPIN_LOCK_UNLOCKED; - typedef struct PageDesc { /* list of TBs intersecting this ram page */ TranslationBlock *first_tb; @@ -125,10 +118,6 @@ uintptr_t qemu_host_page_mask; The bottom level has pointers to PageDesc. */ static void *l1_map[V_L1_SIZE]; -/* statistics */ -static int tb_flush_count; -static int tb_phys_invalidate_count; - /* code generation context */ TCGContext tcg_ctx; @@ -589,7 +578,8 @@ static inline void code_gen_alloc(size_t tb_size) (TCG_MAX_OP_SIZE * OPC_BUF_SIZE); tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; - tbs = g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); + tcg_ctx.tb_ctx.tbs = + g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock)); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -620,12 +610,12 @@ static TranslationBlock *tb_alloc(target_ulong pc) { TranslationBlock *tb; - if (nb_tbs >= tcg_ctx.code_gen_max_blocks || + if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks || (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >= tcg_ctx.code_gen_buffer_max_size) { return NULL; } - tb = &tbs[nb_tbs++]; + tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; tb->pc = pc; tb->cflags = 0; return tb; @@ -636,9 +626,10 @@ void tb_free(TranslationBlock *tb) /* In practice this is mostly used for single use temporary TB Ignore the hard cases and just back up if this TB happens to be the last one generated. */ - if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) { + if (tcg_ctx.tb_ctx.nb_tbs > 0 && + tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { tcg_ctx.code_gen_ptr = tb->tc_ptr; - nb_tbs--; + tcg_ctx.tb_ctx.nb_tbs--; } } @@ -693,27 +684,28 @@ void tb_flush(CPUArchState *env1) #if defined(DEBUG_FLUSH) printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), - nb_tbs, nb_tbs > 0 ? + tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) / - nb_tbs : 0); + tcg_ctx.tb_ctx.nb_tbs : 0); #endif if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) > tcg_ctx.code_gen_buffer_size) { cpu_abort(env1, "Internal error: code buffer overflow\n"); } - nb_tbs = 0; + tcg_ctx.tb_ctx.nb_tbs = 0; for (env = first_cpu; env != NULL; env = env->next_cpu) { memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); } - memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *)); + memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, + CODE_GEN_PHYS_HASH_SIZE * sizeof(void *)); page_flush_tb(); tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; /* XXX: flush processor icache at this point if cache flush is expensive */ - tb_flush_count++; + tcg_ctx.tb_ctx.tb_flush_count++; } #ifdef DEBUG_TB_CHECK @@ -725,7 +717,7 @@ static void tb_invalidate_check(target_ulong address) address &= TARGET_PAGE_MASK; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { if (!(address + TARGET_PAGE_SIZE <= tb->pc || address >= tb->pc + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx @@ -743,7 +735,8 @@ static void tb_page_check(void) int i, flags1, flags2; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; + tb = tb->phys_hash_next) { flags1 = page_get_flags(tb->pc); flags2 = page_get_flags(tb->pc + tb->size - 1); if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) { @@ -835,7 +828,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) /* remove the TB from the hash list */ phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_phys_hash_func(phys_pc); - tb_hash_remove(&tb_phys_hash[h], tb); + tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -849,7 +842,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } - tb_invalidated_flag = 1; + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); @@ -878,7 +871,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ - tb_phys_invalidate_count++; + tcg_ctx.tb_ctx.tb_phys_invalidate_count++; } static inline void set_bits(uint8_t *tab, int start, int len) @@ -955,7 +948,7 @@ TranslationBlock *tb_gen_code(CPUArchState *env, /* cannot fail at this point */ tb = tb_alloc(pc); /* Don't forget to invalidate previous TB info. */ - tb_invalidated_flag = 1; + tcg_ctx.tb_ctx.tb_invalidated_flag = 1; } tc_ptr = tcg_ctx.code_gen_ptr; tb->tc_ptr = tc_ptr; @@ -1273,7 +1266,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, mmap_lock(); /* add in the physical hash table */ h = tb_phys_hash_func(phys_pc); - ptb = &tb_phys_hash[h]; + ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; tb->phys_hash_next = *ptb; *ptb = tb; @@ -1323,7 +1316,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) uintptr_t v; TranslationBlock *tb; - if (nb_tbs <= 0) { + if (tcg_ctx.tb_ctx.nb_tbs <= 0) { return NULL; } if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer || @@ -1332,10 +1325,10 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) } /* binary search (cf Knuth) */ m_min = 0; - m_max = nb_tbs - 1; + m_max = tcg_ctx.tb_ctx.nb_tbs - 1; while (m_min <= m_max) { m = (m_min + m_max) >> 1; - tb = &tbs[m]; + tb = &tcg_ctx.tb_ctx.tbs[m]; v = (uintptr_t)tb->tc_ptr; if (v == tc_ptr) { return tb; @@ -1345,7 +1338,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) m_min = m + 1; } } - return &tbs[m_max]; + return &tcg_ctx.tb_ctx.tbs[m_max]; } static void tb_reset_jump_recursive(TranslationBlock *tb); @@ -1566,8 +1559,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) cross_page = 0; direct_jmp_count = 0; direct_jmp2_count = 0; - for (i = 0; i < nb_tbs; i++) { - tb = &tbs[i]; + for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) { + tb = &tcg_ctx.tb_ctx.tbs[i]; target_code_size += tb->size; if (tb->size > max_target_code_size) { max_target_code_size = tb->size; @@ -1588,27 +1581,32 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_max_size); cpu_fprintf(f, "TB count %d/%d\n", - nb_tbs, tcg_ctx.code_gen_max_blocks); + tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks); cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", - nb_tbs ? target_code_size / nb_tbs : 0, - max_target_code_size); + tcg_ctx.tb_ctx.nb_tbs ? target_code_size / + tcg_ctx.tb_ctx.nb_tbs : 0, + max_target_code_size); cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", - nb_tbs ? (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / - nb_tbs : 0, - target_code_size ? - (double) (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / - target_code_size : 0); - cpu_fprintf(f, "cross page TB count %d (%d%%)\n", - cross_page, - nb_tbs ? (cross_page * 100) / nb_tbs : 0); + tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr - + tcg_ctx.code_gen_buffer) / + tcg_ctx.tb_ctx.nb_tbs : 0, + target_code_size ? (double) (tcg_ctx.code_gen_ptr - + tcg_ctx.code_gen_buffer) / + target_code_size : 0); + cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page, + tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) / + tcg_ctx.tb_ctx.nb_tbs : 0); cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", direct_jmp_count, - nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0, + tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) / + tcg_ctx.tb_ctx.nb_tbs : 0, direct_jmp2_count, - nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0); + tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / + tcg_ctx.tb_ctx.nb_tbs : 0); cpu_fprintf(f, "\nStatistics:\n"); - cpu_fprintf(f, "TB flush count %d\n", tb_flush_count); - cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count); + cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); + cpu_fprintf(f, "TB invalidate count %d\n", + tcg_ctx.tb_ctx.tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); }