@@ -239,9 +239,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
tb_lock();
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE);
- tb->orig_tb = (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag)
- ? NULL
- : orig_tb);
+ tb->orig_tb = orig_tb->valid ? orig_tb : NULL;
cpu->current_tb = tb;
tb_unlock();
@@ -268,8 +266,6 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
tb_page_addr_t phys_pc, phys_page1;
target_ulong virt_page2;
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0);
-
/* find translated block using physical mappings */
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
@@ -536,15 +532,6 @@ int cpu_exec(CPUState *cpu)
cpu_loop_exit(cpu);
}
tb = tb_find_fast(cpu);
- /* Note: we do it here to avoid a gcc bug on Mac OS X when
- doing it in tb_find_slow */
- if (atomic_mb_read(&tcg_ctx.tb_ctx.tb_invalidated_flag)) {
- /* as some TB could have been invalidated because
- of memory exceptions while generating the code, we
- must recompute the hash index here */
- next_tb = 0;
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 0);
- }
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
@@ -553,9 +540,13 @@ int cpu_exec(CPUState *cpu)
spans two pages, we cannot safely do a direct
jump. */
if (next_tb != 0 && tb->page_addr[1] == -1) {
+ TranslationBlock *next;
+
tb_lock_recursive();
- tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK, tb);
+ next = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+ if (tb->valid && next->valid) {
+ tb_add_jump(next, next_tb & TB_EXIT_MASK, tb);
+ }
}
/* The lock may not be taken if we went through the
* fast lookup path and did not have to do any patching.
@@ -178,6 +178,7 @@ struct TranslationBlock {
jmp_first */
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
+ bool valid; /* protected by tb_lock */
};
#include "qemu/thread.h"
@@ -195,8 +196,6 @@ struct TBContext {
/* statistics */
int tb_flush_count;
int tb_phys_invalidate_count;
-
- int tb_invalidated_flag;
};
void tb_free(TranslationBlock *tb);
@@ -791,6 +791,17 @@ static inline void invalidate_page_bitmap(PageDesc *p)
#endif
}
+static void tb_invalidate_all(void)
+{
+ int i;
+
+ for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
+ TranslationBlock *tb = &tcg_ctx.tb_ctx.tbs[i];
+
+ tb->valid = false;
+ }
+}
+
/* Set to NULL all the 'first_tb' fields in all PageDescs. */
static void page_flush_tb_1(int level, void **lp)
{
@@ -866,6 +877,7 @@ void tb_flush(CPUState *cpu)
cpu_tb_jmp_cache_clear(cpu);
}
+ tb_invalidate_all();
memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
page_flush_tb();
@@ -1021,11 +1033,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
tb_page_addr_t phys_pc;
TranslationBlock *tb1, *tb2;
- /* Set the invalidated_flag first, to block patching a
- * jump to tb. FIXME: invalidated_flag should be per TB.
- */
- atomic_mb_set(&tcg_ctx.tb_ctx.tb_invalidated_flag, 1);
-
/* Now remove the TB from the hash list, so that tb_find_slow
* cannot find it anymore.
*/
@@ -1045,8 +1052,6 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
invalidate_page_bitmap(p);
}
- tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
-
/* remove the TB from the hash list */
CPU_FOREACH(cpu) {
tb_jmp_cache_entry_clear(cpu, tb);
@@ -1070,33 +1075,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
}
tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
-#if 0
- /* TODO: I think this barrier is not necessary. On the
- * cpu_exec side, it is okay if the read from tb_jmp_cache
- * comes after the read from tb_phys_hash. This is because
- * the read would be bleeding into the tb_lock critical
- * section, hence there cannot be any concurrent tb_invalidate.
- * And if you don't need a barrier there, you shouldn't need
- * one here, either.
- */
- smp_wmb();
-#endif
-
- /* Finally, remove the TB from the per-CPU cache that is
- * accessed without tb_lock. The tb can still be executed
- * once after returning, if the cache was accessed before
- * this point, but that's it.
- *
- * The cache cannot be filled with this tb anymore, because
- * the lists are accessed with tb_lock held.
- */
- h = tb_jmp_cache_hash_func(tb->pc);
- CPU_FOREACH(cpu) {
- if (cpu->tb_jmp_cache[h] == tb) {
- cpu->tb_jmp_cache[h] = NULL;
- }
- }
-
+ tb->valid = false;
tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
}
@@ -1157,12 +1136,16 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_flush_safe(cpu);
#endif
cpu_loop_exit(cpu);
+ tb_flush(cpu);
+ /* cannot fail at this point */
+ tb = tb_alloc(pc);
}
tb->tc_ptr = tcg_ctx.code_gen_ptr;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
+ tb->valid = true;
cpu_gen_code(env, tb, &code_gen_size);
tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
This will allow us to safely look up TB's without taking any locks. Note however that tb_lock protects the valid field, so if chaining is an option then we'll have to acquire the lock. Signed-off-by: Emilio G. Cota <cota@braap.org> --- cpu-exec.c | 23 +++++++--------------- include/exec/exec-all.h | 3 +-- translate-all.c | 51 +++++++++++++++++-------------------------------- 3 files changed, 25 insertions(+), 52 deletions(-)