Message ID | 20190720010235.32444-3-vandersonmr2@gmail.com |
---|---|
State | New |
Headers | show |
Series | Measure Tiny Code Generation Quality | expand |
vandersonmr <vandersonmr2@gmail.com> writes: > If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS > enabled, then we instrument the start code of the TB > to atomically count the number of times it is executed. > The execution count of the TB is stored in its respective > TBS. > > Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com> > --- > accel/tcg/tcg-runtime.c | 7 +++++++ > accel/tcg/tcg-runtime.h | 2 ++ > accel/tcg/translate-all.c | 8 ++++++++ > accel/tcg/translator.c | 1 + > include/exec/gen-icount.h | 9 +++++++++ > include/exec/tb-stats.h | 11 +++++++++++ > include/qemu/log.h | 6 ++++++ > util/log.c | 11 +++++++++++ > 8 files changed, 55 insertions(+) > > diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c > index 8a1e408e31..f332eae334 100644 > --- a/accel/tcg/tcg-runtime.c > +++ b/accel/tcg/tcg-runtime.c > @@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env) > { > cpu_loop_exit_atomic(env_cpu(env), GETPC()); > } > + > +void HELPER(inc_exec_freq)(void *ptr) > +{ > + TBStatistics *stats = (TBStatistics *) ptr; > + g_assert(stats); > + atomic_inc(&stats->executions.total); > +} > diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h > index 4fa61b49b4..bf0b75dbe8 100644 > --- a/accel/tcg/tcg-runtime.h > +++ b/accel/tcg/tcg-runtime.h > @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) > > DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) > > +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr) > + > #ifdef CONFIG_SOFTMMU > > DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index a574890a80..7497dae508 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1785,6 +1785,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > */ > if (tb_stats_collection_enabled()) { > tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); > + uint32_t flag = get_default_tbstats_flag(); > + > + if (qemu_log_in_addr_range(tb->pc)) { > + if (flag & TB_EXEC_STATS) { > + tb->tb_stats->stats_enabled |= TB_EXEC_STATS; > + } > + } > + > } else { > tb->tb_stats = NULL; > } > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c > index 9226a348a3..396a11e828 100644 > --- a/accel/tcg/translator.c > +++ b/accel/tcg/translator.c > @@ -46,6 +46,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, > > ops->init_disas_context(db, cpu); > tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ > + gen_tb_exec_count(tb); > > /* Reset the temp count so that we can identify leaks */ > tcg_clear_temp_count(); > diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h > index f7669b6841..b3efe41894 100644 > --- a/include/exec/gen-icount.h > +++ b/include/exec/gen-icount.h > @@ -7,6 +7,15 @@ > > static TCGOp *icount_start_insn; > > +static inline void gen_tb_exec_count(TranslationBlock *tb) > +{ > + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { > + TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats); > + gen_helper_inc_exec_freq(ptr); > + tcg_temp_free_ptr(ptr); > + } > +} > + > static inline void gen_tb_start(TranslationBlock *tb) > { > TCGv_i32 count, imm; > diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h > index 0913155ec3..ee1e8de0d3 100644 > --- a/include/exec/tb-stats.h > +++ b/include/exec/tb-stats.h > @@ -6,6 +6,9 @@ > #include "exec/tb-context.h" > #include "tcg.h" > > +#define tb_stats_enabled(tb, JIT_STATS) \ > + (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS)) > + > typedef struct TBStatistics TBStatistics; > > /* > @@ -22,6 +25,14 @@ struct TBStatistics { > uint32_t flags; > /* cs_base isn't included in the hash but we do check for matches */ > target_ulong cs_base; > + > + uint32_t stats_enabled; > + > + /* Execution stats */ > + struct { > + unsigned long total; > + unsigned long atomic; We are not incrementing atomic in this patch. Also it's not total so maybe "normal" makes more sense. Something like: fixup! accel: collecting TB execution count 4 files changed, 11 insertions(+), 6 deletions(-) accel/tcg/cpu-exec.c | 4 ++++ accel/tcg/tb-stats.c | 9 +++++---- accel/tcg/tcg-runtime.c | 2 +- include/exec/tb-stats.h | 2 +- modified accel/tcg/cpu-exec.c @@ -252,6 +252,10 @@ void cpu_exec_step_atomic(CPUState *cpu) start_exclusive(); + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { + tb->tb_stats->executions.atomic++; + } + /* Since we got here, we know that parallel_cpus must be true. */ parallel_cpus = false; in_exclusive_region = true; modified accel/tcg/tb-stats.c @@ -233,11 +233,12 @@ static void dump_tb_header(TBStatistics *tbs) float guest_host_prop = g ? ((float) h / g) : 0; qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx - " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u op:%u op_opt:%u h:%u h/g:%.2f spills:%d)\n", + " flags:%#08x (trans:%lu uncached:%lu exec:%lu/%lu ints: g:%u op:%u op_opt:%u h:%u h/g:%.2f spills:%d)\n", tbs->display_id, tbs->phys_pc, tbs->pc, tbs->flags, tbs->translations.total, tbs->translations.uncached, - tbs->executions.total, g, ops, ops_opt, h, guest_host_prop, + tbs->executions.normal, tbs->executions.atomic, + g, ops, ops_opt, h, guest_host_prop, spills); } @@ -254,8 +255,8 @@ inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by) c1 = tbs1->code.spills; c2 = tbs2->code.spills; } else if (likely(sort_by == SORT_BY_HOTNESS)) { - c1 = tbs1->executions.total; - c2 = tbs2->executions.total; + c1 = tbs1->executions.normal; + c2 = tbs2->executions.normal; } else if (likely(sort_by == SORT_BY_HG)) { if (tbs1->code.num_guest_inst == 0) { return -1; modified accel/tcg/tcg-runtime.c @@ -172,5 +172,5 @@ void HELPER(inc_exec_freq)(void *ptr) { TBStatistics *stats = (TBStatistics *) ptr; g_assert(stats); - atomic_inc(&stats->executions.total); + atomic_inc(&stats->executions.normal); } modified include/exec/tb-stats.h @@ -33,7 +33,7 @@ struct TBStatistics { /* Execution stats */ struct { - unsigned long total; + unsigned long normal; unsigned long atomic; } executions; > diff --git a/include/qemu/log.h b/include/qemu/log.h > index e175d4d5d0..b213411836 100644 > --- a/include/qemu/log.h > +++ b/include/qemu/log.h > @@ -129,10 +129,16 @@ void qemu_log_flush(void); > /* Close the log file */ > void qemu_log_close(void); > > +#define TB_NOTHING 0 > +#define TB_EXEC_STATS (1 << 1) > + > void enable_collect_tb_stats(void); > void disable_collect_tb_stats(void); > void pause_collect_tb_stats(void); > bool tb_stats_collection_enabled(void); > bool tb_stats_collection_paused(void); > > +void set_default_tbstats_flag(uint32_t flag); > +uint32_t get_default_tbstats_flag(void); > + > #endif > diff --git a/util/log.c b/util/log.c > index ab73fdc100..f81653d712 100644 > --- a/util/log.c > +++ b/util/log.c > @@ -354,3 +354,14 @@ bool tb_stats_collection_paused(void) > return tcg_collect_tb_stats == 2; > } > > +uint32_t default_tbstats_flag; > + > +void set_default_tbstats_flag(uint32_t flag) > +{ > + default_tbstats_flag = flag; > +} > + > +uint32_t get_default_tbstats_flag(void) > +{ > + return default_tbstats_flag; > +} Some comment about not overloading log.c as before. -- Alex Bennée
diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c index 8a1e408e31..f332eae334 100644 --- a/accel/tcg/tcg-runtime.c +++ b/accel/tcg/tcg-runtime.c @@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env) { cpu_loop_exit_atomic(env_cpu(env), GETPC()); } + +void HELPER(inc_exec_freq)(void *ptr) +{ + TBStatistics *stats = (TBStatistics *) ptr; + g_assert(stats); + atomic_inc(&stats->executions.total); +} diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 4fa61b49b4..bf0b75dbe8 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env) DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env) +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr) + #ifdef CONFIG_SOFTMMU DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG, diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a574890a80..7497dae508 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1785,6 +1785,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu, */ if (tb_stats_collection_enabled()) { tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags); + uint32_t flag = get_default_tbstats_flag(); + + if (qemu_log_in_addr_range(tb->pc)) { + if (flag & TB_EXEC_STATS) { + tb->tb_stats->stats_enabled |= TB_EXEC_STATS; + } + } + } else { tb->tb_stats = NULL; } diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 9226a348a3..396a11e828 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -46,6 +46,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db, ops->init_disas_context(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ + gen_tb_exec_count(tb); /* Reset the temp count so that we can identify leaks */ tcg_clear_temp_count(); diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index f7669b6841..b3efe41894 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -7,6 +7,15 @@ static TCGOp *icount_start_insn; +static inline void gen_tb_exec_count(TranslationBlock *tb) +{ + if (tb_stats_enabled(tb, TB_EXEC_STATS)) { + TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats); + gen_helper_inc_exec_freq(ptr); + tcg_temp_free_ptr(ptr); + } +} + static inline void gen_tb_start(TranslationBlock *tb) { TCGv_i32 count, imm; diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h index 0913155ec3..ee1e8de0d3 100644 --- a/include/exec/tb-stats.h +++ b/include/exec/tb-stats.h @@ -6,6 +6,9 @@ #include "exec/tb-context.h" #include "tcg.h" +#define tb_stats_enabled(tb, JIT_STATS) \ + (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS)) + typedef struct TBStatistics TBStatistics; /* @@ -22,6 +25,14 @@ struct TBStatistics { uint32_t flags; /* cs_base isn't included in the hash but we do check for matches */ target_ulong cs_base; + + uint32_t stats_enabled; + + /* Execution stats */ + struct { + unsigned long total; + unsigned long atomic; + } executions; }; bool tb_stats_cmp(const void *ap, const void *bp); diff --git a/include/qemu/log.h b/include/qemu/log.h index e175d4d5d0..b213411836 100644 --- a/include/qemu/log.h +++ b/include/qemu/log.h @@ -129,10 +129,16 @@ void qemu_log_flush(void); /* Close the log file */ void qemu_log_close(void); +#define TB_NOTHING 0 +#define TB_EXEC_STATS (1 << 1) + void enable_collect_tb_stats(void); void disable_collect_tb_stats(void); void pause_collect_tb_stats(void); bool tb_stats_collection_enabled(void); bool tb_stats_collection_paused(void); +void set_default_tbstats_flag(uint32_t flag); +uint32_t get_default_tbstats_flag(void); + #endif diff --git a/util/log.c b/util/log.c index ab73fdc100..f81653d712 100644 --- a/util/log.c +++ b/util/log.c @@ -354,3 +354,14 @@ bool tb_stats_collection_paused(void) return tcg_collect_tb_stats == 2; } +uint32_t default_tbstats_flag; + +void set_default_tbstats_flag(uint32_t flag) +{ + default_tbstats_flag = flag; +} + +uint32_t get_default_tbstats_flag(void) +{ + return default_tbstats_flag; +}
If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS enabled, then we instrument the start code of the TB to atomically count the number of times it is executed. The execution count of the TB is stored in its respective TBS. Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com> --- accel/tcg/tcg-runtime.c | 7 +++++++ accel/tcg/tcg-runtime.h | 2 ++ accel/tcg/translate-all.c | 8 ++++++++ accel/tcg/translator.c | 1 + include/exec/gen-icount.h | 9 +++++++++ include/exec/tb-stats.h | 11 +++++++++++ include/qemu/log.h | 6 ++++++ util/log.c | 11 +++++++++++ 8 files changed, 55 insertions(+)