Message ID | 20190821172329.2062-6-vandersonmr2@gmail.com |
---|---|
State | New |
Headers | show |
Series | Measure Tiny Code Generation Quality | expand |
vandersonmr <vandersonmr2@gmail.com> writes: > Replace all others CONFIG_PROFILER statistics and migrate it to > TBStatistics system. However, TCGProfiler still exists and can > be use to store global statistics and times. All TB related > statistics goes to TBStatistics. > > Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com> > --- > accel/tcg/tb-stats.c | 91 +++++++++++++++++++--------- > accel/tcg/translate-all.c | 47 ++++++++------- > configure | 3 - > cpus.c | 14 ++--- > include/exec/tb-stats.h | 19 +++++- > include/qemu/timer.h | 5 +- > monitor/misc.c | 28 ++------- > tcg/tcg.c | 124 +++++++++++--------------------------- > tcg/tcg.h | 10 +-- > vl.c | 8 +-- > 10 files changed, 156 insertions(+), 193 deletions(-) > > diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c > index f53e889d01..e0ac254fc1 100644 > --- a/accel/tcg/tb-stats.c > +++ b/accel/tcg/tb-stats.c > @@ -8,6 +8,8 @@ > > #include "exec/tb-stats.h" > > +uint64_t dev_time; > + > struct jit_profile_info { > uint64_t translations; > uint64_t aborted; > @@ -19,6 +21,13 @@ struct jit_profile_info { > uint64_t host; > uint64_t guest; > uint64_t search_data; > + > + uint64_t interm_time; > + uint64_t code_time; > + uint64_t restore_count; > + uint64_t restore_time; > + uint64_t opt_time; > + uint64_t la_time; > }; > > /* accumulate the statistics from all TBs */ > @@ -40,6 +49,29 @@ static void collect_jit_profile_info(void *p, uint32_t hash, void *userp) > jpi->host += tbs->code.out_len; > jpi->guest += tbs->code.in_len; > jpi->search_data += tbs->code.search_out_len; > + > + jpi->interm_time += stat_per_translation(tbs, time.interm); > + jpi->code_time += stat_per_translation(tbs, time.code); > + jpi->opt_time += stat_per_translation(tbs, time.opt); > + jpi->la_time += stat_per_translation(tbs, time.la); > + jpi->restore_time += tbs->time.restore; > + jpi->restore_count += tbs->time.restore_count; > +} > + > +void dump_jit_exec_time_info(uint64_t dev_time) > +{ > + static uint64_t last_cpu_exec_time; > + uint64_t cpu_exec_time; > + uint64_t delta; > + > + cpu_exec_time = tcg_cpu_exec_time(); > + delta = cpu_exec_time - last_cpu_exec_time; > + > + qemu_printf("async time %" PRId64 " (%0.3f)\n", > + dev_time, dev_time / (double) NANOSECONDS_PER_SECOND); > + qemu_printf("qemu time %" PRId64 " (%0.3f)\n", > + delta, delta / (double) NANOSECONDS_PER_SECOND); > + last_cpu_exec_time = cpu_exec_time; > } > > /* dump JIT statisticis using TCGProfile and TBStats */ > @@ -66,34 +98,39 @@ void dump_jit_profile_info(TCGProfile *s) > qemu_printf("avg search data/TB %0.1f\n", > jpi->search_data / (double) jpi->translations); > > + uint64_t tot = jpi->interm_time + jpi->code_time; > + > + qemu_printf("JIT cycles %" PRId64 " (%0.3fs at 2.4 GHz)\n", > + tot, tot / 2.4e9); > + qemu_printf(" cycles/op %0.1f\n", > + jpi->ops ? (double)tot / jpi->ops : 0); > + qemu_printf(" cycles/in byte %0.1f\n", > + jpi->guest ? (double)tot / jpi->guest : 0); > + qemu_printf(" cycles/out byte %0.1f\n", > + jpi->host ? (double)tot / jpi->host : 0); > + qemu_printf(" cycles/search byte %0.1f\n", > + jpi->search_data ? (double)tot / jpi->search_data : 0); > + if (tot == 0) { > + tot = 1; > + } > + > + qemu_printf(" gen_interm time %0.1f%%\n", > + (double)jpi->interm_time / tot * 100.0); > + qemu_printf(" gen_code time %0.1f%%\n", > + (double)jpi->code_time / tot * 100.0); > + > + qemu_printf(" optim./code time %0.1f%%\n", > + (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); > + qemu_printf(" liveness/code time %0.1f%%\n", > + (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); > + > + qemu_printf("cpu_restore count %" PRId64 "\n", jpi->restore_count); > + qemu_printf(" avg cycles %0.1f\n", > + jpi->restore_count ? (double)jpi->restore_time / jpi->restore_count : 0); > + > if (s) { > - int64_t tot = s->interm_time + s->code_time; > - qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", > - tot, tot / 2.4e9); > - qemu_printf("cycles/op %0.1f\n", > - jpi->ops ? (double)tot / jpi->ops : 0); > - qemu_printf("cycles/in byte %0.1f\n", > - jpi->guest ? (double)tot / jpi->guest : 0); > - qemu_printf("cycles/out byte %0.1f\n", > - jpi->host ? (double)tot / jpi->host : 0); > - qemu_printf("cycles/search byte %0.1f\n", > - jpi->search_data ? (double)tot / jpi->search_data : 0); > - if (tot == 0) { > - tot = 1; > - } > - qemu_printf(" gen_interm time %0.1f%%\n", > - (double)s->interm_time / tot * 100.0); > - qemu_printf(" gen_code time %0.1f%%\n", > - (double)s->code_time / tot * 100.0); > - qemu_printf("optim./code time %0.1f%%\n", > - (double)s->opt_time / (s->code_time ? s->code_time : 1) > - * 100.0); > - qemu_printf("liveness/code time %0.1f%%\n", > - (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); > - qemu_printf("cpu_restore count %" PRId64 "\n", > - s->restore_count); > - qemu_printf(" avg cycles %0.1f\n", > - s->restore_count ? (double)s->restore_time / s->restore_count : 0); > + qemu_printf("cpu exec time %" PRId64 " (%0.3fs)\n", > + s->cpu_exec_time, s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND); > } > } > g_free(jpi); > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index fe890f0782..a2e65bb85c 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -337,10 +337,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, > CPUArchState *env = cpu->env_ptr; > uint8_t *p = tb->tc.ptr + tb->tc.size; > int i, j, num_insns = tb->icount; > -#ifdef CONFIG_PROFILER > - TCGProfile *prof = &tcg_ctx->prof; > - int64_t ti = profile_getclock(); > -#endif > + uint64_t ti = 0; > + > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + ti = profile_getclock(); > + } > > searched_pc -= GETPC_ADJ; > > @@ -370,11 +371,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, > } > restore_state_to_opc(env, tb, data); > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->restore_time, > - prof->restore_time + profile_getclock() - ti); > - atomic_set(&prof->restore_count, prof->restore_count + 1); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.restore, profile_getclock() - ti); > + atomic_inc(&tb->tb_stats->time.restore_count); > + } > + > return 0; > } > > @@ -1724,10 +1725,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > target_ulong virt_page2; > tcg_insn_unit *gen_code_buf; > int gen_code_size, search_size, max_insns; > -#ifdef CONFIG_PROFILER > - TCGProfile *prof = &tcg_ctx->prof; > - int64_t ti; > -#endif > + uint64_t ti = 0; > + > assert_memory_lock(); > > phys_pc = get_page_addr_code(env, pc); > @@ -1773,9 +1772,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > tcg_ctx->tb_cflags = cflags; > tb_overflow: > > -#ifdef CONFIG_PROFILER > - ti = profile_getclock(); > -#endif > > /* > * We want to fetch the stats structure before we start code > @@ -1796,6 +1792,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > tb->tb_stats->stats_enabled |= TB_JIT_STATS; > atomic_inc(&tb->tb_stats->translations.total); > } > + > + if (flag & TB_JIT_TIME) { > + tb->tb_stats->stats_enabled |= TB_JIT_TIME; > + ti = profile_getclock(); > + } > } else { > tb->tb_stats = NULL; > } > @@ -1820,10 +1821,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; > } > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti); > - ti = profile_getclock(); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.interm, profile_getclock() - ti); > + ti = profile_getclock(); > + } > > gen_code_size = tcg_gen_code(tcg_ctx, tb); > if (unlikely(gen_code_size < 0)) { > @@ -1865,9 +1866,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > } > tb->tc.size = gen_code_size; > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.code, profile_getclock() - ti); > + } > > if (tb_stats_enabled(tb, TB_JIT_STATS)) { > atomic_add(&tb->tb_stats->code.in_len, tb->size); > diff --git a/configure b/configure > index 714e7fb6a1..0b3c45c05b 100755 > --- a/configure > +++ b/configure > @@ -6614,9 +6614,6 @@ fi > if test "$static" = "yes" ; then > echo "CONFIG_STATIC=y" >> $config_host_mak > fi > -if test "$profiler" = "yes" ; then > - echo "CONFIG_PROFILER=y" >> $config_host_mak > -fi > if test "$slirp" != "no"; then > echo "CONFIG_SLIRP=y" >> $config_host_mak > echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak > diff --git a/cpus.c b/cpus.c > index 927a00aa90..d8e4e89c19 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1423,21 +1423,17 @@ static void process_icount_data(CPUState *cpu) > static int tcg_cpu_exec(CPUState *cpu) > { > int ret; > -#ifdef CONFIG_PROFILER > - int64_t ti; > -#endif > + uint64_t ti; > > assert(tcg_enabled()); > -#ifdef CONFIG_PROFILER > ti = profile_getclock(); > -#endif > + > cpu_exec_start(cpu); > ret = cpu_exec(cpu); > cpu_exec_end(cpu); > -#ifdef CONFIG_PROFILER > - atomic_set(&tcg_ctx->prof.cpu_exec_time, > - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); > -#endif > + > + atomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti); > + > return ret; > } > > diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h > index 3f062fda5a..0ea2639fd2 100644 > --- a/include/exec/tb-stats.h > +++ b/include/exec/tb-stats.h > @@ -57,22 +57,39 @@ struct TBStatistics { > unsigned long spanning; > } translations; > > + struct { > + int64_t restore; > + uint64_t restore_count; > + int64_t interm; > + int64_t code; > + int64_t opt; > + int64_t la; > + } time; > + > /* current TB linked to this TBStatistics */ > TranslationBlock *tb; > }; > > bool tb_stats_cmp(const void *ap, const void *bp); > > +void dump_jit_exec_time_info(uint64_t dev_time); > + > void init_tb_stats_htable_if_not(void); > > void dump_jit_profile_info(TCGProfile *s); > > /* TBStatistic collection controls */ > -enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED }; > +enum TBStatsStatus { > + TB_STATS_DISABLED = 0, > + TB_STATS_RUNNING, > + TB_STATS_PAUSED, > + TB_STATS_STOPPED > +}; > > #define TB_NOTHING (1 << 0) > #define TB_EXEC_STATS (1 << 1) > #define TB_JIT_STATS (1 << 2) > +#define TB_JIT_STATS (1 << 3) This looks like a dupe, how could it compile? Should it in fact be TB_JIT_TIME? > > extern int tcg_collect_tb_stats; > extern uint32_t default_tbstats_flag; > diff --git a/include/qemu/timer.h b/include/qemu/timer.h > index 5d978e1634..27571a7037 100644 > --- a/include/qemu/timer.h > +++ b/include/qemu/timer.h > @@ -1037,13 +1037,10 @@ static inline int64_t cpu_get_host_ticks(void) > } > #endif > > -#ifdef CONFIG_PROFILER > static inline int64_t profile_getclock(void) > { > return get_clock(); > } > > -extern int64_t dev_time; > -#endif > - > +extern uint64_t dev_time; > #endif > diff --git a/monitor/misc.c b/monitor/misc.c > index 00338c002a..4839d6875f 100644 > --- a/monitor/misc.c > +++ b/monitor/misc.c > @@ -472,6 +472,11 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict) > > static void hmp_info_opcount(Monitor *mon, const QDict *qdict) > { > + if (!tb_stats_collection_enabled()) { > + error_report("TB information not being recorded."); > + return; > + } > + > dump_opcount_info(); > } > #endif > @@ -1082,32 +1087,11 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict) > mtree_info(flatview, dispatch_tree, owner); > } > > -#ifdef CONFIG_PROFILER > - > -int64_t dev_time; > - > static void hmp_info_profile(Monitor *mon, const QDict *qdict) > { > - static int64_t last_cpu_exec_time; > - int64_t cpu_exec_time; > - int64_t delta; > - > - cpu_exec_time = tcg_cpu_exec_time(); > - delta = cpu_exec_time - last_cpu_exec_time; > - > - monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", > - dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); > - monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", > - delta, delta / (double)NANOSECONDS_PER_SECOND); > - last_cpu_exec_time = cpu_exec_time; > + dump_jit_exec_time_info(dev_time); > dev_time = 0; > } > -#else > -static void hmp_info_profile(Monitor *mon, const QDict *qdict) > -{ > - monitor_printf(mon, "Internal profiler not compiled\n"); > -} > -#endif > > /* Capture support */ > static QLIST_HEAD (capture_list_head, CaptureState) capture_head; > diff --git a/tcg/tcg.c b/tcg/tcg.c > index 46b31a2f68..1a306e1ec9 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -3885,82 +3885,34 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) > } > } > > -#ifdef CONFIG_PROFILER > - > /* avoid copy/paste errors */ > #define PROF_ADD(to, from, field) \ > do { \ > (to)->field += atomic_read(&((from)->field)); \ > } while (0) > > -#define PROF_MAX(to, from, field) \ > - do { \ > - typeof((from)->field) val__ = atomic_read(&((from)->field)); \ > - if (val__ > (to)->field) { \ > - (to)->field = val__; \ > - } \ > - } while (0) > - > -/* Pass in a zero'ed @prof */ > -static inline > -void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) > +static void collect_tcg_profiler(TCGProfile *prof) > { > + int i; > unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); > - unsigned int i; > > for (i = 0; i < n_ctxs; i++) { > TCGContext *s = atomic_read(&tcg_ctxs[i]); > const TCGProfile *orig = &s->prof; > > - if (counters) { > - PROF_ADD(prof, orig, cpu_exec_time); > - PROF_ADD(prof, orig, interm_time); > - PROF_ADD(prof, orig, code_time); > - PROF_ADD(prof, orig, la_time); > - PROF_ADD(prof, orig, opt_time); > - PROF_ADD(prof, orig, restore_count); > - PROF_ADD(prof, orig, restore_time); > - } > - if (table) { > - int i; > + PROF_ADD(prof, orig, cpu_exec_time); > > - for (i = 0; i < NB_OPS; i++) { > - PROF_ADD(prof, orig, table_op_count[i]); > - } > + for (i = 0; i < NB_OPS; i++) { > + PROF_ADD(prof, orig, table_op_count[i]); > } > } > } > > -#undef PROF_ADD > -#undef PROF_MAX > - > -static void tcg_profile_snapshot_counters(TCGProfile *prof) > -{ > - tcg_profile_snapshot(prof, true, false); > -} > - > -static void tcg_profile_snapshot_table(TCGProfile *prof) > -{ > - tcg_profile_snapshot(prof, false, true); > -} > - > -void tcg_dump_op_count(void) > -{ > - TCGProfile prof = {}; > - int i; > - > - tcg_profile_snapshot_table(&prof); > - for (i = 0; i < NB_OPS; i++) { > - qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, > - prof.table_op_count[i]); > - } > -} > - > -int64_t tcg_cpu_exec_time(void) > +uint64_t tcg_cpu_exec_time(void) > { > unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); > unsigned int i; > - int64_t ret = 0; > + uint64_t ret = 0; > > for (i = 0; i < n_ctxs; i++) { > const TCGContext *s = atomic_read(&tcg_ctxs[i]); > @@ -3970,25 +3922,9 @@ int64_t tcg_cpu_exec_time(void) > } > return ret; > } > -#else > -void tcg_dump_op_count(void) > -{ > - qemu_printf("[TCG profiler not compiled]\n"); > -} > - > -int64_t tcg_cpu_exec_time(void) > -{ > - error_report("%s: TCG profiler not compiled", __func__); > - exit(EXIT_FAILURE); > -} > -#endif > - > > int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > { > -#ifdef CONFIG_PROFILER > - TCGProfile *prof = &s->prof; > -#endif > int i, num_insns; > TCGOp *op; > > @@ -4041,18 +3977,18 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > } > #endif > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.opt, -profile_getclock()); > + } > > #ifdef USE_TCG_OPTIMIZATIONS > tcg_optimize(s); > #endif > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); > - atomic_set(&prof->la_time, prof->la_time - profile_getclock()); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.opt, profile_getclock()); > + atomic_add(&tb->tb_stats->time.la, -profile_getclock()); > + } > > reachable_code_pass(s); > liveness_pass_1(s); > @@ -4075,9 +4011,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > } > } > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->la_time, prof->la_time + profile_getclock()); > -#endif > + if (tb_stats_enabled(tb, TB_JIT_TIME)) { > + atomic_add(&tb->tb_stats->time.la, profile_getclock()); > + } > > #ifdef DEBUG_DISAS > if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) > @@ -4110,14 +4046,17 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > s->pool_labels = NULL; > #endif > > + if (!tb_stats_collection_enabled()) { > + QTAILQ_FOREACH(op, &s->ops, link) { > + TCGOpcode opc = op->opc; > + atomic_add(&s->prof.table_op_count[opc], 1); > + } > + } > + > num_insns = -1; > QTAILQ_FOREACH(op, &s->ops, link) { > TCGOpcode opc = op->opc; > > -#ifdef CONFIG_PROFILER > - atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); > -#endif > - > switch (opc) { > case INDEX_op_mov_i32: > case INDEX_op_mov_i64: > @@ -4210,14 +4149,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > return tcg_current_code_size(s); > } > > +void tcg_dump_op_count(void) > +{ > + TCGProfile prof = {}; > + int i; > + > + collect_tcg_profiler(&prof); > + for (i = 0; i < NB_OPS; i++) { > + qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, prof.table_op_count[i]); > + } > +} > + > void tcg_dump_info(void) > { > TCGProfile *s = NULL; > -#ifdef CONFIG_PROFILER > TCGProfile prof = {}; > - tcg_profile_snapshot_counters(&prof); > s = &prof; > -#endif > + collect_tcg_profiler(s); > dump_jit_profile_info(s); > } > > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 026a066b9a..12f54283db 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -644,12 +644,6 @@ QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); > > typedef struct TCGProfile { > int64_t cpu_exec_time; > - int64_t interm_time; > - int64_t code_time; > - int64_t la_time; > - int64_t opt_time; > - int64_t restore_count; > - int64_t restore_time; > int64_t table_op_count[NB_OPS]; > } TCGProfile; > > @@ -677,9 +671,7 @@ struct TCGContext { > > tcg_insn_unit *code_ptr; > > -#ifdef CONFIG_PROFILER > TCGProfile prof; > -#endif > > #ifdef CONFIG_DEBUG_TCG > int temps_in_use; > @@ -1011,7 +1003,7 @@ int tcg_check_temp_count(void); > #define tcg_check_temp_count() 0 > #endif > > -int64_t tcg_cpu_exec_time(void); > +uint64_t tcg_cpu_exec_time(void); > void tcg_dump_info(void); > void tcg_dump_op_count(void); > > diff --git a/vl.c b/vl.c > index b426b32134..ef7737a7bc 100644 > --- a/vl.c > +++ b/vl.c > @@ -1781,17 +1781,11 @@ static bool main_loop_should_exit(void) > > static void main_loop(void) > { > -#ifdef CONFIG_PROFILER > - int64_t ti; > -#endif > + uint64_t ti; > while (!main_loop_should_exit()) { > -#ifdef CONFIG_PROFILER > ti = profile_getclock(); > -#endif > main_loop_wait(false); > -#ifdef CONFIG_PROFILER > dev_time += profile_getclock() - ti; > -#endif > } > } -- Alex Bennée
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c index f53e889d01..e0ac254fc1 100644 --- a/accel/tcg/tb-stats.c +++ b/accel/tcg/tb-stats.c @@ -8,6 +8,8 @@ #include "exec/tb-stats.h" +uint64_t dev_time; + struct jit_profile_info { uint64_t translations; uint64_t aborted; @@ -19,6 +21,13 @@ struct jit_profile_info { uint64_t host; uint64_t guest; uint64_t search_data; + + uint64_t interm_time; + uint64_t code_time; + uint64_t restore_count; + uint64_t restore_time; + uint64_t opt_time; + uint64_t la_time; }; /* accumulate the statistics from all TBs */ @@ -40,6 +49,29 @@ static void collect_jit_profile_info(void *p, uint32_t hash, void *userp) jpi->host += tbs->code.out_len; jpi->guest += tbs->code.in_len; jpi->search_data += tbs->code.search_out_len; + + jpi->interm_time += stat_per_translation(tbs, time.interm); + jpi->code_time += stat_per_translation(tbs, time.code); + jpi->opt_time += stat_per_translation(tbs, time.opt); + jpi->la_time += stat_per_translation(tbs, time.la); + jpi->restore_time += tbs->time.restore; + jpi->restore_count += tbs->time.restore_count; +} + +void dump_jit_exec_time_info(uint64_t dev_time) +{ + static uint64_t last_cpu_exec_time; + uint64_t cpu_exec_time; + uint64_t delta; + + cpu_exec_time = tcg_cpu_exec_time(); + delta = cpu_exec_time - last_cpu_exec_time; + + qemu_printf("async time %" PRId64 " (%0.3f)\n", + dev_time, dev_time / (double) NANOSECONDS_PER_SECOND); + qemu_printf("qemu time %" PRId64 " (%0.3f)\n", + delta, delta / (double) NANOSECONDS_PER_SECOND); + last_cpu_exec_time = cpu_exec_time; } /* dump JIT statisticis using TCGProfile and TBStats */ @@ -66,34 +98,39 @@ void dump_jit_profile_info(TCGProfile *s) qemu_printf("avg search data/TB %0.1f\n", jpi->search_data / (double) jpi->translations); + uint64_t tot = jpi->interm_time + jpi->code_time; + + qemu_printf("JIT cycles %" PRId64 " (%0.3fs at 2.4 GHz)\n", + tot, tot / 2.4e9); + qemu_printf(" cycles/op %0.1f\n", + jpi->ops ? (double)tot / jpi->ops : 0); + qemu_printf(" cycles/in byte %0.1f\n", + jpi->guest ? (double)tot / jpi->guest : 0); + qemu_printf(" cycles/out byte %0.1f\n", + jpi->host ? (double)tot / jpi->host : 0); + qemu_printf(" cycles/search byte %0.1f\n", + jpi->search_data ? (double)tot / jpi->search_data : 0); + if (tot == 0) { + tot = 1; + } + + qemu_printf(" gen_interm time %0.1f%%\n", + (double)jpi->interm_time / tot * 100.0); + qemu_printf(" gen_code time %0.1f%%\n", + (double)jpi->code_time / tot * 100.0); + + qemu_printf(" optim./code time %0.1f%%\n", + (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); + qemu_printf(" liveness/code time %0.1f%%\n", + (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1) * 100.0); + + qemu_printf("cpu_restore count %" PRId64 "\n", jpi->restore_count); + qemu_printf(" avg cycles %0.1f\n", + jpi->restore_count ? (double)jpi->restore_time / jpi->restore_count : 0); + if (s) { - int64_t tot = s->interm_time + s->code_time; - qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n", - tot, tot / 2.4e9); - qemu_printf("cycles/op %0.1f\n", - jpi->ops ? (double)tot / jpi->ops : 0); - qemu_printf("cycles/in byte %0.1f\n", - jpi->guest ? (double)tot / jpi->guest : 0); - qemu_printf("cycles/out byte %0.1f\n", - jpi->host ? (double)tot / jpi->host : 0); - qemu_printf("cycles/search byte %0.1f\n", - jpi->search_data ? (double)tot / jpi->search_data : 0); - if (tot == 0) { - tot = 1; - } - qemu_printf(" gen_interm time %0.1f%%\n", - (double)s->interm_time / tot * 100.0); - qemu_printf(" gen_code time %0.1f%%\n", - (double)s->code_time / tot * 100.0); - qemu_printf("optim./code time %0.1f%%\n", - (double)s->opt_time / (s->code_time ? s->code_time : 1) - * 100.0); - qemu_printf("liveness/code time %0.1f%%\n", - (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0); - qemu_printf("cpu_restore count %" PRId64 "\n", - s->restore_count); - qemu_printf(" avg cycles %0.1f\n", - s->restore_count ? (double)s->restore_time / s->restore_count : 0); + qemu_printf("cpu exec time %" PRId64 " (%0.3fs)\n", + s->cpu_exec_time, s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND); } } g_free(jpi); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index fe890f0782..a2e65bb85c 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -337,10 +337,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, CPUArchState *env = cpu->env_ptr; uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; - int64_t ti = profile_getclock(); -#endif + uint64_t ti = 0; + + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + ti = profile_getclock(); + } searched_pc -= GETPC_ADJ; @@ -370,11 +371,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, } restore_state_to_opc(env, tb, data); -#ifdef CONFIG_PROFILER - atomic_set(&prof->restore_time, - prof->restore_time + profile_getclock() - ti); - atomic_set(&prof->restore_count, prof->restore_count + 1); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.restore, profile_getclock() - ti); + atomic_inc(&tb->tb_stats->time.restore_count); + } + return 0; } @@ -1724,10 +1725,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong virt_page2; tcg_insn_unit *gen_code_buf; int gen_code_size, search_size, max_insns; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; - int64_t ti; -#endif + uint64_t ti = 0; + assert_memory_lock(); phys_pc = get_page_addr_code(env, pc); @@ -1773,9 +1772,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->tb_cflags = cflags; tb_overflow: -#ifdef CONFIG_PROFILER - ti = profile_getclock(); -#endif /* * We want to fetch the stats structure before we start code @@ -1796,6 +1792,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->tb_stats->stats_enabled |= TB_JIT_STATS; atomic_inc(&tb->tb_stats->translations.total); } + + if (flag & TB_JIT_TIME) { + tb->tb_stats->stats_enabled |= TB_JIT_TIME; + ti = profile_getclock(); + } } else { tb->tb_stats = NULL; } @@ -1820,10 +1821,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; } -#ifdef CONFIG_PROFILER - atomic_set(&prof->interm_time, prof->interm_time + profile_getclock() - ti); - ti = profile_getclock(); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.interm, profile_getclock() - ti); + ti = profile_getclock(); + } gen_code_size = tcg_gen_code(tcg_ctx, tb); if (unlikely(gen_code_size < 0)) { @@ -1865,9 +1866,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } tb->tc.size = gen_code_size; -#ifdef CONFIG_PROFILER - atomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.code, profile_getclock() - ti); + } if (tb_stats_enabled(tb, TB_JIT_STATS)) { atomic_add(&tb->tb_stats->code.in_len, tb->size); diff --git a/configure b/configure index 714e7fb6a1..0b3c45c05b 100755 --- a/configure +++ b/configure @@ -6614,9 +6614,6 @@ fi if test "$static" = "yes" ; then echo "CONFIG_STATIC=y" >> $config_host_mak fi -if test "$profiler" = "yes" ; then - echo "CONFIG_PROFILER=y" >> $config_host_mak -fi if test "$slirp" != "no"; then echo "CONFIG_SLIRP=y" >> $config_host_mak echo "CONFIG_SMBD_COMMAND=\"$smbd\"" >> $config_host_mak diff --git a/cpus.c b/cpus.c index 927a00aa90..d8e4e89c19 100644 --- a/cpus.c +++ b/cpus.c @@ -1423,21 +1423,17 @@ static void process_icount_data(CPUState *cpu) static int tcg_cpu_exec(CPUState *cpu) { int ret; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif + uint64_t ti; assert(tcg_enabled()); -#ifdef CONFIG_PROFILER ti = profile_getclock(); -#endif + cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); -#ifdef CONFIG_PROFILER - atomic_set(&tcg_ctx->prof.cpu_exec_time, - tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti); -#endif + + atomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti); + return ret; } diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h index 3f062fda5a..0ea2639fd2 100644 --- a/include/exec/tb-stats.h +++ b/include/exec/tb-stats.h @@ -57,22 +57,39 @@ struct TBStatistics { unsigned long spanning; } translations; + struct { + int64_t restore; + uint64_t restore_count; + int64_t interm; + int64_t code; + int64_t opt; + int64_t la; + } time; + /* current TB linked to this TBStatistics */ TranslationBlock *tb; }; bool tb_stats_cmp(const void *ap, const void *bp); +void dump_jit_exec_time_info(uint64_t dev_time); + void init_tb_stats_htable_if_not(void); void dump_jit_profile_info(TCGProfile *s); /* TBStatistic collection controls */ -enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED }; +enum TBStatsStatus { + TB_STATS_DISABLED = 0, + TB_STATS_RUNNING, + TB_STATS_PAUSED, + TB_STATS_STOPPED +}; #define TB_NOTHING (1 << 0) #define TB_EXEC_STATS (1 << 1) #define TB_JIT_STATS (1 << 2) +#define TB_JIT_STATS (1 << 3) extern int tcg_collect_tb_stats; extern uint32_t default_tbstats_flag; diff --git a/include/qemu/timer.h b/include/qemu/timer.h index 5d978e1634..27571a7037 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -1037,13 +1037,10 @@ static inline int64_t cpu_get_host_ticks(void) } #endif -#ifdef CONFIG_PROFILER static inline int64_t profile_getclock(void) { return get_clock(); } -extern int64_t dev_time; -#endif - +extern uint64_t dev_time; #endif diff --git a/monitor/misc.c b/monitor/misc.c index 00338c002a..4839d6875f 100644 --- a/monitor/misc.c +++ b/monitor/misc.c @@ -472,6 +472,11 @@ static void hmp_info_jit(Monitor *mon, const QDict *qdict) static void hmp_info_opcount(Monitor *mon, const QDict *qdict) { + if (!tb_stats_collection_enabled()) { + error_report("TB information not being recorded."); + return; + } + dump_opcount_info(); } #endif @@ -1082,32 +1087,11 @@ static void hmp_info_mtree(Monitor *mon, const QDict *qdict) mtree_info(flatview, dispatch_tree, owner); } -#ifdef CONFIG_PROFILER - -int64_t dev_time; - static void hmp_info_profile(Monitor *mon, const QDict *qdict) { - static int64_t last_cpu_exec_time; - int64_t cpu_exec_time; - int64_t delta; - - cpu_exec_time = tcg_cpu_exec_time(); - delta = cpu_exec_time - last_cpu_exec_time; - - monitor_printf(mon, "async time %" PRId64 " (%0.3f)\n", - dev_time, dev_time / (double)NANOSECONDS_PER_SECOND); - monitor_printf(mon, "qemu time %" PRId64 " (%0.3f)\n", - delta, delta / (double)NANOSECONDS_PER_SECOND); - last_cpu_exec_time = cpu_exec_time; + dump_jit_exec_time_info(dev_time); dev_time = 0; } -#else -static void hmp_info_profile(Monitor *mon, const QDict *qdict) -{ - monitor_printf(mon, "Internal profiler not compiled\n"); -} -#endif /* Capture support */ static QLIST_HEAD (capture_list_head, CaptureState) capture_head; diff --git a/tcg/tcg.c b/tcg/tcg.c index 46b31a2f68..1a306e1ec9 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -3885,82 +3885,34 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op) } } -#ifdef CONFIG_PROFILER - /* avoid copy/paste errors */ #define PROF_ADD(to, from, field) \ do { \ (to)->field += atomic_read(&((from)->field)); \ } while (0) -#define PROF_MAX(to, from, field) \ - do { \ - typeof((from)->field) val__ = atomic_read(&((from)->field)); \ - if (val__ > (to)->field) { \ - (to)->field = val__; \ - } \ - } while (0) - -/* Pass in a zero'ed @prof */ -static inline -void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table) +static void collect_tcg_profiler(TCGProfile *prof) { + int i; unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); - unsigned int i; for (i = 0; i < n_ctxs; i++) { TCGContext *s = atomic_read(&tcg_ctxs[i]); const TCGProfile *orig = &s->prof; - if (counters) { - PROF_ADD(prof, orig, cpu_exec_time); - PROF_ADD(prof, orig, interm_time); - PROF_ADD(prof, orig, code_time); - PROF_ADD(prof, orig, la_time); - PROF_ADD(prof, orig, opt_time); - PROF_ADD(prof, orig, restore_count); - PROF_ADD(prof, orig, restore_time); - } - if (table) { - int i; + PROF_ADD(prof, orig, cpu_exec_time); - for (i = 0; i < NB_OPS; i++) { - PROF_ADD(prof, orig, table_op_count[i]); - } + for (i = 0; i < NB_OPS; i++) { + PROF_ADD(prof, orig, table_op_count[i]); } } } -#undef PROF_ADD -#undef PROF_MAX - -static void tcg_profile_snapshot_counters(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, true, false); -} - -static void tcg_profile_snapshot_table(TCGProfile *prof) -{ - tcg_profile_snapshot(prof, false, true); -} - -void tcg_dump_op_count(void) -{ - TCGProfile prof = {}; - int i; - - tcg_profile_snapshot_table(&prof); - for (i = 0; i < NB_OPS; i++) { - qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, - prof.table_op_count[i]); - } -} - -int64_t tcg_cpu_exec_time(void) +uint64_t tcg_cpu_exec_time(void) { unsigned int n_ctxs = atomic_read(&n_tcg_ctxs); unsigned int i; - int64_t ret = 0; + uint64_t ret = 0; for (i = 0; i < n_ctxs; i++) { const TCGContext *s = atomic_read(&tcg_ctxs[i]); @@ -3970,25 +3922,9 @@ int64_t tcg_cpu_exec_time(void) } return ret; } -#else -void tcg_dump_op_count(void) -{ - qemu_printf("[TCG profiler not compiled]\n"); -} - -int64_t tcg_cpu_exec_time(void) -{ - error_report("%s: TCG profiler not compiled", __func__); - exit(EXIT_FAILURE); -} -#endif - int tcg_gen_code(TCGContext *s, TranslationBlock *tb) { -#ifdef CONFIG_PROFILER - TCGProfile *prof = &s->prof; -#endif int i, num_insns; TCGOp *op; @@ -4041,18 +3977,18 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } #endif -#ifdef CONFIG_PROFILER - atomic_set(&prof->opt_time, prof->opt_time - profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.opt, -profile_getclock()); + } #ifdef USE_TCG_OPTIMIZATIONS tcg_optimize(s); #endif -#ifdef CONFIG_PROFILER - atomic_set(&prof->opt_time, prof->opt_time + profile_getclock()); - atomic_set(&prof->la_time, prof->la_time - profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.opt, profile_getclock()); + atomic_add(&tb->tb_stats->time.la, -profile_getclock()); + } reachable_code_pass(s); liveness_pass_1(s); @@ -4075,9 +4011,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) } } -#ifdef CONFIG_PROFILER - atomic_set(&prof->la_time, prof->la_time + profile_getclock()); -#endif + if (tb_stats_enabled(tb, TB_JIT_TIME)) { + atomic_add(&tb->tb_stats->time.la, profile_getclock()); + } #ifdef DEBUG_DISAS if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT) @@ -4110,14 +4046,17 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) s->pool_labels = NULL; #endif + if (!tb_stats_collection_enabled()) { + QTAILQ_FOREACH(op, &s->ops, link) { + TCGOpcode opc = op->opc; + atomic_add(&s->prof.table_op_count[opc], 1); + } + } + num_insns = -1; QTAILQ_FOREACH(op, &s->ops, link) { TCGOpcode opc = op->opc; -#ifdef CONFIG_PROFILER - atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1); -#endif - switch (opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: @@ -4210,14 +4149,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) return tcg_current_code_size(s); } +void tcg_dump_op_count(void) +{ + TCGProfile prof = {}; + int i; + + collect_tcg_profiler(&prof); + for (i = 0; i < NB_OPS; i++) { + qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name, prof.table_op_count[i]); + } +} + void tcg_dump_info(void) { TCGProfile *s = NULL; -#ifdef CONFIG_PROFILER TCGProfile prof = {}; - tcg_profile_snapshot_counters(&prof); s = &prof; -#endif + collect_tcg_profiler(s); dump_jit_profile_info(s); } diff --git a/tcg/tcg.h b/tcg/tcg.h index 026a066b9a..12f54283db 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -644,12 +644,6 @@ QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8)); typedef struct TCGProfile { int64_t cpu_exec_time; - int64_t interm_time; - int64_t code_time; - int64_t la_time; - int64_t opt_time; - int64_t restore_count; - int64_t restore_time; int64_t table_op_count[NB_OPS]; } TCGProfile; @@ -677,9 +671,7 @@ struct TCGContext { tcg_insn_unit *code_ptr; -#ifdef CONFIG_PROFILER TCGProfile prof; -#endif #ifdef CONFIG_DEBUG_TCG int temps_in_use; @@ -1011,7 +1003,7 @@ int tcg_check_temp_count(void); #define tcg_check_temp_count() 0 #endif -int64_t tcg_cpu_exec_time(void); +uint64_t tcg_cpu_exec_time(void); void tcg_dump_info(void); void tcg_dump_op_count(void); diff --git a/vl.c b/vl.c index b426b32134..ef7737a7bc 100644 --- a/vl.c +++ b/vl.c @@ -1781,17 +1781,11 @@ static bool main_loop_should_exit(void) static void main_loop(void) { -#ifdef CONFIG_PROFILER - int64_t ti; -#endif + uint64_t ti; while (!main_loop_should_exit()) { -#ifdef CONFIG_PROFILER ti = profile_getclock(); -#endif main_loop_wait(false); -#ifdef CONFIG_PROFILER dev_time += profile_getclock() - ti; -#endif } }
Replace all others CONFIG_PROFILER statistics and migrate it to TBStatistics system. However, TCGProfiler still exists and can be use to store global statistics and times. All TB related statistics goes to TBStatistics. Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com> --- accel/tcg/tb-stats.c | 91 +++++++++++++++++++--------- accel/tcg/translate-all.c | 47 ++++++++------- configure | 3 - cpus.c | 14 ++--- include/exec/tb-stats.h | 19 +++++- include/qemu/timer.h | 5 +- monitor/misc.c | 28 ++------- tcg/tcg.c | 124 +++++++++++--------------------------- tcg/tcg.h | 10 +-- vl.c | 8 +-- 10 files changed, 156 insertions(+), 193 deletions(-)