diff mbox series

[v4,2/7] accel: collecting TB execution count

Message ID 20190720010235.32444-3-vandersonmr2@gmail.com
State New
Headers show
Series Measure Tiny Code Generation Quality | expand

Commit Message

vandersonmr July 20, 2019, 1:02 a.m. UTC
If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
enabled, then we instrument the start code of the TB
to atomically count the number of times it is executed.
The execution count of the TB is stored in its respective
TBS.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/tcg-runtime.c   |  7 +++++++
 accel/tcg/tcg-runtime.h   |  2 ++
 accel/tcg/translate-all.c |  8 ++++++++
 accel/tcg/translator.c    |  1 +
 include/exec/gen-icount.h |  9 +++++++++
 include/exec/tb-stats.h   | 11 +++++++++++
 include/qemu/log.h        |  6 ++++++
 util/log.c                | 11 +++++++++++
 8 files changed, 55 insertions(+)

Comments

Alex Bennée July 26, 2019, 1:38 p.m. UTC | #1
vandersonmr <vandersonmr2@gmail.com> writes:

> If a TB has a TBS (TBStatistics) with the TB_EXEC_STATS
> enabled, then we instrument the start code of the TB
> to atomically count the number of times it is executed.
> The execution count of the TB is stored in its respective
> TBS.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/tcg-runtime.c   |  7 +++++++
>  accel/tcg/tcg-runtime.h   |  2 ++
>  accel/tcg/translate-all.c |  8 ++++++++
>  accel/tcg/translator.c    |  1 +
>  include/exec/gen-icount.h |  9 +++++++++
>  include/exec/tb-stats.h   | 11 +++++++++++
>  include/qemu/log.h        |  6 ++++++
>  util/log.c                | 11 +++++++++++
>  8 files changed, 55 insertions(+)
>
> diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
> index 8a1e408e31..f332eae334 100644
> --- a/accel/tcg/tcg-runtime.c
> +++ b/accel/tcg/tcg-runtime.c
> @@ -167,3 +167,10 @@ void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(env_cpu(env), GETPC());
>  }
> +
> +void HELPER(inc_exec_freq)(void *ptr)
> +{
> +    TBStatistics *stats = (TBStatistics *) ptr;
> +    g_assert(stats);
> +    atomic_inc(&stats->executions.total);
> +}
> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
> index 4fa61b49b4..bf0b75dbe8 100644
> --- a/accel/tcg/tcg-runtime.h
> +++ b/accel/tcg/tcg-runtime.h
> @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
>
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
> +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
> +
>  #ifdef CONFIG_SOFTMMU
>
>  DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index a574890a80..7497dae508 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1785,6 +1785,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>       */
>      if (tb_stats_collection_enabled()) {
>          tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
> +        uint32_t flag = get_default_tbstats_flag();
> +
> +        if (qemu_log_in_addr_range(tb->pc)) {
> +            if (flag & TB_EXEC_STATS) {
> +                tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
> +            }
> +        }
> +
>      } else {
>          tb->tb_stats = NULL;
>      }
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 9226a348a3..396a11e828 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -46,6 +46,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>
>      ops->init_disas_context(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> +    gen_tb_exec_count(tb);
>
>      /* Reset the temp count so that we can identify leaks */
>      tcg_clear_temp_count();
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index f7669b6841..b3efe41894 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -7,6 +7,15 @@
>
>  static TCGOp *icount_start_insn;
>
> +static inline void gen_tb_exec_count(TranslationBlock *tb)
> +{
> +    if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
> +        TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
> +        gen_helper_inc_exec_freq(ptr);
> +        tcg_temp_free_ptr(ptr);
> +    }
> +}
> +
>  static inline void gen_tb_start(TranslationBlock *tb)
>  {
>      TCGv_i32 count, imm;
> diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
> index 0913155ec3..ee1e8de0d3 100644
> --- a/include/exec/tb-stats.h
> +++ b/include/exec/tb-stats.h
> @@ -6,6 +6,9 @@
>  #include "exec/tb-context.h"
>  #include "tcg.h"
>
> +#define tb_stats_enabled(tb, JIT_STATS) \
> +    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
> +
>  typedef struct TBStatistics TBStatistics;
>
>  /*
> @@ -22,6 +25,14 @@ struct TBStatistics {
>      uint32_t     flags;
>      /* cs_base isn't included in the hash but we do check for matches */
>      target_ulong cs_base;
> +
> +    uint32_t stats_enabled;
> +
> +    /* Execution stats */
> +    struct {
> +        unsigned long total;
> +        unsigned long atomic;

We are not incrementing atomic in this patch. Also it's not total so
maybe "normal" makes more sense. Something like:

fixup! accel: collecting TB execution count

4 files changed, 11 insertions(+), 6 deletions(-)
accel/tcg/cpu-exec.c    | 4 ++++
accel/tcg/tb-stats.c    | 9 +++++----
accel/tcg/tcg-runtime.c | 2 +-
include/exec/tb-stats.h | 2 +-

modified   accel/tcg/cpu-exec.c
@@ -252,6 +252,10 @@ void cpu_exec_step_atomic(CPUState *cpu)

         start_exclusive();

+        if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+            tb->tb_stats->executions.atomic++;
+        }
+
         /* Since we got here, we know that parallel_cpus must be true.  */
         parallel_cpus = false;
         in_exclusive_region = true;
modified   accel/tcg/tb-stats.c
@@ -233,11 +233,12 @@ static void dump_tb_header(TBStatistics *tbs)
     float guest_host_prop = g ? ((float) h / g) : 0;

     qemu_log("TB%d: phys:0x"TB_PAGE_ADDR_FMT" virt:0x"TARGET_FMT_lx
-             " flags:%#08x (trans:%lu uncached:%lu exec:%lu ints: g:%u op:%u op_opt:%u h:%u h/g:%.2f spills:%d)\n",
+             " flags:%#08x (trans:%lu uncached:%lu exec:%lu/%lu ints: g:%u op:%u op_opt:%u h:%u h/g:%.2f spills:%d)\n",
              tbs->display_id,
              tbs->phys_pc, tbs->pc, tbs->flags,
              tbs->translations.total, tbs->translations.uncached,
-             tbs->executions.total, g, ops, ops_opt, h, guest_host_prop,
+             tbs->executions.normal, tbs->executions.atomic,
+             g, ops, ops_opt, h, guest_host_prop,
              spills);
 }

@@ -254,8 +255,8 @@ inverse_sort_tbs(gconstpointer p1, gconstpointer p2, gpointer psort_by)
         c1 = tbs1->code.spills;
         c2 = tbs2->code.spills;
     } else if (likely(sort_by == SORT_BY_HOTNESS)) {
-        c1 = tbs1->executions.total;
-        c2 = tbs2->executions.total;
+        c1 = tbs1->executions.normal;
+        c2 = tbs2->executions.normal;
     } else if (likely(sort_by == SORT_BY_HG)) {
         if (tbs1->code.num_guest_inst == 0) {
             return -1;
modified   accel/tcg/tcg-runtime.c
@@ -172,5 +172,5 @@ void HELPER(inc_exec_freq)(void *ptr)
 {
     TBStatistics *stats = (TBStatistics *) ptr;
     g_assert(stats);
-    atomic_inc(&stats->executions.total);
+    atomic_inc(&stats->executions.normal);
 }
modified   include/exec/tb-stats.h
@@ -33,7 +33,7 @@ struct TBStatistics {

     /* Execution stats */
     struct {
-        unsigned long total;
+        unsigned long normal;
         unsigned long atomic;
     } executions;

> diff --git a/include/qemu/log.h b/include/qemu/log.h
> index e175d4d5d0..b213411836 100644
> --- a/include/qemu/log.h
> +++ b/include/qemu/log.h
> @@ -129,10 +129,16 @@ void qemu_log_flush(void);
>  /* Close the log file */
>  void qemu_log_close(void);
>
> +#define TB_NOTHING    0
> +#define TB_EXEC_STATS (1 << 1)
> +
>  void enable_collect_tb_stats(void);
>  void disable_collect_tb_stats(void);
>  void pause_collect_tb_stats(void);
>  bool tb_stats_collection_enabled(void);
>  bool tb_stats_collection_paused(void);
>
> +void set_default_tbstats_flag(uint32_t flag);
> +uint32_t get_default_tbstats_flag(void);
> +
>  #endif
> diff --git a/util/log.c b/util/log.c
> index ab73fdc100..f81653d712 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -354,3 +354,14 @@ bool tb_stats_collection_paused(void)
>      return tcg_collect_tb_stats == 2;
>  }
>
> +uint32_t default_tbstats_flag;
> +
> +void set_default_tbstats_flag(uint32_t flag)
> +{
> +    default_tbstats_flag = flag;
> +}
> +
> +uint32_t get_default_tbstats_flag(void)
> +{
> +    return default_tbstats_flag;
> +}

Some comment about not overloading log.c as before.

--
Alex Bennée
diff mbox series

Patch

diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 8a1e408e31..f332eae334 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -167,3 +167,10 @@  void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+    TBStatistics *stats = (TBStatistics *) ptr;
+    g_assert(stats);
+    atomic_inc(&stats->executions.total);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b49b4..bf0b75dbe8 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -28,6 +28,8 @@  DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
 
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
+
 #ifdef CONFIG_SOFTMMU
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index a574890a80..7497dae508 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1785,6 +1785,14 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
      */
     if (tb_stats_collection_enabled()) {
         tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+        uint32_t flag = get_default_tbstats_flag();
+
+        if (qemu_log_in_addr_range(tb->pc)) {
+            if (flag & TB_EXEC_STATS) {
+                tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
+            }
+        }
+
     } else {
         tb->tb_stats = NULL;
     }
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 9226a348a3..396a11e828 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -46,6 +46,7 @@  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 
     ops->init_disas_context(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
+    gen_tb_exec_count(tb);
 
     /* Reset the temp count so that we can identify leaks */
     tcg_clear_temp_count();
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f7669b6841..b3efe41894 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,6 +7,15 @@ 
 
 static TCGOp *icount_start_insn;
 
+static inline void gen_tb_exec_count(TranslationBlock *tb)
+{
+    if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
+        TCGv_ptr ptr = tcg_const_ptr(tb->tb_stats);
+        gen_helper_inc_exec_freq(ptr);
+        tcg_temp_free_ptr(ptr);
+    }
+}
+
 static inline void gen_tb_start(TranslationBlock *tb)
 {
     TCGv_i32 count, imm;
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 0913155ec3..ee1e8de0d3 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -6,6 +6,9 @@ 
 #include "exec/tb-context.h"
 #include "tcg.h"
 
+#define tb_stats_enabled(tb, JIT_STATS) \
+    (tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+
 typedef struct TBStatistics TBStatistics;
 
 /*
@@ -22,6 +25,14 @@  struct TBStatistics {
     uint32_t     flags;
     /* cs_base isn't included in the hash but we do check for matches */
     target_ulong cs_base;
+
+    uint32_t stats_enabled;
+
+    /* Execution stats */
+    struct {
+        unsigned long total;
+        unsigned long atomic;
+    } executions;
 };
 
 bool tb_stats_cmp(const void *ap, const void *bp);
diff --git a/include/qemu/log.h b/include/qemu/log.h
index e175d4d5d0..b213411836 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -129,10 +129,16 @@  void qemu_log_flush(void);
 /* Close the log file */
 void qemu_log_close(void);
 
+#define TB_NOTHING    0
+#define TB_EXEC_STATS (1 << 1)
+
 void enable_collect_tb_stats(void);
 void disable_collect_tb_stats(void);
 void pause_collect_tb_stats(void);
 bool tb_stats_collection_enabled(void);
 bool tb_stats_collection_paused(void);
 
+void set_default_tbstats_flag(uint32_t flag);
+uint32_t get_default_tbstats_flag(void);
+
 #endif
diff --git a/util/log.c b/util/log.c
index ab73fdc100..f81653d712 100644
--- a/util/log.c
+++ b/util/log.c
@@ -354,3 +354,14 @@  bool tb_stats_collection_paused(void)
     return tcg_collect_tb_stats == 2;
 }
 
+uint32_t default_tbstats_flag;
+
+void set_default_tbstats_flag(uint32_t flag)
+{
+    default_tbstats_flag = flag;
+}
+
+uint32_t get_default_tbstats_flag(void)
+{
+    return default_tbstats_flag;
+}