diff mbox series

[v5,03/10] accel: collecting JIT statistics

Message ID 20190815021857.19526-4-vandersonmr2@gmail.com
State New
Headers show
Series Measure Tiny Code Generation Quality | expand

Commit Message

vandersonmr Aug. 15, 2019, 2:18 a.m. UTC
If a TB has a TBS (TBStatistics) with the TB_JIT_STATS
enabled then we collect statistics of its translation
processes and code translation.

Collecting the number of host instructions seems to be
not simple as it would imply in having to modify several
target source files. So, for now, we are only collecting
the size of the host gen code.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/translate-all.c | 14 ++++++++++++++
 accel/tcg/translator.c    |  4 ++++
 include/exec/tb-stats.h   | 15 +++++++++++++++
 tcg/tcg.c                 | 23 +++++++++++++++++++++++
 tcg/tcg.h                 |  2 ++
 5 files changed, 58 insertions(+)

Comments

Alex Bennée Aug. 15, 2019, 2:29 p.m. UTC | #1
vandersonmr <vandersonmr2@gmail.com> writes:

> If a TB has a TBS (TBStatistics) with the TB_JIT_STATS
> enabled then we collect statistics of its translation
> processes and code translation.
>
> Collecting the number of host instructions seems to be
> not simple as it would imply in having to modify several
> target source files. So, for now, we are only collecting
> the size of the host gen code.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

> ---
>  accel/tcg/translate-all.c | 14 ++++++++++++++
>  accel/tcg/translator.c    |  4 ++++
>  include/exec/tb-stats.h   | 15 +++++++++++++++
>  tcg/tcg.c                 | 23 +++++++++++++++++++++++
>  tcg/tcg.h                 |  2 ++
>  5 files changed, 58 insertions(+)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index df08d183df..85c6b7b409 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1696,6 +1696,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
>      new_stats->cs_base = cs_base;
>      new_stats->flags = flags;
>      new_stats->tb = current_tb;
> +    new_stats->translations.total = 1;
>
>      qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
>
> @@ -1705,6 +1706,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
>           * then just make the new TB point to the older TBStatistic
>           */
>          g_free(new_stats);
> +        ((TBStatistics *) existing_stats)->tb = current_tb;
>          return existing_stats;
>      } else {
>          return new_stats;
> @@ -1792,6 +1794,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>                  tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
>              }
>          }
> +
> +        if (flag & TB_JIT_STATS) {
> +            tb->tb_stats->stats_enabled |= TB_JIT_STATS;
> +            atomic_inc(&tb->tb_stats->translations.total);
> +        }
>      } else {
>          tb->tb_stats = NULL;
>      }
> @@ -1869,6 +1876,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
>  #endif
>
> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +        atomic_add(&tb->tb_stats->code.out_len, gen_code_size);
> +    }
> +
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
>          qemu_log_in_addr_range(tb->pc)) {
> @@ -1926,6 +1937,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
>      phys_page2 = -1;
>      if ((pc & TARGET_PAGE_MASK) != virt_page2) {
>          phys_page2 = get_page_addr_code(env, virt_page2);
> +        if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +            atomic_inc(&tb->tb_stats->translations.spanning);
> +        }
>      }
>      /*
>       * No explicit memory barrier is required -- tb_link_page() makes the
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 396a11e828..834265d5be 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -117,6 +117,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>      db->tb->size = db->pc_next - db->pc_first;
>      db->tb->icount = db->num_insns;
>
> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +        atomic_add(&db->tb->tb_stats->code.num_guest_inst, db->num_insns);
> +    }
> +
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
>          && qemu_log_in_addr_range(db->pc_first)) {
> diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
> index 0265050b79..3c219123c2 100644
> --- a/include/exec/tb-stats.h
> +++ b/include/exec/tb-stats.h
> @@ -34,6 +34,20 @@ struct TBStatistics {
>          unsigned long atomic;
>      } executions;
>
> +    struct {
> +        unsigned num_guest_inst;
> +        unsigned num_tcg_ops;
> +        unsigned num_tcg_ops_opt;
> +        unsigned spills;
> +        unsigned out_len;
> +    } code;
> +
> +    struct {
> +        unsigned long total;
> +        unsigned long uncached;
> +        unsigned long spanning;
> +    } translations;
> +
>      /* current TB linked to this TBStatistics */
>      TranslationBlock *tb;
>  };
> @@ -47,6 +61,7 @@ enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED };
>
>  #define TB_NOTHING    0
>  #define TB_EXEC_STATS 1
> +#define TB_JIT_STATS  (1 << 2)
>
>  extern int tcg_collect_tb_stats;
>  extern uint32_t default_tbstats_flag;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index be2c33c400..446e3d1708 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -3126,6 +3126,11 @@ static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
>          case TEMP_VAL_REG:
>              tcg_out_st(s, ts->type, ts->reg,
>                         ts->mem_base->reg, ts->mem_offset);
> +
> +            /* Count number of spills */
> +            if (tb_stats_enabled(s->current_tb, TB_JIT_STATS)) {
> +                atomic_inc(&s->current_tb->tb_stats->code.spills);
> +            }
>              break;
>
>          case TEMP_VAL_MEM:
> @@ -3997,6 +4002,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      int i, num_insns;
>      TCGOp *op;
>
> +    s->current_tb = tb;
> +
>  #ifdef CONFIG_PROFILER
>      {
>          int n = 0;
> @@ -4028,6 +4035,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      }
>  #endif
>
> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +        int n = 0;
> +        QTAILQ_FOREACH(op, &s->ops, link) {
> +            n++;
> +        }
> +        atomic_add(&tb->tb_stats->code.num_tcg_ops, n);
> +    }
> +
>  #ifdef CONFIG_DEBUG_TCG
>      /* Ensure all labels referenced have been emitted.  */
>      {
> @@ -4094,6 +4109,14 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      }
>  #endif
>
> +    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
> +        int n = 0;
> +        QTAILQ_FOREACH(op, &s->ops, link) {
> +            n++;
> +        }
> +        atomic_add(&tb->tb_stats->code.num_tcg_ops_opt, n);
> +    }
> +
>      tcg_reg_alloc_start(s);
>
>      s->code_buf = tb->tc.ptr;
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index b411e17a28..bf6f3bcba3 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -738,6 +738,8 @@ struct TCGContext {
>
>      uint16_t gen_insn_end_off[TCG_MAX_INSNS];
>      target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
> +
> +    TranslationBlock *current_tb;
>  };
>
>  extern TCGContext tcg_init_ctx;


--
Alex Bennée
diff mbox series

Patch

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index df08d183df..85c6b7b409 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -1696,6 +1696,7 @@  static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
     new_stats->cs_base = cs_base;
     new_stats->flags = flags;
     new_stats->tb = current_tb;
+    new_stats->translations.total = 1;
 
     qht_insert(&tb_ctx.tb_stats, new_stats, hash, &existing_stats);
 
@@ -1705,6 +1706,7 @@  static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc, target_ulong pc,
          * then just make the new TB point to the older TBStatistic
          */
         g_free(new_stats);
+        ((TBStatistics *) existing_stats)->tb = current_tb;
         return existing_stats;
     } else {
         return new_stats;
@@ -1792,6 +1794,11 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
                 tb->tb_stats->stats_enabled |= TB_EXEC_STATS;
             }
         }
+
+        if (flag & TB_JIT_STATS) {
+            tb->tb_stats->stats_enabled |= TB_JIT_STATS;
+            atomic_inc(&tb->tb_stats->translations.total);
+        }
     } else {
         tb->tb_stats = NULL;
     }
@@ -1869,6 +1876,10 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
     atomic_set(&prof->search_out_len, prof->search_out_len + search_size);
 #endif
 
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        atomic_add(&tb->tb_stats->code.out_len, gen_code_size);
+    }
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
         qemu_log_in_addr_range(tb->pc)) {
@@ -1926,6 +1937,9 @@  TranslationBlock *tb_gen_code(CPUState *cpu,
     phys_page2 = -1;
     if ((pc & TARGET_PAGE_MASK) != virt_page2) {
         phys_page2 = get_page_addr_code(env, virt_page2);
+        if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+            atomic_inc(&tb->tb_stats->translations.spanning);
+        }
     }
     /*
      * No explicit memory barrier is required -- tb_link_page() makes the
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 396a11e828..834265d5be 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -117,6 +117,10 @@  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     db->tb->size = db->pc_next - db->pc_first;
     db->tb->icount = db->num_insns;
 
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        atomic_add(&db->tb->tb_stats->code.num_guest_inst, db->num_insns);
+    }
+
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
         && qemu_log_in_addr_range(db->pc_first)) {
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index 0265050b79..3c219123c2 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -34,6 +34,20 @@  struct TBStatistics {
         unsigned long atomic;
     } executions;
 
+    struct {
+        unsigned num_guest_inst;
+        unsigned num_tcg_ops;
+        unsigned num_tcg_ops_opt;
+        unsigned spills;
+        unsigned out_len;
+    } code;
+
+    struct {
+        unsigned long total;
+        unsigned long uncached;
+        unsigned long spanning;
+    } translations;
+
     /* current TB linked to this TBStatistics */
     TranslationBlock *tb;
 };
@@ -47,6 +61,7 @@  enum TBStatsStatus { TB_STATS_RUNNING, TB_STATS_PAUSED, TB_STATS_STOPPED };
 
 #define TB_NOTHING    0
 #define TB_EXEC_STATS 1
+#define TB_JIT_STATS  (1 << 2)
 
 extern int tcg_collect_tb_stats;
 extern uint32_t default_tbstats_flag;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index be2c33c400..446e3d1708 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -3126,6 +3126,11 @@  static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
         case TEMP_VAL_REG:
             tcg_out_st(s, ts->type, ts->reg,
                        ts->mem_base->reg, ts->mem_offset);
+
+            /* Count number of spills */
+            if (tb_stats_enabled(s->current_tb, TB_JIT_STATS)) {
+                atomic_inc(&s->current_tb->tb_stats->code.spills);
+            }
             break;
 
         case TEMP_VAL_MEM:
@@ -3997,6 +4002,8 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     int i, num_insns;
     TCGOp *op;
 
+    s->current_tb = tb;
+
 #ifdef CONFIG_PROFILER
     {
         int n = 0;
@@ -4028,6 +4035,14 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     }
 #endif
 
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        int n = 0;
+        QTAILQ_FOREACH(op, &s->ops, link) {
+            n++;
+        }
+        atomic_add(&tb->tb_stats->code.num_tcg_ops, n);
+    }
+
 #ifdef CONFIG_DEBUG_TCG
     /* Ensure all labels referenced have been emitted.  */
     {
@@ -4094,6 +4109,14 @@  int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     }
 #endif
 
+    if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+        int n = 0;
+        QTAILQ_FOREACH(op, &s->ops, link) {
+            n++;
+        }
+        atomic_add(&tb->tb_stats->code.num_tcg_ops_opt, n);
+    }
+
     tcg_reg_alloc_start(s);
 
     s->code_buf = tb->tc.ptr;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index b411e17a28..bf6f3bcba3 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -738,6 +738,8 @@  struct TCGContext {
 
     uint16_t gen_insn_end_off[TCG_MAX_INSNS];
     target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
+
+    TranslationBlock *current_tb;
 };
 
 extern TCGContext tcg_init_ctx;