diff mbox series

[v2,2/4] Adding an optional tb execution counter.

Message ID 20190624055442.2973-3-vandersonmr2@gmail.com
State New
Headers show
Series dumping hot TBs | expand

Commit Message

vandersonmr June 24, 2019, 5:54 a.m. UTC
We collect the number of times each TB is executed
and store it in the its TBStatistics.
We also count the number of times the execution counter overflows.

Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
---
 accel/tcg/tcg-runtime.c   | 10 ++++++++++
 accel/tcg/tcg-runtime.h   |  2 ++
 accel/tcg/translator.c    |  1 +
 include/exec/gen-icount.h |  9 +++++++++
 4 files changed, 22 insertions(+)

Comments

Alex Bennée June 25, 2019, 9:51 a.m. UTC | #1
vandersonmr <vandersonmr2@gmail.com> writes:

> We collect the number of times each TB is executed
> and store it in the its TBStatistics.
> We also count the number of times the execution counter overflows.
>
> Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
> ---
>  accel/tcg/tcg-runtime.c   | 10 ++++++++++
>  accel/tcg/tcg-runtime.h   |  2 ++
>  accel/tcg/translator.c    |  1 +
>  include/exec/gen-icount.h |  9 +++++++++
>  4 files changed, 22 insertions(+)
>
> diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
> index 8a1e408e31..9888a7fce8 100644
> --- a/accel/tcg/tcg-runtime.c
> +++ b/accel/tcg/tcg-runtime.c
> @@ -167,3 +167,13 @@ void HELPER(exit_atomic)(CPUArchState *env)
>  {
>      cpu_loop_exit_atomic(env_cpu(env), GETPC());
>  }
> +
> +void HELPER(inc_exec_freq)(void *ptr)
> +{
> +    TranslationBlock *tb = (TranslationBlock*) ptr;

To make things clearer:

  TBStatistics *stats = tb->tb_stats;

> +    // if overflows, then reset the execution counter and increment the overflow counter
> +    if (atomic_cmpxchg(&tb->tb_stats->exec_count, 0xFFFFFFFF, 0) == 0xFFFFFFFF) {
> +        atomic_inc(&tb->tb_stats->exec_count_overflows);
> +    }

This is all very nice but how often do you actually see overflows?
You'll see them even less on 32 bit hosts as they have much less memory
for translations so will flush them out more often. I'd suggest making
the counter "unsigned long" and just living with the fact the 32 bit
might get the execution stats wrong if a block executes more than 4
billion times.

> +    atomic_inc(&tb->tb_stats->exec_count);
> +}
> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
> index 4fa61b49b4..bf0b75dbe8 100644
> --- a/accel/tcg/tcg-runtime.h
> +++ b/accel/tcg/tcg-runtime.h
> @@ -28,6 +28,8 @@ DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
>
>  DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
> +DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
> +
>  #ifdef CONFIG_SOFTMMU
>
>  DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 9226a348a3..cc06070e7e 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -54,6 +54,7 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>      gen_tb_start(db->tb);
>      ops->tb_start(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
> +    gen_tb_exec_count(tb);
>
>      while (true) {
>          db->num_insns++;
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index f7669b6841..6d38b6e1fb 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -7,6 +7,15 @@
>
>  static TCGOp *icount_start_insn;
>
> +static inline void gen_tb_exec_count(TranslationBlock *tb)
> +{
> +  if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
> +    TCGv_ptr tb_ptr = tcg_const_ptr(tb);

Why not pass the address of the counter itself to save the cost of the
indirection calculation at runtime?

> +    gen_helper_inc_exec_freq(tb_ptr);
> +    tcg_temp_free_ptr(tb_ptr);
> +  }
> +}
> +
>  static inline void gen_tb_start(TranslationBlock *tb)
>  {
>      TCGv_i32 count, imm;


--
Alex Bennée
diff mbox series

Patch

diff --git a/accel/tcg/tcg-runtime.c b/accel/tcg/tcg-runtime.c
index 8a1e408e31..9888a7fce8 100644
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -167,3 +167,13 @@  void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(env_cpu(env), GETPC());
 }
+
+void HELPER(inc_exec_freq)(void *ptr)
+{
+    TranslationBlock *tb = (TranslationBlock*) ptr;
+    // if overflows, then reset the execution counter and increment the overflow counter
+    if (atomic_cmpxchg(&tb->tb_stats->exec_count, 0xFFFFFFFF, 0) == 0xFFFFFFFF) {
+        atomic_inc(&tb->tb_stats->exec_count_overflows);
+    }
+    atomic_inc(&tb->tb_stats->exec_count);
+}
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 4fa61b49b4..bf0b75dbe8 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -28,6 +28,8 @@  DEF_HELPER_FLAGS_1(lookup_tb_ptr, TCG_CALL_NO_WG_SE, ptr, env)
 
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
+DEF_HELPER_FLAGS_1(inc_exec_freq, TCG_CALL_NO_RWG, void, ptr)
+
 #ifdef CONFIG_SOFTMMU
 
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 9226a348a3..cc06070e7e 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -54,6 +54,7 @@  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     gen_tb_start(db->tb);
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
+    gen_tb_exec_count(tb);
 
     while (true) {
         db->num_insns++;
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index f7669b6841..6d38b6e1fb 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -7,6 +7,15 @@ 
 
 static TCGOp *icount_start_insn;
 
+static inline void gen_tb_exec_count(TranslationBlock *tb)
+{
+  if (qemu_loglevel_mask(CPU_LOG_HOT_TBS)) {
+    TCGv_ptr tb_ptr = tcg_const_ptr(tb);
+    gen_helper_inc_exec_freq(tb_ptr);
+    tcg_temp_free_ptr(tb_ptr);
+  }
+}
+
 static inline void gen_tb_start(TranslationBlock *tb)
 {
     TCGv_i32 count, imm;