diff mbox

[12/22] translate-all: report correct avg host TB size

Message ID 1499586614-20507-13-git-send-email-cota@braap.org
State New
Headers show

Commit Message

Emilio Cota July 9, 2017, 7:50 a.m. UTC
Since commit 6e3b2bfd6 ("tcg: allocate TB structs before the
corresponding translated code") we are not fully utilizing
code_gen_buffer for translated code, and therefore are
incorrectly reporting the amount of translated code as well as
the average host TB size. Address this by:

- Making the conscious choice of misreporting the total translated code;
  doing otherwise would mislead users into thinking "-tb-size" is not
  honoured.

- Expanding tb_tree_stats to accurately count the bytes of translated code on
  the host, and using this for reporting the average tb host size,
  as well as the expansion ratio.

In the future we might want to consider reporting the accurate numbers for
the total translated code, together with a "bookkeeping/overhead" field to
account for the TB structs.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 accel/tcg/translate-all.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

Comments

Alex Bennée July 12, 2017, 3:25 p.m. UTC | #1
Emilio G. Cota <cota@braap.org> writes:

> Since commit 6e3b2bfd6 ("tcg: allocate TB structs before the
> corresponding translated code") we are not fully utilizing
> code_gen_buffer for translated code, and therefore are
> incorrectly reporting the amount of translated code as well as
> the average host TB size. Address this by:
>
> - Making the conscious choice of misreporting the total translated code;
>   doing otherwise would mislead users into thinking "-tb-size" is not
>   honoured.
>
> - Expanding tb_tree_stats to accurately count the bytes of translated code on
>   the host, and using this for reporting the average tb host size,
>   as well as the expansion ratio.
>
> In the future we might want to consider reporting the accurate numbers for
> the total translated code, together with a "bookkeeping/overhead" field to
> account for the TB structs.
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  accel/tcg/translate-all.c | 34 ++++++++++++++++++++++++----------
>  1 file changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index aa3a08b..aa71292 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -898,9 +898,20 @@ static void page_flush_tb(void)
>      }
>  }
>
> +static __attribute__((unused))
> +gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
> +{
> +    const TranslationBlock *tb = value;
> +    size_t *size = data;
> +
> +    *size += tb->tc_size;
> +    return false;
> +}
> +

I think having the __attribute__ stuff is confusing. Why don't we just
do what the newer debug stuff does:

modified   accel/tcg/translate-all.c
@@ -66,6 +66,12 @@
 /* make various TB consistency checks */
 /* #define DEBUG_TB_CHECK */

+#if defined(DEBUG_TB_FLUSH)
+#define DEBUG_TB_FLUSH_GATE 1
+#else
+#define DEBUG_TB_FLUSH_GATE 0
+#endif
+
 #if !defined(CONFIG_USER_ONLY)
 /* TB consistency checks only implemented for usermode emulation.  */
 #undef DEBUG_TB_CHECK
@@ -948,8 +954,7 @@ static void page_flush_tb(void)
     }
 }

-static __attribute__((unused))
-gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
+static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
 {
     const TranslationBlock *tb = value;
     size_t *size = data;
@@ -958,11 +963,22 @@ gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
     return false;
 }

+static void dump_tb_sizes(void)
+{
+    if (DEBUG_TB_FLUSH_GATE) {
+        size_t host_size = 0;
+        int nb_tbs;
+
+        g_tree_foreach(tb_ctx.tb_tree, tb_host_size_iter, &host_size);
+        nb_tbs = g_tree_nnodes(tb_ctx.tb_tree);
+        fprintf(stderr, "qemu: flush code_size=%zu nb_tbs=%d avg_tb_size=%zu\n",
+                tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
+    }
+}
+
 /* flush all the translation blocks */
 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 {
-    size_t host_size __attribute__((unused)) = 0;
-    int nb_tbs __attribute__((unused));

     tb_lock();

@@ -973,12 +989,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
         goto done;
     }

-#if defined(DEBUG_TB_FLUSH)
-    g_tree_foreach(tb_ctx.tb_tree, tb_host_size_iter, &host_size);
-    nb_tbs = g_tree_nnodes(tb_ctx.tb_tree);
-    fprintf(stderr, "qemu: flush code_size=%zu nb_tbs=%d avg_tb_size=%zu\n",
-           tcg_code_size(), nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
-#endif
+    dump_tb_sizes();


Which will a) ensure all the debug code is compiled even when not
enabled and b) the compiler won't bitch at you when it optimises stuff
away.

Better?

--
Alex Bennée
Emilio Cota July 12, 2017, 6:45 p.m. UTC | #2
On Wed, Jul 12, 2017 at 16:25:45 +0100, Alex Bennée wrote:
> Emilio G. Cota <cota@braap.org> writes:
(snip)
> I think having the __attribute__ stuff is confusing. Why don't we just
> do what the newer debug stuff does:
> 
> modified   accel/tcg/translate-all.c
> @@ -66,6 +66,12 @@
>  /* make various TB consistency checks */
>  /* #define DEBUG_TB_CHECK */
> 
> +#if defined(DEBUG_TB_FLUSH)
> +#define DEBUG_TB_FLUSH_GATE 1
> +#else
> +#define DEBUG_TB_FLUSH_GATE 0
> +#endif
(snip)
> Which will a) ensure all the debug code is compiled even when not
> enabled and b) the compiler won't bitch at you when it optimises stuff
> away.
> 
> Better?

Much better! The unused attribute was there to achieve this, but what
you propose is much better.

		E.
diff mbox

Patch

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index aa3a08b..aa71292 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -898,9 +898,20 @@  static void page_flush_tb(void)
     }
 }
 
+static __attribute__((unused))
+gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
+{
+    const TranslationBlock *tb = value;
+    size_t *size = data;
+
+    *size += tb->tc_size;
+    return false;
+}
+
 /* flush all the translation blocks */
 static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 {
+    size_t host_size __attribute__((unused)) = 0;
     int nb_tbs __attribute__((unused));
 
     tb_lock();
@@ -913,12 +924,11 @@  static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
     }
 
 #if defined(DEBUG_TB_FLUSH)
+    g_tree_foreach(tcg_ctx.tb_ctx.tb_tree, tb_host_size_iter, &host_size);
     nb_tbs = g_tree_nnodes(tcg_ctx.tb_ctx.tb_tree);
-    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
+    printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%zu\n",
            (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
-           nb_tbs : 0);
+           nb_tbs, nb_tbs > 0 ? host_size / nb_tbs : 0);
 #endif
     if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
         > tcg_ctx.code_gen_buffer_size) {
@@ -1847,6 +1857,7 @@  static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
 }
 
 struct tb_tree_stats {
+    size_t host_size;
     size_t target_size;
     size_t max_target_size;
     size_t direct_jmp_count;
@@ -1859,6 +1870,7 @@  static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
     const TranslationBlock *tb = value;
     struct tb_tree_stats *tst = data;
 
+    tst->host_size += tb->tc_size;
     tst->target_size += tb->size;
     if (tb->size > tst->max_target_size) {
         tst->max_target_size = tb->size;
@@ -1887,6 +1899,11 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     g_tree_foreach(tcg_ctx.tb_ctx.tb_tree, tb_tree_stats_iter, &tst);
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
+    /*
+     * Report total code size including the padding and TB structs;
+     * otherwise users might think "-tb-size" is not honoured.
+     * For avg host size we use the precise numbers from tb_tree_stats though.
+     */
     cpu_fprintf(f, "gen code size       %td/%zd\n",
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                 tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
@@ -1894,12 +1911,9 @@  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cpu_fprintf(f, "TB avg target size  %zu max=%zu bytes\n",
                 nb_tbs ? tst.target_size / nb_tbs : 0,
                 tst.max_target_size);
-    cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (tcg_ctx.code_gen_ptr -
-                          tcg_ctx.code_gen_buffer) / nb_tbs : 0,
-                tst.target_size ? (double) (tcg_ctx.code_gen_ptr -
-                                            tcg_ctx.code_gen_buffer) /
-                                            tst.target_size : 0);
+    cpu_fprintf(f, "TB avg host size    %zu bytes (expansion ratio: %0.1f)\n",
+                nb_tbs ? tst.host_size / nb_tbs : 0,
+                tst.target_size ? (double)tst.host_size / tst.target_size : 0);
     cpu_fprintf(f, "cross page TB count %zu (%zu%%)\n", tst.cross_page,
             nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
     cpu_fprintf(f, "direct jump count   %zu (%zu%%) (2 jumps=%zu %zu%%)\n",