diff mbox series

[RFC] accel/tcg/translator: add tb_enter TCG trace

Message ID 20190628113917.15869-1-luc.michel@greensocs.com
State New
Headers show
Series [RFC] accel/tcg/translator: add tb_enter TCG trace | expand

Commit Message

Luc Michel June 28, 2019, 11:39 a.m. UTC
Add a TCG trace at the begining of a translation block recording the
first and last (past-the-end) PC values.

Signed-off-by: Luc Michel <luc.michel@greensocs.com>
---
This can be used to trace the execution of the guest quite efficiently.
It will report each time a TB is entered (using the tb_enter_exec
trace). The traces arguments give the PC start and past-the-end values.
It has very little to no performance impact since the trace is actually
emitted in the generated code only when it is enabled at run time.

It works already quite well on its own to trace guest execution. However
it does not handle the case where a TB is exited in the middle of
execution. I'm not sure how to properly trace that. A trace could be
added when `cpu_loop_exit()' is called to report the current PC, but in
most cases the interesting value (the PC of the instruction that
caused the exit) is already lost at this stage.

I'm not sure there is a generic (i.e. not target specific) way of
recovering the last PC executed when cpu_loop_exit() is called. Do you
think of a better way?

Thanks to the Xilinx's QEMU team who sponsored this work.
---
 accel/tcg/translator.c | 24 ++++++++++++++++++++++++
 trace-events           |  3 +++
 2 files changed, 27 insertions(+)

Comments

Alex Bennée June 28, 2019, 4:33 p.m. UTC | #1
Luc Michel <luc.michel@greensocs.com> writes:

> Add a TCG trace at the begining of a translation block recording the
> first and last (past-the-end) PC values.
>
> Signed-off-by: Luc Michel <luc.michel@greensocs.com>
> ---
> This can be used to trace the execution of the guest quite efficiently.
> It will report each time a TB is entered (using the tb_enter_exec
> trace). The traces arguments give the PC start and past-the-end values.
> It has very little to no performance impact since the trace is actually
> emitted in the generated code only when it is enabled at run time.
>
> It works already quite well on its own to trace guest execution.

You could just remove the disable from:

disable exec_tb(void *tb, uintptr_t pc) "tb:%p pc=0x%"PRIxPTR

And call qemu with -d nochain,trace:exec_tb

> However
> it does not handle the case where a TB is exited in the middle of
> execution. I'm not sure how to properly trace that. A trace could be
> added when `cpu_loop_exit()' is called to report the current PC, but in
> most cases the interesting value (the PC of the instruction that
> caused the exit) is already lost at this stage.
>
> I'm not sure there is a generic (i.e. not target specific) way of
> recovering the last PC executed when cpu_loop_exit() is called. Do you
> think of a better way?
>
> Thanks to the Xilinx's QEMU team who sponsored this work.
> ---
>  accel/tcg/translator.c | 24 ++++++++++++++++++++++++
>  trace-events           |  3 +++
>  2 files changed, 27 insertions(+)
>
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index 9226a348a3..c55377aa18 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -14,10 +14,11 @@
>  #include "tcg/tcg-op.h"
>  #include "exec/exec-all.h"
>  #include "exec/gen-icount.h"
>  #include "exec/log.h"
>  #include "exec/translator.h"
> +#include "trace-tcg.h"
>
>  /* Pairs with tcg_clear_temp_count.
>     To be called by #TranslatorOps.{translate_insn,tb_stop} if
>     (1) the target is sufficiently clean to support reporting,
>     (2) as and when all temporaries are known to be consumed.
> @@ -28,14 +29,31 @@ void translator_loop_temp_check(DisasContextBase *db)
>          qemu_log("warning: TCG temporary leaks before "
>                   TARGET_FMT_lx "\n", db->pc_next);
>      }
>  }
>
> +static TCGOp *gen_trace_tb_enter(TranslationBlock *tb)
> +{
> +    TCGOp *last_pc_op;
> +
> +    TCGv pc_end = tcg_temp_new();
> +
> +    /* The last PC value is not known yet */
> +    tcg_gen_movi_tl(pc_end, 0xdeadbeef);
> +    last_pc_op = tcg_last_op();
> +
> +    trace_tb_enter_tcg(tcg_ctx->cpu, cpu_env, tb->pc, pc_end);
> +    tcg_temp_free(pc_end);
> +
> +    return last_pc_op;
> +}
> +
>  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>                       CPUState *cpu, TranslationBlock *tb, int max_insns)
>  {
>      int bp_insn = 0;
> +    TCGOp *trace_pc_end;
>
>      /* Initialize DisasContext */
>      db->tb = tb;
>      db->pc_first = tb->pc;
>      db->pc_next = db->pc_first;
> @@ -50,10 +68,13 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>      /* Reset the temp count so that we can identify leaks */
>      tcg_clear_temp_count();
>
>      /* Start translating.  */
>      gen_tb_start(db->tb);
> +
> +    trace_pc_end = gen_trace_tb_enter(tb);
> +
>      ops->tb_start(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
>
>      while (true) {
>          db->num_insns++;
> @@ -110,10 +131,13 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>
>      /* Emit code to exit the TB, as indicated by db->is_jmp.  */
>      ops->tb_stop(db, cpu);
>      gen_tb_end(db->tb, db->num_insns - bp_insn);
>
> +    /* Fixup the last PC value in the tb_enter trace now that we know it */
> +    tcg_set_insn_param(trace_pc_end, 1, db->pc_next);
> +
>      /* The disas_log hook may use these values rather than recompute.  */
>      db->tb->size = db->pc_next - db->pc_first;
>      db->tb->icount = db->num_insns;
>
>  #ifdef DEBUG_DISAS
> diff --git a/trace-events b/trace-events
> index aeea3c2bdb..e37fa12ef0 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -157,10 +157,13 @@ vcpu guest_cpu_reset(void)
>  #
>  # Mode: user, softmmu
>  # Targets: TCG(all)
>  vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
>
> +# translator.c
> +vcpu tcg tb_enter(uint64_t pc_start, TCGv pc_end) "pc_start:0x%"PRIx64, "pc:0x%"PRIx64" pc_end:0x%"PRIx64
> +
>  # linux-user/syscall.c
>  # bsd-user/syscall.c
>
>  # @num: System call number.
>  # @arg*: System call argument value.


--
Alex Bennée
Edgar E. Iglesias July 1, 2019, 1:19 p.m. UTC | #2
On Fri, Jun 28, 2019 at 02:16:41PM +0200, Richard Henderson wrote:
> On 6/28/19 1:39 PM, Luc Michel wrote:
> > Add a TCG trace at the begining of a translation block recording the
> > first and last (past-the-end) PC values.
> > 
> > Signed-off-by: Luc Michel <luc.michel@greensocs.com>
> > ---
> > This can be used to trace the execution of the guest quite efficiently.
> > It will report each time a TB is entered (using the tb_enter_exec
> > trace). The traces arguments give the PC start and past-the-end values.
> > It has very little to no performance impact since the trace is actually
> > emitted in the generated code only when it is enabled at run time.
> > 
> > It works already quite well on its own to trace guest execution. However
> > it does not handle the case where a TB is exited in the middle of
> > execution. I'm not sure how to properly trace that. A trace could be
> > added when `cpu_loop_exit()' is called to report the current PC, but in
> > most cases the interesting value (the PC of the instruction that
> > caused the exit) is already lost at this stage.
> > 
> > I'm not sure there is a generic (i.e. not target specific) way of
> > recovering the last PC executed when cpu_loop_exit() is called. Do you
> > think of a better way?
> > 
> > Thanks to the Xilinx's QEMU team who sponsored this work.
> > ---
> >  accel/tcg/translator.c | 24 ++++++++++++++++++++++++
> >  trace-events           |  3 +++
> >  2 files changed, 27 insertions(+)
> > 
> > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> > index 9226a348a3..c55377aa18 100644
> > --- a/accel/tcg/translator.c
> > +++ b/accel/tcg/translator.c
> > @@ -14,10 +14,11 @@
> >  #include "tcg/tcg-op.h"
> >  #include "exec/exec-all.h"
> >  #include "exec/gen-icount.h"
> >  #include "exec/log.h"
> >  #include "exec/translator.h"
> > +#include "trace-tcg.h"
> >  
> >  /* Pairs with tcg_clear_temp_count.
> >     To be called by #TranslatorOps.{translate_insn,tb_stop} if
> >     (1) the target is sufficiently clean to support reporting,
> >     (2) as and when all temporaries are known to be consumed.
> > @@ -28,14 +29,31 @@ void translator_loop_temp_check(DisasContextBase *db)
> >          qemu_log("warning: TCG temporary leaks before "
> >                   TARGET_FMT_lx "\n", db->pc_next);
> >      }
> >  }
> >  
> > +static TCGOp *gen_trace_tb_enter(TranslationBlock *tb)
> > +{
> > +    TCGOp *last_pc_op;
> > +
> > +    TCGv pc_end = tcg_temp_new();
> > +
> > +    /* The last PC value is not known yet */
> > +    tcg_gen_movi_tl(pc_end, 0xdeadbeef);
> > +    last_pc_op = tcg_last_op();
> 
> TL is a target-specific type that does not necessarily correspond to uint64_t,
> as you assume in the print message.  More importantly, on a 32-bit host with a
> 64-bit guest, this movi will generate *two* ops...
> 
> > +    /* Fixup the last PC value in the tb_enter trace now that we know it */
> > +    tcg_set_insn_param(trace_pc_end, 1, db->pc_next);
> 
> ... which means that this operation does not do what you think it does.  It
> will only set one (unknown) half of the _i64 temporary.
> 
> Moreover, this isn't quite as zero overhead as I would like, because the pc_end
> temporary is always created, even if the trace_tb condition is not satisfied
> and so it (eventually) gets removed as unused.
> 
> I'm not quite sure what you're after with pc_end anyway.  As you note within
> the cover, you can't reliably use it for anything.  If you remove that, then
> you've also removed all of the other problems with this patch.
>

Hi,

One of the use cases is to be able to collect code-coverage from these traces.
In that case we're going to need a reliable pc_end. We may need to recover it
from outside of TCG in some cases though...

Cheers,
Edgar
Alex Bennée July 1, 2019, 1:33 p.m. UTC | #3
Edgar E. Iglesias <edgar.iglesias@xilinx.com> writes:

> On Fri, Jun 28, 2019 at 02:16:41PM +0200, Richard Henderson wrote:
>> On 6/28/19 1:39 PM, Luc Michel wrote:
>> > Add a TCG trace at the begining of a translation block recording the
>> > first and last (past-the-end) PC values.
>> >
>> > Signed-off-by: Luc Michel <luc.michel@greensocs.com>
>> > ---
>> > This can be used to trace the execution of the guest quite efficiently.
>> > It will report each time a TB is entered (using the tb_enter_exec
>> > trace). The traces arguments give the PC start and past-the-end values.
>> > It has very little to no performance impact since the trace is actually
>> > emitted in the generated code only when it is enabled at run time.
>> >
>> > It works already quite well on its own to trace guest execution. However
>> > it does not handle the case where a TB is exited in the middle of
>> > execution. I'm not sure how to properly trace that. A trace could be
>> > added when `cpu_loop_exit()' is called to report the current PC, but in
>> > most cases the interesting value (the PC of the instruction that
>> > caused the exit) is already lost at this stage.
>> >
>> > I'm not sure there is a generic (i.e. not target specific) way of
>> > recovering the last PC executed when cpu_loop_exit() is called. Do you
>> > think of a better way?
>> >
>> > Thanks to the Xilinx's QEMU team who sponsored this work.
>> > ---
>> >  accel/tcg/translator.c | 24 ++++++++++++++++++++++++
>> >  trace-events           |  3 +++
>> >  2 files changed, 27 insertions(+)
>> >
>> > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
>> > index 9226a348a3..c55377aa18 100644
>> > --- a/accel/tcg/translator.c
>> > +++ b/accel/tcg/translator.c
>> > @@ -14,10 +14,11 @@
>> >  #include "tcg/tcg-op.h"
>> >  #include "exec/exec-all.h"
>> >  #include "exec/gen-icount.h"
>> >  #include "exec/log.h"
>> >  #include "exec/translator.h"
>> > +#include "trace-tcg.h"
>> >
>> >  /* Pairs with tcg_clear_temp_count.
>> >     To be called by #TranslatorOps.{translate_insn,tb_stop} if
>> >     (1) the target is sufficiently clean to support reporting,
>> >     (2) as and when all temporaries are known to be consumed.
>> > @@ -28,14 +29,31 @@ void translator_loop_temp_check(DisasContextBase *db)
>> >          qemu_log("warning: TCG temporary leaks before "
>> >                   TARGET_FMT_lx "\n", db->pc_next);
>> >      }
>> >  }
>> >
>> > +static TCGOp *gen_trace_tb_enter(TranslationBlock *tb)
>> > +{
>> > +    TCGOp *last_pc_op;
>> > +
>> > +    TCGv pc_end = tcg_temp_new();
>> > +
>> > +    /* The last PC value is not known yet */
>> > +    tcg_gen_movi_tl(pc_end, 0xdeadbeef);
>> > +    last_pc_op = tcg_last_op();
>>
>> TL is a target-specific type that does not necessarily correspond to uint64_t,
>> as you assume in the print message.  More importantly, on a 32-bit host with a
>> 64-bit guest, this movi will generate *two* ops...
>>
>> > +    /* Fixup the last PC value in the tb_enter trace now that we know it */
>> > +    tcg_set_insn_param(trace_pc_end, 1, db->pc_next);
>>
>> ... which means that this operation does not do what you think it does.  It
>> will only set one (unknown) half of the _i64 temporary.
>>
>> Moreover, this isn't quite as zero overhead as I would like, because the pc_end
>> temporary is always created, even if the trace_tb condition is not satisfied
>> and so it (eventually) gets removed as unused.
>>
>> I'm not quite sure what you're after with pc_end anyway.  As you note within
>> the cover, you can't reliably use it for anything.  If you remove that, then
>> you've also removed all of the other problems with this patch.
>>
>
> Hi,
>
> One of the use cases is to be able to collect code-coverage from these traces.
> In that case we're going to need a reliable pc_end. We may need to recover it
> from outside of TCG in some cases though...

Why?

The only place you need to recover pc_end is when processing an
exception and for that case the front end should be using
cpu_loop_exit_restore() to ensure registers are valid before the
exception is processed. Otherwise you know where you've been given the
next block starts at pc_next (with -d nochain).

However I suspect if you want to build more sophisticated tools to track
execution the plugin approach might be better. This seems like a bit of
an arbitrary addition to the TCG core for a single use case where we
already have logging facilities that will give you the same information.

--
Alex Bennée
Edgar E. Iglesias July 1, 2019, 1:47 p.m. UTC | #4
On Mon, Jul 01, 2019 at 02:33:11PM +0100, Alex Bennée wrote:
> 
> Edgar E. Iglesias <edgar.iglesias@xilinx.com> writes:
> 
> > On Fri, Jun 28, 2019 at 02:16:41PM +0200, Richard Henderson wrote:
> >> On 6/28/19 1:39 PM, Luc Michel wrote:
> >> > Add a TCG trace at the begining of a translation block recording the
> >> > first and last (past-the-end) PC values.
> >> >
> >> > Signed-off-by: Luc Michel <luc.michel@greensocs.com>
> >> > ---
> >> > This can be used to trace the execution of the guest quite efficiently.
> >> > It will report each time a TB is entered (using the tb_enter_exec
> >> > trace). The traces arguments give the PC start and past-the-end values.
> >> > It has very little to no performance impact since the trace is actually
> >> > emitted in the generated code only when it is enabled at run time.
> >> >
> >> > It works already quite well on its own to trace guest execution. However
> >> > it does not handle the case where a TB is exited in the middle of
> >> > execution. I'm not sure how to properly trace that. A trace could be
> >> > added when `cpu_loop_exit()' is called to report the current PC, but in
> >> > most cases the interesting value (the PC of the instruction that
> >> > caused the exit) is already lost at this stage.
> >> >
> >> > I'm not sure there is a generic (i.e. not target specific) way of
> >> > recovering the last PC executed when cpu_loop_exit() is called. Do you
> >> > think of a better way?
> >> >
> >> > Thanks to the Xilinx's QEMU team who sponsored this work.
> >> > ---
> >> >  accel/tcg/translator.c | 24 ++++++++++++++++++++++++
> >> >  trace-events           |  3 +++
> >> >  2 files changed, 27 insertions(+)
> >> >
> >> > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> >> > index 9226a348a3..c55377aa18 100644
> >> > --- a/accel/tcg/translator.c
> >> > +++ b/accel/tcg/translator.c
> >> > @@ -14,10 +14,11 @@
> >> >  #include "tcg/tcg-op.h"
> >> >  #include "exec/exec-all.h"
> >> >  #include "exec/gen-icount.h"
> >> >  #include "exec/log.h"
> >> >  #include "exec/translator.h"
> >> > +#include "trace-tcg.h"
> >> >
> >> >  /* Pairs with tcg_clear_temp_count.
> >> >     To be called by #TranslatorOps.{translate_insn,tb_stop} if
> >> >     (1) the target is sufficiently clean to support reporting,
> >> >     (2) as and when all temporaries are known to be consumed.
> >> > @@ -28,14 +29,31 @@ void translator_loop_temp_check(DisasContextBase *db)
> >> >          qemu_log("warning: TCG temporary leaks before "
> >> >                   TARGET_FMT_lx "\n", db->pc_next);
> >> >      }
> >> >  }
> >> >
> >> > +static TCGOp *gen_trace_tb_enter(TranslationBlock *tb)
> >> > +{
> >> > +    TCGOp *last_pc_op;
> >> > +
> >> > +    TCGv pc_end = tcg_temp_new();
> >> > +
> >> > +    /* The last PC value is not known yet */
> >> > +    tcg_gen_movi_tl(pc_end, 0xdeadbeef);
> >> > +    last_pc_op = tcg_last_op();
> >>
> >> TL is a target-specific type that does not necessarily correspond to uint64_t,
> >> as you assume in the print message.  More importantly, on a 32-bit host with a
> >> 64-bit guest, this movi will generate *two* ops...
> >>
> >> > +    /* Fixup the last PC value in the tb_enter trace now that we know it */
> >> > +    tcg_set_insn_param(trace_pc_end, 1, db->pc_next);
> >>
> >> ... which means that this operation does not do what you think it does.  It
> >> will only set one (unknown) half of the _i64 temporary.
> >>
> >> Moreover, this isn't quite as zero overhead as I would like, because the pc_end
> >> temporary is always created, even if the trace_tb condition is not satisfied
> >> and so it (eventually) gets removed as unused.
> >>
> >> I'm not quite sure what you're after with pc_end anyway.  As you note within
> >> the cover, you can't reliably use it for anything.  If you remove that, then
> >> you've also removed all of the other problems with this patch.
> >>
> >
> > Hi,
> >
> > One of the use cases is to be able to collect code-coverage from these traces.
> > In that case we're going to need a reliable pc_end. We may need to recover it
> > from outside of TCG in some cases though...
> 
> Why?
> 
> The only place you need to recover pc_end is when processing an
> exception and for that case the front end should be using
> cpu_loop_exit_restore() to ensure registers are valid before the
> exception is processed. Otherwise you know where you've been given the
> next block starts at pc_next (with -d nochain).

That's what I meant, that it may not be known to tcg generated code until after the fact.


> 
> However I suspect if you want to build more sophisticated tools to track
> execution the plugin approach might be better. This seems like a bit of
> an arbitrary addition to the TCG core for a single use case where we
> already have logging facilities that will give you the same information.

Sure, if there are other better ways to do the tracing, it sounds good to me.

Preferably it should be binary traces (otherwise it's to slow to generate and
process them plus the traces get huge). And better if we can optionally avoid
storing them in files (i.e stream and process them live via sockets) to support
large runs without storing huge intermediate files.

Cheers,
Edgar



> 
> --
> Alex Bennée
diff mbox series

Patch

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 9226a348a3..c55377aa18 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -14,10 +14,11 @@ 
 #include "tcg/tcg-op.h"
 #include "exec/exec-all.h"
 #include "exec/gen-icount.h"
 #include "exec/log.h"
 #include "exec/translator.h"
+#include "trace-tcg.h"
 
 /* Pairs with tcg_clear_temp_count.
    To be called by #TranslatorOps.{translate_insn,tb_stop} if
    (1) the target is sufficiently clean to support reporting,
    (2) as and when all temporaries are known to be consumed.
@@ -28,14 +29,31 @@  void translator_loop_temp_check(DisasContextBase *db)
         qemu_log("warning: TCG temporary leaks before "
                  TARGET_FMT_lx "\n", db->pc_next);
     }
 }
 
+static TCGOp *gen_trace_tb_enter(TranslationBlock *tb)
+{
+    TCGOp *last_pc_op;
+
+    TCGv pc_end = tcg_temp_new();
+
+    /* The last PC value is not known yet */
+    tcg_gen_movi_tl(pc_end, 0xdeadbeef);
+    last_pc_op = tcg_last_op();
+
+    trace_tb_enter_tcg(tcg_ctx->cpu, cpu_env, tb->pc, pc_end);
+    tcg_temp_free(pc_end);
+
+    return last_pc_op;
+}
+
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
                      CPUState *cpu, TranslationBlock *tb, int max_insns)
 {
     int bp_insn = 0;
+    TCGOp *trace_pc_end;
 
     /* Initialize DisasContext */
     db->tb = tb;
     db->pc_first = tb->pc;
     db->pc_next = db->pc_first;
@@ -50,10 +68,13 @@  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
     /* Reset the temp count so that we can identify leaks */
     tcg_clear_temp_count();
 
     /* Start translating.  */
     gen_tb_start(db->tb);
+
+    trace_pc_end = gen_trace_tb_enter(tb);
+
     ops->tb_start(db, cpu);
     tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
     while (true) {
         db->num_insns++;
@@ -110,10 +131,13 @@  void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
 
     /* Emit code to exit the TB, as indicated by db->is_jmp.  */
     ops->tb_stop(db, cpu);
     gen_tb_end(db->tb, db->num_insns - bp_insn);
 
+    /* Fixup the last PC value in the tb_enter trace now that we know it */
+    tcg_set_insn_param(trace_pc_end, 1, db->pc_next);
+
     /* The disas_log hook may use these values rather than recompute.  */
     db->tb->size = db->pc_next - db->pc_first;
     db->tb->icount = db->num_insns;
 
 #ifdef DEBUG_DISAS
diff --git a/trace-events b/trace-events
index aeea3c2bdb..e37fa12ef0 100644
--- a/trace-events
+++ b/trace-events
@@ -157,10 +157,13 @@  vcpu guest_cpu_reset(void)
 #
 # Mode: user, softmmu
 # Targets: TCG(all)
 vcpu tcg guest_mem_before(TCGv vaddr, uint8_t info) "info=%d", "vaddr=0x%016"PRIx64" info=%d"
 
+# translator.c
+vcpu tcg tb_enter(uint64_t pc_start, TCGv pc_end) "pc_start:0x%"PRIx64, "pc:0x%"PRIx64" pc_end:0x%"PRIx64
+
 # linux-user/syscall.c
 # bsd-user/syscall.c
 
 # @num: System call number.
 # @arg*: System call argument value.