diff mbox

[4/6] trace: Add per-vCPU tracing states for events with the 'vcpu' property

Message ID 145641258612.30097.7127731954660712163.stgit@localhost
State New
Headers show

Commit Message

Lluís Vilanova Feb. 25, 2016, 3:03 p.m. UTC
Each vCPU gets a 'trace_dstate' bitmap to control the per-vCPU dynamic
tracing state of events with the 'vcpu' property.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 Makefile.objs            |    1 +
 bsd-user/main.c          |    2 +
 include/qom/cpu.h        |   12 ++++++++
 linux-user/main.c        |    2 +
 qom/cpu.c                |    1 +
 trace/Makefile.objs      |   26 ++++++++++++++++++
 trace/control-internal.h |   35 ++++++++++++++++++++----
 trace/control-stub.c     |   29 ++++++++++++++++++++
 trace/control-target.c   |   58 +++++++++++++++++++++++++++++++++++++++
 trace/control.c          |   25 ++++++++++++++++-
 trace/control.h          |   68 +++++++++++++++++++++++++++++++++++++++++++++-
 vl.c                     |    1 +
 12 files changed, 251 insertions(+), 9 deletions(-)
 create mode 100644 trace/control-stub.c
 create mode 100644 trace/control-target.c

Comments

Stefan Hajnoczi June 9, 2016, 12:07 p.m. UTC | #1
On Thu, Feb 25, 2016 at 04:03:06PM +0100, Lluís Vilanova wrote:
 @@ -332,6 +334,16 @@ struct CPUState {
>      struct KVMState *kvm_state;
>      struct kvm_run *kvm_run;
>  
> +#define TRACE_VCPU_DSTATE_TYPE uint32_t
> +    TRACE_VCPU_DSTATE_TYPE trace_dstate;

Please use typedef instead of #define.

> +    /*
> +     * Ensure 'trace_dstate' can encode event states as a bitmask. This limits
> +     * the number of events with the 'vcpu' property and *without* the
> +     * 'disabled' property.
> +     */
> +    bool too_many_vcpu_events[
> +        TRACE_VCPU_EVENT_COUNT > sizeof(TRACE_VCPU_DSTATE_TYPE)*8 ? -1 : 0];

Why limit ourselves to a scalar when "qemu/bitops.h" and "qemu/bitmap.h"
provide functions for arbitrary length bitmaps?

See DECLARE_BITMAP(), set_bit(), clear_bit(), test_bit().

> @@ -61,7 +69,7 @@ static inline bool trace_event_get_state_static(TraceEvent *ev)
>  static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
>  {
>      /* it's on fast path, avoid consistency checks (asserts) */
> -    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
> +    return unlikely(trace_events_enabled_count) && (trace_events_dstate[id] > 0);

typeof(trace_events_dstate[0]) is size_t, so trace_events_dstate[id] > 0
is equivalent to trace_events_dstate[id] (due to unsigned).  Why change
this line?

> +void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
> +{
> +    CPUState *cpu;
> +    assert(ev != NULL);
> +    assert(trace_event_get_state_static(ev));
> +    if (trace_event_get_cpu_id(ev) != trace_event_cpu_count()) {
> +        CPU_FOREACH(cpu) {
> +            trace_event_set_cpu_state_dynamic(cpu, ev, state);
> +        }
> +    } else {
> +        TraceEventID id = trace_event_get_id(ev);
> +        trace_events_enabled_count += state - trace_events_dstate[id];
> +        trace_events_dstate[id] = state;
> +    }
> +}

I find it a little confusing to use different semantics for
trace_events_dstate[] elements depending on trace_event_get_cpu_id(ev)
!= trace_event_cpu_count().  In other words, it either acts as a vcpu
enabled counter or as an enable/disable flag.

That said, it's nice to preserve the non-cpu_id case since it was
written by Paolo as a performance optimization.  Changing it could
introduce a regression so I think your approach is okay.

> +
> +void trace_event_set_cpu_state_dynamic(CPUState *cpu,
> +                                       TraceEvent *ev, bool state)
> +{
> +    TraceEventID id;
> +    TraceEventVCPUID cpu_id;
> +    TRACE_VCPU_DSTATE_TYPE bit;
> +    bool state_pre;
> +    assert(cpu != NULL);
> +    assert(ev != NULL);
> +    assert(trace_event_get_state_static(ev));
> +    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
> +    id = trace_event_get_id(ev);
> +    cpu_id = trace_event_get_cpu_id(ev);
> +    bit = ((TRACE_VCPU_DSTATE_TYPE)1) << cpu_id;
> +    state_pre = cpu->trace_dstate & bit;
> +    if ((state_pre == 0) != (state == 0)) {

Simpler expression:

if (state_pre != state)

> @@ -21,7 +21,10 @@
>  #include "qemu/error-report.h"
>  
>  int trace_events_enabled_count;
> -bool trace_events_dstate[TRACE_EVENT_COUNT];
> +/* With the 'vcpu' property, counts how many vCPUs have it enabled. */
> +size_t trace_events_dstate[TRACE_EVENT_COUNT];

The number of cpus has type int (see CPUState *qemu_get_cpu(int index)).

Why did you choose size_t?
Lluís Vilanova June 9, 2016, 2:17 p.m. UTC | #2
Stefan Hajnoczi writes:

> On Thu, Feb 25, 2016 at 04:03:06PM +0100, Lluís Vilanova wrote:
>  @@ -332,6 +334,16 @@ struct CPUState {
>> struct KVMState *kvm_state;
>> struct kvm_run *kvm_run;
>> 
>> +#define TRACE_VCPU_DSTATE_TYPE uint32_t
>> +    TRACE_VCPU_DSTATE_TYPE trace_dstate;

> Please use typedef instead of #define.

>> +    /*
>> +     * Ensure 'trace_dstate' can encode event states as a bitmask. This limits
>> +     * the number of events with the 'vcpu' property and *without* the
>> +     * 'disabled' property.
>> +     */
>> +    bool too_many_vcpu_events[
>> +        TRACE_VCPU_EVENT_COUNT > sizeof(TRACE_VCPU_DSTATE_TYPE)*8 ? -1 : 0];

> Why limit ourselves to a scalar when "qemu/bitops.h" and "qemu/bitmap.h"
> provide functions for arbitrary length bitmaps?

> See DECLARE_BITMAP(), set_bit(), clear_bit(), test_bit().

Oh, I wasn't aware of the bitmap functions. Nice catch.


>> @@ -61,7 +69,7 @@ static inline bool trace_event_get_state_static(TraceEvent *ev)
>> static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
>> {
>> /* it's on fast path, avoid consistency checks (asserts) */
>> -    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
>> +    return unlikely(trace_events_enabled_count) && (trace_events_dstate[id] > 0);

> typeof(trace_events_dstate[0]) is size_t, so trace_events_dstate[id] > 0
> is equivalent to trace_events_dstate[id] (due to unsigned).  Why change
> this line?

Sorry, I have a tendency to make this type of checks explicit when the types are
not boolean (for a maybe-false sense of future-proofing). I can leave it as it
was if it bothers you.


>> +void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
>> +{
>> +    CPUState *cpu;
>> +    assert(ev != NULL);
>> +    assert(trace_event_get_state_static(ev));
>> +    if (trace_event_get_cpu_id(ev) != trace_event_cpu_count()) {
>> +        CPU_FOREACH(cpu) {
>> +            trace_event_set_cpu_state_dynamic(cpu, ev, state);
>> +        }
>> +    } else {
>> +        TraceEventID id = trace_event_get_id(ev);
>> +        trace_events_enabled_count += state - trace_events_dstate[id];
>> +        trace_events_dstate[id] = state;
>> +    }
>> +}

> I find it a little confusing to use different semantics for
> trace_events_dstate[] elements depending on trace_event_get_cpu_id(ev)
> != trace_event_cpu_count().  In other words, it either acts as a vcpu
> enabled counter or as an enable/disable flag.

> That said, it's nice to preserve the non-cpu_id case since it was
> written by Paolo as a performance optimization.  Changing it could
> introduce a regression so I think your approach is okay.

Yes, it's a bit messy. I'll add some proper documentation about how this is
interpreted.


>> +
>> +void trace_event_set_cpu_state_dynamic(CPUState *cpu,
>> +                                       TraceEvent *ev, bool state)
>> +{
>> +    TraceEventID id;
>> +    TraceEventVCPUID cpu_id;
>> +    TRACE_VCPU_DSTATE_TYPE bit;
>> +    bool state_pre;
>> +    assert(cpu != NULL);
>> +    assert(ev != NULL);
>> +    assert(trace_event_get_state_static(ev));
>> +    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
>> +    id = trace_event_get_id(ev);
>> +    cpu_id = trace_event_get_cpu_id(ev);
>> +    bit = ((TRACE_VCPU_DSTATE_TYPE)1) << cpu_id;
>> +    state_pre = cpu->trace_dstate & bit;
>> +    if ((state_pre == 0) != (state == 0)) {

> Simpler expression:

> if (state_pre != state)

>> @@ -21,7 +21,10 @@
>> #include "qemu/error-report.h"
>> 
>> int trace_events_enabled_count;
>> -bool trace_events_dstate[TRACE_EVENT_COUNT];
>> +/* With the 'vcpu' property, counts how many vCPUs have it enabled. */
>> +size_t trace_events_dstate[TRACE_EVENT_COUNT];

> The number of cpus has type int (see CPUState *qemu_get_cpu(int index)).

> Why did you choose size_t?

It just sounds proper to me to use size_t, since the state can never be negative
(it's either interpreted as a boolean or as an unsigned counter, depending on
the "vcpu" property).


Thanks,
  Lluis
Stefan Hajnoczi June 10, 2016, 4:25 p.m. UTC | #3
On Thu, Jun 09, 2016 at 04:17:11PM +0200, Lluís Vilanova wrote:
> >> @@ -61,7 +69,7 @@ static inline bool trace_event_get_state_static(TraceEvent *ev)
> >> static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
> >> {
> >> /* it's on fast path, avoid consistency checks (asserts) */
> >> -    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
> >> +    return unlikely(trace_events_enabled_count) && (trace_events_dstate[id] > 0);
> 
> > typeof(trace_events_dstate[0]) is size_t, so trace_events_dstate[id] > 0
> > is equivalent to trace_events_dstate[id] (due to unsigned).  Why change
> > this line?
> 
> Sorry, I have a tendency to make this type of checks explicit when the types are
> not boolean (for a maybe-false sense of future-proofing). I can leave it as it
> was if it bothers you.

When reviewing patches I try to understand each change.  When I don't
see a reason for a change I need to ask.

In general it's easier to leave code as-is unless there is a need to
change it.  But there are no hard rules :).

> >> +void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
> >> +{
> >> +    CPUState *cpu;
> >> +    assert(ev != NULL);
> >> +    assert(trace_event_get_state_static(ev));
> >> +    if (trace_event_get_cpu_id(ev) != trace_event_cpu_count()) {
> >> +        CPU_FOREACH(cpu) {
> >> +            trace_event_set_cpu_state_dynamic(cpu, ev, state);
> >> +        }
> >> +    } else {
> >> +        TraceEventID id = trace_event_get_id(ev);
> >> +        trace_events_enabled_count += state - trace_events_dstate[id];
> >> +        trace_events_dstate[id] = state;
> >> +    }
> >> +}
> 
> > I find it a little confusing to use different semantics for
> > trace_events_dstate[] elements depending on trace_event_get_cpu_id(ev)
> > != trace_event_cpu_count().  In other words, it either acts as a vcpu
> > enabled counter or as an enable/disable flag.
> 
> > That said, it's nice to preserve the non-cpu_id case since it was
> > written by Paolo as a performance optimization.  Changing it could
> > introduce a regression so I think your approach is okay.
> 
> Yes, it's a bit messy. I'll add some proper documentation about how this is
> interpreted.

Thanks!

> > The number of cpus has type int (see CPUState *qemu_get_cpu(int index)).
> 
> > Why did you choose size_t?
> 
> It just sounds proper to me to use size_t, since the state can never be negative
> (it's either interpreted as a boolean or as an unsigned counter, depending on
> the "vcpu" property).

If you feel strongly about it, feel free to keep it.  Alternative
reasoning about the type:

int is the CPU index type used in qemu_get_cpu().  It is guaranteed to
be large enough for the vcpu count.  IMO there's no need to select a new
type, but there's more...

size_t is larger than necessary on 64-bit machines and has an impact on
the CPU cache performance that Paolo's optimization takes advantage of
(if you trigger adjacent trace event IDs they will probably already be
in cache).

size_t made me have to think hard when reading the "int += bool -
size_t" statement for updating trace_events_enabled_count.

If int is used then it's clear that int = (int)bool - int will be one of
[-1, 0, +1].

But with size_t you have to starting wondering whether the type coercion
is portable and works as expected:

int = (int)((size_t)bool - size_t);

In "6.3.1.3 Signed and unsigned integers" the C99 standard says:

  [If] the new type is signed and the value cannot be represented in
  it; either the result is implementation-defined or an
  implementation-defined signal is raised.

The size_t -> int conversion is therefore implementation-defined.  This
is not portable although QEMU probably does it in many places.

So for these reasons, I think int is the natural choice.

Stefan
Lluís Vilanova June 10, 2016, 5:52 p.m. UTC | #4
Stefan Hajnoczi writes:

> On Thu, Jun 09, 2016 at 04:17:11PM +0200, Lluís Vilanova wrote:
>> >> @@ -61,7 +69,7 @@ static inline bool trace_event_get_state_static(TraceEvent *ev)
>> >> static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
>> >> {
>> >> /* it's on fast path, avoid consistency checks (asserts) */
>> >> -    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
>> >> +    return unlikely(trace_events_enabled_count) && (trace_events_dstate[id] > 0);
>> 
>> > typeof(trace_events_dstate[0]) is size_t, so trace_events_dstate[id] > 0
>> > is equivalent to trace_events_dstate[id] (due to unsigned).  Why change
>> > this line?
>> 
>> Sorry, I have a tendency to make this type of checks explicit when the types are
>> not boolean (for a maybe-false sense of future-proofing). I can leave it as it
>> was if it bothers you.

> When reviewing patches I try to understand each change.  When I don't
> see a reason for a change I need to ask.

> In general it's easier to leave code as-is unless there is a need to
> change it.  But there are no hard rules :).

I'll refrain from pushing my manias into QEMU :)


[...]
>> > The number of cpus has type int (see CPUState *qemu_get_cpu(int index)).
>> 
>> > Why did you choose size_t?
>> 
>> It just sounds proper to me to use size_t, since the state can never be negative
>> (it's either interpreted as a boolean or as an unsigned counter, depending on
>> the "vcpu" property).

> If you feel strongly about it, feel free to keep it.  Alternative
> reasoning about the type:

> int is the CPU index type used in qemu_get_cpu().  It is guaranteed to
> be large enough for the vcpu count.  IMO there's no need to select a new
> type, but there's more...

> size_t is larger than necessary on 64-bit machines and has an impact on
> the CPU cache performance that Paolo's optimization takes advantage of
> (if you trigger adjacent trace event IDs they will probably already be
> in cache).

> size_t made me have to think hard when reading the "int += bool -
> size_t" statement for updating trace_events_enabled_count.

> If int is used then it's clear that int = (int)bool - int will be one of
> [-1, 0, +1].

> But with size_t you have to starting wondering whether the type coercion
> is portable and works as expected:

> int = (int)((size_t)bool - size_t);

> In "6.3.1.3 Signed and unsigned integers" the C99 standard says:

>   [If] the new type is signed and the value cannot be represented in
>   it; either the result is implementation-defined or an
>   implementation-defined signal is raised.

> The size_t -> int conversion is therefore implementation-defined.  This
> is not portable although QEMU probably does it in many places.

> So for these reasons, I think int is the natural choice.

Fair point. But now I feel tempted to change both trace_events_dstate and
trace_events_enabled_count into unsigned int... it burns me when I see signed
types used only on their positives by design.

But don't worry, I'll change trace_events_dstate into int :)


Thanks!
  Lluis
Paolo Bonzini June 13, 2016, 8:38 a.m. UTC | #5
On 10/06/2016 19:52, Lluís Vilanova wrote:
> Fair point. But now I feel tempted to change both trace_events_dstate and
> trace_events_enabled_count into unsigned int... it burns me when I see signed
> types used only on their positives by design.
> 
> But don't worry, I'll change trace_events_dstate into int :)

unsigned int would be fine too.

Paolo
Paolo Bonzini June 13, 2016, 9:13 a.m. UTC | #6
First of all, a generic problem I see with your patches is that the
newly-introduced APIs are not providing a good abstraction.

If something is only used internally, as is the case for
trace_event_get_cpu_id, you don't need accessors.  On the other hand,
when you have a repeated expression such as

     trace_event_get_cpu_id(ev) != trace_event_cpu_count()

then you need an API such as trace_event_is_vcpu(ev).

Another small ugliness is that you are using "vcpu" in trace-events and
in the generated files, but "cpu" in the C file.  My suggestion is to
prefix functions with vcpu_trace_event if they refer to per-VCPU trace
events, and only use the VCPU ids in those functions.

On 25/02/2016 16:03, Lluís Vilanova wrote:
> +static inline bool trace_event_get_cpu_state_dynamic(CPUState *cpu,
> +                                                     TraceEvent *ev)
>  {
> -    int id = trace_event_get_id(ev);
> +    TraceEventVCPUID id;
> +    assert(cpu != NULL);
>      assert(ev != NULL);

Please do not add more "!= NULL" asserts.  In fact, we should remove the
others; in this case the ev != NULL assertion is particularly pointless
since it comes after a dereference.

>      assert(trace_event_get_state_static(ev));
> -    trace_events_enabled_count += state - trace_events_dstate[id];
> -    trace_events_dstate[id] = state;
> +    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
> +    id = trace_event_get_cpu_id(ev);
> +    return trace_event_get_cpu_state_dynamic_by_cpu_id(cpu, id);

Based on the above suggestion regarding APIs:

    assert(trace_event_is_vcpu(ev));
    return vcpu_trace_event_get_state_dynamic(cpu, ev->cpu_id);

>  }
>  
>  #endif  /* TRACE__CONTROL_INTERNAL_H */
> diff --git a/trace/control-stub.c b/trace/control-stub.c
> new file mode 100644
> index 0000000..858b13e
> --- /dev/null
> +++ b/trace/control-stub.c
> @@ -0,0 +1,29 @@
> +/*
> + * Interface for configuring and controlling the state of tracing events.
> + *
> + * Copyright (C) 2014-2016 Lluís Vilanova <vilanova@ac.upc.edu>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "trace/control.h"

This is not a stub, in fact it has a bunch of duplicate code with
trace/control.c.

The actual stubs are trace_event_set_cpu_state_dynamic() (which I'd
rename to vcpu_trace_event_set_state_dynamic) and
vcpu_trace_event_set_state_dynamic_all that does a CPU_FOREACH.

That said, I am skeptical about the benefit of the interfaces you are
adding.  They add a lot of complication and overhead (especially
regarding the memory/cache overhead of the dstate array) without a clear
use case, in my opinion; all the processing you do at run-time is just
as well suited for later filtering.

I also believe that it's a bad idea to add "stuff" to trace-tool without
a user; unless I'm mistaken neither "vcpu" nor "tcg" trace events are
unused in qemu.git, and this means that the ~400 lines added in this
series are actually dead code.

Paolo
Lluís Vilanova June 13, 2016, 12:15 p.m. UTC | #7
Paolo Bonzini writes:

> First of all, a generic problem I see with your patches is that the
> newly-introduced APIs are not providing a good abstraction.

> If something is only used internally, as is the case for
> trace_event_get_cpu_id, you don't need accessors.  On the other hand,
> when you have a repeated expression such as

>      trace_event_get_cpu_id(ev) != trace_event_cpu_count()

> then you need an API such as trace_event_is_vcpu(ev).

> Another small ugliness is that you are using "vcpu" in trace-events and
> in the generated files, but "cpu" in the C file.  My suggestion is to
> prefix functions with vcpu_trace_event if they refer to per-VCPU trace
> events, and only use the VCPU ids in those functions.

I'll fix these two.


> On 25/02/2016 16:03, Lluís Vilanova wrote:
>> +static inline bool trace_event_get_cpu_state_dynamic(CPUState *cpu,
>> +                                                     TraceEvent *ev)
>> {
>> -    int id = trace_event_get_id(ev);
>> +    TraceEventVCPUID id;
>> +    assert(cpu != NULL);
>> assert(ev != NULL);

> Please do not add more "!= NULL" asserts.  In fact, we should remove the
> others; in this case the ev != NULL assertion is particularly pointless
> since it comes after a dereference.

And the asserts too.


>> assert(trace_event_get_state_static(ev));
>> -    trace_events_enabled_count += state - trace_events_dstate[id];
>> -    trace_events_dstate[id] = state;
>> +    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
>> +    id = trace_event_get_cpu_id(ev);
>> +    return trace_event_get_cpu_state_dynamic_by_cpu_id(cpu, id);

> Based on the above suggestion regarding APIs:

>     assert(trace_event_is_vcpu(ev));
>     return vcpu_trace_event_get_state_dynamic(cpu, ev->cpu_id);

>> }
>> 
>> #endif  /* TRACE__CONTROL_INTERNAL_H */
>> diff --git a/trace/control-stub.c b/trace/control-stub.c
>> new file mode 100644
>> index 0000000..858b13e
>> --- /dev/null
>> +++ b/trace/control-stub.c
>> @@ -0,0 +1,29 @@
>> +/*
>> + * Interface for configuring and controlling the state of tracing events.
>> + *
>> + * Copyright (C) 2014-2016 Lluís Vilanova <vilanova@ac.upc.edu>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "trace/control.h"

> This is not a stub, in fact it has a bunch of duplicate code with
> trace/control.c.

> The actual stubs are trace_event_set_cpu_state_dynamic() (which I'd
> rename to vcpu_trace_event_set_state_dynamic) and
> vcpu_trace_event_set_state_dynamic_all that does a CPU_FOREACH.

That follows the name convention of "sources compiled when there is not target"
(like some commandline tools). If there is another naming convention for such
cases, please let me know.


> That said, I am skeptical about the benefit of the interfaces you are
> adding.  They add a lot of complication and overhead (especially
> regarding the memory/cache overhead of the dstate array) without a clear
> use case, in my opinion; all the processing you do at run-time is just
> as well suited for later filtering.

This should make tracing faster on the future with multi-threaded TCG, as well
as trace files much smaller if you're tracing something like memory
accesses. Also, bear in mind this series was split from a much larger one for
simplicity. The follow-up one provides much larger performance benefits by
avoiding the generation of TCG code to call the tracing backend when a vCPU is
not traced.


> I also believe that it's a bad idea to add "stuff" to trace-tool without
> a user; unless I'm mistaken neither "vcpu" nor "tcg" trace events are
> unused in qemu.git, and this means that the ~400 lines added in this
> series are actually dead code.

Events using these features are being added in parallel to this series, like the
recently accepted "guest_mem_before". I also have some events on my queue
tracing bbl and instruction execution, as well as user-mode syscalls.


Thanks,
  Lluis
Lluís Vilanova June 13, 2016, 12:17 p.m. UTC | #8
Paolo Bonzini writes:

> On 10/06/2016 19:52, Lluís Vilanova wrote:
>> Fair point. But now I feel tempted to change both trace_events_dstate and
>> trace_events_enabled_count into unsigned int... it burns me when I see signed
>> types used only on their positives by design.
>> 
>> But don't worry, I'll change trace_events_dstate into int :)

> unsigned int would be fine too.

You mean only for this patch, or changing all vcpu indexes to unsigned int?

Thanks,
  Lluis
Paolo Bonzini June 13, 2016, 2:08 p.m. UTC | #9
On 13/06/2016 14:15, Lluís Vilanova wrote:
> > That said, I am skeptical about the benefit of the interfaces you are
> > adding.  They add a lot of complication and overhead (especially
> > regarding the memory/cache overhead of the dstate array) without a clear
> > use case, in my opinion; all the processing you do at run-time is just
> > as well suited for later filtering.
> 
> This should make tracing faster on the future with multi-threaded TCG, as well
> as trace files much smaller if you're tracing something like memory
> accesses. Also, bear in mind this series was split from a much larger one for
> simplicity. The follow-up one provides much larger performance benefits by
> avoiding the generation of TCG code to call the tracing backend when a vCPU is
> not traced.

This still assumes that tracing only some VCPUs is a common use case.
Is it?...

Paolo
Lluís Vilanova June 13, 2016, 2:38 p.m. UTC | #10
Lluís Vilanova writes:

> Paolo Bonzini writes:
>> First of all, a generic problem I see with your patches is that the
>> newly-introduced APIs are not providing a good abstraction.

>> If something is only used internally, as is the case for
>> trace_event_get_cpu_id, you don't need accessors.  On the other hand,
>> when you have a repeated expression such as

>> trace_event_get_cpu_id(ev) != trace_event_cpu_count()

>> then you need an API such as trace_event_is_vcpu(ev).

>> Another small ugliness is that you are using "vcpu" in trace-events and
>> in the generated files, but "cpu" in the C file.  My suggestion is to
>> prefix functions with vcpu_trace_event if they refer to per-VCPU trace
>> events, and only use the VCPU ids in those functions.

> I'll fix these two.

BTW, I'd rather keep the getters for this series, if only for the sake of
tracing API consistency (e.g., we already have 'trace_event_get_id()').

I will send a separate series removing the existing superfluous asserts (I won't
be adding more on this series), and can extend it to remove the trivial getters
on the tracing API if that's necessary.


Thanks,
  Lluis
Lluís Vilanova June 13, 2016, 4:39 p.m. UTC | #11
Paolo Bonzini writes:

> On 13/06/2016 14:15, Lluís Vilanova wrote:
>> > That said, I am skeptical about the benefit of the interfaces you are
>> > adding.  They add a lot of complication and overhead (especially
>> > regarding the memory/cache overhead of the dstate array) without a clear
>> > use case, in my opinion; all the processing you do at run-time is just
>> > as well suited for later filtering.
>> 
>> This should make tracing faster on the future with multi-threaded TCG, as well
>> as trace files much smaller if you're tracing something like memory
>> accesses. Also, bear in mind this series was split from a much larger one for
>> simplicity. The follow-up one provides much larger performance benefits by
>> avoiding the generation of TCG code to call the tracing backend when a vCPU is
>> not traced.

> This still assumes that tracing only some VCPUs is a common use case.
> Is it?...

I use it for code profiling by sampling across vCPUs, or only on vCPUs I know
run processes of my interest. The profiles can then be used for analyzing the
application/system behaviour.

Also, with the fast-path checks already in place ('trace_events_enabled_count'),
performance when not tracing should never be worse with this series.

If this feature does not look useful to overall QEMU I will fold it into my
other out-of-tree patches.


Cheers,
  Lluis
Stefan Hajnoczi June 14, 2016, 8:39 a.m. UTC | #12
On Mon, Jun 13, 2016 at 06:39:46PM +0200, Lluís Vilanova wrote:
> Paolo Bonzini writes:
> 
> > On 13/06/2016 14:15, Lluís Vilanova wrote:
> >> > That said, I am skeptical about the benefit of the interfaces you are
> >> > adding.  They add a lot of complication and overhead (especially
> >> > regarding the memory/cache overhead of the dstate array) without a clear
> >> > use case, in my opinion; all the processing you do at run-time is just
> >> > as well suited for later filtering.
> >> 
> >> This should make tracing faster on the future with multi-threaded TCG, as well
> >> as trace files much smaller if you're tracing something like memory
> >> accesses. Also, bear in mind this series was split from a much larger one for
> >> simplicity. The follow-up one provides much larger performance benefits by
> >> avoiding the generation of TCG code to call the tracing backend when a vCPU is
> >> not traced.
> 
> > This still assumes that tracing only some VCPUs is a common use case.
> > Is it?...
> 
> I use it for code profiling by sampling across vCPUs, or only on vCPUs I know
> run processes of my interest. The profiles can then be used for analyzing the
> application/system behaviour.
> 
> Also, with the fast-path checks already in place ('trace_events_enabled_count'),
> performance when not tracing should never be worse with this series.
> 
> If this feature does not look useful to overall QEMU I will fold it into my
> other out-of-tree patches.

I think the per-vcpu tracing feature is reasonable for qemu.git as long
as it does not introduce performance regressions for existing users.

Stefan
Paolo Bonzini June 14, 2016, 9:13 a.m. UTC | #13
On 14/06/2016 10:39, Stefan Hajnoczi wrote:
> On Mon, Jun 13, 2016 at 06:39:46PM +0200, Lluís Vilanova wrote:
>> Paolo Bonzini writes:
>>
>>> On 13/06/2016 14:15, Lluís Vilanova wrote:
>>>>> That said, I am skeptical about the benefit of the interfaces you are
>>>>> adding.  They add a lot of complication and overhead (especially
>>>>> regarding the memory/cache overhead of the dstate array) without a clear
>>>>> use case, in my opinion; all the processing you do at run-time is just
>>>>> as well suited for later filtering.
>>>>
>>>> This should make tracing faster on the future with multi-threaded TCG, as well
>>>> as trace files much smaller if you're tracing something like memory
>>>> accesses. Also, bear in mind this series was split from a much larger one for
>>>> simplicity. The follow-up one provides much larger performance benefits by
>>>> avoiding the generation of TCG code to call the tracing backend when a vCPU is
>>>> not traced.
>>
>>> This still assumes that tracing only some VCPUs is a common use case.
>>> Is it?...
>>
>> I use it for code profiling by sampling across vCPUs, or only on vCPUs I know
>> run processes of my interest. The profiles can then be used for analyzing the
>> application/system behaviour.
>>
>> Also, with the fast-path checks already in place ('trace_events_enabled_count'),
>> performance when not tracing should never be worse with this series.
>>
>> If this feature does not look useful to overall QEMU I will fold it into my
>> other out-of-tree patches.
> 
> I think the per-vcpu tracing feature is reasonable for qemu.git as long
> as it does not introduce performance regressions for existing users.

I'm okay with it if the dstate array is changed to uint16_t.

Thanks,

Paolo
Lluís Vilanova June 14, 2016, 12:17 p.m. UTC | #14
Paolo Bonzini writes:

> On 14/06/2016 10:39, Stefan Hajnoczi wrote:
>> On Mon, Jun 13, 2016 at 06:39:46PM +0200, Lluís Vilanova wrote:
>>> Paolo Bonzini writes:
>>> 
>>>> On 13/06/2016 14:15, Lluís Vilanova wrote:
>>>>>> That said, I am skeptical about the benefit of the interfaces you are
>>>>>> adding.  They add a lot of complication and overhead (especially
>>>>>> regarding the memory/cache overhead of the dstate array) without a clear
>>>>>> use case, in my opinion; all the processing you do at run-time is just
>>>>>> as well suited for later filtering.
>>>>> 
>>>>> This should make tracing faster on the future with multi-threaded TCG, as well
>>>>> as trace files much smaller if you're tracing something like memory
>>>>> accesses. Also, bear in mind this series was split from a much larger one for
>>>>> simplicity. The follow-up one provides much larger performance benefits by
>>>>> avoiding the generation of TCG code to call the tracing backend when a vCPU is
>>>>> not traced.
>>> 
>>>> This still assumes that tracing only some VCPUs is a common use case.
>>>> Is it?...
>>> 
>>> I use it for code profiling by sampling across vCPUs, or only on vCPUs I know
>>> run processes of my interest. The profiles can then be used for analyzing the
>>> application/system behaviour.
>>> 
>>> Also, with the fast-path checks already in place ('trace_events_enabled_count'),
>>> performance when not tracing should never be worse with this series.
>>> 
>>> If this feature does not look useful to overall QEMU I will fold it into my
>>> other out-of-tree patches.
>> 
>> I think the per-vcpu tracing feature is reasonable for qemu.git as long
>> as it does not introduce performance regressions for existing users.

> I'm okay with it if the dstate array is changed to uint16_t.

That should be fine. I don't think QEMU is gonna run more than 2**16 vCPUs
anytime soon :)


Cheers,
  Lluis
diff mbox

Patch

diff --git a/Makefile.objs b/Makefile.objs
index fbcaa74..402efb3 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -103,6 +103,7 @@  version-lobj-$(CONFIG_WIN32) += $(BUILD_DIR)/version.lo
 # tracing
 util-obj-y +=  trace/
 target-obj-y += trace/
+stub-obj-y += trace/
 
 ######################################################################
 # guest agent
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 287ec1d..79d2ad0 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -25,6 +25,7 @@ 
 /* For tb_lock */
 #include "cpu.h"
 #include "tcg.h"
+#include "trace/control.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
 #include "exec/log.h"
@@ -1116,6 +1117,7 @@  int main(int argc, char **argv)
         gdbserver_start (gdbstub_port);
         gdb_handlesig(cpu, 0);
     }
+    trace_init_vcpu_events();
     cpu_loop(env);
     /* never exits */
     return 0;
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 1df7cb4..7e19be3 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -28,6 +28,7 @@ 
 #include "qemu/queue.h"
 #include "qemu/thread.h"
 #include "qemu/typedefs.h"
+#include "trace/generated-events.h"
 
 typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
                                      void *opaque);
@@ -266,6 +267,7 @@  struct kvm_run;
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @trace_dstate: Dynamic tracing state of events for this vCPU.
  *
  * State of one CPU core or thread.
  */
@@ -332,6 +334,16 @@  struct CPUState {
     struct KVMState *kvm_state;
     struct kvm_run *kvm_run;
 
+#define TRACE_VCPU_DSTATE_TYPE uint32_t
+    TRACE_VCPU_DSTATE_TYPE trace_dstate;
+    /*
+     * Ensure 'trace_dstate' can encode event states as a bitmask. This limits
+     * the number of events with the 'vcpu' property and *without* the
+     * 'disabled' property.
+     */
+    bool too_many_vcpu_events[
+        TRACE_VCPU_EVENT_COUNT > sizeof(TRACE_VCPU_DSTATE_TYPE)*8 ? -1 : 0];
+
     /* TODO Move common fields from CPUArchState here. */
     int cpu_index; /* used by alpha TCG */
     uint32_t halted; /* used by alpha, cris, ppc TCG */
diff --git a/linux-user/main.c b/linux-user/main.c
index e719a2d..13ce851 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -25,6 +25,7 @@ 
 #include "qemu-common.h"
 #include "cpu.h"
 #include "tcg.h"
+#include "trace/control.h"
 #include "qemu/timer.h"
 #include "qemu/envlist.h"
 #include "elf.h"
@@ -4650,6 +4651,7 @@  int main(int argc, char **argv, char **envp)
         }
         gdb_handlesig(cpu, 0);
     }
+    trace_init_vcpu_events();
     cpu_loop(env);
     /* never exits */
     return 0;
diff --git a/qom/cpu.c b/qom/cpu.c
index c45d0bb..7363776 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -330,6 +330,7 @@  static void cpu_common_initfn(Object *obj)
     qemu_mutex_init(&cpu->work_mutex);
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
+    cpu->trace_dstate = 0;
 }
 
 static void cpu_common_finalize(Object *obj)
diff --git a/trace/Makefile.objs b/trace/Makefile.objs
index 5145b34..902d47b 100644
--- a/trace/Makefile.objs
+++ b/trace/Makefile.objs
@@ -12,6 +12,8 @@  tracetool-y += $(shell find $(SRC_PATH)/scripts/tracetool -name "*.py")
 # Auto-generated event descriptions for LTTng ust code
 
 ifeq ($(findstring ust,$(TRACE_BACKENDS)),ust)
+
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-ust-provider.h: $(obj)/generated-ust-provider.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 $(obj)/generated-ust-provider.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
@@ -30,11 +32,14 @@  $(obj)/generated-ust.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
 
 $(obj)/generated-events.h: $(obj)/generated-ust-provider.h
 $(obj)/generated-events.c: $(obj)/generated-ust.c
+endif					# MAKEFILE_GUARD_TRACE
+
 endif
 
 ######################################################################
 # Auto-generated event descriptions
 
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-events.h: $(obj)/generated-events.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 $(obj)/generated-events.h-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
@@ -50,6 +55,7 @@  $(obj)/generated-events.c-timestamp: $(SRC_PATH)/trace-events $(tracetool-y)
 		--format=events-c \
 		--backends=$(TRACE_BACKENDS) \
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+endif					# MAKEFILE_GUARD_TRACE
 
 util-obj-y += generated-events.o
 
@@ -60,6 +66,7 @@  util-obj-y += generated-events.o
 ##################################################
 # Execution level
 
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-tracers.h: $(obj)/generated-tracers.h-timestamp
 	@cmp -s $< $@ || cp $< $@
 $(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
@@ -67,10 +74,12 @@  $(obj)/generated-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/conf
 		--format=h \
 		--backends=$(TRACE_BACKENDS) \
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+endif					# MAKEFILE_GUARD_TRACE
 
 ##############################
 # non-DTrace
 
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-tracers.c: $(obj)/generated-tracers.c-timestamp
 	@cmp -s $< $@ || cp $< $@
 $(obj)/generated-tracers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
@@ -80,6 +89,7 @@  $(obj)/generated-tracers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/conf
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.h
+endif					# MAKEFILE_GUARD_TRACE
 
 ##############################
 # DTrace
@@ -88,6 +98,8 @@  $(obj)/generated-tracers.o: $(obj)/generated-tracers.c $(obj)/generated-tracers.
 # but that gets picked up by QEMU's Makefile as an external dependency
 # rule file. So we use '.dtrace' instead
 ifeq ($(findstring dtrace,$(TRACE_BACKENDS)),dtrace)
+
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-tracers-dtrace.dtrace: $(obj)/generated-tracers-dtrace.dtrace-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 $(obj)/generated-tracers-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
@@ -100,6 +112,7 @@  $(obj)/generated-tracers-dtrace.h: $(obj)/generated-tracers-dtrace.dtrace
 	$(call quiet-command,dtrace -o $@ -h -s $<, "  GEN   $@")
 
 $(obj)/generated-tracers-dtrace.o: $(obj)/generated-tracers-dtrace.dtrace
+endif					# MAKEFILE_GUARD_TRACE
 
 util-obj-y += generated-tracers-dtrace.o
 endif
@@ -107,6 +120,7 @@  endif
 ##################################################
 # Translation level
 
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-helpers-wrappers.h: $(obj)/generated-helpers-wrappers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 $(obj)/generated-helpers-wrappers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
@@ -132,10 +146,12 @@  $(obj)/generated-helpers.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/conf
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
 
 $(obj)/generated-helpers.o: $(obj)/generated-helpers.c
+endif					# MAKEFILE_GUARD_TRACE
 
 target-obj-y += generated-helpers.o
 
 
+ifndef MAKEFILE_GUARD_TRACE
 $(obj)/generated-tcg-tracers.h: $(obj)/generated-tcg-tracers.h-timestamp
 	@cmp $< $@ >/dev/null 2>&1 || cp $< $@
 $(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak $(tracetool-y)
@@ -143,6 +159,7 @@  $(obj)/generated-tcg-tracers.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/
 		--format=tcg-h \
 		--backend=$(TRACE_BACKENDS) \
 		< $< > $@,"  GEN   $(patsubst %-timestamp,%,$@)")
+endif					# MAKEFILE_GUARD_TRACE
 
 
 ######################################################################
@@ -152,4 +169,13 @@  util-obj-$(CONFIG_TRACE_SIMPLE) += simple.o generated-tracers.o
 util-obj-$(CONFIG_TRACE_FTRACE) += ftrace.o
 util-obj-$(CONFIG_TRACE_UST) += generated-ust.o
 util-obj-y += control.o
+target-obj-y += control-target.o
+stub-obj-y += control-stub.o
 util-obj-y += qmp.o
+
+
+######################################################################
+# Avoid rule overrides when included from multiple top-level variables
+ifndef MAKEFILE_GUARD_TRACE
+MAKEFILE_GUARD_TRACE = 1
+endif
diff --git a/trace/control-internal.h b/trace/control-internal.h
index d1f99e3..2189063 100644
--- a/trace/control-internal.h
+++ b/trace/control-internal.h
@@ -10,10 +10,13 @@ 
 #ifndef TRACE__CONTROL_INTERNAL_H
 #define TRACE__CONTROL_INTERNAL_H
 
+#include <stddef.h>                     /* size_t */
+
+#include "qom/cpu.h"
 
 
 extern TraceEvent trace_events[];
-extern bool trace_events_dstate[];
+extern size_t trace_events_dstate[];
 extern int trace_events_enabled_count;
 
 
@@ -22,6 +25,11 @@  static inline TraceEventID trace_event_count(void)
     return TRACE_EVENT_COUNT;
 }
 
+static inline TraceEventVCPUID trace_event_cpu_count(void)
+{
+    return TRACE_VCPU_EVENT_COUNT;
+}
+
 static inline TraceEvent *trace_event_id(TraceEventID id)
 {
     assert(id < trace_event_count());
@@ -61,7 +69,7 @@  static inline bool trace_event_get_state_static(TraceEvent *ev)
 static inline bool trace_event_get_state_dynamic_by_id(TraceEventID id)
 {
     /* it's on fast path, avoid consistency checks (asserts) */
-    return unlikely(trace_events_enabled_count) && trace_events_dstate[id];
+    return unlikely(trace_events_enabled_count) && (trace_events_dstate[id] > 0);
 }
 
 static inline bool trace_event_get_state_dynamic(TraceEvent *ev)
@@ -73,13 +81,28 @@  static inline bool trace_event_get_state_dynamic(TraceEvent *ev)
     return trace_event_get_state_dynamic_by_id(id);
 }
 
-static inline void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
+static inline bool trace_event_get_cpu_state_dynamic_by_cpu_id(CPUState *cpu,
+                                                               TraceEventVCPUID id)
+{
+    /* it's on fast path, avoid consistency checks (asserts) */
+    if (unlikely(trace_events_enabled_count)) {
+        TRACE_VCPU_DSTATE_TYPE bit = ((TRACE_VCPU_DSTATE_TYPE)1) << id;
+        return cpu->trace_dstate & bit;
+    } else {
+        return false;
+    }
+}
+
+static inline bool trace_event_get_cpu_state_dynamic(CPUState *cpu,
+                                                     TraceEvent *ev)
 {
-    int id = trace_event_get_id(ev);
+    TraceEventVCPUID id;
+    assert(cpu != NULL);
     assert(ev != NULL);
     assert(trace_event_get_state_static(ev));
-    trace_events_enabled_count += state - trace_events_dstate[id];
-    trace_events_dstate[id] = state;
+    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
+    id = trace_event_get_cpu_id(ev);
+    return trace_event_get_cpu_state_dynamic_by_cpu_id(cpu, id);
 }
 
 #endif  /* TRACE__CONTROL_INTERNAL_H */
diff --git a/trace/control-stub.c b/trace/control-stub.c
new file mode 100644
index 0000000..858b13e
--- /dev/null
+++ b/trace/control-stub.c
@@ -0,0 +1,29 @@ 
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2014-2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "trace/control.h"
+
+
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
+{
+    TraceEventID id;
+    assert(ev != NULL);
+    assert(trace_event_get_state_static(ev));
+    id = trace_event_get_id(ev);
+    trace_events_enabled_count += state - trace_events_dstate[id];
+    trace_events_dstate[id] = state;
+}
+
+void trace_event_set_cpu_state_dynamic(CPUState *cpu,
+                                       TraceEvent *ev, bool state)
+{
+    /* should never be called on non-target binaries */
+    abort();
+}
diff --git a/trace/control-target.c b/trace/control-target.c
new file mode 100644
index 0000000..5ae672e
--- /dev/null
+++ b/trace/control-target.c
@@ -0,0 +1,58 @@ 
+/*
+ * Interface for configuring and controlling the state of tracing events.
+ *
+ * Copyright (C) 2014-2016 Lluís Vilanova <vilanova@ac.upc.edu>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "trace/control.h"
+#include "translate-all.h"
+
+
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state)
+{
+    CPUState *cpu;
+    assert(ev != NULL);
+    assert(trace_event_get_state_static(ev));
+    if (trace_event_get_cpu_id(ev) != trace_event_cpu_count()) {
+        CPU_FOREACH(cpu) {
+            trace_event_set_cpu_state_dynamic(cpu, ev, state);
+        }
+    } else {
+        TraceEventID id = trace_event_get_id(ev);
+        trace_events_enabled_count += state - trace_events_dstate[id];
+        trace_events_dstate[id] = state;
+    }
+}
+
+void trace_event_set_cpu_state_dynamic(CPUState *cpu,
+                                       TraceEvent *ev, bool state)
+{
+    TraceEventID id;
+    TraceEventVCPUID cpu_id;
+    TRACE_VCPU_DSTATE_TYPE bit;
+    bool state_pre;
+    assert(cpu != NULL);
+    assert(ev != NULL);
+    assert(trace_event_get_state_static(ev));
+    assert(trace_event_get_cpu_id(ev) != trace_event_cpu_count());
+    id = trace_event_get_id(ev);
+    cpu_id = trace_event_get_cpu_id(ev);
+    bit = ((TRACE_VCPU_DSTATE_TYPE)1) << cpu_id;
+    state_pre = cpu->trace_dstate & bit;
+    if ((state_pre == 0) != (state == 0)) {
+        if (state) {
+            trace_events_enabled_count++;
+            cpu->trace_dstate |= bit;
+            trace_events_dstate[id]++;
+        } else {
+            trace_events_enabled_count--;
+            cpu->trace_dstate &= ~bit;
+            trace_events_dstate[id]--;
+        }
+    }
+}
diff --git a/trace/control.c b/trace/control.c
index 20d3370..501dc29 100644
--- a/trace/control.c
+++ b/trace/control.c
@@ -1,7 +1,7 @@ 
 /*
  * Interface for configuring and controlling the state of tracing events.
  *
- * Copyright (C) 2011-2014 Lluís Vilanova <vilanova@ac.upc.edu>
+ * Copyright (C) 2011-2016 Lluís Vilanova <vilanova@ac.upc.edu>
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -21,7 +21,10 @@ 
 #include "qemu/error-report.h"
 
 int trace_events_enabled_count;
-bool trace_events_dstate[TRACE_EVENT_COUNT];
+/* With the 'vcpu' property, counts how many vCPUs have it enabled. */
+size_t trace_events_dstate[TRACE_EVENT_COUNT];
+/* Marks events for late vCPU state init */
+static bool trace_events_dstate_init[TRACE_EVENT_COUNT];
 
 TraceEvent *trace_event_name(const char *name)
 {
@@ -110,7 +113,10 @@  static void do_trace_enable_events(const char *line_buf)
         TraceEvent *ev = NULL;
         while ((ev = trace_event_pattern(line_ptr, ev)) != NULL) {
             if (trace_event_get_state_static(ev)) {
+                /* start tracing */
                 trace_event_set_state_dynamic(ev, enable);
+                /* mark for late vCPU init */
+                trace_events_dstate_init[ev->id] = true;
             }
         }
     } else {
@@ -122,7 +128,10 @@  static void do_trace_enable_events(const char *line_buf)
             error_report("WARNING: trace event '%s' is not traceable",
                          line_ptr);
         } else {
+            /* start tracing */
             trace_event_set_state_dynamic(ev, enable);
+            /* mark for late vCPU init */
+            trace_events_dstate_init[ev->id] = true;
         }
     }
 }
@@ -212,3 +221,15 @@  bool trace_init_backends(void)
 
     return true;
 }
+
+void trace_init_vcpu_events(void)
+{
+    TraceEvent *ev = NULL;
+    while ((ev = trace_event_pattern("*", ev)) != NULL) {
+        if (trace_event_get_cpu_id(ev) != trace_event_cpu_count() &&
+            trace_event_get_state_static(ev) &&
+            trace_events_dstate_init[ev->id]) {
+            trace_event_set_state_dynamic(ev, true);
+        }
+    }
+}
diff --git a/trace/control.h b/trace/control.h
index f014e11..fb7d18e 100644
--- a/trace/control.h
+++ b/trace/control.h
@@ -77,6 +77,13 @@  static bool trace_event_is_pattern(const char *str);
  */
 static TraceEventID trace_event_count(void);
 
+/**
+ * trace_event_cpu_count:
+ *
+ * Return the number of events with the 'vcpu' property.
+ */
+static TraceEventVCPUID trace_event_cpu_count(void);
+
 
 
 /**
@@ -118,6 +125,23 @@  static const char * trace_event_get_name(TraceEvent *ev);
     ((id ##_ENABLED) && trace_event_get_state_dynamic_by_id(id))
 
 /**
+ * trace_event_get_cpu_state:
+ * @cpu: Target vCPU.
+ * @id: Event identifier (TraceEventID).
+ * @cpu_id: Per-vCPU event identifier (TraceEventVCPUID).
+ *
+ * Get the tracing state of an event (both static and dynamic) for the given
+ * vCPU.
+ *
+ * If the event has the disabled property, the check will have no performance
+ * impact.
+ *
+ * As a down side, you must always use an immediate #TraceEventID value.
+ */
+#define trace_event_get_cpu_state(cpu, id, cpu_id)                      \
+    ((id ##_ENABLED) && trace_event_get_cpu_state_dynamic_by_cpu_id(cpu, cpu_id))
+
+/**
  * trace_event_get_state_static:
  * @id: Event identifier.
  *
@@ -132,10 +156,19 @@  static bool trace_event_get_state_static(TraceEvent *ev);
  * trace_event_get_state_dynamic:
  *
  * Get the dynamic tracing state of an event.
+ *
+ * If the event has the 'vcpu' property, gets the OR'ed state of all vCPUs.
  */
 static bool trace_event_get_state_dynamic(TraceEvent *ev);
 
 /**
+ * trace_event_get_cpu_state_dynamic:
+ *
+ * Get the dynamic tracing state of an event for the given vCPU.
+ */
+static bool trace_event_get_cpu_state_dynamic(CPUState *cpu, TraceEvent *ev);
+
+/**
  * trace_event_set_state:
  *
  * Set the tracing state of an event (only if possible).
@@ -149,13 +182,38 @@  static bool trace_event_get_state_dynamic(TraceEvent *ev);
     } while (0)
 
 /**
+ * trace_event_set_cpu_state:
+ *
+ * Set the tracing state of an event for the given vCPU (only if not disabled).
+ */
+#define trace_event_set_cpu_state(cpu, id, state)               \
+    do {                                                        \
+        if ((id ##_ENABLED)) {                                  \
+            TraceEvent *_e = trace_event_id(id);                \
+            trace_event_set_cpu_state_dynamic(cpu, _e, state);  \
+        }                                                       \
+    } while (0)
+
+/**
  * trace_event_set_state_dynamic:
  *
  * Set the dynamic tracing state of an event.
  *
+ * If the event has the 'vcpu' property, sets the state on all vCPUs.
+ *
  * Pre-condition: trace_event_get_state_static(ev) == true
  */
-static void trace_event_set_state_dynamic(TraceEvent *ev, bool state);
+void trace_event_set_state_dynamic(TraceEvent *ev, bool state);
+
+/**
+ * trace_event_set_cpu_state_dynamic:
+ *
+ * Set the dynamic tracing state of an event for the given vCPU.
+ *
+ * Pre-condition: trace_event_get_cpu_state_static(ev) == true
+ */
+void trace_event_set_cpu_state_dynamic(CPUState *cpu,
+                                       TraceEvent *ev, bool state);
 
 
 
@@ -208,6 +266,14 @@  void trace_list_events(void);
  */
 void trace_enable_events(const char *line_buf);
 
+/**
+ * trace_init_vcpu_events:
+ *
+ * Re-synchronize initial event state with vCPUs (which can be created after
+ * trace_init_events()).
+ */
+void trace_init_vcpu_events(void);
+
 
 #include "trace/control-internal.h"
 
diff --git a/vl.c b/vl.c
index adeddd9..7a1d7f6 100644
--- a/vl.c
+++ b/vl.c
@@ -4642,6 +4642,7 @@  int main(int argc, char **argv, char **envp)
 
     os_setup_post();
 
+    trace_init_vcpu_events();
     main_loop();
     replay_disable_events();