diff mbox

[v6,3/7] trace: [tcg] Delay changes to dynamic state when translating

Message ID 148295047061.19871.11792107348459066542.stgit@fimbulvetr.bsc.es
State New
Headers show

Commit Message

Lluís Vilanova Dec. 28, 2016, 6:41 p.m. UTC
This keeps consistency across all decisions taken during translation
when the dynamic state of a vCPU is changed in the middle of translating
some guest code.

Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
---
 cpu-exec.c             |   26 ++++++++++++++++++++++++++
 include/qom/cpu.h      |    7 +++++++
 qom/cpu.c              |    4 ++++
 trace/control-target.c |   11 +++++++++--
 4 files changed, 46 insertions(+), 2 deletions(-)

Comments

Stefan Hajnoczi Jan. 9, 2017, 5:01 p.m. UTC | #1
On Wed, Dec 28, 2016 at 07:41:10PM +0100, Lluís Vilanova wrote:
> This keeps consistency across all decisions taken during translation
> when the dynamic state of a vCPU is changed in the middle of translating
> some guest code.
> 
> Signed-off-by: Lluís Vilanova <vilanova@ac.upc.edu>
> ---
>  cpu-exec.c             |   26 ++++++++++++++++++++++++++
>  include/qom/cpu.h      |    7 +++++++
>  qom/cpu.c              |    4 ++++
>  trace/control-target.c |   11 +++++++++--
>  4 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 4188fed3c6..1b7366efb0 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -33,6 +33,7 @@
>  #include "hw/i386/apic.h"
>  #endif
>  #include "sysemu/replay.h"
> +#include "trace/control.h"
>  
>  /* -icount align implementation. */
>  
> @@ -451,9 +452,21 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
>  #ifndef CONFIG_USER_ONLY
>      } else if (replay_has_exception()
>                 && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
> +        /* delay changes to this vCPU's dstate during translation */
> +        atomic_set(&cpu->trace_dstate_delayed_req, false);
> +        atomic_set(&cpu->trace_dstate_must_delay, true);
> +
>          /* try to cause an exception pending in the log */
>          cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
>          *ret = -1;
> +
> +        /* apply and disable delayed dstate changes */
> +        atomic_set(&cpu->trace_dstate_must_delay, false);
> +        if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
> +            bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> +                        trace_get_vcpu_event_count());
> +        }
> +
>          return true;
>  #endif
>      }
> @@ -634,8 +647,21 @@ int cpu_exec(CPUState *cpu)
>  
>              for(;;) {
>                  cpu_handle_interrupt(cpu, &last_tb);
> +
> +                /* delay changes to this vCPU's dstate during translation */
> +                atomic_set(&cpu->trace_dstate_delayed_req, false);
> +                atomic_set(&cpu->trace_dstate_must_delay, true);
> +
>                  tb = tb_find(cpu, last_tb, tb_exit);
>                  cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
> +
> +                /* apply and disable delayed dstate changes */
> +                atomic_set(&cpu->trace_dstate_must_delay, false);
> +                if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
> +                    bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> +                                trace_get_vcpu_event_count());
> +                }
> +
>                  /* Try to align the host and virtual clocks
>                     if the guest is in advance */
>                  align_clocks(&sc, cpu);
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index 3f79a8e955..58255d06fa 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -295,6 +295,10 @@ struct qemu_work_item;
>   * @kvm_fd: vCPU file descriptor for KVM.
>   * @work_mutex: Lock to prevent multiple access to queued_work_*.
>   * @queued_work_first: First asynchronous work pending.
> + * @trace_dstate_must_delay: Whether a change to trace_dstate must be delayed.
> + * @trace_dstate_delayed_req: Whether a change to trace_dstate was delayed.
> + * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
> + *                        to @trace_dstate).
>   * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
>   *
>   * State of one CPU core or thread.
> @@ -370,6 +374,9 @@ struct CPUState {
>       * Dynamically allocated based on bitmap requried to hold up to
>       * trace_get_vcpu_event_count() entries.
>       */
> +    bool trace_dstate_must_delay;
> +    bool trace_dstate_delayed_req;
> +    unsigned long *trace_dstate_delayed;
>      unsigned long *trace_dstate;
>  
>      /* TODO Move common fields from CPUArchState here. */
> diff --git a/qom/cpu.c b/qom/cpu.c
> index 03d9190f8c..d56496d28d 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -367,6 +367,9 @@ static void cpu_common_initfn(Object *obj)
>      QTAILQ_INIT(&cpu->breakpoints);
>      QTAILQ_INIT(&cpu->watchpoints);
>  
> +    cpu->trace_dstate_must_delay = false;
> +    cpu->trace_dstate_delayed_req = false;
> +    cpu->trace_dstate_delayed = bitmap_new(trace_get_vcpu_event_count());
>      cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
>  
>      cpu_exec_initfn(cpu);
> @@ -375,6 +378,7 @@ static void cpu_common_initfn(Object *obj)
>  static void cpu_common_finalize(Object *obj)
>  {
>      CPUState *cpu = CPU(obj);
> +    g_free(cpu->trace_dstate_delayed);
>      g_free(cpu->trace_dstate);
>  }
>  
> diff --git a/trace/control-target.c b/trace/control-target.c
> index 7ebf6e0bcb..aba8db55de 100644
> --- a/trace/control-target.c
> +++ b/trace/control-target.c
> @@ -69,13 +69,20 @@ void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
>      if (state_pre != state) {
>          if (state) {
>              trace_events_enabled_count++;
> -            set_bit(vcpu_id, vcpu->trace_dstate);
> +            set_bit(vcpu_id, vcpu->trace_dstate_delayed);
> +            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
> +                set_bit(vcpu_id, vcpu->trace_dstate);
> +            }
>              (*ev->dstate)++;
>          } else {
>              trace_events_enabled_count--;
> -            clear_bit(vcpu_id, vcpu->trace_dstate);
> +            clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
> +            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
> +                clear_bit(vcpu_id, vcpu->trace_dstate);
> +            }
>              (*ev->dstate)--;
>          }
> +        atomic_set(&vcpu->trace_dstate_delayed_req, true);
>      }
>  }

This lock-free scheme looks broken to me.  Consider the following case
with threads A and B:

A: atomic_set(&cpu->trace_dstate_delayed_req, false);
A: atomic_set(&cpu->trace_dstate_must_delay, true);
B: if (!atomic_read(&vcpu->trace_dstate_must_delay)) { /* false */
A: atomic_set(&cpu->trace_dstate_must_delay, false);
A: if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) { /* false */
B: atomic_set(&vcpu->trace_dstate_delayed_req, true);

Oops, we missed the delayed update.

Now when A runs the next iteration we forget there was a delayed req:

A: atomic_set(&cpu->trace_dstate_delayed_req, false);

As a result even the next iteration may not copy the delayed bitmap.

Perhaps you should use RCU.

Or use a simpler scheme:

struct CPUState {
    ...
    uint32_t dstate_update_count;
};

In trace_event_set_vcpu_state_dynamic():

    if (state) {
        trace_events_enabled_count++;
        set_bit(vcpu_id, vcpu->trace_dstate_delayed);
	atomic_inc(&vcpu->dstate_update_count, 1);
        (*ev->dstate)++;
    } ...

In cpu_exec() and friends:

    last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);

    tb = tb_find(cpu, last_tb, tb_exit);
    cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);

    /* apply and disable delayed dstate changes */
    if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
        bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
        trace_get_vcpu_event_count());
    }

(You'll need to adjust the details but the update counter approach
should be workable.)

Stefan
Paolo Bonzini Jan. 10, 2017, 4:31 p.m. UTC | #2
On 09/01/2017 18:01, Stefan Hajnoczi wrote:
> Or use a simpler scheme:
> 
> struct CPUState {
>     ...
>     uint32_t dstate_update_count;
> };
> 
> In trace_event_set_vcpu_state_dynamic():
> 
>     if (state) {
>         trace_events_enabled_count++;
>         set_bit(vcpu_id, vcpu->trace_dstate_delayed);
> 	atomic_inc(&vcpu->dstate_update_count, 1);
>         (*ev->dstate)++;
>     } ...
> 
> In cpu_exec() and friends:
> 
>     last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
> 
>     tb = tb_find(cpu, last_tb, tb_exit);
>     cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
> 
>     /* apply and disable delayed dstate changes */
>     if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
>         bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
>         trace_get_vcpu_event_count());
>     }
> 
> (You'll need to adjust the details but the update counter approach
> should be workable.)

Would it work to use async_run_on_cpu?

Thanks,

Paolo
Stefan Hajnoczi Jan. 11, 2017, 4:16 p.m. UTC | #3
On Tue, Jan 10, 2017 at 05:31:37PM +0100, Paolo Bonzini wrote:
> On 09/01/2017 18:01, Stefan Hajnoczi wrote:
> > Or use a simpler scheme:
> > 
> > struct CPUState {
> >     ...
> >     uint32_t dstate_update_count;
> > };
> > 
> > In trace_event_set_vcpu_state_dynamic():
> > 
> >     if (state) {
> >         trace_events_enabled_count++;
> >         set_bit(vcpu_id, vcpu->trace_dstate_delayed);
> > 	atomic_inc(&vcpu->dstate_update_count, 1);
> >         (*ev->dstate)++;
> >     } ...
> > 
> > In cpu_exec() and friends:
> > 
> >     last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
> > 
> >     tb = tb_find(cpu, last_tb, tb_exit);
> >     cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
> > 
> >     /* apply and disable delayed dstate changes */
> >     if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
> >         bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
> >         trace_get_vcpu_event_count());
> >     }
> > 
> > (You'll need to adjust the details but the update counter approach
> > should be workable.)
> 
> Would it work to use async_run_on_cpu?

I think so.

Stefan
Lluís Vilanova Jan. 12, 2017, 7:37 p.m. UTC | #4
Stefan Hajnoczi writes:

> On Tue, Jan 10, 2017 at 05:31:37PM +0100, Paolo Bonzini wrote:
>> On 09/01/2017 18:01, Stefan Hajnoczi wrote:
>> > Or use a simpler scheme:
>> > 
>> > struct CPUState {
>> >     ...
>> >     uint32_t dstate_update_count;
>> > };
>> > 
>> > In trace_event_set_vcpu_state_dynamic():
>> > 
>> >     if (state) {
>> >         trace_events_enabled_count++;
>> >         set_bit(vcpu_id, vcpu->trace_dstate_delayed);
>> > 	atomic_inc(&vcpu->dstate_update_count, 1);
>> >         (*ev->dstate)++;
>> >     } ...
>> > 
>> > In cpu_exec() and friends:
>> > 
>> >     last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
>> > 
>> >     tb = tb_find(cpu, last_tb, tb_exit);
>> >     cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
>> > 
>> >     /* apply and disable delayed dstate changes */
>> >     if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
>> >         bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
>> >         trace_get_vcpu_event_count());
>> >     }
>> > 
>> > (You'll need to adjust the details but the update counter approach
>> > should be workable.)
>> 
>> Would it work to use async_run_on_cpu?

> I think so.

AFAIU we cannot use async_run_on_cpu(), since we need to reset the local
variable "last_tb" to avoid chaining TBs with different dstates, and we cannot
use cpu_loop_exit() inside the callback.

To make it work, we'd need to add some new boolean flag on the vCPU to control
when to reset "last_tb", and then we're just as good as implementing the async
work "protocol" manually for this specific case.

What I'll do is fix the race condition by simplifying that code (haven't looked
at the problem yet).


Thanks,
  Lluis
Paolo Bonzini Jan. 12, 2017, 9:25 p.m. UTC | #5
On 12/01/2017 20:37, Lluís Vilanova wrote:
> Stefan Hajnoczi writes:
> 
>> On Tue, Jan 10, 2017 at 05:31:37PM +0100, Paolo Bonzini wrote:
>>> On 09/01/2017 18:01, Stefan Hajnoczi wrote:
>>>> Or use a simpler scheme:
>>>>
>>>> struct CPUState {
>>>>     ...
>>>>     uint32_t dstate_update_count;
>>>> };
>>>>
>>>> In trace_event_set_vcpu_state_dynamic():
>>>>
>>>>     if (state) {
>>>>         trace_events_enabled_count++;
>>>>         set_bit(vcpu_id, vcpu->trace_dstate_delayed);
>>>> 	atomic_inc(&vcpu->dstate_update_count, 1);
>>>>         (*ev->dstate)++;
>>>>     } ...
>>>>
>>>> In cpu_exec() and friends:
>>>>
>>>>     last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
>>>>
>>>>     tb = tb_find(cpu, last_tb, tb_exit);
>>>>     cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
>>>>
>>>>     /* apply and disable delayed dstate changes */
>>>>     if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
>>>>         bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
>>>>         trace_get_vcpu_event_count());
>>>>     }
>>>>
>>>> (You'll need to adjust the details but the update counter approach
>>>> should be workable.)
>>>
>>> Would it work to use async_run_on_cpu?
> 
>> I think so.
> 
> AFAIU we cannot use async_run_on_cpu(), since we need to reset the local
> variable "last_tb" to avoid chaining TBs with different dstates, and we cannot
> use cpu_loop_exit() inside the callback.

async_run_on_cpu would run as soon as the currently executing TB
finishes, and would leave cpu_exec completely, so there would be no
chaining.

Paolo

> To make it work, we'd need to add some new boolean flag on the vCPU to control
> when to reset "last_tb", and then we're just as good as implementing the async
> work "protocol" manually for this specific case.
> 
> What I'll do is fix the race condition by simplifying that code (haven't looked
> at the problem yet).
> 
> 
> Thanks,
>   Lluis
>
Lluís Vilanova Jan. 13, 2017, 8:08 p.m. UTC | #6
Paolo Bonzini writes:

> On 12/01/2017 20:37, Lluís Vilanova wrote:
>> Stefan Hajnoczi writes:
>> 
>>> On Tue, Jan 10, 2017 at 05:31:37PM +0100, Paolo Bonzini wrote:
>>>> On 09/01/2017 18:01, Stefan Hajnoczi wrote:
>>>>> Or use a simpler scheme:
>>>>> 
>>>>> struct CPUState {
>>>>> ...
>>>>> uint32_t dstate_update_count;
>>>>> };
>>>>> 
>>>>> In trace_event_set_vcpu_state_dynamic():
>>>>> 
>>>>> if (state) {
>>>>> trace_events_enabled_count++;
>>>>> set_bit(vcpu_id, vcpu->trace_dstate_delayed);
>>>>> atomic_inc(&vcpu->dstate_update_count, 1);
>>>>> (*ev->dstate)++;
>>>>> } ...
>>>>> 
>>>>> In cpu_exec() and friends:
>>>>> 
>>>>> last_dstate_update_count = atomic_read(&vcpu->dstate_update_count);
>>>>> 
>>>>> tb = tb_find(cpu, last_tb, tb_exit);
>>>>> cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
>>>>> 
>>>>> /* apply and disable delayed dstate changes */
>>>>> if (unlikely(atomic_read(&cpu->dstate_update_count) != last_dstate_update_count)) {
>>>>> bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
>>>>> trace_get_vcpu_event_count());
>>>>> }
>>>>> 
>>>>> (You'll need to adjust the details but the update counter approach
>>>>> should be workable.)
>>>> 
>>>> Would it work to use async_run_on_cpu?
>> 
>>> I think so.
>> 
>> AFAIU we cannot use async_run_on_cpu(), since we need to reset the local
>> variable "last_tb" to avoid chaining TBs with different dstates, and we cannot
>> use cpu_loop_exit() inside the callback.

> async_run_on_cpu would run as soon as the currently executing TB
> finishes, and would leave cpu_exec completely, so there would be no
> chaining.

Aha, I've re-read the internals used by async and that'll be sufficient.


Thanks,
  Lluis
diff mbox

Patch

diff --git a/cpu-exec.c b/cpu-exec.c
index 4188fed3c6..1b7366efb0 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -33,6 +33,7 @@ 
 #include "hw/i386/apic.h"
 #endif
 #include "sysemu/replay.h"
+#include "trace/control.h"
 
 /* -icount align implementation. */
 
@@ -451,9 +452,21 @@  static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 #ifndef CONFIG_USER_ONLY
     } else if (replay_has_exception()
                && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
+        /* delay changes to this vCPU's dstate during translation */
+        atomic_set(&cpu->trace_dstate_delayed_req, false);
+        atomic_set(&cpu->trace_dstate_must_delay, true);
+
         /* try to cause an exception pending in the log */
         cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
         *ret = -1;
+
+        /* apply and disable delayed dstate changes */
+        atomic_set(&cpu->trace_dstate_must_delay, false);
+        if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
+            bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
+                        trace_get_vcpu_event_count());
+        }
+
         return true;
 #endif
     }
@@ -634,8 +647,21 @@  int cpu_exec(CPUState *cpu)
 
             for(;;) {
                 cpu_handle_interrupt(cpu, &last_tb);
+
+                /* delay changes to this vCPU's dstate during translation */
+                atomic_set(&cpu->trace_dstate_delayed_req, false);
+                atomic_set(&cpu->trace_dstate_must_delay, true);
+
                 tb = tb_find(cpu, last_tb, tb_exit);
                 cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
+
+                /* apply and disable delayed dstate changes */
+                atomic_set(&cpu->trace_dstate_must_delay, false);
+                if (unlikely(atomic_read(&cpu->trace_dstate_delayed_req))) {
+                    bitmap_copy(cpu->trace_dstate, cpu->trace_dstate_delayed,
+                                trace_get_vcpu_event_count());
+                }
+
                 /* Try to align the host and virtual clocks
                    if the guest is in advance */
                 align_clocks(&sc, cpu);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3f79a8e955..58255d06fa 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -295,6 +295,10 @@  struct qemu_work_item;
  * @kvm_fd: vCPU file descriptor for KVM.
  * @work_mutex: Lock to prevent multiple access to queued_work_*.
  * @queued_work_first: First asynchronous work pending.
+ * @trace_dstate_must_delay: Whether a change to trace_dstate must be delayed.
+ * @trace_dstate_delayed_req: Whether a change to trace_dstate was delayed.
+ * @trace_dstate_delayed: Delayed changes to trace_dstate (includes all changes
+ *                        to @trace_dstate).
  * @trace_dstate: Dynamic tracing state of events for this vCPU (bitmask).
  *
  * State of one CPU core or thread.
@@ -370,6 +374,9 @@  struct CPUState {
      * Dynamically allocated based on bitmap requried to hold up to
      * trace_get_vcpu_event_count() entries.
      */
+    bool trace_dstate_must_delay;
+    bool trace_dstate_delayed_req;
+    unsigned long *trace_dstate_delayed;
     unsigned long *trace_dstate;
 
     /* TODO Move common fields from CPUArchState here. */
diff --git a/qom/cpu.c b/qom/cpu.c
index 03d9190f8c..d56496d28d 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -367,6 +367,9 @@  static void cpu_common_initfn(Object *obj)
     QTAILQ_INIT(&cpu->breakpoints);
     QTAILQ_INIT(&cpu->watchpoints);
 
+    cpu->trace_dstate_must_delay = false;
+    cpu->trace_dstate_delayed_req = false;
+    cpu->trace_dstate_delayed = bitmap_new(trace_get_vcpu_event_count());
     cpu->trace_dstate = bitmap_new(trace_get_vcpu_event_count());
 
     cpu_exec_initfn(cpu);
@@ -375,6 +378,7 @@  static void cpu_common_initfn(Object *obj)
 static void cpu_common_finalize(Object *obj)
 {
     CPUState *cpu = CPU(obj);
+    g_free(cpu->trace_dstate_delayed);
     g_free(cpu->trace_dstate);
 }
 
diff --git a/trace/control-target.c b/trace/control-target.c
index 7ebf6e0bcb..aba8db55de 100644
--- a/trace/control-target.c
+++ b/trace/control-target.c
@@ -69,13 +69,20 @@  void trace_event_set_vcpu_state_dynamic(CPUState *vcpu,
     if (state_pre != state) {
         if (state) {
             trace_events_enabled_count++;
-            set_bit(vcpu_id, vcpu->trace_dstate);
+            set_bit(vcpu_id, vcpu->trace_dstate_delayed);
+            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
+                set_bit(vcpu_id, vcpu->trace_dstate);
+            }
             (*ev->dstate)++;
         } else {
             trace_events_enabled_count--;
-            clear_bit(vcpu_id, vcpu->trace_dstate);
+            clear_bit(vcpu_id, vcpu->trace_dstate_delayed);
+            if (!atomic_read(&vcpu->trace_dstate_must_delay)) {
+                clear_bit(vcpu_id, vcpu->trace_dstate);
+            }
             (*ev->dstate)--;
         }
+        atomic_set(&vcpu->trace_dstate_delayed_req, true);
     }
 }