diff mbox series

[RFC,22/48] cpu: hook plugin vcpu events

Message ID 20181025172057.20414-23-cota@braap.org
State New
Headers show
Series Plugin support | expand

Commit Message

Emilio Cota Oct. 25, 2018, 5:20 p.m. UTC
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 cpus.c    | 10 ++++++++++
 exec.c    |  2 ++
 qom/cpu.c |  2 ++
 3 files changed, 14 insertions(+)

Comments

Alex Bennée Nov. 23, 2018, 5:10 p.m. UTC | #1
Emilio G. Cota <cota@braap.org> writes:

> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  cpus.c    | 10 ++++++++++
>  exec.c    |  2 ++
>  qom/cpu.c |  2 ++
>  3 files changed, 14 insertions(+)
>
> diff --git a/cpus.c b/cpus.c
> index 28e39f045a..3efe89354d 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -43,6 +43,7 @@
>  #include "exec/exec-all.h"
>
>  #include "qemu/thread.h"
> +#include "qemu/plugin.h"
>  #include "sysemu/cpus.h"
>  #include "sysemu/qtest.h"
>  #include "qemu/main-loop.h"
> @@ -1322,12 +1323,21 @@ static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
>
>  static void qemu_wait_io_event(CPUState *cpu)
>  {
> +    bool asleep = false;
> +

slept?

>      g_assert(cpu_mutex_locked(cpu));
>      g_assert(!qemu_mutex_iothread_locked());
>
>      while (cpu_thread_is_idle(cpu)) {
> +        if (!asleep) {
> +            asleep = true;
> +            qemu_plugin_vcpu_idle_cb(cpu);
> +        }
>          qemu_cond_wait(&cpu->halt_cond, &cpu->lock);
>      }
> +    if (asleep) {
> +        qemu_plugin_vcpu_resume_cb(cpu);
> +    }

I wonder if having two hooks is too much? What might a plugin want to do
before we go into idle sleep?

It feels like we are exposing too much of the guts of TCG to the plugin
here as wait_io could be for any number of internal reasons other than
the actual emulation blocking for IO through a WFI or something. If a
plugin really wants to track such things shouldn't it be hooking to the
guest sleep points?

If idle sleeps really are that important maybe we could just report our
sleep time on resume - so a single hook but passing a bit more
information?

>
>  #ifdef _WIN32
>      /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
> diff --git a/exec.c b/exec.c
> index cd171adb93..71fc76f55e 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -967,6 +967,8 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp)
>      }
>      tlb_init(cpu);
>
> +    qemu_plugin_vcpu_init_hook(cpu);
> +
>  #ifndef CONFIG_USER_ONLY
>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
>          vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
> diff --git a/qom/cpu.c b/qom/cpu.c
> index d1e6ecae03..062817c03b 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -32,6 +32,7 @@
>  #include "hw/boards.h"
>  #include "hw/qdev-properties.h"
>  #include "trace-root.h"
> +#include "qemu/plugin.h"
>
>  CPUInterruptHandler cpu_interrupt_handler;
>
> @@ -353,6 +354,7 @@ static void cpu_common_unrealizefn(DeviceState *dev, Error **errp)
>      CPUState *cpu = CPU(dev);
>      /* NOTE: latest generic point before the cpu is fully unrealized */
>      trace_fini_vcpu(cpu);
> +    qemu_plugin_vcpu_exit_hook(cpu);
>      cpu_exec_unrealizefn(cpu);
>  }


--
Alex Bennée
Emilio Cota Nov. 23, 2018, 11:48 p.m. UTC | #2
On Fri, Nov 23, 2018 at 17:10:53 +0000, Alex Bennée wrote:
> Emilio G. Cota <cota@braap.org> writes:
(snip)
> > @@ -1322,12 +1323,21 @@ static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
> >
> >  static void qemu_wait_io_event(CPUState *cpu)
> >  {
> > +    bool asleep = false;
> > +
> 
> slept?

Changed to slept, thanks.

> >      g_assert(cpu_mutex_locked(cpu));
> >      g_assert(!qemu_mutex_iothread_locked());
> >
> >      while (cpu_thread_is_idle(cpu)) {
> > +        if (!asleep) {
> > +            asleep = true;
> > +            qemu_plugin_vcpu_idle_cb(cpu);
> > +        }
> >          qemu_cond_wait(&cpu->halt_cond, &cpu->lock);
> >      }
> > +    if (asleep) {
> > +        qemu_plugin_vcpu_resume_cb(cpu);
> > +    }
> 
> I wonder if having two hooks is too much? What might a plugin want to do
> before we go into idle sleep?
> 
> It feels like we are exposing too much of the guts of TCG to the plugin
> here as wait_io could be for any number of internal reasons other than
> the actual emulation blocking for IO through a WFI or something. If a
> plugin really wants to track such things shouldn't it be hooking to the
> guest sleep points?
> 
> If idle sleeps really are that important maybe we could just report our
> sleep time on resume - so a single hook but passing a bit more
> information?

I don't have all the use cases for this figured out. For now I'm using
this in plugins as a way to know when a vCPU is for sure idle, which
is used in memory reclamation schemes such as RCU.

What are the "guest sleep points" you mention? Are they target-agnostic?

Thanks,

		Emilio
Alex Bennée Nov. 26, 2018, 11:17 a.m. UTC | #3
Emilio G. Cota <cota@braap.org> writes:

> On Fri, Nov 23, 2018 at 17:10:53 +0000, Alex Bennée wrote:
>> Emilio G. Cota <cota@braap.org> writes:
> (snip)
>> > @@ -1322,12 +1323,21 @@ static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
>> >
>> >  static void qemu_wait_io_event(CPUState *cpu)
>> >  {
>> > +    bool asleep = false;
>> > +
>>
>> slept?
>
> Changed to slept, thanks.
>
>> >      g_assert(cpu_mutex_locked(cpu));
>> >      g_assert(!qemu_mutex_iothread_locked());
>> >
>> >      while (cpu_thread_is_idle(cpu)) {
>> > +        if (!asleep) {
>> > +            asleep = true;
>> > +            qemu_plugin_vcpu_idle_cb(cpu);
>> > +        }
>> >          qemu_cond_wait(&cpu->halt_cond, &cpu->lock);
>> >      }
>> > +    if (asleep) {
>> > +        qemu_plugin_vcpu_resume_cb(cpu);
>> > +    }
>>
>> I wonder if having two hooks is too much? What might a plugin want to do
>> before we go into idle sleep?
>>
>> It feels like we are exposing too much of the guts of TCG to the plugin
>> here as wait_io could be for any number of internal reasons other than
>> the actual emulation blocking for IO through a WFI or something. If a
>> plugin really wants to track such things shouldn't it be hooking to the
>> guest sleep points?
>>
>> If idle sleeps really are that important maybe we could just report our
>> sleep time on resume - so a single hook but passing a bit more
>> information?
>
> I don't have all the use cases for this figured out. For now I'm using
> this in plugins as a way to know when a vCPU is for sure idle, which
> is used in memory reclamation schemes such as RCU.

Couldn't we achieve the same by scheduling async or safe async work on
the vCPU? Maybe we would expose this to the plugin as a "run callback
when idle" function.

> What are the "guest sleep points" you mention? Are they
> target-agnostic?

I mean we can arrive here for a variety of reasons - not all of which
are triggered by the guest going to sleep. But that isn't your current
use case.

They aren't target agnostic as not all guests try to fully model their
"sleeping" instructions. Some just end up busy spinning until the event
they should have been sleeping until happens.

>
> Thanks,
>
> 		Emilio


--
Alex Bennée
Emilio Cota Nov. 27, 2018, 1:25 a.m. UTC | #4
On Mon, Nov 26, 2018 at 11:17:27 +0000, Alex Bennée wrote:
> 
> Emilio G. Cota <cota@braap.org> writes:
> 
> > On Fri, Nov 23, 2018 at 17:10:53 +0000, Alex Bennée wrote:
> >> Emilio G. Cota <cota@braap.org> writes:
> > (snip)
> >> > @@ -1322,12 +1323,21 @@ static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
> >> >
> >> >  static void qemu_wait_io_event(CPUState *cpu)
> >> >  {
> >> > +    bool asleep = false;
> >> > +
> >>
> >> slept?
> >
> > Changed to slept, thanks.
> >
> >> >      g_assert(cpu_mutex_locked(cpu));
> >> >      g_assert(!qemu_mutex_iothread_locked());
> >> >
> >> >      while (cpu_thread_is_idle(cpu)) {
> >> > +        if (!asleep) {
> >> > +            asleep = true;
> >> > +            qemu_plugin_vcpu_idle_cb(cpu);
> >> > +        }
> >> >          qemu_cond_wait(&cpu->halt_cond, &cpu->lock);
> >> >      }
> >> > +    if (asleep) {
> >> > +        qemu_plugin_vcpu_resume_cb(cpu);
> >> > +    }
> >>
> >> I wonder if having two hooks is too much? What might a plugin want to do
> >> before we go into idle sleep?
> >>
> >> It feels like we are exposing too much of the guts of TCG to the plugin
> >> here as wait_io could be for any number of internal reasons other than
> >> the actual emulation blocking for IO through a WFI or something. If a
> >> plugin really wants to track such things shouldn't it be hooking to the
> >> guest sleep points?
> >>
> >> If idle sleeps really are that important maybe we could just report our
> >> sleep time on resume - so a single hook but passing a bit more
> >> information?
> >
> > I don't have all the use cases for this figured out. For now I'm using
> > this in plugins as a way to know when a vCPU is for sure idle, which
> > is used in memory reclamation schemes such as RCU.
> 
> Couldn't we achieve the same by scheduling async or safe async work on
> the vCPU? Maybe we would expose this to the plugin as a "run callback
> when idle" function.

I'm not sure I understand the first question. Do you mean to schedule
regular work just to make the CPU idle every now and then?
Letting the CPU idle whenever it wants to is fine, so I don't see
the need to force those transitions.

BTW I just thought of a different use case for this. Imagine a plugin
is estimating how much power a CPU is consuming; if the CPU is idle,
the power model would bring down the voltage/freq and account for that.
Of course the model would also want to track the instructions being
executed, which is a nice segue to your point below.

> > What are the "guest sleep points" you mention? Are they
> > target-agnostic?
> 
> I mean we can arrive here for a variety of reasons - not all of which
> are triggered by the guest going to sleep. But that isn't your current
> use case.
> 
> They aren't target agnostic as not all guests try to fully model their
> "sleeping" instructions. Some just end up busy spinning until the event
> they should have been sleeping until happens.

I see, so you mean something like a "pause" instruction in the guest.
Plugins that needed such level of detail (like the imaginary power
model plugin I mentioned above) would have to track these
instructions as well. IOW, this would be orthogonal to the "idle"
callback as implemented in this series.

		Emilio
diff mbox series

Patch

diff --git a/cpus.c b/cpus.c
index 28e39f045a..3efe89354d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -43,6 +43,7 @@ 
 #include "exec/exec-all.h"
 
 #include "qemu/thread.h"
+#include "qemu/plugin.h"
 #include "sysemu/cpus.h"
 #include "sysemu/qtest.h"
 #include "qemu/main-loop.h"
@@ -1322,12 +1323,21 @@  static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
 
 static void qemu_wait_io_event(CPUState *cpu)
 {
+    bool asleep = false;
+
     g_assert(cpu_mutex_locked(cpu));
     g_assert(!qemu_mutex_iothread_locked());
 
     while (cpu_thread_is_idle(cpu)) {
+        if (!asleep) {
+            asleep = true;
+            qemu_plugin_vcpu_idle_cb(cpu);
+        }
         qemu_cond_wait(&cpu->halt_cond, &cpu->lock);
     }
+    if (asleep) {
+        qemu_plugin_vcpu_resume_cb(cpu);
+    }
 
 #ifdef _WIN32
     /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
diff --git a/exec.c b/exec.c
index cd171adb93..71fc76f55e 100644
--- a/exec.c
+++ b/exec.c
@@ -967,6 +967,8 @@  void cpu_exec_realizefn(CPUState *cpu, Error **errp)
     }
     tlb_init(cpu);
 
+    qemu_plugin_vcpu_init_hook(cpu);
+
 #ifndef CONFIG_USER_ONLY
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
diff --git a/qom/cpu.c b/qom/cpu.c
index d1e6ecae03..062817c03b 100644
--- a/qom/cpu.c
+++ b/qom/cpu.c
@@ -32,6 +32,7 @@ 
 #include "hw/boards.h"
 #include "hw/qdev-properties.h"
 #include "trace-root.h"
+#include "qemu/plugin.h"
 
 CPUInterruptHandler cpu_interrupt_handler;
 
@@ -353,6 +354,7 @@  static void cpu_common_unrealizefn(DeviceState *dev, Error **errp)
     CPUState *cpu = CPU(dev);
     /* NOTE: latest generic point before the cpu is fully unrealized */
     trace_fini_vcpu(cpu);
+    qemu_plugin_vcpu_exit_hook(cpu);
     cpu_exec_unrealizefn(cpu);
 }