diff mbox series

[v6,09/11] arm/hvf: Add a WFI handler

Message ID 20210120224444.71840-10-agraf@csgraf.de
State New
Headers show
Series hvf: Implement Apple Silicon Support | expand

Commit Message

Alexander Graf Jan. 20, 2021, 10:44 p.m. UTC
From: Peter Collingbourne <pcc@google.com>

Sleep on WFI until the VTIMER is due but allow ourselves to be woken
up on IPI.

In this implementation IPI is blocked on the CPU thread at startup and
pselect() is used to atomically unblock the signal and begin sleeping.
The signal is sent unconditionally so there's no need to worry about
races between actually sleeping and the "we think we're sleeping"
state. It may lead to an extra wakeup but that's better than missing
it entirely.

Signed-off-by: Peter Collingbourne <pcc@google.com>
[agraf: Remove unused 'set' variable, always advance PC on WFX trap]
Signed-off-by: Alexander Graf <agraf@csgraf.de>
Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
---
 accel/hvf/hvf-cpus.c     |  5 ++--
 include/sysemu/hvf_int.h |  1 +
 target/arm/hvf/hvf.c     | 56 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 3 deletions(-)

Comments

Peter Maydell Jan. 28, 2021, 4:25 p.m. UTC | #1
On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote:
>
> From: Peter Collingbourne <pcc@google.com>
>
> Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> up on IPI.
>
> In this implementation IPI is blocked on the CPU thread at startup and
> pselect() is used to atomically unblock the signal and begin sleeping.
> The signal is sent unconditionally so there's no need to worry about
> races between actually sleeping and the "we think we're sleeping"
> state. It may lead to an extra wakeup but that's better than missing
> it entirely.
>
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> [agraf: Remove unused 'set' variable, always advance PC on WFX trap]
> Signed-off-by: Alexander Graf <agraf@csgraf.de>
> Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
> ---
>  accel/hvf/hvf-cpus.c     |  5 ++--
>  include/sysemu/hvf_int.h |  1 +
>  target/arm/hvf/hvf.c     | 56 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 59 insertions(+), 3 deletions(-)
>
> diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> index 6d70ee742e..abef6a58f7 100644
> --- a/accel/hvf/hvf-cpus.c
> +++ b/accel/hvf/hvf-cpus.c
> @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu)
>      cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
>
>      /* init cpu signals */
> -    sigset_t set;
>      struct sigaction sigact;
>
>      memset(&sigact, 0, sizeof(sigact));
>      sigact.sa_handler = dummy_signal;
>      sigaction(SIG_IPI, &sigact, NULL);
>
> -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> -    sigdelset(&set, SIG_IPI);
> +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
>
>  #ifdef __aarch64__
>      r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> index c2ac6c8f97..7a397fe85a 100644
> --- a/include/sysemu/hvf_int.h
> +++ b/include/sysemu/hvf_int.h
> @@ -51,6 +51,7 @@ extern HVFState *hvf_state;
>  struct hvf_vcpu_state {
>      uint64_t fd;
>      void *exit;
> +    sigset_t unblock_ipi_mask;
>  };
>
>  void assert_hvf_ok(hv_return_t ret);
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 8f18efe856..f0850ab14a 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -2,6 +2,7 @@
>   * QEMU Hypervisor.framework support for Apple Silicon
>
>   * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> + * Copyright 2020 Google LLC
>   *
>   * This work is licensed under the terms of the GNU GPL, version 2 or later.
>   * See the COPYING file in the top-level directory.
> @@ -17,6 +18,8 @@
>  #include "sysemu/hvf_int.h"
>  #include "sysemu/hw_accel.h"
>
> +#include <mach/mach_time.h>
> +
>  #include "exec/address-spaces.h"
>  #include "hw/irq.h"
>  #include "qemu/main-loop.h"
> @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>
>  void hvf_kick_vcpu_thread(CPUState *cpu)
>  {
> +    cpus_kick_thread(cpu);
>      hv_vcpus_exit(&cpu->hvf->fd, 1);
>  }
>
> @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
>      return 0;
>  }
>
> +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> +{
> +    /*
> +     * Use pselect to sleep so that other threads can IPI us while we're
> +     * sleeping.
> +     */
> +    qatomic_mb_set(&cpu->thread_kicked, false);
> +    qemu_mutex_unlock_iothread();
> +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> +    qemu_mutex_lock_iothread();
> +}

It seems a bit odd that this is specific to Arm hvf.
Doesn't x86 hvf need "pause until interrupt" functionality ?

> +
>  int hvf_vcpu_exec(CPUState *cpu)
>  {
>      ARMCPU *arm_cpu = ARM_CPU(cpu);
> @@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu)
>          }
>          case EC_WFX_TRAP:
>              advance_pc = true;
> +            if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> +                (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> +
> +                uint64_t ctl;
> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> +                                        &ctl);
> +                assert_hvf_ok(r);
> +
> +                if (!(ctl & 1) || (ctl & 2)) {
> +                    /* Timer disabled or masked, just wait for an IPI. */
> +                    hvf_wait_for_ipi(cpu, NULL);
> +                    break;
> +                }
> +
> +                uint64_t cval;
> +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> +                                        &cval);
> +                assert_hvf_ok(r);
> +
> +                int64_t ticks_to_sleep = cval - mach_absolute_time();
> +                if (ticks_to_sleep < 0) {
> +                    break;
> +                }
> +
> +                uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
> +                uint64_t nanos =
> +                    (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
> +                    1000000000 / arm_cpu->gt_cntfrq_hz;
> +
> +                /*
> +                 * Don't sleep for less than 2ms. This is believed to improve
> +                 * latency of message passing workloads.
> +                 */

Believed by who ?

> +                if (!seconds && nanos < 2000000) {
> +                    break;
> +                }
> +
> +                struct timespec ts = { seconds, nanos };
> +                hvf_wait_for_ipi(cpu, &ts);
> +            }

Why doesn't the timer timeout manifest as an IPI ? (Put another way,
why is the timer interrupt special?)

thanks
-- PMM
Peter Collingbourne Feb. 10, 2021, 8:25 p.m. UTC | #2
On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>
> On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote:
> >
> > From: Peter Collingbourne <pcc@google.com>
> >
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > In this implementation IPI is blocked on the CPU thread at startup and
> > pselect() is used to atomically unblock the signal and begin sleeping.
> > The signal is sent unconditionally so there's no need to worry about
> > races between actually sleeping and the "we think we're sleeping"
> > state. It may lead to an extra wakeup but that's better than missing
> > it entirely.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap]
> > Signed-off-by: Alexander Graf <agraf@csgraf.de>
> > Acked-by: Roman Bolshakov <r.bolshakov@yadro.com>
> > ---
> >  accel/hvf/hvf-cpus.c     |  5 ++--
> >  include/sysemu/hvf_int.h |  1 +
> >  target/arm/hvf/hvf.c     | 56 ++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 59 insertions(+), 3 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index 6d70ee742e..abef6a58f7 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu)
> >      cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
> >
> >      /* init cpu signals */
> > -    sigset_t set;
> >      struct sigaction sigact;
> >
> >      memset(&sigact, 0, sizeof(sigact));
> >      sigact.sa_handler = dummy_signal;
> >      sigaction(SIG_IPI, &sigact, NULL);
> >
> > -    pthread_sigmask(SIG_BLOCK, NULL, &set);
> > -    sigdelset(&set, SIG_IPI);
> > +    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
> > +    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
> >
> >  #ifdef __aarch64__
> >      r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index c2ac6c8f97..7a397fe85a 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -51,6 +51,7 @@ extern HVFState *hvf_state;
> >  struct hvf_vcpu_state {
> >      uint64_t fd;
> >      void *exit;
> > +    sigset_t unblock_ipi_mask;
> >  };
> >
> >  void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 8f18efe856..f0850ab14a 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >   * QEMU Hypervisor.framework support for Apple Silicon
> >
> >   * Copyright 2020 Alexander Graf <agraf@csgraf.de>
> > + * Copyright 2020 Google LLC
> >   *
> >   * This work is licensed under the terms of the GNU GPL, version 2 or later.
> >   * See the COPYING file in the top-level directory.
> > @@ -17,6 +18,8 @@
> >  #include "sysemu/hvf_int.h"
> >  #include "sysemu/hw_accel.h"
> >
> > +#include <mach/mach_time.h>
> > +
> >  #include "exec/address-spaces.h"
> >  #include "hw/irq.h"
> >  #include "qemu/main-loop.h"
> > @@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >  void hvf_kick_vcpu_thread(CPUState *cpu)
> >  {
> > +    cpus_kick_thread(cpu);
> >      hv_vcpus_exit(&cpu->hvf->fd, 1);
> >  }
> >
> > @@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >      return 0;
> >  }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +    /*
> > +     * Use pselect to sleep so that other threads can IPI us while we're
> > +     * sleeping.
> > +     */
> > +    qatomic_mb_set(&cpu->thread_kicked, false);
> > +    qemu_mutex_unlock_iothread();
> > +    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
> > +    qemu_mutex_lock_iothread();
> > +}
>
> It seems a bit odd that this is specific to Arm hvf.
> Doesn't x86 hvf need "pause until interrupt" functionality ?

I'm not very familiar with x86 HVF (and I don't have an x86 Mac to
test with), but my reading of the x86 HVF code is that there appear to
be no exits that put the system to sleep (not even MWAIT which I think
is the x86 equivalent of WFI -- the code just appears to busy loop). I
think that implies that either we actually busy loop on x86 (seems
unlikely to me since I guess someone would have noticed by now) or
MWAIT does not actually result in a VM exit, and HVF itself goes to
sleep inside hv_vcpu_run(), unlike ARM HVF where WFI results in an
exit (and immediate re-entering would otherwise busy loop).

> > +
> >  int hvf_vcpu_exec(CPUState *cpu)
> >  {
> >      ARMCPU *arm_cpu = ARM_CPU(cpu);
> > @@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu)
> >          }
> >          case EC_WFX_TRAP:
> >              advance_pc = true;
> > +            if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> > +                (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > +
> > +                uint64_t ctl;
> > +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
> > +                                        &ctl);
> > +                assert_hvf_ok(r);
> > +
> > +                if (!(ctl & 1) || (ctl & 2)) {
> > +                    /* Timer disabled or masked, just wait for an IPI. */
> > +                    hvf_wait_for_ipi(cpu, NULL);
> > +                    break;
> > +                }
> > +
> > +                uint64_t cval;
> > +                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
> > +                                        &cval);
> > +                assert_hvf_ok(r);
> > +
> > +                int64_t ticks_to_sleep = cval - mach_absolute_time();
> > +                if (ticks_to_sleep < 0) {
> > +                    break;
> > +                }
> > +
> > +                uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
> > +                uint64_t nanos =
> > +                    (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
> > +                    1000000000 / arm_cpu->gt_cntfrq_hz;
> > +
> > +                /*
> > +                 * Don't sleep for less than 2ms. This is believed to improve
> > +                 * latency of message passing workloads.
> > +                 */
>
> Believed by who ?

Alexander asked me to add this [1], so I'll defer to him. As I
mentioned on the thread I personally would prefer not to have this
logic without data specifically collected on the M1, but I don't have
a strong opinion.

> > +                if (!seconds && nanos < 2000000) {
> > +                    break;
> > +                }
> > +
> > +                struct timespec ts = { seconds, nanos };
> > +                hvf_wait_for_ipi(cpu, &ts);
> > +            }
>
> Why doesn't the timer timeout manifest as an IPI ? (Put another way,
> why is the timer interrupt special?)

Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED)
if they become due while in hv_vcpu_run(). But at this point we are
not in hv_vcpu_run() (due to the aforementioned difference in wait
behavior between x86 and ARM) so we need to "manually" wait for the
timer to become due, re-enter the guest, let it exit with
HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI.

Peter

[1] https://lists.gnu.org/archive/html/qemu-arm/2020-12/msg00056.html
Peter Maydell Feb. 10, 2021, 10:17 p.m. UTC | #3
On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote:
>
> On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> > On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote:
> > > +                if (!seconds && nanos < 2000000) {
> > > +                    break;
> > > +                }
> > > +
> > > +                struct timespec ts = { seconds, nanos };
> > > +                hvf_wait_for_ipi(cpu, &ts);
> > > +            }
> >
> > Why doesn't the timer timeout manifest as an IPI ? (Put another way,
> > why is the timer interrupt special?)
>
> Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED)
> if they become due while in hv_vcpu_run(). But at this point we are
> not in hv_vcpu_run() (due to the aforementioned difference in wait
> behavior between x86 and ARM) so we need to "manually" wait for the
> timer to become due, re-enter the guest, let it exit with
> HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI.

But WFI doesn't just wait for a timer interrupt, it waits for
any interrupt. So it doesn't seem right that the timer interrupt
in particular is being handled specially here.

thanks
-- PMM
Alexander Graf Feb. 11, 2021, 12:33 a.m. UTC | #4
On 10.02.21 23:17, Peter Maydell wrote:
> On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote:
>> On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote:
>>>> +                if (!seconds && nanos < 2000000) {
>>>> +                    break;
>>>> +                }
>>>> +
>>>> +                struct timespec ts = { seconds, nanos };
>>>> +                hvf_wait_for_ipi(cpu, &ts);
>>>> +            }
>>> Why doesn't the timer timeout manifest as an IPI ? (Put another way,
>>> why is the timer interrupt special?)
>> Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED)
>> if they become due while in hv_vcpu_run(). But at this point we are
>> not in hv_vcpu_run() (due to the aforementioned difference in wait
>> behavior between x86 and ARM) so we need to "manually" wait for the
>> timer to become due, re-enter the guest, let it exit with
>> HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI.
> But WFI doesn't just wait for a timer interrupt, it waits for
> any interrupt. So it doesn't seem right that the timer interrupt
> in particular is being handled specially here.


The vtimer is handled by hvf itself and results in a #vmexit when 
triggered. On wfi, we're switching from an hvf owned vtimer to a QEMU 
owned one. The only events that can happen to wake us from wfi are kicks 
(IRQs, maintenance events) or expiry of the vtimer. The select() logic 
in this patch handles both, as it wakes up on an IPI (kick because of 
IRQ/maint event) and exits gracefully after the timer deadline.


Alex
Alexander Graf March 21, 2021, 4:28 p.m. UTC | #5
On 10.02.21 23:17, Peter Maydell wrote:
> On Wed, 10 Feb 2021 at 20:25, Peter Collingbourne <pcc@google.com> wrote:
>> On Thu, Jan 28, 2021 at 8:25 AM Peter Maydell <peter.maydell@linaro.org> wrote:
>>> On Wed, 20 Jan 2021 at 22:44, Alexander Graf <agraf@csgraf.de> wrote:
>>>> +                if (!seconds && nanos < 2000000) {
>>>> +                    break;
>>>> +                }
>>>> +
>>>> +                struct timespec ts = { seconds, nanos };
>>>> +                hvf_wait_for_ipi(cpu, &ts);
>>>> +            }
>>> Why doesn't the timer timeout manifest as an IPI ? (Put another way,
>>> why is the timer interrupt special?)
>> Timer timeouts result in an IPI (via HV_EXIT_REASON_VTIMER_ACTIVATED)
>> if they become due while in hv_vcpu_run(). But at this point we are
>> not in hv_vcpu_run() (due to the aforementioned difference in wait
>> behavior between x86 and ARM) so we need to "manually" wait for the
>> timer to become due, re-enter the guest, let it exit with
>> HV_EXIT_REASON_VTIMER_ACTIVATED and then trigger the IPI.
> But WFI doesn't just wait for a timer interrupt, it waits for
> any interrupt. So it doesn't seem right that the timer interrupt
> in particular is being handled specially here.


It waits for either an external interrupt (vcpu_kick() -> IPI -> signal 
-> pselect exits) or a vtimer (kept in the CPU thread, handled by hvf 
natively when vCPU is running, handled through the pselect timeout when 
in WFI mode).

In hvf on ARM, the vtimer is handled specially. It is owned by the 
kernel code when we're inside the CPU loop. We don't even get vtimer MSR 
exits. However when user space decides to not return to the kernel CPU 
loop, it needs to handle the vtimer expiry itself. We really only have 
that case on WFI.


Alex
diff mbox series

Patch

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 6d70ee742e..abef6a58f7 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -322,15 +322,14 @@  static int hvf_init_vcpu(CPUState *cpu)
     cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
 
     /* init cpu signals */
-    sigset_t set;
     struct sigaction sigact;
 
     memset(&sigact, 0, sizeof(sigact));
     sigact.sa_handler = dummy_signal;
     sigaction(SIG_IPI, &sigact, NULL);
 
-    pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
+    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
+    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
     r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c2ac6c8f97..7a397fe85a 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -51,6 +51,7 @@  extern HVFState *hvf_state;
 struct hvf_vcpu_state {
     uint64_t fd;
     void *exit;
+    sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8f18efe856..f0850ab14a 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@ 
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf <agraf@csgraf.de>
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -17,6 +18,8 @@ 
 #include "sysemu/hvf_int.h"
 #include "sysemu/hw_accel.h"
 
+#include <mach/mach_time.h>
+
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
 #include "qemu/main-loop.h"
@@ -411,6 +414,7 @@  int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
+    cpus_kick_thread(cpu);
     hv_vcpus_exit(&cpu->hvf->fd, 1);
 }
 
@@ -466,6 +470,18 @@  static int hvf_inject_interrupts(CPUState *cpu)
     return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+    /*
+     * Use pselect to sleep so that other threads can IPI us while we're
+     * sleeping.
+     */
+    qatomic_mb_set(&cpu->thread_kicked, false);
+    qemu_mutex_unlock_iothread();
+    pselect(0, 0, 0, 0, ts, &cpu->hvf->unblock_ipi_mask);
+    qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
     ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -577,6 +593,46 @@  int hvf_vcpu_exec(CPUState *cpu)
         }
         case EC_WFX_TRAP:
             advance_pc = true;
+            if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
+                (CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+
+                uint64_t ctl;
+                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
+                                        &ctl);
+                assert_hvf_ok(r);
+
+                if (!(ctl & 1) || (ctl & 2)) {
+                    /* Timer disabled or masked, just wait for an IPI. */
+                    hvf_wait_for_ipi(cpu, NULL);
+                    break;
+                }
+
+                uint64_t cval;
+                r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
+                                        &cval);
+                assert_hvf_ok(r);
+
+                int64_t ticks_to_sleep = cval - mach_absolute_time();
+                if (ticks_to_sleep < 0) {
+                    break;
+                }
+
+                uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
+                uint64_t nanos =
+                    (ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
+                    1000000000 / arm_cpu->gt_cntfrq_hz;
+
+                /*
+                 * Don't sleep for less than 2ms. This is believed to improve
+                 * latency of message passing workloads.
+                 */
+                if (!seconds && nanos < 2000000) {
+                    break;
+                }
+
+                struct timespec ts = { seconds, nanos };
+                hvf_wait_for_ipi(cpu, &ts);
+            }
             break;
         case EC_AA64_HVC:
             cpu_synchronize_state(cpu);