diff mbox

[3/3] Plumb the HAXM-based hardware acceleration support

Message ID 3d8be595c30d0009464ce16e90ee8fb5451e5605.1478619442.git.vpalatin@chromium.org
State New
Headers show

Commit Message

Vincent Palatin Nov. 8, 2016, 3:39 p.m. UTC
Use the Intel HAX is kernel-based hardware acceleration module for
Windows and MacOSX (similar to KVM on Linux).

Based on the "target-i386: Add Intel HAX to android emulator" patch
from David Chou <david.j.chou@intel.com>

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
---
 Makefile.target           |   1 +
 configure                 |  18 +++++++
 cpu-exec.c                |  23 ++++++++-
 cpus.c                    | 125 ++++++++++++++++++++++++++++++++++++++++------
 exec.c                    |  16 ++++++
 hw/intc/apic_common.c     |   3 +-
 include/qom/cpu.h         |   5 ++
 include/sysemu/hw_accel.h |   9 ++++
 qemu-options.hx           |  11 ++++
 target-i386/Makefile.objs |   7 +++
 target-i386/seg_helper.c  |   5 ++
 target-i386/translate.c   |   8 +++
 vl.c                      |  17 +++++--
 13 files changed, 229 insertions(+), 19 deletions(-)

Comments

Paolo Bonzini Nov. 8, 2016, 8:37 p.m. UTC | #1
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 4188fed..4bd238b 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c

All this should not be needed anymore with unrestricted guest support.

> diff --git a/cpus.c b/cpus.c
> index fc78502..6e0f572 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -35,6 +35,7 @@
>  #include "sysemu/dma.h"
>  #include "sysemu/hw_accel.h"
>  #include "sysemu/kvm.h"
> +#include "sysemu/hax.h"
>  #include "qmp-commands.h"
>  #include "exec/exec-all.h"
>  
> @@ -1221,6 +1222,52 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>      return NULL;
>  }
>  
> +static void *qemu_hax_cpu_thread_fn(void *arg)
> +{
> +    CPUState *cpu = arg;
> +    int r;
> +    qemu_thread_get_self(cpu->thread);
> +    qemu_mutex_lock(&qemu_global_mutex);
> +
> +    cpu->thread_id = qemu_get_thread_id();
> +    cpu->created = true;
> +    cpu->halted = 0;
> +    current_cpu = cpu;
> +
> +    hax_init_vcpu(cpu);
> +    qemu_cond_signal(&qemu_cpu_cond);
> +
> +    while (1) {
> +        if (cpu_can_run(cpu)) {
> +            r = hax_smp_cpu_exec(cpu);
> +            if (r == EXCP_DEBUG) {
> +                cpu_handle_guest_debug(cpu);
> +            }
> +        }
> +
> +        while (cpu_thread_is_idle(cpu)) {
> +            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
> +        }
> +
> +        qemu_wait_io_event_common(cpu);
> +    }
> +    return NULL;
> +}
> +
> +
> +static void qemu_cpu_kick_no_halt(void)
> +{
> +    CPUState *cpu;
> +    /* Ensure whatever caused the exit has reached the CPU threads before
> +     * writing exit_request.
> +     */
> +    atomic_mb_set(&exit_request, 1);
> +    cpu = atomic_mb_read(&tcg_current_cpu);
> +    if (cpu) {
> +        cpu_exit(cpu);
> +    }
> +}
> +
>  static void qemu_cpu_kick_thread(CPUState *cpu)
>  {
>  #ifndef _WIN32
> @@ -1235,28 +1282,52 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>          fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
>          exit(1);
>      }
> -#else /* _WIN32 */
> -    abort();
> -#endif
> -}
>  
> -static void qemu_cpu_kick_no_halt(void)
> -{
> -    CPUState *cpu;
> -    /* Ensure whatever caused the exit has reached the CPU threads before
> -     * writing exit_request.
> +#ifdef CONFIG_DARWIN
> +    /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
> +     * We can double check it and resend it
>       */
> -    atomic_mb_set(&exit_request, 1);
> -    cpu = atomic_mb_read(&tcg_current_cpu);
> -    if (cpu) {
> -        cpu_exit(cpu);
> +    if (!exit_request)
> +        qemu_cpu_kick_no_halt();

This must be a conflict resolved wrong.  exit_request is never read by
the HAX code.

> +    if (hax_enabled() && hax_ug_platform())
> +        cpu->exit_request = 1;
> +#endif
> +#else /* _WIN32 */
> +    if (!qemu_cpu_is_self(cpu)) {
> +        CONTEXT tcgContext;
> +
> +        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> +                    GetLastError());
> +            exit(1);
> +        }
> +
> +        /* On multi-core systems, we are not sure that the thread is actually
> +         * suspended until we can get the context.
> +         */
> +        tcgContext.ContextFlags = CONTEXT_CONTROL;
> +        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
> +            continue;
> +        }
> +
> +        qemu_cpu_kick_no_halt();
> +        if (hax_enabled() && hax_ug_platform())
> +            cpu->exit_request = 1;
> +
> +        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
> +                    GetLastError());
> +            exit(1);
> +        }

This is weird too.  The SuspendThread/ResumeThread dance comes from an
old version of QEMU.  It is not needed anymore and, again,
qemu_cpu_kick_no_halt would only be useful if hax_ug_platform() is false.

Here, Linux/KVM uses a signal and pthread_kill.  It's probably good for
HAX on Darwin too, but not on Windows.  It's possible that
SuspendThread/ResumeThread just does the right thing (sort of by
chance), in which case you can just keep it (removing
qemu_cpu_kick_no_halt).  However, there is a hax_raise_event in patch 2
that is unused.  If you can figure out how to use it it would be better.



> @@ -1617,6 +1618,21 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>          } else {
>              new_block->host = phys_mem_alloc(new_block->max_length,
>                                               &new_block->mr->align);
> +            /*
> +             * In Hax, the qemu allocate the virtual address, and HAX kernel
> +             * populate the memory with physical memory. Currently we have no
> +             * paging, so user should make sure enough free memory in advance
> +             */
> +            if (hax_enabled()) {
> +                int ret;
> +                ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
> +                                       new_block->max_length);
> +                if (ret < 0) {
> +                    error_setg(errp, "Hax failed to populate ram");
> +                    return;
> +                }
> +            }

Please try removing this block and instead starting QEMU with
-mem-prealloc.  If it works, remove hax_populate_ram and just set
mem_prealloc to 1 in hax_accel_init.

>              if (!new_block->host) {
>                  error_setg_errno(errp, errno,
>                                   "cannot set up guest memory '%s'",
> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
> index d78c885..dd4cdc8 100644
> --- a/hw/intc/apic_common.c
> +++ b/hw/intc/apic_common.c
> @@ -316,9 +316,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
>  
>      /* Note: We need at least 1M to map the VAPIC option ROM */
>      if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
> -        ram_size >= 1024 * 1024) {
> +        kvm_enabled() && ram_size >= 1024 * 1024) {

This should rather be !hax_enabled(), despite the historical name
mentions KVM.

>          vapic = sysbus_create_simple("kvmvapic", -1, NULL);
>      }
> +
>      s->vapic = vapic;
>      if (apic_report_tpr_access && info->enable_tpr_reporting) {
>          info->enable_tpr_reporting(s, true);
> diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
> index fb79f31..25b6003 100644
> --- a/target-i386/seg_helper.c
> +++ b/target-i386/seg_helper.c
> @@ -25,6 +25,7 @@
>  #include "exec/exec-all.h"
>  #include "exec/cpu_ldst.h"
>  #include "exec/log.h"
> +#include "sysemu/hax.h"
>  
>  //#define DEBUG_PCALL
>  
> @@ -1336,6 +1337,10 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
>              !(env->hflags & HF_SMM_MASK)) {
>              cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0);
>              cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
> +#ifdef CONFIG_HAX
> +            if (hax_enabled())
> +                cs->hax_vcpu->resync = 1;
> +#endif

Not needed for UG mode.

>              do_smm_enter(cpu);
>              ret = true;
>          } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 324103c..e027896 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c

Same.

> @@ -4060,6 +4066,7 @@ int main(int argc, char **argv, char **envp)
>      machine_class = select_machine();
>  
>      set_memory_options(&ram_slots, &maxram_size, machine_class);
> +    hax_pre_init(ram_size);

It should be possible to merge this with hax_accel_init.

>      os_daemonize();
>  
> @@ -4418,8 +4425,8 @@ int main(int argc, char **argv, char **envp)
>  
>      cpu_ticks_init();
>      if (icount_opts) {
> -        if (kvm_enabled() || xen_enabled()) {
> -            error_report("-icount is not allowed with kvm or xen");
> +        if (kvm_enabled() || xen_enabled() || hax_enabled()) {
> +            error_report("-icount is not allowed with kvm or xen or hax");

Let's say it's only allowed with TCG. :)  Again, thanks to UG mode if
hax_enabled() you'll have !tcg_enabled().

Paolo

>              exit(1);
>          }
>          configure_icount(icount_opts, &error_abort);
> @@ -4555,6 +4562,10 @@ int main(int argc, char **argv, char **envp)
>  
>      numa_post_machine_init();
>  
> +    if (hax_enabled()) {
> +        hax_sync_vcpus();
> +    }
> +
>      if (qemu_opts_foreach(qemu_find_opts("fw_cfg"),
>                            parse_fw_cfg, fw_cfg_find(), NULL) != 0) {
>          exit(1);
>
Vincent Palatin Nov. 9, 2016, 5:19 p.m. UTC | #2
On Tue, Nov 8, 2016 at 9:37 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>> diff --git a/cpu-exec.c b/cpu-exec.c
>> index 4188fed..4bd238b 100644
>> --- a/cpu-exec.c
>> +++ b/cpu-exec.c
>
> All this should not be needed anymore with unrestricted guest support.

Removed in v2

>
>> diff --git a/cpus.c b/cpus.c
>> index fc78502..6e0f572 100644
>> --- a/cpus.c
>> +++ b/cpus.c
>> @@ -35,6 +35,7 @@
>>  #include "sysemu/dma.h"
>>  #include "sysemu/hw_accel.h"
>>  #include "sysemu/kvm.h"
>> +#include "sysemu/hax.h"
>>  #include "qmp-commands.h"
>>  #include "exec/exec-all.h"
>>
>> @@ -1221,6 +1222,52 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
>>      return NULL;
>>  }
>>
>> +static void *qemu_hax_cpu_thread_fn(void *arg)
>> +{
>> +    CPUState *cpu = arg;
>> +    int r;
>> +    qemu_thread_get_self(cpu->thread);
>> +    qemu_mutex_lock(&qemu_global_mutex);
>> +
>> +    cpu->thread_id = qemu_get_thread_id();
>> +    cpu->created = true;
>> +    cpu->halted = 0;
>> +    current_cpu = cpu;
>> +
>> +    hax_init_vcpu(cpu);
>> +    qemu_cond_signal(&qemu_cpu_cond);
>> +
>> +    while (1) {
>> +        if (cpu_can_run(cpu)) {
>> +            r = hax_smp_cpu_exec(cpu);
>> +            if (r == EXCP_DEBUG) {
>> +                cpu_handle_guest_debug(cpu);
>> +            }
>> +        }
>> +
>> +        while (cpu_thread_is_idle(cpu)) {
>> +            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
>> +        }
>> +
>> +        qemu_wait_io_event_common(cpu);
>> +    }
>> +    return NULL;
>> +}
>> +
>> +
>> +static void qemu_cpu_kick_no_halt(void)
>> +{
>> +    CPUState *cpu;
>> +    /* Ensure whatever caused the exit has reached the CPU threads before
>> +     * writing exit_request.
>> +     */
>> +    atomic_mb_set(&exit_request, 1);
>> +    cpu = atomic_mb_read(&tcg_current_cpu);
>> +    if (cpu) {
>> +        cpu_exit(cpu);
>> +    }
>> +}
>> +
>>  static void qemu_cpu_kick_thread(CPUState *cpu)
>>  {
>>  #ifndef _WIN32
>> @@ -1235,28 +1282,52 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
>>          fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
>>          exit(1);
>>      }
>> -#else /* _WIN32 */
>> -    abort();
>> -#endif
>> -}
>>
>> -static void qemu_cpu_kick_no_halt(void)
>> -{
>> -    CPUState *cpu;
>> -    /* Ensure whatever caused the exit has reached the CPU threads before
>> -     * writing exit_request.
>> +#ifdef CONFIG_DARWIN
>> +    /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
>> +     * We can double check it and resend it
>>       */
>> -    atomic_mb_set(&exit_request, 1);
>> -    cpu = atomic_mb_read(&tcg_current_cpu);
>> -    if (cpu) {
>> -        cpu_exit(cpu);
>> +    if (!exit_request)
>> +        qemu_cpu_kick_no_halt();
>
> This must be a conflict resolved wrong.  exit_request is never read by
> the HAX code.

Maybe, it already exists in the predating Android branch.
I will need to sort out this.

>
>> +    if (hax_enabled() && hax_ug_platform())
>> +        cpu->exit_request = 1;
>> +#endif
>> +#else /* _WIN32 */
>> +    if (!qemu_cpu_is_self(cpu)) {
>> +        CONTEXT tcgContext;
>> +
>> +        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
>> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> +                    GetLastError());
>> +            exit(1);
>> +        }
>> +
>> +        /* On multi-core systems, we are not sure that the thread is actually
>> +         * suspended until we can get the context.
>> +         */
>> +        tcgContext.ContextFlags = CONTEXT_CONTROL;
>> +        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
>> +            continue;
>> +        }
>> +
>> +        qemu_cpu_kick_no_halt();
>> +        if (hax_enabled() && hax_ug_platform())
>> +            cpu->exit_request = 1;
>> +
>> +        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
>> +            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
>> +                    GetLastError());
>> +            exit(1);
>> +        }
>
> This is weird too.  The SuspendThread/ResumeThread dance comes from an
> old version of QEMU.  It is not needed anymore and,

Yes I knew I was re-introducing removed code, that's why my original
message was reading "I'm not so happy with the qemu_cpu_kick_thread
mess in cpus.c, if somebody can help/advise."
To be fair, your original commit message removing it was saying that
this code was no longer useful for TCG, as there is no working support
for KVM on Windows, I was not sure whether it might be useful in this
case.

> again,
> qemu_cpu_kick_no_halt would only be useful if hax_ug_platform() is false.
>
> Here, Linux/KVM uses a signal and pthread_kill.  It's probably good for
> HAX on Darwin too, but not on Windows.  It's possible that
> SuspendThread/ResumeThread just does the right thing (sort of by
> chance), in which case you can just keep it (removing
> qemu_cpu_kick_no_halt).  However, there is a hax_raise_event in patch 2
> that is unused.  If you can figure out how to use it it would be better.

I still need to figure out this mess, I haven't made a working solution yet.

>
>
>> @@ -1617,6 +1618,21 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>>          } else {
>>              new_block->host = phys_mem_alloc(new_block->max_length,
>>                                               &new_block->mr->align);
>> +            /*
>> +             * In Hax, the qemu allocate the virtual address, and HAX kernel
>> +             * populate the memory with physical memory. Currently we have no
>> +             * paging, so user should make sure enough free memory in advance
>> +             */
>> +            if (hax_enabled()) {
>> +                int ret;
>> +                ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
>> +                                       new_block->max_length);
>> +                if (ret < 0) {
>> +                    error_setg(errp, "Hax failed to populate ram");
>> +                    return;
>> +                }
>> +            }
>
> Please try removing this block and instead starting QEMU with
> -mem-prealloc.  If it works, remove hax_populate_ram and just set
> mem_prealloc to 1 in hax_accel_init.

it's not working, later hax_set_ram() is unhappy about what it is
finding the mappings.
By the way, even if it had worked at startup, under memory pressure,
Windows might have evicted the physical pages (which is not supported
by the HAXM kernel module)

I can try to move this in os_mem_prealloc() if you feel it's cleaner.


>
>>              if (!new_block->host) {
>>                  error_setg_errno(errp, errno,
>>                                   "cannot set up guest memory '%s'",
>> diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
>> index d78c885..dd4cdc8 100644
>> --- a/hw/intc/apic_common.c
>> +++ b/hw/intc/apic_common.c
>> @@ -316,9 +316,10 @@ static void apic_common_realize(DeviceState *dev, Error **errp)
>>
>>      /* Note: We need at least 1M to map the VAPIC option ROM */
>>      if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
>> -        ram_size >= 1024 * 1024) {
>> +        kvm_enabled() && ram_size >= 1024 * 1024) {
>
> This should rather be !hax_enabled(), despite the historical name
> mentions KVM.

Updated in v2, thanks for the hint.

>
>>          vapic = sysbus_create_simple("kvmvapic", -1, NULL);
>>      }
>> +
>>      s->vapic = vapic;
>>      if (apic_report_tpr_access && info->enable_tpr_reporting) {
>>          info->enable_tpr_reporting(s, true);
>> diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
>> index fb79f31..25b6003 100644
>> --- a/target-i386/seg_helper.c
>> +++ b/target-i386/seg_helper.c
>> @@ -25,6 +25,7 @@
>>  #include "exec/exec-all.h"
>>  #include "exec/cpu_ldst.h"
>>  #include "exec/log.h"
>> +#include "sysemu/hax.h"
>>
>>  //#define DEBUG_PCALL
>>
>> @@ -1336,6 +1337,10 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
>>              !(env->hflags & HF_SMM_MASK)) {
>>              cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0);
>>              cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
>> +#ifdef CONFIG_HAX
>> +            if (hax_enabled())
>> +                cs->hax_vcpu->resync = 1;
>> +#endif
>
> Not needed for UG mode.

Removed in v2.

>
>>              do_smm_enter(cpu);
>>              ret = true;
>>          } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
>> diff --git a/target-i386/translate.c b/target-i386/translate.c
>> index 324103c..e027896 100644
>> --- a/target-i386/translate.c
>> +++ b/target-i386/translate.c
>
> Same.
>
>> @@ -4060,6 +4066,7 @@ int main(int argc, char **argv, char **envp)
>>      machine_class = select_machine();
>>
>>      set_memory_options(&ram_slots, &maxram_size, machine_class);
>> +    hax_pre_init(ram_size);
>
> It should be possible to merge this with hax_accel_init.

Done in v2, I have added a new patch simplifying the init
(i have also realized it contained Android specific code leftovers and
was misusing the 'allowed' property of the AccelClass)

>
>>      os_daemonize();
>>
>> @@ -4418,8 +4425,8 @@ int main(int argc, char **argv, char **envp)
>>
>>      cpu_ticks_init();
>>      if (icount_opts) {
>> -        if (kvm_enabled() || xen_enabled()) {
>> -            error_report("-icount is not allowed with kvm or xen");
>> +        if (kvm_enabled() || xen_enabled() || hax_enabled()) {
>> +            error_report("-icount is not allowed with kvm or xen or hax");
>
> Let's say it's only allowed with TCG. :)  Again, thanks to UG mode if
> hax_enabled() you'll have !tcg_enabled().

Updated in v2.
I actually did not realize earlier that the interpreter mode was
running with tcg_enabled

>
> Paolo
>
>>              exit(1);
>>          }
>>          configure_icount(icount_opts, &error_abort);
>> @@ -4555,6 +4562,10 @@ int main(int argc, char **argv, char **envp)
>>
>>      numa_post_machine_init();
>>
>> +    if (hax_enabled()) {
>> +        hax_sync_vcpus();
>> +    }
>> +
>>      if (qemu_opts_foreach(qemu_find_opts("fw_cfg"),
>>                            parse_fw_cfg, fw_cfg_find(), NULL) != 0) {
>>          exit(1);
>>
Paolo Bonzini Nov. 9, 2016, 5:32 p.m. UTC | #3
On 09/11/2016 18:19, Vincent Palatin wrote:
>> > Please try removing this block and instead starting QEMU with
>> > -mem-prealloc.  If it works, remove hax_populate_ram and just set
>> > mem_prealloc to 1 in hax_accel_init.
> it's not working, later hax_set_ram() is unhappy about what it is
> finding the mappings.
> By the way, even if it had worked at startup, under memory pressure,
> Windows might have evicted the physical pages (which is not supported
> by the HAXM kernel module)
> 
> I can try to move this in os_mem_prealloc() if you feel it's cleaner.

Yes, that would work!

Paolo
Vincent Palatin Nov. 11, 2016, 11:25 a.m. UTC | #4
On Wed, Nov 9, 2016 at 6:32 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 09/11/2016 18:19, Vincent Palatin wrote:
>>> > Please try removing this block and instead starting QEMU with
>>> > -mem-prealloc.  If it works, remove hax_populate_ram and just set
>>> > mem_prealloc to 1 in hax_accel_init.
>> it's not working, later hax_set_ram() is unhappy about what it is
>> finding the mappings.
>> By the way, even if it had worked at startup, under memory pressure,
>> Windows might have evicted the physical pages (which is not supported
>> by the HAXM kernel module)
>>
>> I can try to move this in os_mem_prealloc() if you feel it's cleaner.
>
> Yes, that would work!

Actually, os_mem_prealloc seems mostly no-op in the win32 version,
I have postponed this change for after my v2 patchset.
Paolo Bonzini Nov. 11, 2016, 11:26 a.m. UTC | #5
On 11/11/2016 12:25, Vincent Palatin wrote:
> On Wed, Nov 9, 2016 at 6:32 PM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>
>> On 09/11/2016 18:19, Vincent Palatin wrote:
>>>>> Please try removing this block and instead starting QEMU with
>>>>> -mem-prealloc.  If it works, remove hax_populate_ram and just set
>>>>> mem_prealloc to 1 in hax_accel_init.
>>> it's not working, later hax_set_ram() is unhappy about what it is
>>> finding the mappings.
>>> By the way, even if it had worked at startup, under memory pressure,
>>> Windows might have evicted the physical pages (which is not supported
>>> by the HAXM kernel module)
>>>
>>> I can try to move this in os_mem_prealloc() if you feel it's cleaner.
>>
>> Yes, that would work!
> 
> Actually, os_mem_prealloc seems mostly no-op in the win32 version,
> I have postponed this change for after my v2 patchset.

Good, we'll sort it out during review.

Paolo
diff mbox

Patch

diff --git a/Makefile.target b/Makefile.target
index 7a5080e..dab81e7 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -96,6 +96,7 @@  obj-y += target-$(TARGET_BASE_ARCH)/
 obj-y += disas.o
 obj-y += tcg-runtime.o
 obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
+obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
 
 obj-$(CONFIG_LIBDECNUMBER) += libdecnumber/decContext.o
diff --git a/configure b/configure
index fd6f898..424453b 100755
--- a/configure
+++ b/configure
@@ -230,6 +230,7 @@  vhost_net="no"
 vhost_scsi="no"
 vhost_vsock="no"
 kvm="no"
+hax="no"
 colo="yes"
 rdma=""
 gprof="no"
@@ -563,6 +564,7 @@  CYGWIN*)
 ;;
 MINGW32*)
   mingw32="yes"
+  hax="yes"
   audio_possible_drivers="dsound sdl"
   if check_include dsound.h; then
     audio_drv_list="dsound"
@@ -610,6 +612,7 @@  OpenBSD)
 Darwin)
   bsd="yes"
   darwin="yes"
+  hax="yes"
   LDFLAGS_SHARED="-bundle -undefined dynamic_lookup"
   if [ "$cpu" = "x86_64" ] ; then
     QEMU_CFLAGS="-arch x86_64 $QEMU_CFLAGS"
@@ -919,6 +922,10 @@  for opt do
   ;;
   --enable-kvm) kvm="yes"
   ;;
+  --disable-hax) hax="no"
+  ;;
+  --enable-hax) hax="yes"
+  ;;
   --disable-colo) colo="no"
   ;;
   --enable-colo) colo="yes"
@@ -1371,6 +1378,7 @@  disabled with --disable-FEATURE, default is enabled if available:
   fdt             fdt device tree
   bluez           bluez stack connectivity
   kvm             KVM acceleration support
+  hax             HAX acceleration support
   colo            COarse-grain LOck-stepping VM for Non-stop Service
   rdma            RDMA-based migration support
   vde             support for vde network
@@ -5038,6 +5046,7 @@  echo "ATTR/XATTR support $attr"
 echo "Install blobs     $blobs"
 echo "KVM support       $kvm"
 echo "COLO support      $colo"
+echo "HAX support       $hax"
 echo "RDMA support      $rdma"
 echo "TCG interpreter   $tcg_interpreter"
 echo "fdt support       $fdt"
@@ -6022,6 +6031,15 @@  case "$target_name" in
       fi
     fi
 esac
+if test "$hax" = "yes" ; then
+  if test "$target_softmmu" = "yes" ; then
+    case "$target_name" in
+    i386|x86_64)
+      echo "CONFIG_HAX=y" >> $config_target_mak
+    ;;
+    esac
+  fi
+fi
 if test "$target_bigendian" = "yes" ; then
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
diff --git a/cpu-exec.c b/cpu-exec.c
index 4188fed..4bd238b 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -25,6 +25,7 @@ 
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
+#include "sysemu/hax.h"
 #include "exec/address-spaces.h"
 #include "qemu/rcu.h"
 #include "exec/tb-hash.h"
@@ -461,11 +462,24 @@  static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
     return false;
 }
 
+/*
+ * QEMU emulate can happens because of MMIO or emulation mode, i.e. non-PG mode,
+ * when it's because of MMIO, the MMIO, the interrupt should not be emulated,
+ * because MMIO is emulated for only one instruction now and then back to
+ * HAX kernel
+ */
+static int need_handle_intr_request(CPUState *cpu)
+{
+    if (!hax_enabled() || hax_vcpu_emulation_mode(cpu))
+        return cpu->interrupt_request;
+    return 0;
+}
+
 static inline void cpu_handle_interrupt(CPUState *cpu,
                                         TranslationBlock **last_tb)
 {
     CPUClass *cc = CPU_GET_CLASS(cpu);
-    int interrupt_request = cpu->interrupt_request;
+    int interrupt_request = need_handle_intr_request(cpu);
 
     if (unlikely(interrupt_request)) {
         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
@@ -632,10 +646,17 @@  int cpu_exec(CPUState *cpu)
                 break;
             }
 
+            if (hax_enabled() && !hax_vcpu_exec(cpu))
+                longjmp(cpu->jmp_env, 1);
+
             for(;;) {
                 cpu_handle_interrupt(cpu, &last_tb);
                 tb = tb_find(cpu, last_tb, tb_exit);
                 cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
+
+                if (hax_enabled() && hax_stop_emulation(cpu))
+                    cpu_loop_exit(cpu);
+
                 /* Try to align the host and virtual clocks
                    if the guest is in advance */
                 align_clocks(&sc, cpu);
diff --git a/cpus.c b/cpus.c
index fc78502..6e0f572 100644
--- a/cpus.c
+++ b/cpus.c
@@ -35,6 +35,7 @@ 
 #include "sysemu/dma.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hax.h"
 #include "qmp-commands.h"
 #include "exec/exec-all.h"
 
@@ -1221,6 +1222,52 @@  static void *qemu_tcg_cpu_thread_fn(void *arg)
     return NULL;
 }
 
+static void *qemu_hax_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+    int r;
+    qemu_thread_get_self(cpu->thread);
+    qemu_mutex_lock(&qemu_global_mutex);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->created = true;
+    cpu->halted = 0;
+    current_cpu = cpu;
+
+    hax_init_vcpu(cpu);
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    while (1) {
+        if (cpu_can_run(cpu)) {
+            r = hax_smp_cpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+
+        while (cpu_thread_is_idle(cpu)) {
+            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+        }
+
+        qemu_wait_io_event_common(cpu);
+    }
+    return NULL;
+}
+
+
+static void qemu_cpu_kick_no_halt(void)
+{
+    CPUState *cpu;
+    /* Ensure whatever caused the exit has reached the CPU threads before
+     * writing exit_request.
+     */
+    atomic_mb_set(&exit_request, 1);
+    cpu = atomic_mb_read(&tcg_current_cpu);
+    if (cpu) {
+        cpu_exit(cpu);
+    }
+}
+
 static void qemu_cpu_kick_thread(CPUState *cpu)
 {
 #ifndef _WIN32
@@ -1235,28 +1282,52 @@  static void qemu_cpu_kick_thread(CPUState *cpu)
         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
         exit(1);
     }
-#else /* _WIN32 */
-    abort();
-#endif
-}
 
-static void qemu_cpu_kick_no_halt(void)
-{
-    CPUState *cpu;
-    /* Ensure whatever caused the exit has reached the CPU threads before
-     * writing exit_request.
+#ifdef CONFIG_DARWIN
+    /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
+     * We can double check it and resend it
      */
-    atomic_mb_set(&exit_request, 1);
-    cpu = atomic_mb_read(&tcg_current_cpu);
-    if (cpu) {
-        cpu_exit(cpu);
+    if (!exit_request)
+        qemu_cpu_kick_no_halt();
+
+    if (hax_enabled() && hax_ug_platform())
+        cpu->exit_request = 1;
+#endif
+#else /* _WIN32 */
+    if (!qemu_cpu_is_self(cpu)) {
+        CONTEXT tcgContext;
+
+        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
+
+        /* On multi-core systems, we are not sure that the thread is actually
+         * suspended until we can get the context.
+         */
+        tcgContext.ContextFlags = CONTEXT_CONTROL;
+        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
+            continue;
+        }
+
+        qemu_cpu_kick_no_halt();
+        if (hax_enabled() && hax_ug_platform())
+            cpu->exit_request = 1;
+
+        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
+            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
+                    GetLastError());
+            exit(1);
+        }
     }
+#endif
 }
 
 void qemu_cpu_kick(CPUState *cpu)
 {
     qemu_cond_broadcast(cpu->halt_cond);
-    if (tcg_enabled()) {
+    if (tcg_enabled() && !(hax_enabled() && hax_ug_platform())) {
         qemu_cpu_kick_no_halt();
     } else {
         qemu_cpu_kick_thread(cpu);
@@ -1293,6 +1364,7 @@  void qemu_mutex_lock_iothread(void)
      * TCG code execution.
      */
     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
+       (hax_enabled() && hax_ug_platform()) ||
         !first_cpu || !first_cpu->created) {
         qemu_mutex_lock(&qemu_global_mutex);
         atomic_dec(&iothread_requesting_mutex);
@@ -1396,6 +1468,9 @@  static void qemu_tcg_init_vcpu(CPUState *cpu)
     static QemuCond *tcg_halt_cond;
     static QemuThread *tcg_cpu_thread;
 
+    if (hax_enabled())
+        hax_init_vcpu(cpu);
+
     /* share a single thread for all cpus with TCG */
     if (!tcg_cpu_thread) {
         cpu->thread = g_malloc0(sizeof(QemuThread));
@@ -1419,6 +1494,26 @@  static void qemu_tcg_init_vcpu(CPUState *cpu)
     }
 }
 
+static void qemu_hax_start_vcpu(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+    while (!cpu->created) {
+        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+    }
+}
+
 static void qemu_kvm_start_vcpu(CPUState *cpu)
 {
     char thread_name[VCPU_THREAD_NAME_SIZE];
@@ -1469,6 +1564,8 @@  void qemu_init_vcpu(CPUState *cpu)
 
     if (kvm_enabled()) {
         qemu_kvm_start_vcpu(cpu);
+    } else if (hax_enabled() && hax_ug_platform()) {
+        qemu_hax_start_vcpu(cpu);
     } else if (tcg_enabled()) {
         qemu_tcg_init_vcpu(cpu);
     } else {
diff --git a/exec.c b/exec.c
index 3d867f1..c46fabc 100644
--- a/exec.c
+++ b/exec.c
@@ -31,6 +31,7 @@ 
 #include "hw/xen/xen.h"
 #endif
 #include "sysemu/kvm.h"
+#include "sysemu/hax.h"
 #include "sysemu/sysemu.h"
 #include "qemu/timer.h"
 #include "qemu/config-file.h"
@@ -1617,6 +1618,21 @@  static void ram_block_add(RAMBlock *new_block, Error **errp)
         } else {
             new_block->host = phys_mem_alloc(new_block->max_length,
                                              &new_block->mr->align);
+            /*
+             * In Hax, the qemu allocate the virtual address, and HAX kernel
+             * populate the memory with physical memory. Currently we have no
+             * paging, so user should make sure enough free memory in advance
+             */
+            if (hax_enabled()) {
+                int ret;
+                ret = hax_populate_ram((uint64_t)(uintptr_t)new_block->host,
+                                       new_block->max_length);
+                if (ret < 0) {
+                    error_setg(errp, "Hax failed to populate ram");
+                    return;
+                }
+            }
+
             if (!new_block->host) {
                 error_setg_errno(errp, errno,
                                  "cannot set up guest memory '%s'",
diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index d78c885..dd4cdc8 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -316,9 +316,10 @@  static void apic_common_realize(DeviceState *dev, Error **errp)
 
     /* Note: We need at least 1M to map the VAPIC option ROM */
     if (!vapic && s->vapic_control & VAPIC_ENABLE_MASK &&
-        ram_size >= 1024 * 1024) {
+        kvm_enabled() && ram_size >= 1024 * 1024) {
         vapic = sysbus_create_simple("kvmvapic", -1, NULL);
     }
+
     s->vapic = vapic;
     if (apic_report_tpr_access && info->enable_tpr_reporting) {
         info->enable_tpr_reporting(s, true);
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 3f79a8e..ca4d0fb 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -227,6 +227,8 @@  struct CPUWatchpoint {
 struct KVMState;
 struct kvm_run;
 
+struct hax_vcpu_state;
+
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 
@@ -392,6 +394,9 @@  struct CPUState {
        (absolute value) offset as small as possible.  This reduces code
        size, especially for hosts without large memory offsets.  */
     uint32_t tcg_exit_req;
+
+    bool hax_vcpu_dirty;
+    struct hax_vcpu_state *hax_vcpu;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/include/sysemu/hw_accel.h b/include/sysemu/hw_accel.h
index 03812cf..a0d5a9e 100644
--- a/include/sysemu/hw_accel.h
+++ b/include/sysemu/hw_accel.h
@@ -20,6 +20,9 @@  static inline void cpu_synchronize_state(CPUState *cpu)
     if (kvm_enabled()) {
         kvm_cpu_synchronize_state(cpu);
     }
+    if (hax_enabled() && hax_ug_platform()) {
+        hax_cpu_synchronize_state(cpu);
+    }
 }
 
 static inline void cpu_synchronize_post_reset(CPUState *cpu)
@@ -27,6 +30,9 @@  static inline void cpu_synchronize_post_reset(CPUState *cpu)
     if (kvm_enabled()) {
         kvm_cpu_synchronize_post_reset(cpu);
     }
+    if (hax_enabled() && hax_ug_platform()) {
+        hax_cpu_synchronize_post_reset(cpu);
+    }
 }
 
 static inline void cpu_synchronize_post_init(CPUState *cpu)
@@ -34,6 +40,9 @@  static inline void cpu_synchronize_post_init(CPUState *cpu)
     if (kvm_enabled()) {
         kvm_cpu_synchronize_post_init(cpu);
     }
+    if (hax_enabled() && hax_ug_platform()) {
+        hax_cpu_synchronize_post_init(cpu);
+    }
 }
 
 #endif /* QEMU_HW_ACCEL_H */
diff --git a/qemu-options.hx b/qemu-options.hx
index 4536e18..bd28219 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3259,6 +3259,17 @@  Enable KVM full virtualization support. This option is only available
 if KVM support is enabled when compiling.
 ETEXI
 
+DEF("enable-hax", 0, QEMU_OPTION_enable_hax, \
+    "-enable-hax     enable HAX virtualization support\n", QEMU_ARCH_I386)
+STEXI
+@item -enable-hax
+@findex -enable-hax
+Enable HAX (Hardware-based Acceleration eXecution) support. This option
+is only available if HAX support is enabled when compiling. HAX is only
+applicable to MAC and Windows platform, and thus does not conflict with
+KVM.
+ETEXI
+
 DEF("xen-domid", HAS_ARG, QEMU_OPTION_xen_domid,
     "-xen-domid id   specify xen guest domain id\n", QEMU_ARCH_ALL)
 DEF("xen-create", 0, QEMU_OPTION_xen_create,
diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs
index b223d79..8c53209 100644
--- a/target-i386/Makefile.objs
+++ b/target-i386/Makefile.objs
@@ -5,3 +5,10 @@  obj-y += gdbstub.o
 obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
 obj-$(CONFIG_KVM) += kvm.o hyperv.o
 obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o
+# HAX support
+ifdef CONFIG_WIN32
+obj-$(CONFIG_HAX) += hax-all.o hax-slot.o hax-windows.o
+endif
+ifdef CONFIG_DARWIN
+obj-$(CONFIG_HAX) += hax-all.o hax-slot.o hax-darwin.o
+endif
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index fb79f31..25b6003 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -25,6 +25,7 @@ 
 #include "exec/exec-all.h"
 #include "exec/cpu_ldst.h"
 #include "exec/log.h"
+#include "sysemu/hax.h"
 
 //#define DEBUG_PCALL
 
@@ -1336,6 +1337,10 @@  bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
             !(env->hflags & HF_SMM_MASK)) {
             cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0);
             cs->interrupt_request &= ~CPU_INTERRUPT_SMI;
+#ifdef CONFIG_HAX
+            if (hax_enabled())
+                cs->hax_vcpu->resync = 1;
+#endif
             do_smm_enter(cpu);
             ret = true;
         } else if ((interrupt_request & CPU_INTERRUPT_NMI) &&
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 324103c..e027896 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -30,6 +30,7 @@ 
 
 #include "trace-tcg.h"
 #include "exec/log.h"
+#include "sysemu/hax.h"
 
 
 #define PREFIX_REPZ   0x01
@@ -8421,6 +8422,13 @@  void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
         }
 
         pc_ptr = disas_insn(env, dc, pc_ptr);
+
+        if (hax_enabled() && hax_stop_translate(cs)) {
+            gen_jmp_im(pc_ptr - dc->cs_base);
+            gen_eob(dc);
+            break;
+        }
+
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
diff --git a/vl.c b/vl.c
index 319f641..3804b9b 100644
--- a/vl.c
+++ b/vl.c
@@ -92,6 +92,7 @@  int main(int argc, char **argv)
 #include "sysemu/cpus.h"
 #include "migration/colo.h"
 #include "sysemu/kvm.h"
+#include "sysemu/hax.h"
 #include "qapi/qmp/qjson.h"
 #include "qemu/option.h"
 #include "qemu/config-file.h"
@@ -1959,7 +1960,7 @@  static void main_loop(void)
     int64_t ti;
 #endif
     do {
-        nonblocking = !kvm_enabled() && !xen_enabled() && last_io > 0;
+        nonblocking = !kvm_enabled() && !xen_enabled() && !hax_enabled() && last_io > 0;
 #ifdef CONFIG_PROFILER
         ti = profile_getclock();
 #endif
@@ -3724,6 +3725,11 @@  int main(int argc, char **argv, char **envp)
                 olist = qemu_find_opts("machine");
                 qemu_opts_parse_noisily(olist, "accel=kvm", false);
                 break;
+            case QEMU_OPTION_enable_hax:
+                olist = qemu_find_opts("machine");
+                qemu_opts_parse_noisily(olist, "accel=hax", false);
+                hax_disable(0);
+                break;
             case QEMU_OPTION_M:
             case QEMU_OPTION_machine:
                 olist = qemu_find_opts("machine");
@@ -4060,6 +4066,7 @@  int main(int argc, char **argv, char **envp)
     machine_class = select_machine();
 
     set_memory_options(&ram_slots, &maxram_size, machine_class);
+    hax_pre_init(ram_size);
 
     os_daemonize();
 
@@ -4418,8 +4425,8 @@  int main(int argc, char **argv, char **envp)
 
     cpu_ticks_init();
     if (icount_opts) {
-        if (kvm_enabled() || xen_enabled()) {
-            error_report("-icount is not allowed with kvm or xen");
+        if (kvm_enabled() || xen_enabled() || hax_enabled()) {
+            error_report("-icount is not allowed with kvm or xen or hax");
             exit(1);
         }
         configure_icount(icount_opts, &error_abort);
@@ -4555,6 +4562,10 @@  int main(int argc, char **argv, char **envp)
 
     numa_post_machine_init();
 
+    if (hax_enabled()) {
+        hax_sync_vcpus();
+    }
+
     if (qemu_opts_foreach(qemu_find_opts("fw_cfg"),
                           parse_fw_cfg, fw_cfg_find(), NULL) != 0) {
         exit(1);