diff mbox

[v2,2/5] target-i386: Add Intel HAX files

Message ID 08625798334d3ea3ccead1b40d1068982b40f1d1.1478863621.git.vpalatin@chromium.org
State New
Headers show

Commit Message

Vincent Palatin Nov. 11, 2016, 11:28 a.m. UTC
That's a forward port of the core HAX interface code mostly unmodified from
emu-2.2-release branch in the external/qemu-android repository as used by
the Android emulator.

The original commit was "target-i386: Add Intel HAX to android emulator"
saying:
"""
  Backport of 2b3098ff27bab079caab9b46b58546b5036f5c0c
  from studio-1.4-dev into emu-master-dev

    Intel HAX (harware acceleration) will enhance android emulator performance
    in Windows and Mac OS X in the systems powered by Intel processors with
    "Intel Hardware Accelerated Execution Manager" package installed when
    user runs android emulator with Intel target.

    Signed-off-by: David Chou <david.j.chou@intel.com>
"""

It has minor modifications to build and run along with the current
code base.
The formatting has been fixed to go through scripts/checkpatch.pl.
Update the DPRINTF macros to get the instanciations checked by the
compiler.
Does not include the Darwin support.

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
---
 hax-stub.c                  |   74 +++
 include/sysemu/hax.h        |   66 ++
 target-i386/hax-all.c       | 1490 +++++++++++++++++++++++++++++++++++++++++++
 target-i386/hax-i386.h      |   91 +++
 target-i386/hax-interface.h |  357 +++++++++++
 target-i386/hax-slot.c      |  333 ++++++++++
 target-i386/hax-slot.h      |   58 ++
 target-i386/hax-windows.c   |  509 +++++++++++++++
 target-i386/hax-windows.h   |   89 +++
 9 files changed, 3067 insertions(+)
 create mode 100644 hax-stub.c
 create mode 100644 include/sysemu/hax.h
 create mode 100644 target-i386/hax-all.c
 create mode 100644 target-i386/hax-i386.h
 create mode 100644 target-i386/hax-interface.h
 create mode 100644 target-i386/hax-slot.c
 create mode 100644 target-i386/hax-slot.h
 create mode 100644 target-i386/hax-windows.c
 create mode 100644 target-i386/hax-windows.h

Comments

Stefan Weil Nov. 14, 2016, 9:29 a.m. UTC | #1
Am 11.11.2016 um 12:28 schrieb Vincent Palatin:
[...]
> 
> Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
> ---
>  hax-stub.c                  |   74 +++
>  include/sysemu/hax.h        |   66 ++
>  target-i386/hax-all.c       | 1490 +++++++++++++++++++++++++++++++++++++++++++

Git warns about a whitespace issue:
The empty last line of target-i386/hax-all.c should be removed.

Stefan
Vincent Palatin Nov. 14, 2016, 9:38 a.m. UTC | #2
On Mon, Nov 14, 2016 at 10:29 AM, Stefan Weil <sw@weilnetz.de> wrote:
> Am 11.11.2016 um 12:28 schrieb Vincent Palatin:
> [...]
>>
>> Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
>> ---
>>  hax-stub.c                  |   74 +++
>>  include/sysemu/hax.h        |   66 ++
>>  target-i386/hax-all.c       | 1490 +++++++++++++++++++++++++++++++++++++++++++
>
> Git warns about a whitespace issue:
> The empty last line of target-i386/hax-all.c should be removed.

Done, I will send it with the v3 series.
Paolo Bonzini Nov. 14, 2016, 10:15 a.m. UTC | #3
On 11/11/2016 12:28, Vincent Palatin wrote:
> +
> +    memcpy(env->xmm_regs, fpu.mmx_1, sizeof(fpu.mmx_1));
> +    memcpy((ZMMReg *) (env->xmm_regs) + 8, fpu.mmx_2, sizeof(fpu.mmx_2));

HAX will only support SSE (128-bit) registers, while env->xmm_regs
supports AVX512 (512-bit) so you have to copy registers one by one.

Is there documentation for HAX?  In particular I'm curious as to what
the CPUID information looks like in the guest, and whether there are
ioctls to change it.

> +
> +static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
> +{
> +    uint64_t buf = 0;
> +    /*
> +     * With fast MMIO, QEMU need not sync vCPU state with HAXM
> +     * driver because it will only invoke MMIO handler
> +     * However, some MMIO operations utilize virtual address like qemu_pipe
> +     * Thus we need to sync the CR0, CR3 and CR4 so that QEMU
> +     * can translate the guest virtual address to guest physical
> +     * address
> +     */
> +    env->cr[0] = hft->_cr0;
> +    env->cr[2] = hft->_cr2;
> +    env->cr[3] = hft->_cr3;
> +    env->cr[4] = hft->_cr4;

These seem to apply only to some parts of the Android emulator that are
not upstream, so you can remove them.

> +    buf = hft->value;
> +
> +    cpu_physical_memory_rw(hft->gpa, (uint8_t *) &buf, hft->size,
> +                           hft->direction);
> +    if (hft->direction == 0) {
> +        hft->value = buf;
> +    }

No need to use "buf", you can use &hft->value directly.

> +    return 0;
> +}
> +
> +static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
> +                         int direction, int size, int count, void *buffer)
> +{
> +    uint8_t *ptr;
> +    int i;
> +
> +    if (!df) {
> +        ptr = (uint8_t *) buffer;
> +    } else {
> +        ptr = buffer + size * count - size;
> +    }
> +    for (i = 0; i < count; i++) {
> +        if (direction == HAX_EXIT_IO_IN) {
> +            switch (size) {
> +            case 1:
> +                stb_p(ptr, cpu_inb(port));
> +                break;
> +            case 2:
> +                stw_p(ptr, cpu_inw(port));
> +                break;
> +            case 4:
> +                stl_p(ptr, cpu_inl(port));
> +                break;
> +            }
> +        } else {
> +            switch (size) {
> +            case 1:
> +                cpu_outb(port, ldub_p(ptr));
> +                break;
> +            case 2:
> +                cpu_outw(port, lduw_p(ptr));
> +                break;
> +            case 4:
> +                cpu_outl(port, ldl_p(ptr));
> +                break;
> +            }
> +        }

The whole "if" can be replaced by

    MemTxAttrs = { 0 };
    ...

        address_space_rw(&address_space_io, port, attrs,
                         ptr, size, direction == HAX_EXIT_IO_OUT);

Thanks,

Paolo

> +        if (!df) {
> +            ptr += size;
> +        } else {
> +            ptr -= size;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
Paolo Bonzini Nov. 14, 2016, 11:55 a.m. UTC | #4
On 11/11/2016 12:28, Vincent Palatin wrote:
> +
> +    memcpy(env->xmm_regs, fpu.mmx_1, sizeof(fpu.mmx_1));
> +    memcpy((ZMMReg *) (env->xmm_regs) + 8, fpu.mmx_2, sizeof(fpu.mmx_2));

HAX will only support SSE (128-bit) registers, while env->xmm_regs
supports AVX512 (512-bit) so you have to copy registers one by one.

Is there documentation for HAX?  In particular I'm curious as to what
the CPUID information looks like in the guest, and whether there are
ioctls to change it.  In particular I would expect XSAVE to be disabled.

Paolo
Vincent Palatin Nov. 14, 2016, 12:07 p.m. UTC | #5
On Mon, Nov 14, 2016 at 11:15 AM, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
>
> On 11/11/2016 12:28, Vincent Palatin wrote:
>> +
>> +    memcpy(env->xmm_regs, fpu.mmx_1, sizeof(fpu.mmx_1));
>> +    memcpy((ZMMReg *) (env->xmm_regs) + 8, fpu.mmx_2, sizeof(fpu.mmx_2));
>
> HAX will only support SSE (128-bit) registers, while env->xmm_regs
> supports AVX512 (512-bit) so you have to copy registers one by one.


Good point,
I will fix this


>
> Is there documentation for HAX?

No developer doc I know of,
both Intel website and the download packages contain only installation
documentations as far as I can tell.
I will ask Intel when I have the chance.

>  In particular I'm curious as to what
> the CPUID information looks like in the guest, and whether there are
> ioctls to change it.

No idea for the interface, but I have put an example below if you are
interested.

> In particular I would expect XSAVE to be disabled.

For EAX=1  I'm seeing ECX = 00d82201 => [26] = 0 && [27] = 0.
We should be fine for XSAVE.


On the Intel Core i5-6200U CPU I was running my tests on, I have
dumped the CPUID inside the emulator with HAX and on the Windows host:

========== emulation with HAX ==========
 eax in    eax      ebx      ecx      edx
00000000 00000004 756e6547 6c65746e 49656e69
00000001 000106f1 00010400 00d82201 1f88fbff
00000002 03020101 00000000 00000000 0c040844
00000003 00000000 00000000 00000000 00000000
00000004 00000000 00000000 00000000 00000000
00000005 00000040 00000040 00000003 11142120
00000006 000027f7 00000002 00000009 00000000
00000007 00000000 029c67af 00000000 00000000
00000008 00000000 00000000 00000000 00000000
00000009 00000000 00000000 00000000 00000000
0000000a 07300404 00000000 00000000 00000603
0000000b 00000001 00000002 00000100 00000001
0000000c 00000000 00000000 00000000 00000000
0000000d 0000001f 00000440 00000440 00000000
0000000e 00000000 00000000 00000000 00000000
0000000f 00000000 00000000 00000000 00000000
00000010 00000000 00000000 00000000 00000000
00000011 00000000 00000000 00000000 00000000
00000012 00000000 00000000 00000000 00000000
00000013 00000000 00000000 00000000 00000000
00000014 00000001 0000000f 00000007 00000000
00000015 00000002 000000c8 00000000 00000000
00000016 00000960 00000af0 00000064 00000000
80000000 80000008 00000000 00000000 00000000
80000001 00000000 00000000 00000000 20000800
80000002 74726956 206c6175 20555043 00000000
80000003 00000000 00000000 00000000 00000000
80000004 00000000 00000000 00000000 00000000
80000005 00000000 00000000 00000000 00000000
80000006 00000000 00000000 04008040 00000000
80000007 00000000 00000000 00000000 00000000
80000008 00003027 00000000 00000000 00000000

========== Windows host  ==========
 eax in    eax      ebx      ecx      edx
00000000 00000016 756e6547 6c65746e 49656e69
00000001 000406e3 00100800 7ffafbbf bfebfbff
00000002 76036301 00f0b5ff 00000000 00c30000
00000003 00000000 00000000 00000000 00000000
00000004 00000000 00000000 00000000 00000000
00000005 00000040 00000040 00000003 11142120
00000006 000027f7 00000002 00000009 00000000
00000007 00000000 00000000 00000000 00000000
00000008 00000000 00000000 00000000 00000000
00000009 00000000 00000000 00000000 00000000
0000000a 07300404 00000000 00000000 00000603
0000000b 00000000 00000000 000000c3 00000000
0000000c 00000000 00000000 00000000 00000000
0000000d 00000000 00000000 00000000 00000000
0000000e 00000000 00000000 00000000 00000000
0000000f 00000000 00000000 00000000 00000000
00000010 00000000 00000000 00000000 00000000
00000011 00000000 00000000 00000000 00000000
00000012 00000000 00000000 00000000 00000000
00000013 00000000 00000000 00000000 00000000
00000014 00000000 00000000 00000000 00000000
00000015 00000002 000000c8 00000000 00000000
00000016 00000960 00000af0 00000064 00000000
80000000 80000008 00000000 00000000 00000000
80000001 00000000 00000000 00000121 2c100000
80000002 65746e49 2952286c 726f4320 4d542865
80000003 35692029 3032362d 43205530 40205550
80000004 332e3220 7a484730 00000000 00000000
80000005 00000000 00000000 00000000 00000000
80000006 00000000 00000000 01006040 00000000
80000007 00000000 00000000 00000000 00000100
80000008 00003027 00000000 00000000 00000000

>
>> +
>> +static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
>> +{
>> +    uint64_t buf = 0;
>> +    /*
>> +     * With fast MMIO, QEMU need not sync vCPU state with HAXM
>> +     * driver because it will only invoke MMIO handler
>> +     * However, some MMIO operations utilize virtual address like qemu_pipe
>> +     * Thus we need to sync the CR0, CR3 and CR4 so that QEMU
>> +     * can translate the guest virtual address to guest physical
>> +     * address
>> +     */
>> +    env->cr[0] = hft->_cr0;
>> +    env->cr[2] = hft->_cr2;
>> +    env->cr[3] = hft->_cr3;
>> +    env->cr[4] = hft->_cr4;
>
> These seem to apply only to some parts of the Android emulator that are
> not upstream, so you can remove them.

Ok, removed.
Re-tested my own image still works ...


>
>> +    buf = hft->value;
>> +
>> +    cpu_physical_memory_rw(hft->gpa, (uint8_t *) &buf, hft->size,
>> +                           hft->direction);
>> +    if (hft->direction == 0) {
>> +        hft->value = buf;
>> +    }
>
> No need to use "buf", you can use &hft->value directly.

Updated.


>
>> +    return 0;
>> +}
>> +
>> +static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
>> +                         int direction, int size, int count, void *buffer)
>> +{
>> +    uint8_t *ptr;
>> +    int i;
>> +
>> +    if (!df) {
>> +        ptr = (uint8_t *) buffer;
>> +    } else {
>> +        ptr = buffer + size * count - size;
>> +    }
>> +    for (i = 0; i < count; i++) {
>> +        if (direction == HAX_EXIT_IO_IN) {
>> +            switch (size) {
>> +            case 1:
>> +                stb_p(ptr, cpu_inb(port));
>> +                break;
>> +            case 2:
>> +                stw_p(ptr, cpu_inw(port));
>> +                break;
>> +            case 4:
>> +                stl_p(ptr, cpu_inl(port));
>> +                break;
>> +            }
>> +        } else {
>> +            switch (size) {
>> +            case 1:
>> +                cpu_outb(port, ldub_p(ptr));
>> +                break;
>> +            case 2:
>> +                cpu_outw(port, lduw_p(ptr));
>> +                break;
>> +            case 4:
>> +                cpu_outl(port, ldl_p(ptr));
>> +                break;
>> +            }
>> +        }
>
> The whole "if" can be replaced by
>
>     MemTxAttrs = { 0 };
>     ...
>
>         address_space_rw(&address_space_io, port, attrs,
>                          ptr, size, direction == HAX_EXIT_IO_OUT);
>

Nice, updated and queued for my V3 series.

>
>> +        if (!df) {
>> +            ptr += size;
>> +        } else {
>> +            ptr -= size;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
diff mbox

Patch

diff --git a/hax-stub.c b/hax-stub.c
new file mode 100644
index 0000000..11cd626
--- /dev/null
+++ b/hax-stub.c
@@ -0,0 +1,74 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Copyright 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
+#include "sysemu/hax.h"
+
+int hax_sync_vcpus(void)
+{
+    return 0;
+}
+
+void hax_disable(int disable)
+{
+   return;
+}
+
+int hax_pre_init(uint64_t ram_size)
+{
+   return 0;
+}
+
+int hax_get_max_ram(uint64_t *max_ram)
+{
+    return 0;
+}
+
+int hax_populate_ram(uint64_t va, uint32_t size)
+{
+    return -ENOSYS;
+}
+
+int hax_init_vcpu(CPUState *cpu)
+{
+    return -ENOSYS;
+}
+
+int hax_smp_cpu_exec(CPUState *cpu)
+{
+    return -ENOSYS;
+}
+
+int hax_vcpu_exec(CPUState *cpu)
+{
+    return -ENOSYS;
+}
+
+int hax_vcpu_emulation_mode(CPUState *cpu)
+{
+    return 0;
+}
+
+int hax_stop_emulation(CPUState *cpu)
+{
+    return 0;
+}
+
+int hax_stop_translate(CPUState *cpu)
+{
+    return 0;
+}
diff --git a/include/sysemu/hax.h b/include/sysemu/hax.h
new file mode 100644
index 0000000..159e20f
--- /dev/null
+++ b/include/sysemu/hax.h
@@ -0,0 +1,66 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * Copyright 2016 Google, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_HAX_H
+#define QEMU_HAX_H
+
+#include "config-host.h"
+#include "qemu-common.h"
+
+int hax_pre_init(uint64_t ram_size);
+int hax_sync_vcpus(void);
+void hax_disable(int disable);
+int hax_init_vcpu(CPUState *cpu);
+int hax_smp_cpu_exec(CPUState *cpu);
+int hax_vcpu_exec(CPUState *cpu);
+int hax_vcpu_emulation_mode(CPUState *cpu);
+int hax_stop_emulation(CPUState *cpu);
+int hax_stop_translate(CPUState *cpu);
+/* get the max haxm ram even before haxm library is initialized */
+int hax_get_max_ram(uint64_t *max_ram);
+int hax_populate_ram(uint64_t va, uint32_t size);
+
+void hax_cpu_synchronize_state(CPUState *cpu);
+void hax_cpu_synchronize_post_reset(CPUState *cpu);
+void hax_cpu_synchronize_post_init(CPUState *cpu);
+
+#ifdef CONFIG_HAX
+
+int hax_enabled(void);
+int hax_ug_platform(void);
+
+#include "hw/hw.h"
+#include "qemu/bitops.h"
+#include "exec/memory.h"
+int hax_vcpu_destroy(CPUState *cpu);
+void hax_raise_event(CPUState *cpu);
+void hax_reset_vcpu_state(void *opaque);
+#include "target-i386/hax-interface.h"
+#include "target-i386/hax-i386.h"
+
+#else /* CONFIG_HAX */
+
+#define hax_enabled() (0)
+#define hax_ug_platform() (0)
+
+#endif /* CONFIG_HAX */
+
+#endif /* QEMU_HAX_H */
diff --git a/target-i386/hax-all.c b/target-i386/hax-all.c
new file mode 100644
index 0000000..86d16ae
--- /dev/null
+++ b/target-i386/hax-all.c
@@ -0,0 +1,1490 @@ 
+/*
+ * QEMU HAX support
+ *
+ * Copyright IBM, Corp. 2008
+ *           Red Hat, Inc. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Glauber Costa     <gcosta@redhat.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/*
+ * HAX common code for both windows and darwin
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "exec/ioport.h"
+
+#include "qemu-common.h"
+#include "strings.h"
+#include "hax-i386.h"
+#include "sysemu/accel.h"
+#include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "qemu/main-loop.h"
+#include "hax-slot.h"
+
+static const char kHaxVcpuSyncFailed[] = "Failed to sync HAX vcpu context";
+#define derror(msg) do { fprintf(stderr, (msg)); } while (0)
+
+#define DEBUG_HAX 0
+
+#define DPRINTF(fmt, ...) \
+    do { \
+        if (DEBUG_HAX) { \
+            fprintf(stdout, fmt, ## __VA_ARGS__); \
+        } \
+    } while (0)
+
+/* Current version */
+const uint32_t hax_cur_version = 0x3;    /* ver 2.0: support fast mmio */
+/* Minimum  HAX kernel version */
+const uint32_t hax_min_version = 0x3;
+
+#define TYPE_HAX_ACCEL ACCEL_CLASS_NAME("hax")
+
+#define HAX_EMUL_ONE    0x1
+#define HAX_EMUL_REAL   0x2
+#define HAX_EMUL_HLT    0x4
+#define HAX_EMUL_EXITLOOP    0x5
+
+#define HAX_EMULATE_STATE_MMIO  0x1
+#define HAX_EMULATE_STATE_REAL  0x2
+#define HAX_EMULATE_STATE_NONE  0x3
+#define HAX_EMULATE_STATE_INITIAL       0x4
+
+#define HAX_NON_UG_PLATFORM 0x0
+#define HAX_UG_PLATFORM     0x1
+
+bool hax_allowed;
+
+static void hax_vcpu_sync_state(CPUArchState *env, int modified);
+static int hax_arch_get_registers(CPUArchState *env);
+static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
+                         int direction, int size, int count, void *buffer);
+static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft);
+
+struct hax_state hax_global;
+int ret_hax_init;
+static int hax_disabled = 1;
+
+int hax_support = -1;
+int ug_support;
+
+/* Called after hax_init */
+int hax_enabled(void)
+{
+    return !hax_disabled && hax_support;
+}
+
+void hax_disable(int disable)
+{
+    hax_disabled = disable;
+}
+
+/* Called after hax_init */
+int hax_ug_platform(void)
+{
+    return ug_support;
+}
+
+/* Currently non-PG modes are emulated by QEMU */
+int hax_vcpu_emulation_mode(CPUState *cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+    return !(env->cr[0] & CR0_PG_MASK);
+}
+
+static int hax_prepare_emulation(CPUArchState *env)
+{
+    /* Flush all emulation states */
+    tlb_flush(ENV_GET_CPU(env), 1);
+    tb_flush(ENV_GET_CPU(env));
+    /* Sync the vcpu state from hax kernel module */
+    hax_vcpu_sync_state(env, 0);
+    return 0;
+}
+
+/*
+ * Check whether to break the translation block loop
+ * break tbloop after one MMIO emulation, or after finish emulation mode
+ */
+static int hax_stop_tbloop(CPUArchState *env)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    switch (cpu->hax_vcpu->emulation_state) {
+    case HAX_EMULATE_STATE_MMIO:
+        if (cpu->hax_vcpu->resync) {
+            hax_prepare_emulation(env);
+            cpu->hax_vcpu->resync = 0;
+            return 0;
+        }
+        return 1;
+        break;
+    case HAX_EMULATE_STATE_INITIAL:
+    case HAX_EMULATE_STATE_REAL:
+        if (!hax_vcpu_emulation_mode(cpu)) {
+            return 1;
+        }
+        break;
+    default:
+        fprintf(stderr, "Invalid emulation state in hax_sto_tbloop state %x\n",
+                cpu->hax_vcpu->emulation_state);
+        break;
+    }
+
+    return 0;
+}
+
+int hax_stop_emulation(CPUState *cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+
+    if (hax_stop_tbloop(env)) {
+        cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_NONE;
+        /*
+         * QEMU emulation changes vcpu state,
+         * Sync the vcpu state to HAX kernel module
+         */
+        hax_vcpu_sync_state(env, 1);
+        return 1;
+    }
+
+    return 0;
+}
+
+int hax_stop_translate(CPUState *cpu)
+{
+    struct hax_vcpu_state *vstate = cpu->hax_vcpu;
+
+    assert(vstate->emulation_state);
+    if (vstate->emulation_state == HAX_EMULATE_STATE_MMIO) {
+        return 1;
+    }
+
+    return 0;
+}
+
+int valid_hax_tunnel_size(uint16_t size)
+{
+    return size >= sizeof(struct hax_tunnel);
+}
+
+hax_fd hax_vcpu_get_fd(CPUArchState *env)
+{
+    struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
+    if (!vcpu) {
+        return HAX_INVALID_FD;
+    }
+    return vcpu->fd;
+}
+
+static int hax_get_capability(struct hax_state *hax)
+{
+    int ret;
+    struct hax_capabilityinfo capinfo, *cap = &capinfo;
+
+    ret = hax_capability(hax, cap);
+    if (ret) {
+        return ret;
+    }
+
+    if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
+        if (cap->winfo & HAX_CAP_FAILREASON_VT) {
+            DPRINTF
+                ("VTX feature is not enabled, HAX driver will not work.\n");
+        } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
+            DPRINTF
+                ("NX feature is not enabled, HAX driver will not work.\n");
+        }
+        return -ENXIO;
+
+    }
+
+    if ((cap->winfo & HAX_CAP_UG)) {
+        ug_support = 1;
+    }
+
+    if (cap->wstatus & HAX_CAP_MEMQUOTA) {
+        if (cap->mem_quota < hax->mem_quota) {
+            fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
+            return -ENOSPC;
+        }
+    }
+    return 0;
+}
+
+static int hax_version_support(struct hax_state *hax)
+{
+    int ret;
+    struct hax_module_version version;
+
+    ret = hax_mod_version(hax, &version);
+    if (ret < 0) {
+        return 0;
+    }
+
+    if ((hax_min_version > version.cur_version) ||
+        (hax_cur_version < version.compat_version)) {
+        return 0;
+    }
+
+    return 1;
+}
+
+int hax_vcpu_create(int id)
+{
+    struct hax_vcpu_state *vcpu = NULL;
+    int ret;
+
+    if (!hax_global.vm) {
+        fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
+        return -1;
+    }
+
+    if (hax_global.vm->vcpus[id]) {
+        fprintf(stderr, "vcpu %x allocated already\n", id);
+        return 0;
+    }
+
+    vcpu = g_malloc(sizeof(struct hax_vcpu_state));
+    if (!vcpu) {
+        fprintf(stderr, "Failed to alloc vcpu state\n");
+        return -ENOMEM;
+    }
+
+    memset(vcpu, 0, sizeof(struct hax_vcpu_state));
+
+    ret = hax_host_create_vcpu(hax_global.vm->fd, id);
+    if (ret) {
+        fprintf(stderr, "Failed to create vcpu %x\n", id);
+        goto error;
+    }
+
+    vcpu->vcpu_id = id;
+    vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
+    if (hax_invalid_fd(vcpu->fd)) {
+        fprintf(stderr, "Failed to open the vcpu\n");
+        ret = -ENODEV;
+        goto error;
+    }
+
+    hax_global.vm->vcpus[id] = vcpu;
+
+    ret = hax_host_setup_vcpu_channel(vcpu);
+    if (ret) {
+        fprintf(stderr, "Invalid hax tunnel size\n");
+        ret = -EINVAL;
+        goto error;
+    }
+    return 0;
+
+  error:
+    /* vcpu and tunnel will be closed automatically */
+    if (vcpu && !hax_invalid_fd(vcpu->fd)) {
+        hax_close_fd(vcpu->fd);
+    }
+
+    hax_global.vm->vcpus[id] = NULL;
+    g_free(vcpu);
+    return -1;
+}
+
+int hax_vcpu_destroy(CPUState *cpu)
+{
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+
+    if (!hax_global.vm) {
+        fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
+        return -1;
+    }
+
+    if (!vcpu) {
+        return 0;
+    }
+
+    /*
+     * 1. The hax_tunnel is also destroied when vcpu destroy
+     * 2. close fd will cause hax module vcpu be cleaned
+     */
+    hax_close_fd(vcpu->fd);
+    hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
+    g_free(vcpu);
+    return 0;
+}
+
+int hax_init_vcpu(CPUState *cpu)
+{
+    int ret;
+
+    ret = hax_vcpu_create(cpu->cpu_index);
+    if (ret < 0) {
+        fprintf(stderr, "Failed to create HAX vcpu\n");
+        exit(-1);
+    }
+
+    cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
+    cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
+    cpu->hax_vcpu_dirty = true;
+    qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
+
+    return ret;
+}
+
+struct hax_vm *hax_vm_create(struct hax_state *hax)
+{
+    struct hax_vm *vm;
+    int vm_id = 0, ret;
+
+    if (hax_invalid_fd(hax->fd)) {
+        return NULL;
+    }
+
+    if (hax->vm) {
+        return hax->vm;
+    }
+
+    vm = g_malloc(sizeof(struct hax_vm));
+    if (!vm) {
+        return NULL;
+    }
+    memset(vm, 0, sizeof(struct hax_vm));
+    ret = hax_host_create_vm(hax, &vm_id);
+    if (ret) {
+        fprintf(stderr, "Failed to create vm %x\n", ret);
+        goto error;
+    }
+    vm->id = vm_id;
+    vm->fd = hax_host_open_vm(hax, vm_id);
+    if (hax_invalid_fd(vm->fd)) {
+        fprintf(stderr, "Failed to open vm %d\n", vm_id);
+        goto error;
+    }
+
+    hax->vm = vm;
+    hax_slot_init_registry();
+    return vm;
+
+  error:
+    g_free(vm);
+    hax->vm = NULL;
+    return NULL;
+}
+
+int hax_vm_destroy(struct hax_vm *vm)
+{
+    int i;
+
+    hax_slot_free_registry();
+    for (i = 0; i < HAX_MAX_VCPU; i++)
+        if (vm->vcpus[i]) {
+            fprintf(stderr, "VCPU should be cleaned before vm clean\n");
+            return -1;
+        }
+    hax_close_fd(vm->fd);
+    g_free(vm);
+    hax_global.vm = NULL;
+    return 0;
+}
+
+static void hax_set_phys_mem(MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+    hwaddr start_pa = section->offset_within_address_space;
+    ram_addr_t size = int128_get64(section->size);
+    unsigned int delta;
+    void *host_ptr;
+    int flags;
+
+    /* We only care about RAM and ROM */
+    if (!memory_region_is_ram(mr)) {
+        return;
+    }
+
+    /* Adjust start_pa and size so that they are page-aligned. (Cf
+     * kvm_set_phys_mem() in kvm-all.c).
+     */
+    delta = TARGET_PAGE_SIZE - (start_pa & ~TARGET_PAGE_MASK);
+    delta &= ~TARGET_PAGE_MASK;
+    if (delta > size) {
+        return;
+    }
+    start_pa += delta;
+    size -= delta;
+    size &= TARGET_PAGE_MASK;
+    if (!size || start_pa & ~TARGET_PAGE_MASK) {
+        return;
+    }
+
+    host_ptr = memory_region_get_ram_ptr(mr) + section->offset_within_region
+               + delta;
+    flags = memory_region_is_rom(mr) ? 1 : 0;
+    hax_slot_register(start_pa, size, (uintptr_t) host_ptr, flags);
+}
+
+static void hax_region_add(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    hax_set_phys_mem(section);
+}
+
+static void hax_region_del(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    /* Memory mappings will be removed at VM close. */
+}
+
+/* currently we fake the dirty bitmap sync, always dirty */
+/* avoid implicit declaration warning on Windows */
+static void hax_log_sync(MemoryListener *listener,
+                         MemoryRegionSection *section)
+{
+    MemoryRegion *mr = section->mr;
+
+    if (!memory_region_is_ram(mr)) {
+        /* Skip MMIO regions */
+        return;
+    }
+
+    unsigned long c;
+    unsigned int len =
+        ((int128_get64(section->size) / TARGET_PAGE_SIZE) + HOST_LONG_BITS -
+         1) / HOST_LONG_BITS;
+    unsigned long bitmap[len];
+    unsigned int i, j;
+
+    for (i = 0; i < len; i++) {
+        bitmap[i] = 1;
+        c = leul_to_cpu(bitmap[i]);
+        do {
+            j = ctzl(c) - 1;
+            c &= ~(1ul << j);
+
+            memory_region_set_dirty(mr, ((uint64_t)i * HOST_LONG_BITS + j) *
+                                    TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
+        } while (c != 0);
+    }
+}
+
+static void hax_log_global_start(struct MemoryListener *listener)
+{
+}
+
+static void hax_log_global_stop(struct MemoryListener *listener)
+{
+}
+
+static void hax_log_start(MemoryListener *listener,
+                          MemoryRegionSection *section, int old, int new)
+{
+}
+
+static void hax_log_stop(MemoryListener *listener,
+                         MemoryRegionSection *section, int old, int new)
+{
+}
+
+static void hax_begin(MemoryListener *listener)
+{
+}
+
+static void hax_commit(MemoryListener *listener)
+{
+}
+
+static void hax_region_nop(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+}
+
+static MemoryListener hax_memory_listener = {
+    .begin = hax_begin,
+    .commit = hax_commit,
+    .region_add = hax_region_add,
+    .region_del = hax_region_del,
+    .region_nop = hax_region_nop,
+    .log_start = hax_log_start,
+    .log_stop = hax_log_stop,
+    .log_sync = hax_log_sync,
+    .log_global_start = hax_log_global_start,
+    .log_global_stop = hax_log_global_stop,
+};
+
+static void hax_handle_interrupt(CPUState *cpu, int mask)
+{
+    cpu->interrupt_request |= mask;
+
+    if (!qemu_cpu_is_self(cpu)) {
+        qemu_cpu_kick(cpu);
+    }
+}
+
+int hax_pre_init(uint64_t ram_size)
+{
+    struct hax_state *hax = NULL;
+
+    fprintf(stdout, "Hax is %s\n", hax_disabled ? "disabled" : "enabled");
+    if (hax_disabled) {
+        return 0;
+    }
+    hax = &hax_global;
+    memset(hax, 0, sizeof(struct hax_state));
+    hax->mem_quota = ram_size;
+    fprintf(stdout, "Hax ram_size 0x%llx\n", ram_size);
+
+    return 0;
+}
+
+static int hax_init(void)
+{
+    struct hax_state *hax = NULL;
+    struct hax_qemu_version qversion;
+    int ret;
+
+    hax_support = 0;
+
+    hax = &hax_global;
+
+    hax->fd = hax_mod_open();
+    if (hax_invalid_fd(hax->fd)) {
+        hax->fd = 0;
+        ret = -ENODEV;
+        goto error;
+    }
+
+    ret = hax_get_capability(hax);
+
+    if (ret) {
+        if (ret != -ENOSPC) {
+            ret = -EINVAL;
+        }
+        goto error;
+    }
+
+    if (!hax_version_support(hax)) {
+        fprintf(stderr, "Incompat HAX version. QEMU current version %x ",
+                hax_cur_version);
+        fprintf(stderr, "requires minimum HAX version %x\n", hax_min_version);
+        ret = -EINVAL;
+        goto error;
+    }
+
+    hax->vm = hax_vm_create(hax);
+    if (!hax->vm) {
+        fprintf(stderr, "Failed to create HAX VM\n");
+        ret = -EINVAL;
+        goto error;
+    }
+
+    memory_listener_register(&hax_memory_listener, &address_space_memory);
+
+    qversion.cur_version = hax_cur_version;
+    qversion.min_version = hax_min_version;
+    hax_notify_qemu_version(hax->vm->fd, &qversion);
+    cpu_interrupt_handler = hax_handle_interrupt;
+    hax_support = 1;
+
+    return ret;
+  error:
+    if (hax->vm) {
+        hax_vm_destroy(hax->vm);
+    }
+    if (hax->fd) {
+        hax_mod_close(hax);
+    }
+
+    return ret;
+}
+
+static int hax_accel_init(MachineState *ms)
+{
+    ret_hax_init = hax_init();
+
+    if (ret_hax_init && (ret_hax_init != -ENOSPC)) {
+        fprintf(stderr, "No accelerator found.\n");
+        return ret_hax_init;
+    } else {
+        /* need tcg for non-UG platform in real mode */
+        if (!hax_ug_platform()) {
+            tcg_exec_init(tcg_tb_size * 1024 * 1024);
+        }
+
+        fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
+                !ret_hax_init ? "working" : "not working",
+                !ret_hax_init ? "fast virt" : "emulation");
+        return 0;
+    }
+}
+
+static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
+{
+    uint64_t buf = 0;
+    /*
+     * With fast MMIO, QEMU need not sync vCPU state with HAXM
+     * driver because it will only invoke MMIO handler
+     * However, some MMIO operations utilize virtual address like qemu_pipe
+     * Thus we need to sync the CR0, CR3 and CR4 so that QEMU
+     * can translate the guest virtual address to guest physical
+     * address
+     */
+    env->cr[0] = hft->_cr0;
+    env->cr[2] = hft->_cr2;
+    env->cr[3] = hft->_cr3;
+    env->cr[4] = hft->_cr4;
+
+    buf = hft->value;
+
+    cpu_physical_memory_rw(hft->gpa, (uint8_t *) &buf, hft->size,
+                           hft->direction);
+    if (hft->direction == 0) {
+        hft->value = buf;
+    }
+
+    return 0;
+}
+
+static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
+                         int direction, int size, int count, void *buffer)
+{
+    uint8_t *ptr;
+    int i;
+
+    if (!df) {
+        ptr = (uint8_t *) buffer;
+    } else {
+        ptr = buffer + size * count - size;
+    }
+    for (i = 0; i < count; i++) {
+        if (direction == HAX_EXIT_IO_IN) {
+            switch (size) {
+            case 1:
+                stb_p(ptr, cpu_inb(port));
+                break;
+            case 2:
+                stw_p(ptr, cpu_inw(port));
+                break;
+            case 4:
+                stl_p(ptr, cpu_inl(port));
+                break;
+            }
+        } else {
+            switch (size) {
+            case 1:
+                cpu_outb(port, ldub_p(ptr));
+                break;
+            case 2:
+                cpu_outw(port, lduw_p(ptr));
+                break;
+            case 4:
+                cpu_outl(port, ldl_p(ptr));
+                break;
+            }
+        }
+        if (!df) {
+            ptr += size;
+        } else {
+            ptr -= size;
+        }
+    }
+
+    return 0;
+}
+
+static int hax_vcpu_interrupt(CPUArchState *env)
+{
+    CPUState *cpu = ENV_GET_CPU(env);
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+    struct hax_tunnel *ht = vcpu->tunnel;
+
+    /*
+     * Try to inject an interrupt if the guest can accept it
+     * Unlike KVM, HAX kernel check for the eflags, instead of qemu
+     */
+    if (ht->ready_for_interrupt_injection &&
+        (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
+        int irq;
+
+        irq = cpu_get_pic_interrupt(env);
+        if (irq >= 0) {
+            hax_inject_interrupt(env, irq);
+            cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
+        }
+    }
+
+    /* If we have an interrupt but the guest is not ready to receive an
+     * interrupt, request an interrupt window exit.  This will
+     * cause a return to userspace as soon as the guest is ready to
+     * receive interrupts. */
+    if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
+        ht->request_interrupt_window = 1;
+    } else {
+        ht->request_interrupt_window = 0;
+    }
+    return 0;
+}
+
+void hax_raise_event(CPUState *cpu)
+{
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+
+    if (!vcpu) {
+        return;
+    }
+    vcpu->tunnel->user_event_pending = 1;
+}
+
+/*
+ * Ask hax kernel module to run the CPU for us till:
+ * 1. Guest crash or shutdown
+ * 2. Need QEMU's emulation like guest execute MMIO instruction or guest
+ *    enter emulation mode (non-PG mode)
+ * 3. Guest execute HLT
+ * 4. QEMU have Signal/event pending
+ * 5. An unknown VMX exit happens
+ */
+static int hax_vcpu_hax_exec(CPUArchState *env, int ug_platform)
+{
+    int ret = 0;
+    CPUState *cpu = ENV_GET_CPU(env);
+    X86CPU *x86_cpu = X86_CPU(cpu);
+    struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
+    struct hax_tunnel *ht = vcpu->tunnel;
+
+    if (!ug_platform) {
+        if (hax_vcpu_emulation_mode(cpu)) {
+            DPRINTF("Trying to execute vcpu at eip:" TARGET_FMT_lx "\n",
+                    env->eip);
+            return HAX_EMUL_EXITLOOP;
+        }
+
+        cpu->halted = 0;
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+            apic_poll_irq(x86_cpu->apic_state);
+        }
+    } else {                        /* UG platform */
+        if (!hax_enabled()) {
+            DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n",
+                    env->eip);
+            return HAX_EMUL_EXITLOOP;
+        }
+
+        cpu->halted = 0;
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
+            cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
+            apic_poll_irq(x86_cpu->apic_state);
+        }
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
+            DPRINTF("\nUG hax_vcpu_hax_exec: handling INIT for %d\n",
+                    cpu->cpu_index);
+            do_cpu_init(x86_cpu);
+            hax_vcpu_sync_state(env, 1);
+        }
+
+        if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
+            DPRINTF("UG hax_vcpu_hax_exec: handling SIPI for %d\n",
+                    cpu->cpu_index);
+            hax_vcpu_sync_state(env, 0);
+            do_cpu_sipi(x86_cpu);
+            hax_vcpu_sync_state(env, 1);
+        }
+    }
+
+    do {
+        int hax_ret;
+
+        if (cpu->exit_request) {
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        }
+
+        hax_vcpu_interrupt(env);
+        if (!ug_platform) {
+            hax_ret = hax_vcpu_run(vcpu);
+        } else {                /* UG platform */
+
+            qemu_mutex_unlock_iothread();
+            hax_ret = hax_vcpu_run(vcpu);
+            qemu_mutex_lock_iothread();
+            current_cpu = cpu;
+        }
+
+        /* Simply continue the vcpu_run if system call interrupted */
+        if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
+            DPRINTF("io window interrupted\n");
+            continue;
+        }
+
+        if (hax_ret < 0) {
+            fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
+            abort();
+        }
+        switch (ht->_exit_status) {
+        case HAX_EXIT_IO:
+            ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
+                            ht->pio._direction,
+                            ht->pio._size, ht->pio._count, vcpu->iobuf);
+            break;
+        case HAX_EXIT_MMIO:
+            ret = HAX_EMUL_ONE;
+            break;
+        case HAX_EXIT_FAST_MMIO:
+            ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
+            break;
+        case HAX_EXIT_REAL:
+            ret = HAX_EMUL_REAL;
+            break;
+        /* Guest state changed, currently only for shutdown */
+        case HAX_EXIT_STATECHANGE:
+            fprintf(stdout, "VCPU shutdown request\n");
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        case HAX_EXIT_UNKNOWN_VMEXIT:
+            fprintf(stderr, "Unknown VMX exit %x from guest\n",
+                    ht->_exit_reason);
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        case HAX_EXIT_HLT:
+            if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
+                !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
+                /* hlt instruction with interrupt disabled is shutdown */
+                env->eflags |= IF_MASK;
+                cpu->halted = 1;
+                cpu->exception_index = EXCP_HLT;
+                ret = HAX_EMUL_HLT;
+            }
+            break;
+        /* these situation will continue to hax module */
+        case HAX_EXIT_INTERRUPT:
+        case HAX_EXIT_PAUSED:
+            break;
+        default:
+            fprintf(stderr, "Unknow exit %x from hax\n", ht->_exit_status);
+            qemu_system_reset_request();
+            hax_prepare_emulation(env);
+            cpu_dump_state(cpu, stderr, fprintf, 0);
+            ret = HAX_EMUL_EXITLOOP;
+            break;
+        }
+    } while (!ret);
+
+    if (cpu->exit_request) {
+        cpu->exit_request = 0;
+        cpu->exception_index = EXCP_INTERRUPT;
+    }
+    return ret;
+}
+
+static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
+{
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_arch_get_registers(env);
+    cpu->hax_vcpu_dirty = true;
+}
+
+void hax_cpu_synchronize_state(CPUState *cpu)
+{
+    if (!cpu->hax_vcpu_dirty) {
+        run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
+    }
+}
+
+static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
+                                              run_on_cpu_data arg)
+{
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_vcpu_sync_state(env, 1);
+    cpu->hax_vcpu_dirty = false;
+}
+
+void hax_cpu_synchronize_post_reset(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
+}
+
+static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
+{
+    CPUArchState *env = cpu->env_ptr;
+
+    hax_vcpu_sync_state(env, 1);
+    cpu->hax_vcpu_dirty = false;
+}
+
+void hax_cpu_synchronize_post_init(CPUState *cpu)
+{
+    run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
+}
+
+/*
+ * return 1 when need emulate, 0 when need exit loop
+ */
+int hax_vcpu_exec(CPUState *cpu)
+{
+    int next = 0, ret = 0;
+    struct hax_vcpu_state *vcpu;
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+
+    if (cpu->hax_vcpu->emulation_state != HAX_EMULATE_STATE_NONE) {
+        return 1;
+    }
+
+    vcpu = cpu->hax_vcpu;
+    next = hax_vcpu_hax_exec(env, HAX_NON_UG_PLATFORM);
+    switch (next) {
+    case HAX_EMUL_ONE:
+        ret = 1;
+        vcpu->emulation_state = HAX_EMULATE_STATE_MMIO;
+        hax_prepare_emulation(env);
+        break;
+    case HAX_EMUL_REAL:
+        ret = 1;
+        vcpu->emulation_state = HAX_EMULATE_STATE_REAL;
+        hax_prepare_emulation(env);
+        break;
+    case HAX_EMUL_HLT:
+    case HAX_EMUL_EXITLOOP:
+        break;
+    default:
+        fprintf(stderr, "Unknown hax vcpu exec return %x\n", next);
+        abort();
+    }
+
+    return ret;
+}
+
+int hax_smp_cpu_exec(CPUState *cpu)
+{
+    CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
+    int why;
+    int ret;
+
+    while (1) {
+        if (cpu->exception_index >= EXCP_INTERRUPT) {
+            ret = cpu->exception_index;
+            cpu->exception_index = -1;
+            break;
+        }
+
+        why = hax_vcpu_hax_exec(env, HAX_UG_PLATFORM);
+
+        if ((why != HAX_EMUL_HLT) && (why != HAX_EMUL_EXITLOOP)) {
+            fprintf(stderr, "Unknown hax vcpu return %x\n", why);
+            abort();
+        }
+    }
+
+    return ret;
+}
+
+#define HAX_RAM_INFO_ROM 0x1
+
+static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
+{
+    memset(lhs, 0, sizeof(struct segment_desc_t));
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = 3;
+    lhs->present = 1;
+    lhs->dpl = 3;
+    lhs->operand_size = 0;
+    lhs->desc = 1;
+    lhs->long_mode = 0;
+    lhs->granularity = 0;
+    lhs->available = 0;
+}
+
+static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
+{
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
+        | (rhs->present * DESC_P_MASK)
+        | (rhs->dpl << DESC_DPL_SHIFT)
+        | (rhs->operand_size << DESC_B_SHIFT)
+        | (rhs->desc * DESC_S_MASK)
+        | (rhs->long_mode << DESC_L_SHIFT)
+        | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
+}
+
+static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
+{
+    unsigned flags = rhs->flags;
+
+    memset(lhs, 0, sizeof(struct segment_desc_t));
+    lhs->selector = rhs->selector;
+    lhs->base = rhs->base;
+    lhs->limit = rhs->limit;
+    lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
+    lhs->present = (flags & DESC_P_MASK) != 0;
+    lhs->dpl = rhs->selector & 3;
+    lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
+    lhs->desc = (flags & DESC_S_MASK) != 0;
+    lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
+    lhs->granularity = (flags & DESC_G_MASK) != 0;
+    lhs->available = (flags & DESC_AVL_MASK) != 0;
+}
+
+static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
+{
+    target_ulong reg = *hax_reg;
+
+    if (set) {
+        *hax_reg = *qemu_reg;
+    } else {
+        *qemu_reg = reg;
+    }
+}
+
+/* The sregs has been synced with HAX kernel already before this call */
+static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
+{
+    get_seg(&env->segs[R_CS], &sregs->_cs);
+    get_seg(&env->segs[R_DS], &sregs->_ds);
+    get_seg(&env->segs[R_ES], &sregs->_es);
+    get_seg(&env->segs[R_FS], &sregs->_fs);
+    get_seg(&env->segs[R_GS], &sregs->_gs);
+    get_seg(&env->segs[R_SS], &sregs->_ss);
+
+    get_seg(&env->tr, &sregs->_tr);
+    get_seg(&env->ldt, &sregs->_ldt);
+    env->idt.limit = sregs->_idt.limit;
+    env->idt.base = sregs->_idt.base;
+    env->gdt.limit = sregs->_gdt.limit;
+    env->gdt.base = sregs->_gdt.base;
+    return 0;
+}
+
+static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
+{
+    if ((env->eflags & VM_MASK)) {
+        set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
+        set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
+        set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
+        set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
+        set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
+        set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
+    } else {
+        set_seg(&sregs->_cs, &env->segs[R_CS]);
+        set_seg(&sregs->_ds, &env->segs[R_DS]);
+        set_seg(&sregs->_es, &env->segs[R_ES]);
+        set_seg(&sregs->_fs, &env->segs[R_FS]);
+        set_seg(&sregs->_gs, &env->segs[R_GS]);
+        set_seg(&sregs->_ss, &env->segs[R_SS]);
+
+        if (env->cr[0] & CR0_PE_MASK) {
+            /* force ss cpl to cs cpl */
+            sregs->_ss.selector = (sregs->_ss.selector & ~3) |
+                                  (sregs->_cs.selector & 3);
+            sregs->_ss.dpl = sregs->_ss.selector & 3;
+        }
+    }
+
+    set_seg(&sregs->_tr, &env->tr);
+    set_seg(&sregs->_ldt, &env->ldt);
+    sregs->_idt.limit = env->idt.limit;
+    sregs->_idt.base = env->idt.base;
+    sregs->_gdt.limit = env->gdt.limit;
+    sregs->_gdt.base = env->gdt.base;
+    return 0;
+}
+
+/*
+ * After get the state from the kernel module, some
+ * qemu emulator state need be updated also
+ */
+static int hax_setup_qemu_emulator(CPUArchState *env)
+{
+
+#define HFLAG_COPY_MASK (~( \
+  HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
+  HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
+  HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
+  HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK))
+
+    uint32_t hflags;
+
+    hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
+    hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
+    hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
+        (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
+    hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
+    hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
+              (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
+
+    if (env->efer & MSR_EFER_LMA) {
+        hflags |= HF_LMA_MASK;
+    }
+
+    if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
+        hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
+    } else {
+        hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
+            (DESC_B_SHIFT - HF_CS32_SHIFT);
+        hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
+            (DESC_B_SHIFT - HF_SS32_SHIFT);
+        if (!(env->cr[0] & CR0_PE_MASK) ||
+            (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) {
+            hflags |= HF_ADDSEG_MASK;
+        } else {
+            hflags |= ((env->segs[R_DS].base |
+                        env->segs[R_ES].base |
+                        env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
+        }
+    }
+
+    hflags &= ~HF_SMM_MASK;
+
+    env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
+    return 0;
+}
+
+static int hax_sync_vcpu_register(CPUArchState *env, int set)
+{
+    struct vcpu_state_t regs;
+    int ret;
+    memset(&regs, 0, sizeof(struct vcpu_state_t));
+
+    if (!set) {
+        ret = hax_sync_vcpu_state(env, &regs, 0);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+
+    /* generic register */
+    hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
+    hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
+    hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
+    hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
+    hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
+    hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
+    hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
+    hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
+#ifdef TARGET_X86_64
+    hax_getput_reg(&regs._r8, &env->regs[8], set);
+    hax_getput_reg(&regs._r9, &env->regs[9], set);
+    hax_getput_reg(&regs._r10, &env->regs[10], set);
+    hax_getput_reg(&regs._r11, &env->regs[11], set);
+    hax_getput_reg(&regs._r12, &env->regs[12], set);
+    hax_getput_reg(&regs._r13, &env->regs[13], set);
+    hax_getput_reg(&regs._r14, &env->regs[14], set);
+    hax_getput_reg(&regs._r15, &env->regs[15], set);
+#endif
+    hax_getput_reg(&regs._rflags, &env->eflags, set);
+    hax_getput_reg(&regs._rip, &env->eip, set);
+
+    if (set) {
+        regs._cr0 = env->cr[0];
+        regs._cr2 = env->cr[2];
+        regs._cr3 = env->cr[3];
+        regs._cr4 = env->cr[4];
+        hax_set_segments(env, &regs);
+    } else {
+        env->cr[0] = regs._cr0;
+        env->cr[2] = regs._cr2;
+        env->cr[3] = regs._cr3;
+        env->cr[4] = regs._cr4;
+        hax_get_segments(env, &regs);
+    }
+
+    if (set) {
+        ret = hax_sync_vcpu_state(env, &regs, 1);
+        if (ret < 0) {
+            return -1;
+        }
+    }
+    if (!set) {
+        hax_setup_qemu_emulator(env);
+    }
+    return 0;
+}
+
+static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
+                              uint64_t value)
+{
+    item->entry = index;
+    item->value = value;
+}
+
+static int hax_get_msrs(CPUArchState *env)
+{
+    struct hax_msr_data md;
+    struct vmx_msr *msrs = md.entries;
+    int ret, i, n;
+
+    n = 0;
+    msrs[n++].entry = MSR_IA32_SYSENTER_CS;
+    msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
+    msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
+    msrs[n++].entry = MSR_IA32_TSC;
+#ifdef TARGET_X86_64
+    msrs[n++].entry = MSR_EFER;
+    msrs[n++].entry = MSR_STAR;
+    msrs[n++].entry = MSR_LSTAR;
+    msrs[n++].entry = MSR_CSTAR;
+    msrs[n++].entry = MSR_FMASK;
+    msrs[n++].entry = MSR_KERNELGSBASE;
+#endif
+    md.nr_msr = n;
+    ret = hax_sync_msr(env, &md, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    for (i = 0; i < md.done; i++) {
+        switch (msrs[i].entry) {
+        case MSR_IA32_SYSENTER_CS:
+            env->sysenter_cs = msrs[i].value;
+            break;
+        case MSR_IA32_SYSENTER_ESP:
+            env->sysenter_esp = msrs[i].value;
+            break;
+        case MSR_IA32_SYSENTER_EIP:
+            env->sysenter_eip = msrs[i].value;
+            break;
+        case MSR_IA32_TSC:
+            env->tsc = msrs[i].value;
+            break;
+#ifdef TARGET_X86_64
+        case MSR_EFER:
+            env->efer = msrs[i].value;
+            break;
+        case MSR_STAR:
+            env->star = msrs[i].value;
+            break;
+        case MSR_LSTAR:
+            env->lstar = msrs[i].value;
+            break;
+        case MSR_CSTAR:
+            env->cstar = msrs[i].value;
+            break;
+        case MSR_FMASK:
+            env->fmask = msrs[i].value;
+            break;
+        case MSR_KERNELGSBASE:
+            env->kernelgsbase = msrs[i].value;
+            break;
+#endif
+        }
+    }
+
+    return 0;
+}
+
+static int hax_set_msrs(CPUArchState *env)
+{
+    struct hax_msr_data md;
+    struct vmx_msr *msrs;
+    msrs = md.entries;
+    int n = 0;
+
+    memset(&md, 0, sizeof(struct hax_msr_data));
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
+    hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
+#ifdef TARGET_X86_64
+    hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
+    hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
+    hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
+    hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
+    hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
+    hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
+#endif
+    md.nr_msr = n;
+    md.done = 0;
+
+    return hax_sync_msr(env, &md, 1);
+}
+
+static int hax_get_fpu(CPUArchState *env)
+{
+    struct fx_layout fpu;
+    int i, ret;
+
+    ret = hax_sync_fpu(env, &fpu, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    env->fpstt = (fpu.fsw >> 11) & 7;
+    env->fpus = fpu.fsw;
+    env->fpuc = fpu.fcw;
+    for (i = 0; i < 8; ++i) {
+        env->fptags[i] = !((fpu.ftw >> i) & 1);
+    }
+    memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
+
+    memcpy(env->xmm_regs, fpu.mmx_1, sizeof(fpu.mmx_1));
+    memcpy((ZMMReg *) (env->xmm_regs) + 8, fpu.mmx_2, sizeof(fpu.mmx_2));
+    env->mxcsr = fpu.mxcsr;
+
+    return 0;
+}
+
+static int hax_set_fpu(CPUArchState *env)
+{
+    struct fx_layout fpu;
+    int i;
+
+    memset(&fpu, 0, sizeof(fpu));
+    fpu.fsw = env->fpus & ~(7 << 11);
+    fpu.fsw |= (env->fpstt & 7) << 11;
+    fpu.fcw = env->fpuc;
+
+    for (i = 0; i < 8; ++i) {
+        fpu.ftw |= (!env->fptags[i]) << i;
+    }
+
+    memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
+    memcpy(fpu.mmx_1, env->xmm_regs, sizeof(fpu.mmx_1));
+    memcpy(fpu.mmx_2, (ZMMReg *) (env->xmm_regs) + 8, sizeof(fpu.mmx_2));
+
+    fpu.mxcsr = env->mxcsr;
+
+    return hax_sync_fpu(env, &fpu, 1);
+}
+
+static int hax_arch_get_registers(CPUArchState *env)
+{
+    int ret;
+
+    ret = hax_sync_vcpu_register(env, 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = hax_get_fpu(env);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = hax_get_msrs(env);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static int hax_arch_set_registers(CPUArchState *env)
+{
+    int ret;
+    ret = hax_sync_vcpu_register(env, 1);
+
+    if (ret < 0) {
+        fprintf(stderr, "Failed to sync vcpu reg\n");
+        return ret;
+    }
+    ret = hax_set_fpu(env);
+    if (ret < 0) {
+        fprintf(stderr, "FPU failed\n");
+        return ret;
+    }
+    ret = hax_set_msrs(env);
+    if (ret < 0) {
+        fprintf(stderr, "MSR failed\n");
+        return ret;
+    }
+
+    return 0;
+}
+
+static void hax_vcpu_sync_state(CPUArchState *env, int modified)
+{
+    if (hax_enabled()) {
+        if (modified) {
+            hax_arch_set_registers(env);
+        } else {
+            hax_arch_get_registers(env);
+        }
+    }
+}
+
+/*
+ * much simpler than kvm, at least in first stage because:
+ * We don't need consider the device pass-through, we don't need
+ * consider the framebuffer, and we may even remove the bios at all
+ */
+int hax_sync_vcpus(void)
+{
+    if (hax_enabled()) {
+        CPUState *cpu;
+
+        cpu = first_cpu;
+        if (!cpu) {
+            return 0;
+        }
+
+        for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
+            int ret;
+
+            ret = hax_arch_set_registers(cpu->env_ptr);
+            if (ret < 0) {
+                derror(kHaxVcpuSyncFailed);
+                return ret;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void hax_reset_vcpu_state(void *opaque)
+{
+    CPUState *cpu;
+    for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
+        DPRINTF("*********ReSet hax_vcpu->emulation_state\n");
+        cpu->hax_vcpu->emulation_state = HAX_EMULATE_STATE_INITIAL;
+        cpu->hax_vcpu->tunnel->user_event_pending = 0;
+        cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
+    }
+}
+
+static void hax_accel_class_init(ObjectClass *oc, void *data)
+{
+    AccelClass *ac = ACCEL_CLASS(oc);
+    ac->name = "HAX";
+    ac->init_machine = hax_accel_init;
+    ac->allowed = &hax_allowed;
+}
+
+static const TypeInfo hax_accel_type = {
+    .name = TYPE_HAX_ACCEL,
+    .parent = TYPE_ACCEL,
+    .class_init = hax_accel_class_init,
+};
+
+static void hax_type_init(void)
+{
+    type_register_static(&hax_accel_type);
+}
+
+type_init(hax_type_init);
+
diff --git a/target-i386/hax-i386.h b/target-i386/hax-i386.h
new file mode 100644
index 0000000..c98f801
--- /dev/null
+++ b/target-i386/hax-i386.h
@@ -0,0 +1,91 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _HAX_I386_H
+#define _HAX_I386_H
+
+#include "cpu.h"
+#include "sysemu/hax.h"
+
+#ifdef CONFIG_DARWIN
+typedef int hax_fd;
+#endif
+
+#ifdef CONFIG_WIN32
+typedef HANDLE hax_fd;
+#endif
+
+extern struct hax_state hax_global;
+struct hax_vcpu_state {
+    hax_fd fd;
+    int vcpu_id;
+    int resync;
+    int emulation_state;
+    struct hax_tunnel *tunnel;
+    unsigned char *iobuf;
+};
+
+struct hax_state {
+    hax_fd fd; /* the global hax device interface */
+    uint32_t version;
+    struct hax_vm *vm;
+    uint64_t mem_quota;
+};
+
+#define HAX_MAX_VCPU 0x10
+#define MAX_VM_ID 0x40
+#define MAX_VCPU_ID 0x40
+
+struct hax_vm {
+    hax_fd fd;
+    int id;
+    struct hax_vcpu_state *vcpus[HAX_MAX_VCPU];
+};
+
+#ifdef NEED_CPU_H
+/* Functions exported to host specific mode */
+hax_fd hax_vcpu_get_fd(CPUArchState *env);
+int valid_hax_tunnel_size(uint16_t size);
+
+/* Host specific functions */
+int hax_mod_version(struct hax_state *hax, struct hax_module_version *version);
+int hax_inject_interrupt(CPUArchState *env, int vector);
+struct hax_vm *hax_vm_create(struct hax_state *hax);
+int hax_vcpu_run(struct hax_vcpu_state *vcpu);
+int hax_vcpu_create(int id);
+int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state,
+                        int set);
+int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set);
+int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set);
+#endif
+
+int hax_vm_destroy(struct hax_vm *vm);
+int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap);
+int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion);
+int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags);
+
+/* Common host function */
+int hax_host_create_vm(struct hax_state *hax, int *vm_id);
+hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id);
+int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid);
+hax_fd hax_host_open_vcpu(int vmid, int vcpuid);
+int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu);
+hax_fd hax_mod_open(void);
+
+
+#ifdef CONFIG_WIN32
+#include "target-i386/hax-windows.h"
+#endif
+
+#include "target-i386/hax-interface.h"
+
+#endif
diff --git a/target-i386/hax-interface.h b/target-i386/hax-interface.h
new file mode 100644
index 0000000..2bc7f1a
--- /dev/null
+++ b/target-i386/hax-interface.h
@@ -0,0 +1,357 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* Interface with HAX kernel module */
+
+#ifndef _HAX_INTERFACE_H
+#define _HAX_INTERFACE_H
+
+/* fx_layout has 3 formats table 3-56, 512bytes */
+struct fx_layout {
+    uint16_t fcw;
+    uint16_t fsw;
+    uint8_t ftw;
+    uint8_t res1;
+    uint16_t fop;
+    union {
+        struct {
+            uint32_t fip;
+            uint16_t fcs;
+            uint16_t res2;
+        };
+        uint64_t fpu_ip;
+    };
+    union {
+        struct {
+            uint32_t fdp;
+            uint16_t fds;
+            uint16_t res3;
+        };
+        uint64_t fpu_dp;
+    };
+    uint32_t mxcsr;
+    uint32_t mxcsr_mask;
+    uint8_t st_mm[8][16];
+    uint8_t mmx_1[8][16];
+    uint8_t mmx_2[8][16];
+    uint8_t pad[96];
+} __attribute__ ((aligned(8)));
+
+struct vmx_msr {
+    uint64_t entry;
+    uint64_t value;
+} __attribute__ ((__packed__));
+
+/*
+ * Fixed array is not good, but it makes Mac support a bit easier by avoiding
+ * memory map or copyin staff.
+ */
+#define HAX_MAX_MSR_ARRAY 0x20
+struct hax_msr_data {
+    uint16_t nr_msr;
+    uint16_t done;
+    uint16_t pad[2];
+    struct vmx_msr entries[HAX_MAX_MSR_ARRAY];
+} __attribute__ ((__packed__));
+
+union interruptibility_state_t {
+    uint32_t raw;
+    struct {
+        uint32_t sti_blocking:1;
+        uint32_t movss_blocking:1;
+        uint32_t smi_blocking:1;
+        uint32_t nmi_blocking:1;
+        uint32_t reserved:28;
+    };
+    uint64_t pad;
+};
+
+typedef union interruptibility_state_t interruptibility_state_t;
+
+/* Segment descriptor */
+struct segment_desc_t {
+    uint16_t selector;
+    uint16_t _dummy;
+    uint32_t limit;
+    uint64_t base;
+    union {
+        struct {
+            uint32_t type:4;
+            uint32_t desc:1;
+            uint32_t dpl:2;
+            uint32_t present:1;
+            uint32_t:4;
+            uint32_t available:1;
+            uint32_t long_mode:1;
+            uint32_t operand_size:1;
+            uint32_t granularity:1;
+            uint32_t null:1;
+            uint32_t:15;
+        };
+        uint32_t ar;
+    };
+    uint32_t ipad;
+};
+
+typedef struct segment_desc_t segment_desc_t;
+
+struct vcpu_state_t {
+    union {
+        uint64_t _regs[16];
+        struct {
+            union {
+                struct {
+                    uint8_t _al, _ah;
+                };
+                uint16_t _ax;
+                uint32_t _eax;
+                uint64_t _rax;
+            };
+            union {
+                struct {
+                    uint8_t _cl, _ch;
+                };
+                uint16_t _cx;
+                uint32_t _ecx;
+                uint64_t _rcx;
+            };
+            union {
+                struct {
+                    uint8_t _dl, _dh;
+                };
+                uint16_t _dx;
+                uint32_t _edx;
+                uint64_t _rdx;
+            };
+            union {
+                struct {
+                    uint8_t _bl, _bh;
+                };
+                uint16_t _bx;
+                uint32_t _ebx;
+                uint64_t _rbx;
+            };
+            union {
+                uint16_t _sp;
+                uint32_t _esp;
+                uint64_t _rsp;
+            };
+            union {
+                uint16_t _bp;
+                uint32_t _ebp;
+                uint64_t _rbp;
+            };
+            union {
+                uint16_t _si;
+                uint32_t _esi;
+                uint64_t _rsi;
+            };
+            union {
+                uint16_t _di;
+                uint32_t _edi;
+                uint64_t _rdi;
+            };
+
+            uint64_t _r8;
+            uint64_t _r9;
+            uint64_t _r10;
+            uint64_t _r11;
+            uint64_t _r12;
+            uint64_t _r13;
+            uint64_t _r14;
+            uint64_t _r15;
+        };
+    };
+
+    union {
+        uint32_t _eip;
+        uint64_t _rip;
+    };
+
+    union {
+        uint32_t _eflags;
+        uint64_t _rflags;
+    };
+
+    segment_desc_t _cs;
+    segment_desc_t _ss;
+    segment_desc_t _ds;
+    segment_desc_t _es;
+    segment_desc_t _fs;
+    segment_desc_t _gs;
+    segment_desc_t _ldt;
+    segment_desc_t _tr;
+
+    segment_desc_t _gdt;
+    segment_desc_t _idt;
+
+    uint64_t _cr0;
+    uint64_t _cr2;
+    uint64_t _cr3;
+    uint64_t _cr4;
+
+    uint64_t _dr0;
+    uint64_t _dr1;
+    uint64_t _dr2;
+    uint64_t _dr3;
+    uint64_t _dr6;
+    uint64_t _dr7;
+    uint64_t _pde;
+
+    uint32_t _efer;
+
+    uint32_t _sysenter_cs;
+    uint64_t _sysenter_eip;
+    uint64_t _sysenter_esp;
+
+    uint32_t _activity_state;
+    uint32_t pad;
+    interruptibility_state_t _interruptibility_state;
+};
+
+/* HAX exit status */
+enum exit_status {
+    /* IO port request */
+    HAX_EXIT_IO = 1,
+    /* MMIO instruction emulation */
+    HAX_EXIT_MMIO,
+    /* QEMU emulation mode request, currently means guest enter non-PG mode */
+    HAX_EXIT_REAL,
+    /*
+     * Interrupt window open, qemu can inject interrupt now
+     * Also used when signal pending since at that time qemu usually need
+     * check interrupt
+     */
+    HAX_EXIT_INTERRUPT,
+    /* Unknown vmexit, mostly trigger reboot */
+    HAX_EXIT_UNKNOWN_VMEXIT,
+    /* HALT from guest */
+    HAX_EXIT_HLT,
+    /* Reboot request, like because of tripple fault in guest */
+    HAX_EXIT_STATECHANGE,
+    /* the vcpu is now only paused when destroy, so simply return to hax */
+    HAX_EXIT_PAUSED,
+    HAX_EXIT_FAST_MMIO,
+};
+
+/*
+ * The interface definition:
+ * 1. vcpu_run execute will return 0 on success, otherwise mean failed
+ * 2. exit_status return the exit reason, as stated in enum exit_status
+ * 3. exit_reason is the vmx exit reason
+ */
+struct hax_tunnel {
+    uint32_t _exit_reason;
+    uint32_t _exit_flag;
+    uint32_t _exit_status;
+    uint32_t user_event_pending;
+    int ready_for_interrupt_injection;
+    int request_interrupt_window;
+    union {
+        struct {
+            /* 0: read, 1: write */
+#define HAX_EXIT_IO_IN  1
+#define HAX_EXIT_IO_OUT 0
+            uint8_t _direction;
+            uint8_t _df;
+            uint16_t _size;
+            uint16_t _port;
+            uint16_t _count;
+            uint8_t _flags;
+            uint8_t _pad0;
+            uint16_t _pad1;
+            uint32_t _pad2;
+            uint64_t _vaddr;
+        } pio;
+        struct {
+            uint64_t gla;
+        } mmio;
+        struct {
+        } state;
+    };
+} __attribute__ ((__packed__));
+
+struct hax_module_version {
+    uint32_t compat_version;
+    uint32_t cur_version;
+} __attribute__ ((__packed__));
+
+/* This interface is support only after API version 2 */
+struct hax_qemu_version {
+    /* Current API version in QEMU */
+    uint32_t cur_version;
+    /* The minimum API version supported by QEMU */
+    uint32_t min_version;
+} __attribute__ ((__packed__));
+
+/* The mac specfic interface to qemu, mostly is ioctl related */
+struct hax_tunnel_info {
+    uint64_t va;
+    uint64_t io_va;
+    uint16_t size;
+    uint16_t pad[3];
+} __attribute__ ((__packed__));
+
+struct hax_alloc_ram_info {
+    uint32_t size;
+    uint32_t pad;
+    uint64_t va;
+} __attribute__ ((__packed__));
+#define HAX_RAM_INFO_ROM 0x1
+struct hax_set_ram_info {
+    uint64_t pa_start;
+    uint32_t size;
+    uint8_t flags;
+    uint8_t pad[3];
+    uint64_t va;
+} __attribute__ ((__packed__));
+
+#define HAX_CAP_STATUS_WORKING     0x1
+#define HAX_CAP_STATUS_NOTWORKING  0x0
+#define HAX_CAP_WORKSTATUS_MASK    0x1
+
+#define HAX_CAP_FAILREASON_VT      0x1
+#define HAX_CAP_FAILREASON_NX      0x2
+
+#define HAX_CAP_MEMQUOTA           0x2
+#define HAX_CAP_UG                 0x4
+
+struct hax_capabilityinfo {
+    /* bit 0: 1 - working
+     *        0 - not working, possibly because NT/NX disabled
+     * bit 1: 1 - memory limitation working
+     *        0 - no memory limitation
+     */
+    uint16_t wstatus;
+    /* valid when not working
+     * bit 0: VT not enabeld
+     * bit 1: NX not enabled*/
+    uint16_t winfo;
+    uint32_t pad;
+    uint64_t mem_quota;
+} __attribute__ ((__packed__));
+
+struct hax_fastmmio {
+    uint64_t gpa;
+    uint64_t value;
+    uint8_t size;
+    uint8_t direction;
+    uint16_t reg_index;
+    uint32_t pad0;
+    uint64_t _cr0;
+    uint64_t _cr2;
+    uint64_t _cr3;
+    uint64_t _cr4;
+} __attribute__ ((__packed__));
+#endif
diff --git a/target-i386/hax-slot.c b/target-i386/hax-slot.c
new file mode 100644
index 0000000..a3d8e8b
--- /dev/null
+++ b/target-i386/hax-slot.c
@@ -0,0 +1,333 @@ 
+/*
+** HAX memory slot operations
+**
+** Copyright (c) 2015-16 Intel Corporation
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+
+#include "target-i386/hax-slot.h"
+#include "target-i386/hax-i386.h"
+#include "qemu/queue.h"
+
+#define DEBUG_HAX_SLOT 0
+
+#define DPRINTF(fmt, ...) \
+    do { \
+        if (DEBUG_HAX_SLOT) { \
+            fprintf(stdout, fmt, ## __VA_ARGS__); \
+        } \
+    } while (0)
+
+/**
+ * HAXSlot: describes a guest physical memory region and its mapping
+ *
+ * @start_pa: a guest physical address marking the start of the region; must be
+ *            page-aligned
+ * @end_pa: a guest physical address marking the end of the region; must be
+ *          page-aligned
+ * @hva_pa_delta: the host virtual address to which guest physical address 0 is
+ *                mapped; in other words, for any guest physical address within
+ *                the region (start_pa <= pa < end_pa), the corresponding host
+ *                virtual address is calculated by host_va = pa + hva_pa_delta
+ * @flags: parameters for the mapping; must be non-negative
+ * @entry: additional fields for linking #HAXSlot instances together
+ */
+typedef struct HAXSlot {
+    uint64_t start_pa;
+    uint64_t end_pa;
+    uint64_t hva_pa_delta;
+    int flags;
+    QTAILQ_ENTRY(HAXSlot) entry;
+} HAXSlot;
+
+/* A doubly-linked list (actually a tail queue) of all registered slots */
+static QTAILQ_HEAD(HAXSlotListHead, HAXSlot) slot_list =
+    QTAILQ_HEAD_INITIALIZER(slot_list);
+
+void hax_slot_init_registry(void)
+{
+    HAXSlot *initial_slot;
+
+    g_assert(QTAILQ_EMPTY(&slot_list));
+
+    initial_slot = g_new0(HAXSlot, 1);
+    /* Implied: initial_slot->start_pa = 0; */
+    /* Ideally we want to set end_pa to 2^64, but that is too large for
+     * uint64_t. We don't need to support such a large guest physical address
+     * space anyway; (2^64 - TARGET_PAGE_SIZE) should be (more than) enough.
+     */
+    initial_slot->end_pa = TARGET_PAGE_MASK;
+    /* hva_pa_delta and flags are initialized with invalid values */
+    initial_slot->hva_pa_delta = ~TARGET_PAGE_MASK;
+    initial_slot->flags = -1;
+    QTAILQ_INSERT_TAIL(&slot_list, initial_slot, entry);
+}
+
+void hax_slot_free_registry(void)
+{
+    DPRINTF("%s: Deleting all registered slots\n", __func__);
+    while (!QTAILQ_EMPTY(&slot_list)) {
+        HAXSlot *slot = QTAILQ_FIRST(&slot_list);
+        QTAILQ_REMOVE(&slot_list, slot, entry);
+        g_free(slot);
+    }
+}
+
+/**
+ * hax_slot_dump: dumps a slot to stdout (for debugging)
+ *
+ * @slot: the slot to dump
+ */
+static void hax_slot_dump(HAXSlot *slot)
+{
+    DPRINTF("[ start_pa=0x%016" PRIx64 ", end_pa=0x%016" PRIx64
+            ", hva_pa_delta=0x%016" PRIx64 ", flags=%d ]\n", slot->start_pa,
+            slot->end_pa, slot->hva_pa_delta, slot->flags);
+}
+
+/**
+ * hax_slot_dump_list: dumps @slot_list to stdout (for debugging)
+ */
+static void hax_slot_dump_list(void)
+{
+    HAXSlot *slot;
+    int i = 0;
+
+    DPRINTF("**** BEGIN HAX SLOT LIST DUMP ****\n");
+    QTAILQ_FOREACH(slot, &slot_list, entry) {
+        DPRINTF("Slot %d:\n\t", i++);
+        hax_slot_dump(slot);
+    }
+    DPRINTF("**** END HAX SLOT LIST DUMP ****\n");
+}
+
+/**
+ * hax_slot_find: locates the slot containing a guest physical address
+ *
+ * Traverses @slot_list, starting from @start_slot, and returns the slot which
+ * contains @pa. There should be one and only one such slot, because:
+ *
+ * 1) @slot_list is initialized with a slot which covers all valid @pa values.
+ *    This coverage stays unchanged as new slots are inserted into @slot_list.
+ * 2) @slot_list does not contain overlapping slots.
+ *
+ * @start_slot: the first slot from which @slot_list is traversed and searched;
+ *              must not be %NULL
+ * @pa: the guest physical address to locate; must not be less than the lower
+ *      bound of @start_slot
+ */
+static HAXSlot *hax_slot_find(HAXSlot *start_slot, uint64_t pa)
+{
+    HAXSlot *slot;
+
+    g_assert(start_slot);
+    g_assert(start_slot->start_pa <= pa);
+
+    slot = start_slot;
+    do {
+        if (slot->end_pa > pa) {
+            return slot;
+        }
+        slot = QTAILQ_NEXT(slot, entry);
+    } while (slot);
+
+    /* Should never reach here */
+    g_assert_not_reached();
+    return NULL;
+}
+
+/**
+ * hax_slot_split: splits a slot into two
+ *
+ * Shrinks @slot and creates a new slot from the vacated region. Returns the
+ * new slot.
+ *
+ * @slot: the slot to be split/shrinked
+ * @pa: the splitting point; must be page-aligned and within @slot
+ */
+static HAXSlot *hax_slot_split(HAXSlot *slot, uint64_t pa)
+{
+    HAXSlot *new_slot;
+
+    g_assert(slot);
+    g_assert(pa > slot->start_pa && pa < slot->end_pa);
+    g_assert(!(pa & ~TARGET_PAGE_MASK));
+
+    new_slot = g_new0(HAXSlot, 1);
+    new_slot->start_pa = pa;
+    new_slot->end_pa = slot->end_pa;
+    new_slot->hva_pa_delta = slot->hva_pa_delta;
+    new_slot->flags = slot->flags;
+
+    slot->end_pa = pa;
+    QTAILQ_INSERT_AFTER(&slot_list, slot, new_slot, entry);
+    return new_slot;
+}
+
+/**
+ * hax_slot_can_merge: tests if two slots are compatible
+ *
+ * Two slots are considered compatible if they share the same memory mapping
+ * attributes. Compatible slots can be merged if they overlap or are adjacent.
+ *
+ * Returns %true if @slot1 and @slot2 are compatible.
+ *
+ * @slot1: one of the slots to be tested; must not be %NULL
+ * @slot2: the other slot to be tested; must not be %NULL
+ */
+static bool hax_slot_can_merge(HAXSlot *slot1, HAXSlot *slot2)
+{
+    g_assert(slot1 && slot2);
+
+    return slot1->hva_pa_delta == slot2->hva_pa_delta
+           && slot1->flags == slot2->flags;
+}
+
+/**
+ * hax_slot_insert: inserts a slot into @slot_list, with the potential side
+ *                  effect of creating/updating memory mappings
+ *
+ * Causes memory mapping attributes of @slot to override those of overlapping
+ * slots (including partial slots) in @slot_list. For any slot whose mapping
+ * attributes have changed, performs an ioctl to enforce the new mapping.
+ *
+ * Aborts QEMU on error.
+ *
+ * @slot: the slot to be inserted
+ */
+static void hax_slot_insert(HAXSlot *slot)
+{
+    HAXSlot *low_slot, *high_slot;
+    HAXSlot *low_slot_prev, *high_slot_next;
+    HAXSlot *old_slot, *old_slot_next;
+
+    g_assert(!QTAILQ_EMPTY(&slot_list));
+
+    low_slot = hax_slot_find(QTAILQ_FIRST(&slot_list), slot->start_pa);
+    g_assert(low_slot);
+    low_slot_prev = QTAILQ_PREV(low_slot, HAXSlotListHead, entry);
+
+    /* Adjust slot and/or low_slot such that their lower bounds (start_pa)
+     * align.
+     */
+    if (hax_slot_can_merge(low_slot, slot)) {
+        slot->start_pa = low_slot->start_pa;
+    } else if (slot->start_pa == low_slot->start_pa && low_slot_prev
+               && hax_slot_can_merge(low_slot_prev, slot)) {
+        low_slot = low_slot_prev;
+        slot->start_pa = low_slot->start_pa;
+    } else if (slot->start_pa != low_slot->start_pa) {
+        /* low_slot->start_pa < slot->start_pa < low_slot->end_pa */
+        low_slot = hax_slot_split(low_slot, slot->start_pa);
+        g_assert(low_slot);
+    }
+    /* Now we have slot->start_pa == low_slot->start_pa */
+
+    high_slot = hax_slot_find(low_slot, slot->end_pa - 1);
+    g_assert(high_slot);
+    high_slot_next = QTAILQ_NEXT(high_slot, entry);
+
+    /* Adjust slot and/or high_slot such that their upper bounds (end_pa)
+     * align.
+     */
+    if (hax_slot_can_merge(slot, high_slot)) {
+        slot->end_pa = high_slot->end_pa;
+    } else if (slot->end_pa == high_slot->end_pa && high_slot_next
+               && hax_slot_can_merge(slot, high_slot_next)) {
+        high_slot = high_slot_next;
+        slot->end_pa = high_slot->end_pa;
+    } else if (slot->end_pa != high_slot->end_pa) {
+        /* high_slot->start_pa < slot->end_pa < high_slot->end_pa */
+        high_slot_next = hax_slot_split(high_slot, slot->end_pa);
+        g_assert(high_slot_next);
+    }
+    /* Now we have slot->end_pa == high_slot->end_pa */
+
+    /* We are ready for substitution: replace all slots between low_slot and
+     * high_slot (inclusive) with slot. */
+
+    /* Step 1: insert slot into the list, before low_slot */
+    QTAILQ_INSERT_BEFORE(low_slot, slot, entry);
+
+    /* Step 2: remove low_slot..high_slot, one by one */
+    for (old_slot = low_slot;
+         /* This condition always evaluates to 1. See:
+          * https://en.wikipedia.org/wiki/Comma_operator
+          */
+         old_slot_next = QTAILQ_NEXT(old_slot, entry), 1;
+         old_slot = old_slot_next) {
+        g_assert(old_slot);
+
+        QTAILQ_REMOVE(&slot_list, old_slot, entry);
+        if (!hax_slot_can_merge(slot, old_slot)) {
+            /* Mapping for guest memory region [old_slot->start_pa,
+             * old_slot->end_pa) has changed - must do ioctl. */
+            /* TODO: Further reduce the number of ioctl calls by preprocessing
+             * the low_slot..high_slot sublist and combining any two adjacent
+             * slots that are both incompatible with slot.
+             */
+            uint32_t size = old_slot->end_pa - old_slot->start_pa;
+            uint64_t host_va = old_slot->start_pa + slot->hva_pa_delta;
+            int err;
+
+            DPRINTF("%s: Doing ioctl (size=0x%08" PRIx32 ")\n", __func__, size);
+            /* Use the new host_va and flags */
+            err = hax_set_ram(old_slot->start_pa, size, host_va, slot->flags);
+            if (err) {
+                fprintf(stderr, "%s: Failed to set memory mapping (err=%d)\n",
+                        __func__, err);
+                abort();
+            }
+        }
+        g_free(old_slot);
+
+        /* Exit the infinite loop following the removal of high_slot */
+        if (old_slot == high_slot) {
+            break;
+        }
+    }
+}
+
+void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va,
+                       int flags)
+{
+    uint64_t end_pa = start_pa + size;
+    HAXSlot *slot;
+
+    g_assert(!(start_pa & ~TARGET_PAGE_MASK));
+    g_assert(!(end_pa & ~TARGET_PAGE_MASK));
+    g_assert(start_pa < end_pa);
+    g_assert(host_va);
+    g_assert(flags >= 0);
+
+    slot = g_malloc0(sizeof(*slot));
+    slot->start_pa = start_pa;
+    slot->end_pa = end_pa;
+    slot->hva_pa_delta = host_va - start_pa;
+    slot->flags = flags;
+
+    DPRINTF("%s: Inserting slot:\n\t", __func__);
+    hax_slot_dump(slot);
+    if (DEBUG_HAX_SLOT) {
+        hax_slot_dump_list();
+    }
+
+    hax_slot_insert(slot);
+
+    DPRINTF("%s: Done\n", __func__);
+    if (DEBUG_HAX_SLOT) {
+        hax_slot_dump_list();
+    }
+}
diff --git a/target-i386/hax-slot.h b/target-i386/hax-slot.h
new file mode 100644
index 0000000..d991c53
--- /dev/null
+++ b/target-i386/hax-slot.h
@@ -0,0 +1,58 @@ 
+/*
+** HAX memory slot operations
+**
+** Copyright (c) 2015-16 Intel Corporation
+**
+** This software is licensed under the terms of the GNU General Public
+** License version 2, as published by the Free Software Foundation, and
+** may be copied, distributed, and modified under those terms.
+**
+** This program is distributed in the hope that it will be useful,
+** but WITHOUT ANY WARRANTY; without even the implied warranty of
+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+** GNU General Public License for more details.
+*/
+
+#ifndef _HAX_SLOT_H
+#define _HAX_SLOT_H
+
+#include <inttypes.h>
+
+/**
+ * hax_slot_init_registry: initializes the registry of memory slots.
+ *
+ * Should be called during HAX initialization, before any call to
+ * hax_slot_register().
+ */
+void hax_slot_init_registry(void);
+
+/**
+ * hax_slot_free_registry: destroys the registry of memory slots.
+ *
+ * Should be called during HAX cleanup to free up resources used by the
+ * registry of memory slots.
+ */
+void hax_slot_free_registry(void);
+
+/**
+ * hax_slot_register: registers a memory slot, updating HAX memory mappings if
+ * necessary.
+ *
+ * Must be called after hax_slot_init_registry(). Can be called multiple times
+ * to create new memory mappings or update existing ones. This function is smart
+ * enough to avoid asking the HAXM driver to do the same mapping twice for any
+ * guest physical page.
+ *
+ * Aborts QEMU on error.
+ *
+ * @start_pa: a guest physical address marking the start of the slot to
+ *            register; must be page-aligned
+ * @size: size of the slot to register; must be page-aligned and positive
+ * @host_va: a host virtual address to which @start_pa should be mapped
+ * @flags: parameters for the mapping, passed verbatim to the HAXM driver if
+ *         necessary; must be non-negative
+ */
+void hax_slot_register(uint64_t start_pa, uint32_t size, uint64_t host_va,
+                       int flags);
+
+#endif
diff --git a/target-i386/hax-windows.c b/target-i386/hax-windows.c
new file mode 100644
index 0000000..194ac1c
--- /dev/null
+++ b/target-i386/hax-windows.c
@@ -0,0 +1,509 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/exec-all.h"
+#include "hax-i386.h"
+
+#define DEBUG_HAX 0
+
+#define DPRINTF(fmt, ...) \
+    do { \
+        if (DEBUG_HAX) { \
+            fprintf(stdout, fmt, ## __VA_ARGS__); \
+        } \
+    } while (0)
+
+/*
+ * return 0 when success, -1 when driver not loaded,
+ * other negative value for other failure
+ */
+static int hax_open_device(hax_fd *fd)
+{
+    uint32_t errNum = 0;
+    HANDLE hDevice;
+
+    if (!fd) {
+        return -2;
+    }
+
+    hDevice = CreateFile("\\\\.\\HAX",
+                         GENERIC_READ | GENERIC_WRITE,
+                         0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+
+    if (hDevice == INVALID_HANDLE_VALUE) {
+        fprintf(stderr, "Failed to open the HAX device!\n");
+        errNum = GetLastError();
+        if (errNum == ERROR_FILE_NOT_FOUND) {
+            return -1;
+        }
+        return -2;
+    }
+    *fd = hDevice;
+    DPRINTF("HAX device fd:%p\n", *fd);
+    return 0;
+}
+
+int hax_get_max_ram(uint64_t *max_ram)
+{
+    DWORD dSize = 0;
+    struct hax_capabilityinfo cap;
+    hax_fd fd = hax_mod_open();
+    if (fd == NULL) {
+        return -1;
+    }
+    int result = DeviceIoControl(fd, HAX_IOCTL_CAPABILITY, NULL, 0, &cap,
+                          sizeof(cap), &dSize, (LPOVERLAPPED) NULL);
+    CloseHandle(fd);
+
+    if (!result) {
+        return -2;
+    }
+    *max_ram = cap.mem_quota;
+
+    return 0;
+}
+
+/* hax_fd hax_mod_open */
+ hax_fd hax_mod_open(void)
+{
+    int ret;
+    hax_fd fd = NULL;
+
+    ret = hax_open_device(&fd);
+    if (ret != 0) {
+        fprintf(stderr, "Open HAX device failed\n");
+    }
+
+    return fd;
+}
+
+int hax_populate_ram(uint64_t va, uint32_t size)
+{
+    int ret;
+    struct hax_alloc_ram_info info;
+    HANDLE hDeviceVM;
+    DWORD dSize = 0;
+
+    if (!hax_global.vm || !hax_global.vm->fd) {
+        fprintf(stderr, "Allocate memory before vm create?\n");
+        return -EINVAL;
+    }
+
+    info.size = size;
+    info.va = va;
+
+    hDeviceVM = hax_global.vm->fd;
+
+    ret = DeviceIoControl(hDeviceVM,
+                          HAX_VM_IOCTL_ALLOC_RAM,
+                          &info, sizeof(info), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        fprintf(stderr, "Failed to allocate %x memory\n", size);
+        return ret;
+    }
+
+    return 0;
+}
+
+int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags)
+{
+    struct hax_set_ram_info info;
+    HANDLE hDeviceVM = hax_global.vm->fd;
+    DWORD dSize = 0;
+    int ret;
+
+    info.pa_start = start_pa;
+    info.size = size;
+    info.va = host_va;
+    info.flags = (uint8_t) flags;
+
+    ret = DeviceIoControl(hDeviceVM, HAX_VM_IOCTL_SET_RAM,
+                          &info, sizeof(info), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap)
+{
+    int ret;
+    HANDLE hDevice = hax->fd;        /* handle to hax module */
+    DWORD dSize = 0;
+    DWORD err = 0;
+
+    if (hax_invalid_fd(hDevice)) {
+        fprintf(stderr, "Invalid fd for hax device!\n");
+        return -ENODEV;
+    }
+
+    ret = DeviceIoControl(hDevice, HAX_IOCTL_CAPABILITY, NULL, 0, cap,
+                          sizeof(*cap), &dSize, (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        err = GetLastError();
+        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) {
+            fprintf(stderr, "hax capability is too long to hold.\n");
+        }
+        fprintf(stderr, "Failed to get Hax capability:%luu\n", err);
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)
+{
+    int ret;
+    HANDLE hDevice = hax->fd; /* handle to hax module */
+    DWORD dSize = 0;
+    DWORD err = 0;
+
+    if (hax_invalid_fd(hDevice)) {
+        fprintf(stderr, "Invalid fd for hax device!\n");
+        return -ENODEV;
+    }
+
+    ret = DeviceIoControl(hDevice,
+                          HAX_IOCTL_VERSION,
+                          NULL, 0,
+                          version, sizeof(*version), &dSize,
+                          (LPOVERLAPPED) NULL);
+
+    if (!ret) {
+        err = GetLastError();
+        if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) {
+            fprintf(stderr, "hax module verion is too long to hold.\n");
+        }
+        fprintf(stderr, "Failed to get Hax module version:%lu\n", err);
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+static char *hax_vm_devfs_string(int vm_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID) {
+        fprintf(stderr, "Too big VM id\n");
+        return NULL;
+    }
+
+#define HAX_VM_DEVFS "\\\\.\\hax_vmxx"
+    name = g_strdup(HAX_VM_DEVFS);
+    if (!name) {
+        return NULL;
+    }
+
+    snprintf(name, sizeof HAX_VM_DEVFS, "\\\\.\\hax_vm%02d", vm_id);
+    return name;
+}
+
+static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
+{
+    char *name;
+
+    if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) {
+        fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id);
+        return NULL;
+    }
+
+#define HAX_VCPU_DEVFS "\\\\.\\hax_vmxx_vcpuxx"
+    name = g_strdup(HAX_VCPU_DEVFS);
+    if (!name) {
+        return NULL;
+    }
+
+    snprintf(name, sizeof HAX_VCPU_DEVFS, "\\\\.\\hax_vm%02d_vcpu%02d",
+             vm_id, vcpu_id);
+    return name;
+}
+
+int hax_host_create_vm(struct hax_state *hax, int *vmid)
+{
+    int ret;
+    int vm_id = 0;
+    DWORD dSize = 0;
+
+    if (hax_invalid_fd(hax->fd)) {
+        return -EINVAL;
+    }
+
+    if (hax->vm) {
+        return 0;
+    }
+
+    ret = DeviceIoControl(hax->fd,
+                          HAX_IOCTL_CREATE_VM,
+                          NULL, 0, &vm_id, sizeof(vm_id), &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to create VM. Error code: %lu\n",
+                GetLastError());
+        return -1;
+    }
+    *vmid = vm_id;
+    return 0;
+}
+
+hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id)
+{
+    char *vm_name = NULL;
+    hax_fd hDeviceVM;
+
+    vm_name = hax_vm_devfs_string(vm_id);
+    if (!vm_name) {
+        fprintf(stderr, "Failed to open VM. VM name is null\n");
+        return INVALID_HANDLE_VALUE;
+    }
+
+    hDeviceVM = CreateFile(vm_name,
+                           GENERIC_READ | GENERIC_WRITE,
+                           0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
+    if (hDeviceVM == INVALID_HANDLE_VALUE) {
+        fprintf(stderr, "Open the vm device error:%s, ec:%lu\n",
+                vm_name, GetLastError());
+    }
+
+    g_free(vm_name);
+    return hDeviceVM;
+}
+
+int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion)
+{
+    int ret;
+    DWORD dSize = 0;
+    if (hax_invalid_fd(vm_fd)) {
+        return -EINVAL;
+    }
+    ret = DeviceIoControl(vm_fd,
+                          HAX_VM_IOCTL_NOTIFY_QEMU_VERSION,
+                          qversion, sizeof(struct hax_qemu_version),
+                          NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to notify qemu API version\n");
+        return -1;
+    }
+    return 0;
+}
+
+int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid)
+{
+    int ret;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(vm_fd,
+                          HAX_VM_IOCTL_VCPU_CREATE,
+                          &vcpuid, sizeof(vcpuid), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to create vcpu %x\n", vcpuid);
+        return -1;
+    }
+
+    return 0;
+}
+
+hax_fd hax_host_open_vcpu(int vmid, int vcpuid)
+{
+    char *devfs_path = NULL;
+    hax_fd hDeviceVCPU;
+
+    devfs_path = hax_vcpu_devfs_string(vmid, vcpuid);
+    if (!devfs_path) {
+        fprintf(stderr, "Failed to get the devfs\n");
+        return INVALID_HANDLE_VALUE;
+    }
+
+    hDeviceVCPU = CreateFile(devfs_path,
+                             GENERIC_READ | GENERIC_WRITE,
+                             0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL,
+                             NULL);
+
+    if (hDeviceVCPU == INVALID_HANDLE_VALUE) {
+        fprintf(stderr, "Failed to open the vcpu devfs\n");
+    }
+    g_free(devfs_path);
+    return hDeviceVCPU;
+}
+
+int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu)
+{
+    hax_fd hDeviceVCPU = vcpu->fd;
+    int ret;
+    struct hax_tunnel_info info;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_SETUP_TUNNEL,
+                          NULL, 0, &info, sizeof(info), &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        fprintf(stderr, "Failed to setup the hax tunnel\n");
+        return -1;
+    }
+
+    if (!valid_hax_tunnel_size(info.size)) {
+        fprintf(stderr, "Invalid hax tunnel size %x\n", info.size);
+        ret = -EINVAL;
+        return ret;
+    }
+    vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va);
+    vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va);
+    return 0;
+}
+
+int hax_vcpu_run(struct hax_vcpu_state *vcpu)
+{
+    int ret;
+    HANDLE hDeviceVCPU = vcpu->fd;
+    DWORD dSize = 0;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_RUN,
+                          NULL, 0, NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize = 0;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd)) {
+        return -1;
+    }
+
+    hDeviceVCPU = fd;
+
+    if (set) {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_SET_FPU,
+                              fl, sizeof(*fl), NULL, 0, &dSize,
+                              (LPOVERLAPPED) NULL);
+    } else {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_GET_FPU,
+                              NULL, 0, fl, sizeof(*fl), &dSize,
+                              (LPOVERLAPPED) NULL);
+    }
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize = 0;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd)) {
+        return -1;
+    }
+    hDeviceVCPU = fd;
+
+    if (set) {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_SET_MSRS,
+                              msrs, sizeof(*msrs),
+                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
+    } else {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_IOCTL_GET_MSRS,
+                              msrs, sizeof(*msrs),
+                              msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL);
+    }
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, int set)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd)) {
+        return -1;
+    }
+
+    hDeviceVCPU = fd;
+
+    if (set) {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_SET_REGS,
+                              state, sizeof(*state),
+                              NULL, 0, &dSize, (LPOVERLAPPED) NULL);
+    } else {
+        ret = DeviceIoControl(hDeviceVCPU,
+                              HAX_VCPU_GET_REGS,
+                              NULL, 0,
+                              state, sizeof(*state), &dSize,
+                              (LPOVERLAPPED) NULL);
+    }
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
+
+int hax_inject_interrupt(CPUArchState *env, int vector)
+{
+    int ret;
+    hax_fd fd;
+    HANDLE hDeviceVCPU;
+    DWORD dSize;
+
+    fd = hax_vcpu_get_fd(env);
+    if (hax_invalid_fd(fd)) {
+        return -1;
+    }
+
+    hDeviceVCPU = fd;
+
+    ret = DeviceIoControl(hDeviceVCPU,
+                          HAX_VCPU_IOCTL_INTERRUPT,
+                          &vector, sizeof(vector), NULL, 0, &dSize,
+                          (LPOVERLAPPED) NULL);
+    if (!ret) {
+        return -EFAULT;
+    } else {
+        return 0;
+    }
+}
diff --git a/target-i386/hax-windows.h b/target-i386/hax-windows.h
new file mode 100644
index 0000000..1d8f68d
--- /dev/null
+++ b/target-i386/hax-windows.h
@@ -0,0 +1,89 @@ 
+/*
+ * QEMU HAXM support
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * Copyright (c) 2011 Intel Corporation
+ *  Written by:
+ *  Jiang Yunhong<yunhong.jiang@intel.com>
+ *  Xin Xiaohui<xiaohui.xin@intel.com>
+ *  Zhang Xiantao<xiantao.zhang@intel.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef TARGET_I386_HAX_WINDOWS_H
+#define TARGET_I386_HAX_WINDOWS_H
+
+#include <windows.h>
+#include <memory.h>
+#include <malloc.h>
+#include <winioctl.h>
+#include <string.h>
+#include <stdio.h>
+#include <windef.h>
+
+#define HAX_INVALID_FD INVALID_HANDLE_VALUE
+
+static inline void hax_mod_close(struct hax_state *hax)
+{
+    CloseHandle(hax->fd);
+}
+
+static inline void hax_close_fd(hax_fd fd)
+{
+    CloseHandle(fd);
+}
+
+static inline int hax_invalid_fd(hax_fd fd)
+{
+    return (fd == INVALID_HANDLE_VALUE);
+}
+
+#define HAX_DEVICE_TYPE 0x4000
+
+#define HAX_IOCTL_VERSION       CTL_CODE(HAX_DEVICE_TYPE, 0x900, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_IOCTL_CREATE_VM     CTL_CODE(HAX_DEVICE_TYPE, 0x901, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_IOCTL_CAPABILITY    CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VM_IOCTL_VCPU_CREATE   CTL_CODE(HAX_DEVICE_TYPE, 0x902, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_ALLOC_RAM     CTL_CODE(HAX_DEVICE_TYPE, 0x903, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_SET_RAM       CTL_CODE(HAX_DEVICE_TYPE, 0x904, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VM_IOCTL_VCPU_DESTROY  CTL_CODE(HAX_DEVICE_TYPE, 0x905, \
+                                            METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VCPU_IOCTL_RUN      CTL_CODE(HAX_DEVICE_TYPE, 0x906, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_SET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x907, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_GET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x908, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_SET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x909, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_GET_FPU  CTL_CODE(HAX_DEVICE_TYPE, 0x90a, \
+                                         METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VCPU_IOCTL_SETUP_TUNNEL  CTL_CODE(HAX_DEVICE_TYPE, 0x90b, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_IOCTL_INTERRUPT     CTL_CODE(HAX_DEVICE_TYPE, 0x90c, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_SET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90d, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define HAX_VCPU_GET_REGS            CTL_CODE(HAX_DEVICE_TYPE, 0x90e, \
+                                              METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x910, \
+                                                  METHOD_BUFFERED,        \
+                                                  FILE_ANY_ACCESS)
+#endif /* TARGET_I386_HAX_WINDOWS_H */