diff mbox series

[RFC,v2,03/22] i386/xen: Add xen-version machine property and init KVM Xen support

Message ID 20221209095612.689243-4-dwmw2@infradead.org
State New
Headers show
Series Xen HVM support under KVM | expand

Commit Message

David Woodhouse Dec. 9, 2022, 9:55 a.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

This is a machine property for two main reasons. One is that it allows
us to set it in default_machine_opts for the xenfv platform when not
running on actual Xen. The other is that theoretically we *could* do
this with TCG too; we'd just have to implement a bunch of the stuff that
KVM already does for us.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/pc.c            | 32 +++++++++++++++++++++++++++
 hw/i386/pc_piix.c       | 10 +++++++--
 include/hw/i386/pc.h    |  3 +++
 target/i386/kvm/kvm.c   | 26 ++++++++++++++++++++++
 target/i386/meson.build |  1 +
 target/i386/xen.c       | 49 +++++++++++++++++++++++++++++++++++++++++
 target/i386/xen.h       | 19 ++++++++++++++++
 7 files changed, 138 insertions(+), 2 deletions(-)
 create mode 100644 target/i386/xen.c
 create mode 100644 target/i386/xen.h

Comments

Paul Durrant Dec. 12, 2022, 12:48 p.m. UTC | #1
On 09/12/2022 09:55, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> This is a machine property for two main reasons. One is that it allows
> us to set it in default_machine_opts for the xenfv platform when not
> running on actual Xen. The other is that theoretically we *could* do
> this with TCG too; we'd just have to implement a bunch of the stuff that
> KVM already does for us.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   hw/i386/pc.c            | 32 +++++++++++++++++++++++++++
>   hw/i386/pc_piix.c       | 10 +++++++--
>   include/hw/i386/pc.h    |  3 +++
>   target/i386/kvm/kvm.c   | 26 ++++++++++++++++++++++
>   target/i386/meson.build |  1 +
>   target/i386/xen.c       | 49 +++++++++++++++++++++++++++++++++++++++++
>   target/i386/xen.h       | 19 ++++++++++++++++
>   7 files changed, 138 insertions(+), 2 deletions(-)
>   create mode 100644 target/i386/xen.c
>   create mode 100644 target/i386/xen.h
> 
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 546b703cb4..9bada1a8ff 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1811,6 +1811,32 @@ static void pc_machine_set_max_fw_size(Object *obj, Visitor *v,
>       pcms->max_fw_size = value;
>   }
>   
> +static void pc_machine_get_xen_version(Object *obj, Visitor *v,
> +                                       const char *name, void *opaque,
> +                                       Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    uint32_t value = pcms->xen_version;
> +
> +    visit_type_uint32(v, name, &value, errp);
> +}
> +
> +static void pc_machine_set_xen_version(Object *obj, Visitor *v,
> +                                       const char *name, void *opaque,
> +                                       Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    Error *error = NULL;
> +    uint32_t value;
> +
> +    visit_type_uint32(v, name, &value, &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        return;
> +    }
> +
> +    pcms->xen_version = value;
> +}
>   
>   static void pc_machine_initfn(Object *obj)
>   {
> @@ -1978,6 +2004,12 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
>           NULL, NULL);
>       object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP,
>           "SMBIOS Entry Point type [32, 64]");
> +
> +    object_class_property_add(oc, "xen-version", "uint32",
> +        pc_machine_get_xen_version, pc_machine_set_xen_version,
> +        NULL, NULL);
> +    object_class_property_set_description(oc, "xen-version",
> +        "Xen version to be emulated (in XENVER_version form e.g. 0x4000a for 4.10)");
>   }

Since this is e properly of the general pc machine class, could it be 
made to report the actual version if running on real Xen and be 
read-only? AFAICT I could specify "accel=xen,xen-version=<blah>" and the 
feels like it should be an error.

>   
>   static const TypeInfo pc_machine_info = {
> diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
> index 0ad0ed1603..13286d0739 100644
> --- a/hw/i386/pc_piix.c
> +++ b/hw/i386/pc_piix.c
> @@ -876,7 +876,10 @@ static void xenfv_4_2_machine_options(MachineClass *m)
>       pc_i440fx_4_2_machine_options(m);
>       m->desc = "Xen Fully-virtualized PC";
>       m->max_cpus = HVM_MAX_VCPUS;
> -    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    if (xen_enabled())
> +            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    else
> +            m->default_machine_opts = "accel=kvm,xen-version=0x40002";
>   }
>   
>   DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,
> @@ -888,7 +891,10 @@ static void xenfv_3_1_machine_options(MachineClass *m)
>       m->desc = "Xen Fully-virtualized PC";
>       m->alias = "xenfv";
>       m->max_cpus = HVM_MAX_VCPUS;
> -    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    if (xen_enabled())
> +            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    else
> +            m->default_machine_opts = "accel=kvm,xen-version=0x30001";
>   }
>   
>   DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index c95333514e..9b14b18836 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -52,6 +52,9 @@ typedef struct PCMachineState {
>       bool default_bus_bypass_iommu;
>       uint64_t max_fw_size;
>   
> +    /* Xen HVM emulation */
> +    uint32_t xen_version;
> +
>       /* ACPI Memory hotplug IO base address */
>       hwaddr memhp_io_base;
>   
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index a213209379..0a2069b117 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -31,6 +31,7 @@
>   #include "sysemu/runstate.h"
>   #include "kvm_i386.h"
>   #include "sev.h"
> +#include "xen.h"
>   #include "hyperv.h"
>   #include "hyperv-proto.h"
>   
> @@ -774,6 +775,17 @@ static inline bool freq_within_bounds(int freq, int target_freq)
>           return false;
>   }
>   
> +static uint32_t kvm_arch_xen_version(MachineState *ms)
> +{
> +    uint32_t v = object_property_get_int(OBJECT(ms), "xen-version", NULL);
> +
> +    /* If it was unset, return zero */
> +    if (v == (uint32_t) -1)
> +            return 0;
> +
> +    return v;
> +}
> +
>   static int kvm_arch_set_tsc_khz(CPUState *cs)
>   {
>       X86CPU *cpu = X86_CPU(cs);
> @@ -2459,6 +2471,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>   {
>       uint64_t identity_base = 0xfffbc000;
>       uint64_t shadow_mem;
> +    uint32_t xen_version;
>       int ret;
>       struct utsname utsname;
>       Error *local_err = NULL;
> @@ -2513,6 +2526,19 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>           }
>       }
>   
> +    xen_version = kvm_arch_xen_version(ms);
> +    if (xen_version) {
> +#ifdef CONFIG_XEN_EMU
> +            ret = kvm_xen_init(s, xen_version);
> +            if (ret < 0) {
> +                    return ret;
> +            }
> +#else
> +            error_report("kvm: Xen support not enabled in qemu");
> +            return -ENOTSUP;
> +#endif
> +    }
> +
>       ret = kvm_get_supported_msrs(s);
>       if (ret < 0) {
>           return ret;
> diff --git a/target/i386/meson.build b/target/i386/meson.build
> index ae38dc9563..9f3ef246b8 100644
> --- a/target/i386/meson.build
> +++ b/target/i386/meson.build
> @@ -7,6 +7,7 @@ i386_ss.add(files(
>     'cpu-dump.c',
>   ))
>   i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c'))
> +i386_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen.c'))
>   
>   # x86 cpu type
>   i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
> diff --git a/target/i386/xen.c b/target/i386/xen.c
> new file mode 100644
> index 0000000000..bc183dce4e
> --- /dev/null
> +++ b/target/i386/xen.c
> @@ -0,0 +1,49 @@
> +/*
> + * Xen HVM emulation support in KVM
> + *
> + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
> + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "kvm/kvm_i386.h"
> +#include "xen.h"
> +
> +int kvm_xen_init(KVMState *s, uint32_t xen_version)
> +{
> +    const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
> +        KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
> +    struct kvm_xen_hvm_config cfg = {
> +        .msr = XEN_HYPERCALL_MSR,
> +        .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
> +    };
> +    int xen_caps, ret;
> +
> +    xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
> +    if (required_caps & ~xen_caps) {
> +        error_report("kvm: Xen HVM guest support not present or insufficient");
> +        return -ENOSYS;
> +    }
> +
> +    if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
> +        struct kvm_xen_hvm_attr ha = {
> +            .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
> +            .u.xen_version = xen_version,
> +        };
> +        (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);

Should you not handle the error here? If the cap is present then surely 
it ought to work.

> +
> +        cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
> +    }
> +
> +    ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
> +    if (ret < 0) {
> +        error_report("kvm: Failed to enable Xen HVM support: %s", strerror(-ret));
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> diff --git a/target/i386/xen.h b/target/i386/xen.h
> new file mode 100644
> index 0000000000..6c4f3b7822
> --- /dev/null
> +++ b/target/i386/xen.h
> @@ -0,0 +1,19 @@
> +/*
> + * Xen HVM emulation support in KVM
> + *
> + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
> + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef QEMU_I386_XEN_H
> +#define QEMU_I386_XEN_H
> +
> +#define XEN_HYPERCALL_MSR 0x40000000

This is a moveable MSR if Hyper-V is also enabled. Is that configuration 
being explicitly denied?

   Paul

> +
> +int kvm_xen_init(KVMState *s, uint32_t xen_version);
> +
> +#endif /* QEMU_I386_XEN_H */
Paolo Bonzini Dec. 12, 2022, 5:30 p.m. UTC | #2
On 12/9/22 10:55, David Woodhouse wrote:
> -    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    if (xen_enabled())
> +            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> +    else
> +            m->default_machine_opts = "accel=kvm,xen-version=0x30001";

Please do not modify pc_xen_hvm_init().

"-M xenfv" should be the same as "-M pc-i440fx-...,suppress-vmdesc=on 
-accel xen -device xen-platform".  It must work *without* "-accel xen", 
while here you're basically requiring it.  For now, please make 
KVM-emulated Xen use "-M pc -device xen-platform".  We can figure out 
"-M xenfv" later.

You can instead have:

- a check in xen_init() that checks that xen_mode is XEN_ATTACH.  If 
not, fail.

- an extra enum value for xen_mode, XEN_DISABLED, which is the default 
instead of XEN_EMULATE;

- an accelerator property "-accel kvm,xen-version=...", added in 
kvm_accel_class_init() instead of the machine property.  The property, 
when set to a nonzero value, flips xen_mode from XEN_DISABLED to 
XEN_EMULATE.

The Xen overlay device can be created using the mc->kvm_type function 
(which you can set in DEFINE_PC_MACHINE); at that point, xen_mode has 
switched from XEN_DISABLED to XEN_EMULATE.  Those xen_enabled() checks 
that apply to KVM then become xen_mode != XEN_DISABLED, as long as they 
run during mc->kvm_type or afterwards.

The platform device can be created either in mc->kvm_type or manually 
(not sure if it makes sense to have a "XenVMMXenVMM" CPUID + emulated 
hypercalls but no platform device---would it still use pvclock for 
example?).

Paolo
Paul Durrant Dec. 12, 2022, 5:55 p.m. UTC | #3
On 12/12/2022 17:30, Paolo Bonzini wrote:
[snip]
> 
> The platform device can be created either in mc->kvm_type or manually 
> (not sure if it makes sense to have a "XenVMMXenVMM" CPUID + emulated 
> hypercalls but no platform device---would it still use pvclock for 
> example?).
> 

Not sure it's wise but the platform device is certainly optional in 
xl.cfg so you can easily bring up a VM without it.

   Paul
David Woodhouse Dec. 13, 2022, 12:13 a.m. UTC | #4
On Mon, 2022-12-12 at 18:30 +0100, Paolo Bonzini wrote:
> On 12/9/22 10:55, David Woodhouse wrote:
> > -    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> > +    if (xen_enabled())
> > +            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> > +    else
> > +            m->default_machine_opts = "accel=kvm,xen-version=0x30001";
> 
> Please do not modify pc_xen_hvm_init().
> 
> "-M xenfv" should be the same as "-M pc-i440fx-...,suppress-vmdesc=on 
> -accel xen -device xen-platform".  It must work *without* "-accel xen", 
> while here you're basically requiring it.  For now, please make 
> KVM-emulated Xen use "-M pc -device xen-platform".  We can figure out 
> "-M xenfv" later.
> 
> You can instead have:
> 
> - a check in xen_init() that checks that xen_mode is XEN_ATTACH.  If 
> not, fail.
> 
> - an extra enum value for xen_mode, XEN_DISABLED, which is the default 
> instead of XEN_EMULATE;
> 
> - an accelerator property "-accel kvm,xen-version=...", added in 
> kvm_accel_class_init() instead of the machine property.  The property, 
> when set to a nonzero value, flips xen_mode from XEN_DISABLED to 
> XEN_EMULATE.
> 
> The Xen overlay device can be created using the mc->kvm_type function 
> (which you can set in DEFINE_PC_MACHINE); at that point, xen_mode has 
> switched from XEN_DISABLED to XEN_EMULATE.  Those xen_enabled() checks 
> that apply to KVM then become xen_mode != XEN_DISABLED, as long as they 
> run during mc->kvm_type or afterwards.
> 
> The platform device can be created either in mc->kvm_type or manually 
> (not sure if it makes sense to have a "XenVMMXenVMM" CPUID + emulated 
> hypercalls but no platform device---would it still use pvclock for 
> example?).

That works; thanks. I won't spam the list with another round just yet,
but have pushed it to
https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv

The guest now correctly panics because I haven't implemented event
channel hypercalls yet (got to fix up a bit more of the 32-bit compat
first, and some other parts of Paul's feedback I haven't yet got to).

$ ./build/qemu-system-x86_64 -serial mon:stdio -M pc -accel kvm,xen-version=0x4000a  -cpu host  -display none -kernel vmlinuz-5.17.8-200.fc35.x86_64 -append "console=ttyS0 earlyprintk=ttyS0 panic=10000" --trace "kvm_xen*" -d unimp -m 1G -smp 4 -device xen-platform
Probing EDD (edd=off to disable)... ok
[    0.000000] Linux version 5.17.8-200.fc35.x86_64 (mockbuild@bkernel02.iad2.fedoraproject.org) (gcc (GCC) 11.3.1 20220421 (Red Hat 11.3.1-2), GNU ld version 2.37-17.fc35) #1 SMP PREEMPT Mon May 16 01:01:02 UTC 2022
[    0.000000] Command line: console=ttyS0 earlyprintk=ttyS0 panic=10000
[    0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x002: 'SSE registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x004: 'AVX registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x008: 'MPX bounds registers'
[    0.000000] x86/fpu: Supporting XSAVE feature 0x010: 'MPX CSR'
[    0.000000] x86/fpu: xstate_offset[2]:  576, xstate_sizes[2]:  256
[    0.000000] x86/fpu: xstate_offset[3]:  832, xstate_sizes[3]:   64
[    0.000000] x86/fpu: xstate_offset[4]:  896, xstate_sizes[4]:   64
[    0.000000] x86/fpu: Enabled xstate features 0x1f, context size is 960 bytes, using 'compacted' format.
[    0.000000] signal: max sigframe size: 2032
[    0.000000] BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000003ffdffff] usable
[    0.000000] BIOS-e820: [mem 0x000000003ffe0000-0x000000003fffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] printk: bootconsole [earlyser0] enabled
[    0.000000] NX (Execute Disable) protection: active
[    0.000000] extended physical RAM map:
[    0.000000] reserve setup_data: [mem 0x0000000000000000-0x000000000009fbff] usable
[    0.000000] reserve setup_data: [mem 0x000000000009fc00-0x000000000009ffff] reserved
[    0.000000] reserve setup_data: [mem 0x00000000000f0000-0x00000000000fffff] reserved
[    0.000000] reserve setup_data: [mem 0x0000000000100000-0x0000000000bf98ef] usable
[    0.000000] reserve setup_data: [mem 0x0000000000bf98f0-0x0000000000bf991f] usable
[    0.000000] reserve setup_data: [mem 0x0000000000bf9920-0x000000003ffdffff] usable
[    0.000000] reserve setup_data: [mem 0x000000003ffe0000-0x000000003fffffff] reserved
[    0.000000] reserve setup_data: [mem 0x00000000feffc000-0x00000000feffffff] reserved
[    0.000000] reserve setup_data: [mem 0x00000000fffc0000-0x00000000ffffffff] reserved
[    0.000000] SMBIOS 2.8 present.
[    0.000000] DMI: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
[    0.000000] Hypervisor detected: Xen HVM
[    0.000000] Xen version 4.10.
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 17 a0 0x6 a1 0xffffffffb8e03e70 a2 0x0 ret 0x0
kvm_xen_set_shared_info shared info at gfn 0x10
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 12 a0 0x7 a1 0xffffffffb8e03e60 a2 0x8000000000000163 ret 0x0
kvm_xen_set_vcpu_attr vcpu attr cpu 1 type 0 gpa 0x10040
kvm_xen_set_vcpu_attr vcpu attr cpu 2 type 0 gpa 0x10080
kvm_xen_set_vcpu_attr vcpu attr cpu 0 type 0 gpa 0x10000
kvm_xen_set_vcpu_attr vcpu attr cpu 3 type 0 gpa 0x100c0
[    0.000000] platform_pci_unplug: Netfront and the Xen platform PCI driver have been compiled for this kernel: unplug emulated NICs.
[    0.000000] platform_pci_unplug: Blkfront and the Xen platform PCI driver have been compiled for this kernel: unplug emulated disks.
[    0.000000] You might have to change the root device
[    0.000000] from /dev/hd[a-d] to /dev/xvd[a-d]
[    0.000000] in your root= kernel command line option
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 34 a0 0x9 a1 0xffffffffb8e03e68 a2 0x2 ret 0xffffffffffffffda
[    0.021222] tsc: Fast TSC calibration using PIT
[    0.022715] tsc: Detected 2112.208 MHz processor
[    0.024194] tsc: Detected 2112.000 MHz TSC
[    0.027112] last_pfn = 0x3ffe0 max_arch_pfn = 0x400000000
[    0.028983] x86/PAT: Configuration [0-7]: WB  WC  UC- UC  WB  WP  UC- WT  
Memory KASLR using RDRAND RDTSC...
[    0.040593] found SMP MP-table at [mem 0x000f5bf0-0x000f5bff]
[    0.042372] Using GB pages for direct mapping
[    0.044274] ACPI: Early table checksum verification disabled
[    0.045818] ACPI: RSDP 0x00000000000F5A10 000014 (v00 BOCHS )
[    0.047436] ACPI: RSDT 0x000000003FFE1BDD 000034 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.049981] ACPI: FACP 0x000000003FFE1A79 000074 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.052664] ACPI: DSDT 0x000000003FFE0040 001A39 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.055603] ACPI: FACS 0x000000003FFE0000 000040
[    0.057391] ACPI: APIC 0x000000003FFE1AED 000090 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.060233] ACPI: HPET 0x000000003FFE1B7D 000038 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.063058] ACPI: WAET 0x000000003FFE1BB5 000028 (v01 BOCHS  BXPC     00000001 BXPC 00000001)
[    0.065812] ACPI: Reserving FACP table memory at [mem 0x3ffe1a79-0x3ffe1aec]
[    0.068290] ACPI: Reserving DSDT table memory at [mem 0x3ffe0040-0x3ffe1a78]
[    0.070623] ACPI: Reserving FACS table memory at [mem 0x3ffe0000-0x3ffe003f]
[    0.072976] ACPI: Reserving APIC table memory at [mem 0x3ffe1aed-0x3ffe1b7c]
[    0.075241] ACPI: Reserving HPET table memory at [mem 0x3ffe1b7d-0x3ffe1bb4]
[    0.077313] ACPI: Reserving WAET table memory at [mem 0x3ffe1bb5-0x3ffe1bdc]
[    0.080178] No NUMA configuration found
[    0.081189] Faking a node at [mem 0x0000000000000000-0x000000003ffdffff]
[    0.083037] NODE_DATA(0) allocated [mem 0x3ffb5000-0x3ffdffff]
[    0.158885] Zone ranges:
[    0.159679]   DMA      [mem 0x0000000000001000-0x0000000000ffffff]
[    0.161442]   DMA32    [mem 0x0000000001000000-0x000000003ffdffff]
[    0.163104]   Normal   empty
[    0.163924]   Device   empty
[    0.164732] Movable zone start for each node
[    0.165899] Early memory node ranges
[    0.166843]   node   0: [mem 0x0000000000001000-0x000000000009efff]
[    0.168562]   node   0: [mem 0x0000000000100000-0x000000003ffdffff]
[    0.170032] Initmem setup node 0 [mem 0x0000000000001000-0x000000003ffdffff]
[    0.171611] On node 0, zone DMA: 1 pages in unavailable ranges
[    0.171668] On node 0, zone DMA: 97 pages in unavailable ranges
[    0.175214] On node 0, zone DMA32: 32 pages in unavailable ranges
[    0.177220] ACPI: PM-Timer IO Port: 0x608
[    0.179928] ACPI: LAPIC_NMI (acpi_id[0xff] dfl dfl lint[0x1])
[    0.181520] IOAPIC[0]: apic_id 0, version 17, address 0xfec00000, GSI 0-23
[    0.183448] ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
[    0.185083] ACPI: INT_SRC_OVR (bus 0 bus_irq 5 global_irq 5 high level)
[    0.186633] ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
[    0.188303] ACPI: INT_SRC_OVR (bus 0 bus_irq 10 global_irq 10 high level)
[    0.190365] ACPI: INT_SRC_OVR (bus 0 bus_irq 11 global_irq 11 high level)
[    0.192528] ACPI: Using ACPI (MADT) for SMP configuration information
[    0.194438] ACPI: HPET id: 0x8086a201 base: 0xfed00000
[    0.196053] TSC deadline timer available
[    0.197268] smpboot: Allowing 4 CPUs, 0 hotplug CPUs
[    0.198877] PM: hibernation: Registered nosave memory: [mem 0x00000000-0x00000fff]
[    0.201200] PM: hibernation: Registered nosave memory: [mem 0x0009f000-0x0009ffff]
[    0.203531] PM: hibernation: Registered nosave memory: [mem 0x000a0000-0x000effff]
[    0.205839] PM: hibernation: Registered nosave memory: [mem 0x000f0000-0x000fffff]
[    0.208122] PM: hibernation: Registered nosave memory: [mem 0x00bf9000-0x00bf9fff]
[    0.210303] PM: hibernation: Registered nosave memory: [mem 0x00bf9000-0x00bf9fff]
[    0.212595] [mem 0x40000000-0xfeffbfff] available for PCI devices
[    0.214447] Booting paravirtualized kernel on Xen HVM
[    0.216050] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 1910969940391419 ns
[    0.226546] setup_percpu: NR_CPUS:8192 nr_cpumask_bits:4 nr_cpu_ids:4 nr_node_ids:1
[    0.232545] percpu: Embedded 61 pages/cpu s212992 r8192 d28672 u524288
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 24 a0 0xa a1 0x0 a2 0xffffffffb8e03ed0 ret 0x0
kvm_xen_set_vcpu_attr vcpu attr cpu 0 type 0 gpa 0x3ec1e0e0
[    0.234134] PV qspinlock hash table entries: 256 (order: 0, 4096 bytes, linear)
[    0.235651] Fallback order for Node 0: 0 
[    0.236618] Built 1 zonelists, mobility grouping on.  Total pages: 257759
[    0.238432] Policy zone: DMA32
[    0.239373] Kernel command line: console=ttyS0 earlyprintk=ttyS0 panic=10000
[    0.242530] Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear)
[    0.245280] Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear)
[    0.247676] mem auto-init: stack:off, heap alloc:off, heap free:off
[    0.253165] Memory: 978800K/1048056K available (16393K kernel code, 3607K rwdata, 10784K rodata, 2704K init, 6288K bss, 68996K reserved, 0K cma-reserved)
[    0.256519] random: get_random_u64 called from __kmem_cache_create+0x2a/0x530 with crng_init=0
[    0.256838] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
[    0.260445] Kernel/User page tables isolation: enabled
[    0.261691] ftrace: allocating 48342 entries in 189 pages
[    0.278046] ftrace: allocated 189 pages with 6 groups
[    0.280372] Dynamic Preempt: voluntary
[    0.281783] rcu: Preemptible hierarchical RCU implementation.
[    0.282905] rcu: 	RCU restricting CPUs from NR_CPUS=8192 to nr_cpu_ids=4.
[    0.284217] 	Trampoline variant of Tasks RCU enabled.
[    0.285323] 	Rude variant of Tasks RCU enabled.
[    0.286304] 	Tracing variant of Tasks RCU enabled.
[    0.287385] rcu: RCU calculated value of scheduler-enlistment delay is 100 jiffies.
[    0.289049] rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
[    0.297553] NR_IRQS: 524544, nr_irqs: 456, preallocated irqs: 16
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 32 a0 0xb a1 0xffffffffb8e03e98 a2 0xffff9316bec00000 ret 0xffffffffffffffda
[    0.298817] xen:events: Using 2-level ABI
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 34 a0 0x0 a1 0xffffffffb8e03ec8 a2 0x100 ret 0x0
[    0.299811] xen:events: Xen HVM callback vector for event delivery is enabled
[    0.301653] random: crng init done (trusting CPU's manufacturer)
[    0.318080] Console: colour VGA+ 80x25
Unimplemented Xen hypercall 34 (0x1 0xffffffffb8e03eb8 0xffffffffb8e03eb8)
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 34 a0 0x1 a1 0xffffffffb8e03eb8 a2 0x7ff0 ret 0xffffffffffffffda
[    0.319300] Cannot get hvm parameter CONSOLE_EVTCHN (18): -38!
[    0.321118] printk: console [ttyS0] enabled
[    0.321118] printk: console [ttyS0] enabled
[    0.323780] printk: bootconsole [earlyser0] disabled
[    0.323780] printk: bootconsole [earlyser0] disabled
[    0.326482] ACPI: Core revision 20211217
[    0.328222] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604467 ns
[    0.330092] APIC: Switch to symmetric I/O mode setup
[    0.331324] x2apic enabled
[    0.332145] Switched APIC routing to physical x2apic.
[    0.334323] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
[    0.340113] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x1e71785e5dd, max_idle_ns: 440795244814 ns
[    0.341973] Calibrating delay loop (skipped), value calculated using timer frequency.. 4224.00 BogoMIPS (lpj=2112000)
[    0.342962] pid_max: default: 32768 minimum: 301
[    0.344027] LSM: Security Framework initializing
[    0.344995] Yama: becoming mindful.
[    0.345971] SELinux:  Initializing.
[    0.347003] LSM support for eBPF active
[    0.347710] landlock: Up and running.
[    0.348070] Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
[    0.348982] Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear)
Poking KASLR using RDRAND RDTSC...
[    0.352097] x86/cpu: User Mode Instruction Prevention (UMIP) activated
[    0.353243] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0
[    0.353963] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0
[    0.355022] Spectre V1 : Mitigation: usercopy/swapgs barriers and __user pointer sanitization
[    0.355969] Spectre V2 : Mitigation: Retpolines
[    0.356962] Spectre V2 : Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch
[    0.357965] Spectre V2 : Enabling Restricted Speculation for firmware calls
[    0.358972] Spectre V2 : mitigation: Enabling conditional Indirect Branch Prediction Barrier
[    0.359962] Speculative Store Bypass: Mitigation: Speculative Store Bypass disabled via prctl
[    0.360984] SRBDS: Unknown: Dependent on hypervisor status
[    0.361966] MDS: Mitigation: Clear CPU buffers
[    0.370697] Freeing SMP alternatives memory: 44K
[    0.371365] clocksource: xen: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
Unimplemented Xen hypercall 24 (0x7 0x0 0x0)
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 24 a0 0x7 a1 0x0 a2 0x0 ret 0xffffffffffffffda
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 24 a0 0xd a1 0x0 a2 0xffffa734c0013e58 ret 0x0
kvm_xen_set_vcpu_attr vcpu attr cpu 0 type 1 gpa 0x123e000
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 24 a0 0x5 a1 0x0 a2 0xffffa734c0013e48 ret 0x0
kvm_xen_set_vcpu_attr vcpu attr cpu 0 type 2 gpa 0x3ec2ec40
[    0.373086] installing Xen timer for CPU 0
Unimplemented Xen hypercall 32 (0x1 0xffffa734c0013da4 0xffffa734c0013da4)
kvm_xen_hypercall xen_hypercall: cpu 0 cpl 0 input 32 a0 0x1 a1 0xffffa734c0013da4 a2 0x1 ret 0xffffffffffffffda
[    0.374108] ------------[ cut here ]------------
[    0.374962] kernel BUG at drivers/xen/events/events_base.c:1397!
[    0.375979] invalid opcode: 0000 [#1] PREEMPT SMP PTI
[    0.376960] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.17.8-200.fc35.x86_64 #1
[    0.376960] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.1-0-g3208b098f51a-prebuilt.qemu.org 04/01/2014
[    0.376960] RIP: 0010:bind_virq_to_irq+0x1b7/0x2d0
[    0.376960] Code: c7 c2 e0 f1 15 b7 48 c7 c6 20 05 18 b9 44 89 ff e8 ee 5a 98 ff e9 50 ff ff ff 45 31 db 83 f8 ef 74 41 85 d2 0f 89 77 ff ff ff <0f> 0b 44 89 ff 44 89 5c 24 08 e8 fa 4a 98 ff 44 8b 5c 24 08 48 85
[    0.376960] RSP: 0000:ffffa734c0013d80 EFLAGS: 00010282
[    0.376960] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000000000001e120
[    0.376960] RDX: 00000000ffffffda RSI: ffffa734c0013da4 RDI: 0000000000000001
[    0.376960] RBP: 0000000000000000 R08: ffff9316811e4200 R09: 0000000000000000
[    0.376960] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000002ed20
[    0.376960] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000018
[    0.376960] FS:  0000000000000000(0000) GS:ffff9316bec00000(0000) knlGS:0000000000000000
[    0.376960] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.376960] CR2: ffff93168f001000 CR3: 000000000de10001 CR4: 0000000000370ef0
[    0.376960] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    0.376960] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    0.376960] Call Trace:
[    0.376960]  <TASK>
[    0.376960]  ? xen_timerop_shutdown+0x10/0x10
[    0.376960]  bind_virq_to_irqhandler+0x28/0x90
[    0.376960]  xen_setup_timer.cold+0x47/0xc5
[    0.376960]  xen_time_init+0x173/0x1a4
[    0.376960]  native_smp_prepare_cpus+0xdd/0x17e
[    0.376960]  xen_hvm_smp_prepare_cpus+0xc/0x67
[    0.376960]  kernel_init_freeable+0xe8/0x251
[    0.376960]  ? rest_init+0xd0/0xd0
[    0.376960]  kernel_init+0x16/0x130
[    0.376960]  ret_from_fork+0x22/0x30
[    0.376960]  </TASK>
[    0.376960] Modules linked in:
[    0.376965] ---[ end trace 0000000000000000 ]---
[    0.377964] RIP: 0010:bind_virq_to_irq+0x1b7/0x2d0
[    0.378874] Code: c7 c2 e0 f1 15 b7 48 c7 c6 20 05 18 b9 44 89 ff e8 ee 5a 98 ff e9 50 ff ff ff 45 31 db 83 f8 ef 74 41 85 d2 0f 89 77 ff ff ff <0f> 0b 44 89 ff 44 89 5c 24 08 e8 fa 4a 98 ff 44 8b 5c 24 08 48 85
[    0.378965] RSP: 0000:ffffa734c0013d80 EFLAGS: 00010282
[    0.379962] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000000000001e120
[    0.380965] RDX: 00000000ffffffda RSI: ffffa734c0013da4 RDI: 0000000000000001
[    0.381962] RBP: 0000000000000000 R08: ffff9316811e4200 R09: 0000000000000000
[    0.382964] R10: 0000000000000000 R11: 0000000000000000 R12: 000000000002ed20
[    0.383963] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000018
[    0.384964] FS:  0000000000000000(0000) GS:ffff9316bec00000(0000) knlGS:0000000000000000
[    0.385963] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.386962] CR2: ffff93168f001000 CR3: 000000000de10001 CR4: 0000000000370ef0
[    0.387964] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    0.388962] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    0.389975] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[    0.390960] Rebooting in 10000 seconds..
David Woodhouse Jan. 17, 2023, 1:49 p.m. UTC | #5
On Mon, 2022-12-12 at 18:30 +0100, Paolo Bonzini wrote:
> On 12/9/22 10:55, David Woodhouse wrote:
> > -    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> > +    if (xen_enabled())
> > +            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
> > +    else
> > +            m->default_machine_opts = "accel=kvm,xen-version=0x30001";
> 
> Please do not modify pc_xen_hvm_init().
> 
> "-M xenfv" should be the same as "-M pc-i440fx-...,suppress-vmdesc=on
> -accel xen -device xen-platform".  It must work *without* "-accel xen", 
> while here you're basically requiring it.  For now, please make 
> KVM-emulated Xen use "-M pc -device xen-platform". 

I did that (well, you also need -accel kvm,xen_version=xxx).

>  We can figure out "-M xenfv" later.

I don't know that we care about doing this. Sure, I could change the if
(xen_enabled()) to something which isn't recursive, and works out which
of Xen or KVM mode is *possible* right now.

But having moved the xen-version from a machine property to a kvm-accel
property, using "accel=kvm,xen-version=xxx" doesn't even work; not
without the hackery to 'forward' that from the machine to the
accelerator, like we do in qemu_apply_legacy_machine_options() for
properties like kvm-shadow-mem. I don't think we want that.

So I think I'm most inclined to rename the CONFIG_XENFV_MACHINE option
to CONFIG_XEN_BUS, since that's basically what it ended up being once
the rest of the patches took shape on top of the basic platform
support. And drop the idea of making '-M xenfv' work altogether.

I'll add some documentation on how to launch it using
-accel kvm,xen-version=.

> You can instead have:
> 
> - a check in xen_init() that checks that xen_mode is XEN_ATTACH.  If 
> not, fail.
> 
> - an extra enum value for xen_mode, XEN_DISABLED, which is the default 
> instead of XEN_EMULATE;
> 
> - an accelerator property "-accel kvm,xen-version=...", added in 
> kvm_accel_class_init() instead of the machine property.  The property, 
> when set to a nonzero value, flips xen_mode from XEN_DISABLED to 
> XEN_EMULATE.
> 
> The Xen overlay device can be created using the mc->kvm_type function
> (which you can set in DEFINE_PC_MACHINE); at that point, xen_mode has
> switched from XEN_DISABLED to XEN_EMULATE.  Those xen_enabled() checks 
> that apply to KVM then become xen_mode != XEN_DISABLED, as long as they 
> run during mc->kvm_type or afterwards.
> 
> The platform device can be created either in mc->kvm_type or manually
> (not sure if it makes sense to have a "XenVMMXenVMM" CPUID + emulated
> hypercalls but no platform device---would it still use pvclock for 
> example?).

Yeah, I think fairly much everything *can* work without the platform
device. It's only really the hook for the legacy event channel
interrupt, and the unplug protocol. Linux does use its BAR to map grant
tables over, but that's just a crutch to find a free bit of guest
physical address space.

But still, I think it makes sense to add it unconditionally as you
suggest. In mc->kvm_type, pcms->bus isn't set yet but I can do it like
this:

--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1313,6 +1313,9 @@ void pc_basic_device_init(struct PCMachineState
*pcms,
 #ifdef CONFIG_XEN_EMU
     if (xen_mode == XEN_EMULATE) {
         xen_evtchn_connect_gsis(gsi);
+        if (pcms->bus) {
+            pci_create_simple(pcms->bus, -1, "xen-platform");
+        }
     }
 #endif
diff mbox series

Patch

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 546b703cb4..9bada1a8ff 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1811,6 +1811,32 @@  static void pc_machine_set_max_fw_size(Object *obj, Visitor *v,
     pcms->max_fw_size = value;
 }
 
+static void pc_machine_get_xen_version(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    uint32_t value = pcms->xen_version;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void pc_machine_set_xen_version(Object *obj, Visitor *v,
+                                       const char *name, void *opaque,
+                                       Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    Error *error = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    pcms->xen_version = value;
+}
 
 static void pc_machine_initfn(Object *obj)
 {
@@ -1978,6 +2004,12 @@  static void pc_machine_class_init(ObjectClass *oc, void *data)
         NULL, NULL);
     object_class_property_set_description(oc, PC_MACHINE_SMBIOS_EP,
         "SMBIOS Entry Point type [32, 64]");
+
+    object_class_property_add(oc, "xen-version", "uint32",
+        pc_machine_get_xen_version, pc_machine_set_xen_version,
+        NULL, NULL);
+    object_class_property_set_description(oc, "xen-version",
+        "Xen version to be emulated (in XENVER_version form e.g. 0x4000a for 4.10)");
 }
 
 static const TypeInfo pc_machine_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 0ad0ed1603..13286d0739 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -876,7 +876,10 @@  static void xenfv_4_2_machine_options(MachineClass *m)
     pc_i440fx_4_2_machine_options(m);
     m->desc = "Xen Fully-virtualized PC";
     m->max_cpus = HVM_MAX_VCPUS;
-    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+    if (xen_enabled())
+            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+    else
+            m->default_machine_opts = "accel=kvm,xen-version=0x40002";
 }
 
 DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,
@@ -888,7 +891,10 @@  static void xenfv_3_1_machine_options(MachineClass *m)
     m->desc = "Xen Fully-virtualized PC";
     m->alias = "xenfv";
     m->max_cpus = HVM_MAX_VCPUS;
-    m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+    if (xen_enabled())
+            m->default_machine_opts = "accel=xen,suppress-vmdesc=on";
+    else
+            m->default_machine_opts = "accel=kvm,xen-version=0x30001";
 }
 
 DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index c95333514e..9b14b18836 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -52,6 +52,9 @@  typedef struct PCMachineState {
     bool default_bus_bypass_iommu;
     uint64_t max_fw_size;
 
+    /* Xen HVM emulation */
+    uint32_t xen_version;
+
     /* ACPI Memory hotplug IO base address */
     hwaddr memhp_io_base;
 
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a213209379..0a2069b117 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -31,6 +31,7 @@ 
 #include "sysemu/runstate.h"
 #include "kvm_i386.h"
 #include "sev.h"
+#include "xen.h"
 #include "hyperv.h"
 #include "hyperv-proto.h"
 
@@ -774,6 +775,17 @@  static inline bool freq_within_bounds(int freq, int target_freq)
         return false;
 }
 
+static uint32_t kvm_arch_xen_version(MachineState *ms)
+{
+    uint32_t v = object_property_get_int(OBJECT(ms), "xen-version", NULL);
+
+    /* If it was unset, return zero */
+    if (v == (uint32_t) -1)
+            return 0;
+
+    return v;
+}
+
 static int kvm_arch_set_tsc_khz(CPUState *cs)
 {
     X86CPU *cpu = X86_CPU(cs);
@@ -2459,6 +2471,7 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
 {
     uint64_t identity_base = 0xfffbc000;
     uint64_t shadow_mem;
+    uint32_t xen_version;
     int ret;
     struct utsname utsname;
     Error *local_err = NULL;
@@ -2513,6 +2526,19 @@  int kvm_arch_init(MachineState *ms, KVMState *s)
         }
     }
 
+    xen_version = kvm_arch_xen_version(ms);
+    if (xen_version) {
+#ifdef CONFIG_XEN_EMU
+            ret = kvm_xen_init(s, xen_version);
+            if (ret < 0) {
+                    return ret;
+            }
+#else
+            error_report("kvm: Xen support not enabled in qemu");
+            return -ENOTSUP;
+#endif
+    }
+
     ret = kvm_get_supported_msrs(s);
     if (ret < 0) {
         return ret;
diff --git a/target/i386/meson.build b/target/i386/meson.build
index ae38dc9563..9f3ef246b8 100644
--- a/target/i386/meson.build
+++ b/target/i386/meson.build
@@ -7,6 +7,7 @@  i386_ss.add(files(
   'cpu-dump.c',
 ))
 i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c'))
+i386_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen.c'))
 
 # x86 cpu type
 i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c'))
diff --git a/target/i386/xen.c b/target/i386/xen.c
new file mode 100644
index 0000000000..bc183dce4e
--- /dev/null
+++ b/target/i386/xen.c
@@ -0,0 +1,49 @@ 
+/*
+ * Xen HVM emulation support in KVM
+ *
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "kvm/kvm_i386.h"
+#include "xen.h"
+
+int kvm_xen_init(KVMState *s, uint32_t xen_version)
+{
+    const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
+        KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
+    struct kvm_xen_hvm_config cfg = {
+        .msr = XEN_HYPERCALL_MSR,
+        .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+    };
+    int xen_caps, ret;
+
+    xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
+    if (required_caps & ~xen_caps) {
+        error_report("kvm: Xen HVM guest support not present or insufficient");
+        return -ENOSYS;
+    }
+
+    if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
+        struct kvm_xen_hvm_attr ha = {
+            .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
+            .u.xen_version = xen_version,
+        };
+        (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
+
+        cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+    }
+
+    ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
+    if (ret < 0) {
+        error_report("kvm: Failed to enable Xen HVM support: %s", strerror(-ret));
+        return ret;
+    }
+
+    return 0;
+}
diff --git a/target/i386/xen.h b/target/i386/xen.h
new file mode 100644
index 0000000000..6c4f3b7822
--- /dev/null
+++ b/target/i386/xen.h
@@ -0,0 +1,19 @@ 
+/*
+ * Xen HVM emulation support in KVM
+ *
+ * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_I386_XEN_H
+#define QEMU_I386_XEN_H
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+int kvm_xen_init(KVMState *s, uint32_t xen_version);
+
+#endif /* QEMU_I386_XEN_H */