diff mbox series

[RFC,12/21] i386/xen: set shared_info page

Message ID 20221205173137.607044-13-dwmw2@infradead.org
State New
Headers show
Series Xen HVM support under KVM | expand

Commit Message

David Woodhouse Dec. 5, 2022, 5:31 p.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

This is done by implementing HYPERVISOR_memory_op specifically
XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
Xen removes the page with its own, we instead use the gfn passed
by the guest.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 accel/kvm/kvm-all.c      |  6 ++++
 include/hw/core/cpu.h    |  2 ++
 include/sysemu/kvm.h     |  2 ++
 include/sysemu/kvm_int.h |  3 ++
 target/i386/cpu.h        |  8 ++++++
 target/i386/trace-events |  1 +
 target/i386/xen-proto.h  | 19 +++++++++++++
 target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
 8 files changed, 102 insertions(+)
 create mode 100644 target/i386/xen-proto.h

Comments

Philippe Mathieu-Daudé Dec. 5, 2022, 10:17 p.m. UTC | #1
On 5/12/22 18:31, David Woodhouse wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
> 
> This is done by implementing HYPERVISOR_memory_op specifically
> XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
> Xen removes the page with its own, we instead use the gfn passed
> by the guest.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   accel/kvm/kvm-all.c      |  6 ++++
>   include/hw/core/cpu.h    |  2 ++
>   include/sysemu/kvm.h     |  2 ++
>   include/sysemu/kvm_int.h |  3 ++
>   target/i386/cpu.h        |  8 ++++++
>   target/i386/trace-events |  1 +
>   target/i386/xen-proto.h  | 19 +++++++++++++
>   target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
>   8 files changed, 102 insertions(+)
>   create mode 100644 target/i386/xen-proto.h


> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index 8830546121..e57b693528 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -443,6 +443,8 @@ struct CPUState {
>   
>       /* track IOMMUs whose translations we've cached in the TCG TLB */
>       GArray *iommu_notifiers;
> +
> +    struct XenState *xen_state;

Since you define a type definition below, use it.

>   };
>   
>   typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index e9a97eda8c..8e882fbe96 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -582,4 +582,6 @@ bool kvm_arch_cpu_check_are_resettable(void);
>   bool kvm_dirty_ring_enabled(void);
>   
>   uint32_t kvm_dirty_ring_size(void);
> +
> +struct XenState *kvm_get_xen_state(KVMState *s);

Ditto.

>   #endif
> diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
> index 3b4adcdc10..0d89cfe273 100644
> --- a/include/sysemu/kvm_int.h
> +++ b/include/sysemu/kvm_int.h
> @@ -110,6 +110,9 @@ struct KVMState
>       struct KVMDirtyRingReaper reaper;
>       NotifyVmexitOption notify_vmexit;
>       uint32_t notify_window;
> +
> +    /* xen guest state */
> +    struct XenState xen;

Ditto.

>   };
>   
>   void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 5ddd14467e..09c0281b8b 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -23,6 +23,14 @@
>   #include "sysemu/tcg.h"
>   #include "cpu-qom.h"
>   #include "kvm/hyperv-proto.h"
> +#include "xen-proto.h"
> +
> +#ifdef TARGET_X86_64
> +#define TARGET_LONG_BITS 64
> +#else
> +#define TARGET_LONG_BITS 32
> +#endif


How come you don't have access to the definitions from "cpu-param.h" here?

Regards,

Phil.
David Woodhouse Dec. 6, 2022, 2:20 a.m. UTC | #2
On Mon, 2022-12-05 at 23:17 +0100, Philippe Mathieu-Daudé wrote:
> On 5/12/22 18:31, David Woodhouse wrote:
> > From: Joao Martins <joao.m.martins@oracle.com>
> > 
> > This is done by implementing HYPERVISOR_memory_op specifically
> > XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
> > Xen removes the page with its own, we instead use the gfn passed
> > by the guest.
> > 
> > Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > ---
> >   accel/kvm/kvm-all.c      |  6 ++++
> >   include/hw/core/cpu.h    |  2 ++
> >   include/sysemu/kvm.h     |  2 ++
> >   include/sysemu/kvm_int.h |  3 ++
> >   target/i386/cpu.h        |  8 ++++++
> >   target/i386/trace-events |  1 +
> >   target/i386/xen-proto.h  | 19 +++++++++++++
> >   target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
> >   8 files changed, 102 insertions(+)
> >   create mode 100644 target/i386/xen-proto.h
> 
> 
> > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> > index 8830546121..e57b693528 100644
> > --- a/include/hw/core/cpu.h
> > +++ b/include/hw/core/cpu.h
> > @@ -443,6 +443,8 @@ struct CPUState {
> >   
> >       /* track IOMMUs whose translations we've cached in the TCG TLB */
> >       GArray *iommu_notifiers;
> > +
> > +    struct XenState *xen_state;
> 
> Since you define a type definition below, use it.

Ack.

More importantly though, some of that state needs to be persisted
across live migration / live update. 

There is per-vCPU state (the GPAs for vcpu_info etc., upcall vector,
timer info). I think I see how I could add that to the vmstate_x86_cpu
defined in target/i386/machine.c.

For the machine-wide state, where do I add that? Should I just
instantiate a dummy device (a bit like TYPE_KVM_CLOCK, AFAICT) to hang
that state off?
Philippe Mathieu-Daudé Dec. 6, 2022, 8:26 a.m. UTC | #3
+Juan/David/Claudio.

On 6/12/22 03:20, David Woodhouse wrote:
> On Mon, 2022-12-05 at 23:17 +0100, Philippe Mathieu-Daudé wrote:
>> On 5/12/22 18:31, David Woodhouse wrote:
>>> From: Joao Martins <joao.m.martins@oracle.com>
>>>
>>> This is done by implementing HYPERVISOR_memory_op specifically
>>> XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
>>> Xen removes the page with its own, we instead use the gfn passed
>>> by the guest.
>>>
>>> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
>>> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
>>> ---
>>>    accel/kvm/kvm-all.c      |  6 ++++
>>>    include/hw/core/cpu.h    |  2 ++
>>>    include/sysemu/kvm.h     |  2 ++
>>>    include/sysemu/kvm_int.h |  3 ++
>>>    target/i386/cpu.h        |  8 ++++++
>>>    target/i386/trace-events |  1 +
>>>    target/i386/xen-proto.h  | 19 +++++++++++++
>>>    target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
>>>    8 files changed, 102 insertions(+)
>>>    create mode 100644 target/i386/xen-proto.h
>>
>>
>>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
>>> index 8830546121..e57b693528 100644
>>> --- a/include/hw/core/cpu.h
>>> +++ b/include/hw/core/cpu.h
>>> @@ -443,6 +443,8 @@ struct CPUState {
>>>    
>>>        /* track IOMMUs whose translations we've cached in the TCG TLB */
>>>        GArray *iommu_notifiers;
>>> +
>>> +    struct XenState *xen_state;
>>
>> Since you define a type definition below, use it.
> 
> Ack.
> 
> More importantly though, some of that state needs to be persisted
> across live migration / live update.
> 
> There is per-vCPU state (the GPAs for vcpu_info etc., upcall vector,
> timer info). I think I see how I could add that to the vmstate_x86_cpu
> defined in target/i386/machine.c.
> 
> For the machine-wide state, where do I add that? Should I just
> instantiate a dummy device (a bit like TYPE_KVM_CLOCK, AFAICT) to hang
> that state off?

XenState in CPUState indeed is an anti-pattern.

As you said elsewhere (patch 2 maybe) your use is not a new accelerator
but a machine, so new state should go in a derived MachineState.

Migration is not my area of expertise, but since only the xenfv machine
will use this configuration, it seems simpler to store the vCPUs
migration states there...

Regards,

Phil.
Dr. David Alan Gilbert Dec. 6, 2022, 10 a.m. UTC | #4
* Philippe Mathieu-Daudé (philmd@linaro.org) wrote:
> +Juan/David/Claudio.
> 
> On 6/12/22 03:20, David Woodhouse wrote:
> > On Mon, 2022-12-05 at 23:17 +0100, Philippe Mathieu-Daudé wrote:
> > > On 5/12/22 18:31, David Woodhouse wrote:
> > > > From: Joao Martins <joao.m.martins@oracle.com>
> > > > 
> > > > This is done by implementing HYPERVISOR_memory_op specifically
> > > > XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
> > > > Xen removes the page with its own, we instead use the gfn passed
> > > > by the guest.
> > > > 
> > > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> > > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> > > > ---
> > > >    accel/kvm/kvm-all.c      |  6 ++++
> > > >    include/hw/core/cpu.h    |  2 ++
> > > >    include/sysemu/kvm.h     |  2 ++
> > > >    include/sysemu/kvm_int.h |  3 ++
> > > >    target/i386/cpu.h        |  8 ++++++
> > > >    target/i386/trace-events |  1 +
> > > >    target/i386/xen-proto.h  | 19 +++++++++++++
> > > >    target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
> > > >    8 files changed, 102 insertions(+)
> > > >    create mode 100644 target/i386/xen-proto.h
> > > 
> > > 
> > > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> > > > index 8830546121..e57b693528 100644
> > > > --- a/include/hw/core/cpu.h
> > > > +++ b/include/hw/core/cpu.h
> > > > @@ -443,6 +443,8 @@ struct CPUState {
> > > >        /* track IOMMUs whose translations we've cached in the TCG TLB */
> > > >        GArray *iommu_notifiers;
> > > > +
> > > > +    struct XenState *xen_state;
> > > 
> > > Since you define a type definition below, use it.
> > 
> > Ack.
> > 
> > More importantly though, some of that state needs to be persisted
> > across live migration / live update.
> > 
> > There is per-vCPU state (the GPAs for vcpu_info etc., upcall vector,
> > timer info). I think I see how I could add that to the vmstate_x86_cpu
> > defined in target/i386/machine.c.
> > 
> > For the machine-wide state, where do I add that? Should I just
> > instantiate a dummy device (a bit like TYPE_KVM_CLOCK, AFAICT) to hang
> > that state off?
> 
> XenState in CPUState indeed is an anti-pattern.
> 
> As you said elsewhere (patch 2 maybe) your use is not a new accelerator
> but a machine, so new state should go in a derived MachineState.

I *think* the vmstate tends to be attached to a device rather than the
machinetype itself; eg a PCIe bridge that the Machine instantiates.
But yes, a 'dummy' type is fine for hanging vmstate off.

> Migration is not my area of expertise, but since only the xenfv machine
> will use this configuration, it seems simpler to store the vCPUs
> migration states there...

As long as ordering issues don't bite; i.e. between loading the
new Xen specific stuff and loading the main cpu;  you can force
order using the MIG_PRI_ constants on the .priority field.

I was going to suggest maybe you could add it to vmstate_cpu_common
as a subsection; but I don't *think* that's used for x86
(I think that's vmstate_x86_cpu instead???)

Dave

> Regards,
> 
> Phil.
>
David Woodhouse Dec. 7, 2022, 11:15 a.m. UTC | #5
On Tue, 2022-12-06 at 10:00 +0000, Dr. David Alan Gilbert wrote:
> * Philippe Mathieu-Daudé (
> philmd@linaro.org
> ) wrote:
> > +Juan/David/Claudio.
> > 
> > On 6/12/22 03:20, David Woodhouse wrote:
> > > On Mon, 2022-12-05 at 23:17 +0100, Philippe Mathieu-Daudé wrote:
> > > > On 5/12/22 18:31, David Woodhouse wrote:
> > > > > From: Joao Martins <
> > > > > joao.m.martins@oracle.com
> > > > > >
> > > > > 
> > > > > This is done by implementing HYPERVISOR_memory_op specifically
> > > > > XENMEM_add_to_physmap with space XENMAPSPACE_shared_info. While
> > > > > Xen removes the page with its own, we instead use the gfn passed
> > > > > by the guest.
> > > > > 
> > > > > Signed-off-by: Joao Martins <
> > > > > joao.m.martins@oracle.com
> > > > > >
> > > > > Signed-off-by: David Woodhouse <
> > > > > dwmw@amazon.co.uk
> > > > > >
> > > > > ---
> > > > >    accel/kvm/kvm-all.c      |  6 ++++
> > > > >    include/hw/core/cpu.h    |  2 ++
> > > > >    include/sysemu/kvm.h     |  2 ++
> > > > >    include/sysemu/kvm_int.h |  3 ++
> > > > >    target/i386/cpu.h        |  8 ++++++
> > > > >    target/i386/trace-events |  1 +
> > > > >    target/i386/xen-proto.h  | 19 +++++++++++++
> > > > >    target/i386/xen.c        | 61 ++++++++++++++++++++++++++++++++++++++++
> > > > >    8 files changed, 102 insertions(+)
> > > > >    create mode 100644 target/i386/xen-proto.h
> > > > 
> > > > 
> > > > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> > > > > index 8830546121..e57b693528 100644
> > > > > --- a/include/hw/core/cpu.h
> > > > > +++ b/include/hw/core/cpu.h
> > > > > @@ -443,6 +443,8 @@ struct CPUState {
> > > > >        /* track IOMMUs whose translations we've cached in the TCG TLB */
> > > > >        GArray *iommu_notifiers;
> > > > > +
> > > > > +    struct XenState *xen_state;
> > > > 
> > > > Since you define a type definition below, use it.
> > > 
> > > Ack.
> > > 
> > > More importantly though, some of that state needs to be persisted
> > > across live migration / live update.
> > > 
> > > There is per-vCPU state (the GPAs for vcpu_info etc., upcall vector,
> > > timer info). I think I see how I could add that to the vmstate_x86_cpu
> > > defined in target/i386/machine.c.
> > > 
> > > For the machine-wide state, where do I add that? Should I just
> > > instantiate a dummy device (a bit like TYPE_KVM_CLOCK, AFAICT) to hang
> > > that state off?
> > 
> > XenState in CPUState indeed is an anti-pattern.
> > 
> > As you said elsewhere (patch 2 maybe) your use is not a new accelerator
> > but a machine, so new state should go in a derived MachineState.
> 
> I *think* the vmstate tends to be attached to a device rather than the
> machinetype itself; eg a PCIe bridge that the Machine instantiates.
> But yes, a 'dummy' type is fine for hanging vmstate off.

Below is an attempt at that. It adds a 'xen-overlay' device which hosts
the memory regions corresponding to "xenheap" pages, which need to be
mapped over guest GPAs on demand.

There's plenty to heckle here, but it basically seems to be working.
I've dumped the state (migrate "exec:cat>foo") and I can see the
correct shinfo_gpa there when the guest was running.

I added the device under hw/xen covered by CONFIG_XEN_EMU, and will
amend the existing shinfo patch to call xen_overlay_map_page() instead
of just *assuming* that there'll already be RAM there... which is true
for Linux guests but Windows uses an empty GFN instead of wasting a
page of real RAM.

There are some target-specific things to be migrated too, so if this
approach is sane then I'll probably add a similar dummy device in
target/i386/xen.c for the system-wide state in *addition* to...

> > Migration is not my area of expertise, but since only the xenfv machine
> > will use this configuration, it seems simpler to store the vCPUs
> > migration states there...
> 
> As long as ordering issues don't bite; i.e. between loading the
> new Xen specific stuff and loading the main cpu;  you can force
> order using the MIG_PRI_ constants on the .priority field.
> 
> I was going to suggest maybe you could add it to vmstate_cpu_common
> as a subsection; but I don't *think* that's used for x86
> (I think that's vmstate_x86_cpu instead???)

... using vmstate_x86_cpu for the per-vCPU state, which seems fairly
straightforward.

-------
From 6ac40ff7731bc2144aa7fa4015b9308c2eea8f3d Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Wed, 7 Dec 2022 09:19:31 +0000
Subject: [PATCH] hw/xen: Add xen_overlay device for emulating shared xenheap
 pages

For the shared info page and for grant tables, Xen shares its own pages
from the "Xen heap" to the guest. The guest requests that a given page
from a certain address space (XENMAPSPACE_shared_info, etc.) be mapped
to a given GPA using the XENMEM_add_to_physmap hypercall.

To support that in qemu when *emulating* Xen, create a memory region
(migratable) and allow it to be mapped as an overlay when requested.

Xen theoretically allows the same page to be mapped multiple times
into the guest, but that's hard to track and reinstate over migration,
so we automatically *unmap* any previous mapping when creating a new
one. This approach has been used in production with.... a non-trivial
number of guests expecting true Xen, without any problems yet being
noticed.

This adds just the shared info page for now. The grant tables will be
a larger region, and will need to be overlaid one page at a time. I
think that means I need to create separate aliases for each page of
the overall grant_frames region, so that they can be mapped individually.

Expecting some heckling at the use of xen_overlay_singleton. What is
the best way to do that? Using qemu_find_recursive() every time seemed
a bit wrong. But I suppose mapping it into the *guest* isn't a fast
path, and if the actual grant table code is allowed to just stash the
pointer it gets from xen_overlay_page_ptr() for later use then that
isn't a fast path for device I/O either.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/pc_piix.c    |   6 ++
 hw/xen/meson.build   |   4 ++
 hw/xen/xen_overlay.c | 158 +++++++++++++++++++++++++++++++++++++++++++
 hw/xen/xen_overlay.h |  14 ++++
 4 files changed, 182 insertions(+)
 create mode 100644 hw/xen/xen_overlay.c
 create mode 100644 hw/xen/xen_overlay.h

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index d1127adde0..322232cce1 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -57,6 +57,7 @@
 #ifdef CONFIG_XEN
 #include <xen/hvm/hvm_info_table.h>
 #include "hw/xen/xen_pt.h"
+#include "hw/xen/xen_overlay.h"
 #endif
 #include "migration/global_state.h"
 #include "migration/misc.h"
@@ -411,6 +412,11 @@ static void pc_xen_hvm_init(MachineState *machine)
 
     pc_xen_hvm_init_pci(machine);
     pci_create_simple(pcms->bus, -1, "xen-platform");
+#ifdef CONFIG_XEN_EMU
+    if (xen_mode == XEN_EMULATE) {
+            xen_overlay_create();
+    }
+#endif
 }
 #endif
 
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index ae0ace3046..74b1b60afe 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -22,3 +22,7 @@ else
 endif
 
 specific_ss.add_all(when: ['CONFIG_XEN', xen], if_true: xen_specific_ss)
+
+softmmu_ss.add(when: ['CONFIG_XEN_EMU'], if_true: files(
+  'xen_overlay.c',
+))
diff --git a/hw/xen/xen_overlay.c b/hw/xen/xen_overlay.c
new file mode 100644
index 0000000000..86865082cf
--- /dev/null
+++ b/hw/xen/xen_overlay.c
@@ -0,0 +1,158 @@
+/*
+ * QEMU Xen emulation: Shared/overlay pages support
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/module.h"
+#include "qemu/main-loop.h"
+#include "qapi/error.h"
+#include "qom/object.h"
+#include "exec/target_page.h"
+#include "exec/address-spaces.h"
+#include "migration/vmstate.h"
+
+#include "hw/sysbus.h"
+#include "hw/xen/xen.h"
+#include "xen_overlay.h"
+
+#include "standard-headers/xen/memory.h"
+
+#define INVALID_GPA UINT64_MAX
+
+#define TYPE_XEN_OVERLAY "xenoverlay"
+OBJECT_DECLARE_SIMPLE_TYPE(XenOverlayState, XEN_OVERLAY)
+
+struct XenOverlayState {
+    /*< private >*/
+    SysBusDevice busdev;
+    /*< public >*/
+
+    MemoryRegion shinfo_mem;
+    void *shinfo_ptr;
+    uint64_t shinfo_gpa;
+};
+
+struct XenOverlayState *xen_overlay_singleton;
+
+static void xen_overlay_realize(DeviceState *dev, Error **errp)
+{
+    XenOverlayState *s = XEN_OVERLAY(dev);
+    size_t pg_sz = qemu_target_page_size();
+
+    if (xen_mode != XEN_EMULATE) {
+        error_setg(errp, "Xen overlay page support is for Xen emulation");
+        return;
+    }
+
+    memory_region_init_ram(&s->shinfo_mem, OBJECT(dev), "xen:shared_info", pg_sz, &error_abort);
+    memory_region_set_enabled(&s->shinfo_mem, true);
+    s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem);
+    s->shinfo_gpa = INVALID_GPA;
+    memset(s->shinfo_ptr, 0, pg_sz);
+}
+
+static int xen_overlay_post_load(void *opaque, int version_id)
+{
+    XenOverlayState *s = opaque;
+
+    if (s->shinfo_gpa != INVALID_GPA) {
+            xen_overlay_map_page(XENMAPSPACE_shared_info, 0, s->shinfo_gpa);
+    }
+
+    return 0;
+}
+
+static bool xen_overlay_is_needed(void *opaque)
+{
+    return xen_mode == XEN_EMULATE;
+}
+
+static const VMStateDescription xen_overlay_vmstate = {
+    .name = "xen_overlay",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = xen_overlay_is_needed,
+    .post_load = xen_overlay_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(shinfo_gpa, XenOverlayState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void xen_overlay_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = xen_overlay_realize;
+    dc->vmsd = &xen_overlay_vmstate;
+}
+
+static const TypeInfo xen_overlay_info = {
+    .name          = TYPE_XEN_OVERLAY,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XenOverlayState),
+    .class_init    = xen_overlay_class_init,
+};
+
+void xen_overlay_create(void)
+{
+    xen_overlay_singleton = XEN_OVERLAY(sysbus_create_simple(TYPE_XEN_OVERLAY, -1, NULL));
+}
+
+static void xen_overlay_register_types(void)
+{
+    type_register_static(&xen_overlay_info);
+}
+
+type_init(xen_overlay_register_types)
+
+int xen_overlay_map_page(uint32_t space, uint64_t idx, uint64_t gpa)
+{
+    MemoryRegion *ovl_page;
+
+    if (space != XENMAPSPACE_shared_info || idx != 0)
+        return -EINVAL;
+
+    if (!xen_overlay_singleton)
+        return -ENOENT;
+
+    qemu_mutex_lock_iothread();
+
+    ovl_page = &xen_overlay_singleton->shinfo_mem;
+
+    if (memory_region_is_mapped(ovl_page)) {
+        /* Xen allows guests to map the same page as many times as it likes
+         * into guest physical frames. We don't, because it would be hard
+         * to track and restore them all. One mapping of each page is
+         * perfectly sufficient for all known guests... and we've tested
+         * that theory on a few now in other implementations. dwmw2. */
+        memory_region_del_subregion(get_system_memory(), ovl_page);
+    }
+
+    if (gpa != INVALID_GPA) {
+        memory_region_add_subregion_overlap(get_system_memory(), gpa, ovl_page, 0);
+    }
+    xen_overlay_singleton->shinfo_gpa = gpa;
+
+    qemu_mutex_unlock_iothread();
+    return 0;
+}
+
+void *xen_overlay_page_ptr(uint32_t space, uint64_t idx)
+{
+    if (space != XENMAPSPACE_shared_info || idx != 0)
+        return NULL;
+
+    if (!xen_overlay_singleton)
+        return NULL;
+
+    return xen_overlay_singleton->shinfo_ptr;
+}
diff --git a/hw/xen/xen_overlay.h b/hw/xen/xen_overlay.h
new file mode 100644
index 0000000000..afc63991ea
--- /dev/null
+++ b/hw/xen/xen_overlay.h
@@ -0,0 +1,14 @@
+/*
+ * QEMU Xen emulation: Shared/overlay pages support
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+void xen_overlay_create(void);
+int xen_overlay_map_page(uint32_t space, uint64_t idx, uint64_t gpa);
+void *xen_overlay_page_ptr(uint32_t space, uint64_t idx);
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..8a227515b7 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -172,6 +172,11 @@  void kvm_resample_fd_notify(int gsi)
     }
 }
 
+struct XenState *kvm_get_xen_state(KVMState *s)
+{
+    return &s->xen;
+}
+
 int kvm_get_max_memslots(void)
 {
     KVMState *s = KVM_STATE(current_accel());
@@ -405,6 +410,7 @@  int kvm_init_vcpu(CPUState *cpu, Error **errp)
     cpu->vcpu_dirty = true;
     cpu->dirty_pages = 0;
     cpu->throttle_us_per_full = 0;
+    cpu->xen_state = &s->xen;
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8830546121..e57b693528 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -443,6 +443,8 @@  struct CPUState {
 
     /* track IOMMUs whose translations we've cached in the TCG TLB */
     GArray *iommu_notifiers;
+
+    struct XenState *xen_state;
 };
 
 typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index e9a97eda8c..8e882fbe96 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -582,4 +582,6 @@  bool kvm_arch_cpu_check_are_resettable(void);
 bool kvm_dirty_ring_enabled(void);
 
 uint32_t kvm_dirty_ring_size(void);
+
+struct XenState *kvm_get_xen_state(KVMState *s);
 #endif
diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
index 3b4adcdc10..0d89cfe273 100644
--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
@@ -110,6 +110,9 @@  struct KVMState
     struct KVMDirtyRingReaper reaper;
     NotifyVmexitOption notify_vmexit;
     uint32_t notify_window;
+
+    /* xen guest state */
+    struct XenState xen;
 };
 
 void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5ddd14467e..09c0281b8b 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -23,6 +23,14 @@ 
 #include "sysemu/tcg.h"
 #include "cpu-qom.h"
 #include "kvm/hyperv-proto.h"
+#include "xen-proto.h"
+
+#ifdef TARGET_X86_64
+#define TARGET_LONG_BITS 64
+#else
+#define TARGET_LONG_BITS 32
+#endif
+
 #include "exec/cpu-defs.h"
 #include "qapi/qapi-types-common.h"
 #include "qemu/cpu-float.h"
diff --git a/target/i386/trace-events b/target/i386/trace-events
index 3fb9ee3add..2bf732ee07 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -14,3 +14,4 @@  kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data
 
 # target/i386/xen.c
 kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIu64
+kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64
diff --git a/target/i386/xen-proto.h b/target/i386/xen-proto.h
new file mode 100644
index 0000000000..c394909f54
--- /dev/null
+++ b/target/i386/xen-proto.h
@@ -0,0 +1,19 @@ 
+/*
+ * Definitions for Xen guest/hypervisor interaction - x86-specific part
+ *
+ * Copyright (c) 2019 Oracle and/or its affiliates. All rights reserved.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef TARGET_I386_XEN_PROTO_H
+#define TARGET_I386_XEN_PROTO_H
+
+typedef struct XenState {
+    struct shared_info *shared_info;
+} XenState;
+
+#endif
+
diff --git a/target/i386/xen.c b/target/i386/xen.c
index ee6f99523d..5d2d8a7e00 100644
--- a/target/i386/xen.c
+++ b/target/i386/xen.c
@@ -16,8 +16,10 @@ 
 #include "trace.h"
 
 #include "standard-headers/xen/version.h"
+#include "standard-headers/xen/memory.h"
 
 #define PAGE_OFFSET    0xffffffff80000000UL
+#define PAGE_SHIFT     12
 
 /*
  * Unhandled hypercalls error:
@@ -123,6 +125,62 @@  static int kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
     return err ? HCALL_ERR : 0;
 }
 
+static int xen_set_shared_info(CPUState *cs, struct shared_info *shi,
+                               uint64_t gfn)
+{
+    struct kvm_xen_hvm_attr xhsi;
+    XenState *xen = cs->xen_state;
+    KVMState *s = cs->kvm_state;
+    int err;
+
+    xhsi.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+    xhsi.u.shared_info.gfn = gfn;
+    err = kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &xhsi);
+    trace_kvm_xen_set_shared_info(gfn);
+    xen->shared_info = shi;
+    return err;
+}
+
+static int kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit,
+                                   int cmd, uint64_t arg, X86CPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    int err = 0;
+
+    switch (cmd) {
+    case XENMEM_add_to_physmap: {
+            struct xen_add_to_physmap *xatp;
+            struct shared_info *shi;
+
+            xatp = gva_to_hva(cs, arg);
+            if (!xatp) {
+                err = -EFAULT;
+                break;
+            }
+
+            switch (xatp->space) {
+            case XENMAPSPACE_shared_info:
+                break;
+            default:
+                err = -ENOSYS;
+                break;
+            }
+
+            shi = gpa_to_hva(xatp->gpfn << PAGE_SHIFT);
+            if (!shi) {
+                err = -EFAULT;
+                break;
+            }
+
+            err = xen_set_shared_info(cs, shi, xatp->gpfn);
+            break;
+         }
+    }
+
+    exit->u.hcall.result = err;
+    return err ? HCALL_ERR : 0;
+}
+
 static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 {
     uint16_t code = exit->u.hcall.input;
@@ -133,6 +191,9 @@  static int __kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
     }
 
     switch (code) {
+    case __HYPERVISOR_memory_op:
+        return kvm_xen_hcall_memory_op(exit, exit->u.hcall.params[0],
+                                       exit->u.hcall.params[1], cpu);
     case __HYPERVISOR_xen_version:
         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
                                          exit->u.hcall.params[1]);