diff mbox series

[RFC,v2,16/22] i386/xen: handle VCPUOP_register_vcpu_info

Message ID 20221209095612.689243-17-dwmw2@infradead.org
State New
Headers show
Series Xen HVM support under KVM | expand

Commit Message

David Woodhouse Dec. 9, 2022, 9:56 a.m. UTC
From: Joao Martins <joao.m.martins@oracle.com>

Handle the hypercall to set a per vcpu info, and also wire up the default
vcpu_info in the shared_info page for the first 32 vCPUs.

To avoid deadlock within KVM a vCPU thread must set its *own* vcpu_info
rather than it being set from the context in which the hypercall is
invoked.

Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for migration,
and restore it in kvm_arch_put_registers() appropriately.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 target/i386/cpu.h        |  2 ++
 target/i386/kvm/kvm.c    | 19 +++++++++++
 target/i386/machine.c    | 21 ++++++++++++
 target/i386/trace-events |  1 +
 target/i386/xen.c        | 74 +++++++++++++++++++++++++++++++++++++---
 target/i386/xen.h        |  1 +
 6 files changed, 113 insertions(+), 5 deletions(-)

Comments

Durrant, Paul Dec. 12, 2022, 2:58 p.m. UTC | #1
On 09/12/2022 09:56, David Woodhouse wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
> 
> Handle the hypercall to set a per vcpu info, and also wire up the default
> vcpu_info in the shared_info page for the first 32 vCPUs.
> 
> To avoid deadlock within KVM a vCPU thread must set its *own* vcpu_info
> rather than it being set from the context in which the hypercall is
> invoked.
> 
> Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for migration,
> and restore it in kvm_arch_put_registers() appropriately.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   target/i386/cpu.h        |  2 ++
>   target/i386/kvm/kvm.c    | 19 +++++++++++
>   target/i386/machine.c    | 21 ++++++++++++
>   target/i386/trace-events |  1 +
>   target/i386/xen.c        | 74 +++++++++++++++++++++++++++++++++++++---
>   target/i386/xen.h        |  1 +
>   6 files changed, 113 insertions(+), 5 deletions(-)
> 
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index c6c57baed5..109b2e5669 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
>   #endif
>   #if defined(CONFIG_KVM)
>       struct kvm_nested_state *nested_state;
> +    uint64_t xen_vcpu_info_gpa;
> +    uint64_t xen_vcpu_info_default_gpa;
>   #endif
>   #if defined(CONFIG_HVF)
>       HVFX86LazyFlags hvf_lflags;
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index ebde6bc204..fa45e2f99a 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>           has_msr_hv_hypercall = true;
>       }
>   
> +    env->xen_vcpu_info_gpa = UINT64_MAX;
> +    env->xen_vcpu_info_default_gpa = UINT64_MAX;


There was an INVALID_GPA definition for shared info. Looks like we could 
use it here too.

> +
>       xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine()));
>       if (xen_version) {
>   #ifdef CONFIG_XEN_EMU
> @@ -4728,6 +4731,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
>           kvm_arch_set_tsc_khz(cpu);
>       }
>   
> +#ifdef CONFIG_XEN_EMU
> +    if (level == KVM_PUT_FULL_STATE) {
> +        uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa;
> +        if (gpa == UINT64_MAX) {
> +            gpa = x86_cpu->env.xen_vcpu_info_default_gpa;
> +        }
> +
> +        if (gpa != UINT64_MAX) {
> +            ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +    }
> +#endif
> +
>       ret = kvm_getput_regs(x86_cpu, 1);
>       if (ret < 0) {
>           return ret;
[snip]
> @@ -195,19 +240,38 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit,
>       }
>   }
>   
> +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
> +                                     uint64_t arg)
> +{
> +    struct vcpu_register_vcpu_info rvi;
> +    uint64_t gpa;
> +
> +    if (!target)
> +            return -ENOENT;
> +
> +    if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
> +        return -EFAULT;
> +    }
> +
> +    gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);

Some sanity checks wouldn't go a miss here...

rvi.offset should:
a) be < TARGET_PAGE_SIZE, and
b) ba aligned to vcpu_info_t size

   Paul

> +    async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
> +    return 0;
> +}
> +
David Woodhouse Dec. 13, 2022, 12:13 a.m. UTC | #2
On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote:
> On 09/12/2022 09:56, David Woodhouse wrote:
> > From: Joao Martins <
> > joao.m.martins@oracle.com
> > >
> > 
> > Handle the hypercall to set a per vcpu info, and also wire up the
> > default
> > vcpu_info in the shared_info page for the first 32 vCPUs.
> > 
> > To avoid deadlock within KVM a vCPU thread must set its *own*
> > vcpu_info
> > rather than it being set from the context in which the hypercall is
> > invoked.
> > 
> > Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for
> > migration,
> > and restore it in kvm_arch_put_registers() appropriately.
> > 
> > Signed-off-by: Joao Martins <
> > joao.m.martins@oracle.com
> > >
> > Signed-off-by: David Woodhouse <
> > dwmw@amazon.co.uk
> > >
> > ---
> >   target/i386/cpu.h        |  2 ++
> >   target/i386/kvm/kvm.c    | 19 +++++++++++
> >   target/i386/machine.c    | 21 ++++++++++++
> >   target/i386/trace-events |  1 +
> >   target/i386/xen.c        | 74
> > +++++++++++++++++++++++++++++++++++++---
> >   target/i386/xen.h        |  1 +
> >   6 files changed, 113 insertions(+), 5 deletions(-)
> > 
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index c6c57baed5..109b2e5669 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
> >   #endif
> >   #if defined(CONFIG_KVM)
> >       struct kvm_nested_state *nested_state;
> > +    uint64_t xen_vcpu_info_gpa;
> > +    uint64_t xen_vcpu_info_default_gpa;
> >   #endif
> >   #if defined(CONFIG_HVF)
> >       HVFX86LazyFlags hvf_lflags;
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index ebde6bc204..fa45e2f99a 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >           has_msr_hv_hypercall = true;
> >       }
> >   
> > +    env->xen_vcpu_info_gpa = UINT64_MAX;
> > +    env->xen_vcpu_info_default_gpa = UINT64_MAX;
> 
> 
> There was an INVALID_GPA definition for shared info. Looks like we
> could use it here too.

There was, and I started trying to use it, but it fell foul of the "is
this going to live in target/ or hw/ and who can include what from
where?" and I decided to just use UINT64_MAX for now and keep typing.

That will work out in the end, I'm sure.

> Some sanity checks wouldn't go a miss here...
> 
> rvi.offset should:
> a) be < TARGET_PAGE_SIZE, and
> b) ba aligned to vcpu_info_t size

Ack.
Durrant, Paul Dec. 14, 2022, 10:28 a.m. UTC | #3
On 13/12/2022 00:13, David Woodhouse wrote:
> On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote:
>> On 09/12/2022 09:56, David Woodhouse wrote:
>>> From: Joao Martins <
>>> joao.m.martins@oracle.com
>>>>
>>>
>>> Handle the hypercall to set a per vcpu info, and also wire up the
>>> default
>>> vcpu_info in the shared_info page for the first 32 vCPUs.
>>>
>>> To avoid deadlock within KVM a vCPU thread must set its *own*
>>> vcpu_info
>>> rather than it being set from the context in which the hypercall is
>>> invoked.
>>>
>>> Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for
>>> migration,
>>> and restore it in kvm_arch_put_registers() appropriately.
>>>
>>> Signed-off-by: Joao Martins <
>>> joao.m.martins@oracle.com
>>>>
>>> Signed-off-by: David Woodhouse <
>>> dwmw@amazon.co.uk
>>>>
>>> ---
>>>    target/i386/cpu.h        |  2 ++
>>>    target/i386/kvm/kvm.c    | 19 +++++++++++
>>>    target/i386/machine.c    | 21 ++++++++++++
>>>    target/i386/trace-events |  1 +
>>>    target/i386/xen.c        | 74
>>> +++++++++++++++++++++++++++++++++++++---
>>>    target/i386/xen.h        |  1 +
>>>    6 files changed, 113 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>>> index c6c57baed5..109b2e5669 100644
>>> --- a/target/i386/cpu.h
>>> +++ b/target/i386/cpu.h
>>> @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
>>>    #endif
>>>    #if defined(CONFIG_KVM)
>>>        struct kvm_nested_state *nested_state;
>>> +    uint64_t xen_vcpu_info_gpa;
>>> +    uint64_t xen_vcpu_info_default_gpa;
>>>    #endif
>>>    #if defined(CONFIG_HVF)
>>>        HVFX86LazyFlags hvf_lflags;
>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>> index ebde6bc204..fa45e2f99a 100644
>>> --- a/target/i386/kvm/kvm.c
>>> +++ b/target/i386/kvm/kvm.c
>>> @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>>            has_msr_hv_hypercall = true;
>>>        }
>>>    
>>> +    env->xen_vcpu_info_gpa = UINT64_MAX;
>>> +    env->xen_vcpu_info_default_gpa = UINT64_MAX;
>>
>>
>> There was an INVALID_GPA definition for shared info. Looks like we
>> could use it here too.
> 
> There was, and I started trying to use it, but it fell foul of the "is
> this going to live in target/ or hw/ and who can include what from
> where?" and I decided to just use UINT64_MAX for now and keep typing.
> 
> That will work out in the end, I'm sure.

Hopefully 
https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/ 
will help.

> 
>> Some sanity checks wouldn't go a miss here...
>>
>> rvi.offset should:
>> a) be < TARGET_PAGE_SIZE, and
>> b) ba aligned to vcpu_info_t size
> 
> Ack.
David Woodhouse Dec. 14, 2022, 11:04 a.m. UTC | #4
On Wed, 2022-12-14 at 10:28 +0000, Paul Durrant wrote:
> On 13/12/2022 00:13, David Woodhouse wrote:
> > On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote:
> > > On 09/12/2022 09:56, David Woodhouse wrote:
> > > > 
> > > > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > > >            has_msr_hv_hypercall = true;
> > > >        }
> > > >    
> > > > +    env->xen_vcpu_info_gpa = UINT64_MAX;
> > > > +    env->xen_vcpu_info_default_gpa = UINT64_MAX;
> > > 
> > > 
> > > There was an INVALID_GPA definition for shared info. Looks like we
> > > could use it here too.
> > 
> > There was, and I started trying to use it, but it fell foul of the "is
> > this going to live in target/ or hw/ and who can include what from
> > where?" and I decided to just use UINT64_MAX for now and keep typing.
> > 
> > That will work out in the end, I'm sure.
> 
> Hopefully 
> https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/
>  
> will help.

Those are kernel-internal; not in uapi headers. Although maybe they
*should* be uapi, at least for the KVM/Xen support because they are
actually part of the userspace ABI.

The kernel returns GFN_INVALID when queried about the shared info page
if it isn't set, or GPA_INVALID when queried about vcpu_info etc.

(Those are the same numerically but semantically subtly different, and
it hurts my brain that GFN_INVALID != GPA_INVALID >> PAGE_SHIFT.)

Userspace can also *set* those fields to Gxx_INVALID. Unlike the Xen
APIs which don't allow them to be turned off, we implement
SHUTDOWN_soft_reset in the userspace VMM so it needs to be able to turn
the shinfo areas off.
diff mbox series

Patch

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c6c57baed5..109b2e5669 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1788,6 +1788,8 @@  typedef struct CPUArchState {
 #endif
 #if defined(CONFIG_KVM)
     struct kvm_nested_state *nested_state;
+    uint64_t xen_vcpu_info_gpa;
+    uint64_t xen_vcpu_info_default_gpa;
 #endif
 #if defined(CONFIG_HVF)
     HVFX86LazyFlags hvf_lflags;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ebde6bc204..fa45e2f99a 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1811,6 +1811,9 @@  int kvm_arch_init_vcpu(CPUState *cs)
         has_msr_hv_hypercall = true;
     }
 
+    env->xen_vcpu_info_gpa = UINT64_MAX;
+    env->xen_vcpu_info_default_gpa = UINT64_MAX;
+
     xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine()));
     if (xen_version) {
 #ifdef CONFIG_XEN_EMU
@@ -4728,6 +4731,22 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
         kvm_arch_set_tsc_khz(cpu);
     }
 
+#ifdef CONFIG_XEN_EMU
+    if (level == KVM_PUT_FULL_STATE) {
+        uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa;
+        if (gpa == UINT64_MAX) {
+            gpa = x86_cpu->env.xen_vcpu_info_default_gpa;
+        }
+
+        if (gpa != UINT64_MAX) {
+            ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+    }
+#endif
+
     ret = kvm_getput_regs(x86_cpu, 1);
     if (ret < 0) {
         return ret;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 310b125235..104cd6047c 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1257,6 +1257,26 @@  static const VMStateDescription vmstate_nested_state = {
     }
 };
 
+static bool xen_vcpu_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return (env->xen_vcpu_info_gpa != UINT64_MAX ||
+            env->xen_vcpu_info_default_gpa != UINT64_MAX);
+}
+
+static const VMStateDescription vmstate_xen_vcpu = {
+    .name = "cpu/xen_vcpu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = xen_vcpu_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.xen_vcpu_info_gpa, X86CPU),
+        VMSTATE_UINT64(env.xen_vcpu_info_default_gpa, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
 #endif
 
 static bool mcg_ext_ctl_needed(void *opaque)
@@ -1716,6 +1736,7 @@  const VMStateDescription vmstate_x86_cpu = {
 #endif
 #ifdef CONFIG_KVM
         &vmstate_nested_state,
+        &vmstate_xen_vcpu,
 #endif
         &vmstate_msr_tsx_ctrl,
         &vmstate_msr_intel_sgx,
diff --git a/target/i386/trace-events b/target/i386/trace-events
index fb999d0052..7118640697 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -15,3 +15,4 @@  kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data
 # target/i386/xen.c
 kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
 kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64
+kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIx64
diff --git a/target/i386/xen.c b/target/i386/xen.c
index 9d1daadee1..cd816bb711 100644
--- a/target/i386/xen.c
+++ b/target/i386/xen.c
@@ -129,10 +129,47 @@  static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
     return true;
 }
 
+int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
+{
+    struct kvm_xen_vcpu_attr xhsi;
+
+    xhsi.type = type;
+    xhsi.u.gpa = gpa;
+
+    trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
+
+    return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
+}
+
+static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    env->xen_vcpu_info_default_gpa = data.host_ulong;
+
+    /* Changing the default does nothing if a vcpu_info was explicitly set. */
+    if (env->xen_vcpu_info_gpa == UINT64_MAX) {
+            kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+                                  env->xen_vcpu_info_default_gpa);
+    }
+}
+
+static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
+{
+    X86CPU *cpu = X86_CPU(cs);
+    CPUX86State *env = &cpu->env;
+
+    env->xen_vcpu_info_gpa = data.host_ulong;
+
+    kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+                          env->xen_vcpu_info_gpa);
+}
+
 static int xen_set_shared_info(CPUState *cs, uint64_t gfn)
 {
     uint64_t gpa = gfn << TARGET_PAGE_BITS;
-    int err;
+    int i, err;
 
     /* The xen_overlay device tells KVM about it too, since it had to
      * do that on migration load anyway (unless we're going to jump
@@ -144,6 +181,14 @@  static int xen_set_shared_info(CPUState *cs, uint64_t gfn)
 
     trace_kvm_xen_set_shared_info(gfn);
 
+    for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
+        CPUState *cpu = qemu_get_cpu(i);
+        if (cpu) {
+                async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
+        }
+        gpa += sizeof(vcpu_info_t);
+    }
+
     return err;
 }
 
@@ -195,19 +240,38 @@  static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit,
     }
 }
 
+static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
+                                     uint64_t arg)
+{
+    struct vcpu_register_vcpu_info rvi;
+    uint64_t gpa;
+
+    if (!target)
+            return -ENOENT;
+
+    if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
+        return -EFAULT;
+    }
+
+    gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
+    async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
+    return 0;
+}
+
 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
                                   int cmd, int vcpu_id, uint64_t arg)
 {
+    CPUState *dest = qemu_get_cpu(vcpu_id);
+    CPUState *cs = CPU(cpu);
     int err;
 
     switch (cmd) {
     case VCPUOP_register_vcpu_info:
-        /* no vcpu info placement for now */
-        err = -ENOSYS;
-        break;
+            err = vcpuop_register_vcpu_info(cs, dest, arg);
+            break;
 
     default:
-        return false;
+            return false;
     }
 
     exit->u.hcall.result = err;
diff --git a/target/i386/xen.h b/target/i386/xen.h
index 9134d78685..53573e07f8 100644
--- a/target/i386/xen.h
+++ b/target/i386/xen.h
@@ -24,5 +24,6 @@ 
 
 int kvm_xen_init(KVMState *s, uint32_t xen_version);
 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
+int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa);
 
 #endif /* QEMU_I386_XEN_H */