Message ID | 20221209095612.689243-17-dwmw2@infradead.org |
---|---|
State | New |
Headers | show |
Series | Xen HVM support under KVM | expand |
On 09/12/2022 09:56, David Woodhouse wrote: > From: Joao Martins <joao.m.martins@oracle.com> > > Handle the hypercall to set a per vcpu info, and also wire up the default > vcpu_info in the shared_info page for the first 32 vCPUs. > > To avoid deadlock within KVM a vCPU thread must set its *own* vcpu_info > rather than it being set from the context in which the hypercall is > invoked. > > Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for migration, > and restore it in kvm_arch_put_registers() appropriately. > > Signed-off-by: Joao Martins <joao.m.martins@oracle.com> > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > --- > target/i386/cpu.h | 2 ++ > target/i386/kvm/kvm.c | 19 +++++++++++ > target/i386/machine.c | 21 ++++++++++++ > target/i386/trace-events | 1 + > target/i386/xen.c | 74 +++++++++++++++++++++++++++++++++++++--- > target/i386/xen.h | 1 + > 6 files changed, 113 insertions(+), 5 deletions(-) > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > index c6c57baed5..109b2e5669 100644 > --- a/target/i386/cpu.h > +++ b/target/i386/cpu.h > @@ -1788,6 +1788,8 @@ typedef struct CPUArchState { > #endif > #if defined(CONFIG_KVM) > struct kvm_nested_state *nested_state; > + uint64_t xen_vcpu_info_gpa; > + uint64_t xen_vcpu_info_default_gpa; > #endif > #if defined(CONFIG_HVF) > HVFX86LazyFlags hvf_lflags; > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index ebde6bc204..fa45e2f99a 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) > has_msr_hv_hypercall = true; > } > > + env->xen_vcpu_info_gpa = UINT64_MAX; > + env->xen_vcpu_info_default_gpa = UINT64_MAX; There was an INVALID_GPA definition for shared info. Looks like we could use it here too. > + > xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine())); > if (xen_version) { > #ifdef CONFIG_XEN_EMU > @@ -4728,6 +4731,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level) > kvm_arch_set_tsc_khz(cpu); > } > > +#ifdef CONFIG_XEN_EMU > + if (level == KVM_PUT_FULL_STATE) { > + uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa; > + if (gpa == UINT64_MAX) { > + gpa = x86_cpu->env.xen_vcpu_info_default_gpa; > + } > + > + if (gpa != UINT64_MAX) { > + ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); > + if (ret < 0) { > + return ret; > + } > + } > + } > +#endif > + > ret = kvm_getput_regs(x86_cpu, 1); > if (ret < 0) { > return ret; [snip] > @@ -195,19 +240,38 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, > } > } > > +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, > + uint64_t arg) > +{ > + struct vcpu_register_vcpu_info rvi; > + uint64_t gpa; > + > + if (!target) > + return -ENOENT; > + > + if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) { > + return -EFAULT; > + } > + > + gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset); Some sanity checks wouldn't go a miss here... rvi.offset should: a) be < TARGET_PAGE_SIZE, and b) ba aligned to vcpu_info_t size Paul > + async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa)); > + return 0; > +} > +
On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote: > On 09/12/2022 09:56, David Woodhouse wrote: > > From: Joao Martins < > > joao.m.martins@oracle.com > > > > > > > Handle the hypercall to set a per vcpu info, and also wire up the > > default > > vcpu_info in the shared_info page for the first 32 vCPUs. > > > > To avoid deadlock within KVM a vCPU thread must set its *own* > > vcpu_info > > rather than it being set from the context in which the hypercall is > > invoked. > > > > Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for > > migration, > > and restore it in kvm_arch_put_registers() appropriately. > > > > Signed-off-by: Joao Martins < > > joao.m.martins@oracle.com > > > > > Signed-off-by: David Woodhouse < > > dwmw@amazon.co.uk > > > > > --- > > target/i386/cpu.h | 2 ++ > > target/i386/kvm/kvm.c | 19 +++++++++++ > > target/i386/machine.c | 21 ++++++++++++ > > target/i386/trace-events | 1 + > > target/i386/xen.c | 74 > > +++++++++++++++++++++++++++++++++++++--- > > target/i386/xen.h | 1 + > > 6 files changed, 113 insertions(+), 5 deletions(-) > > > > diff --git a/target/i386/cpu.h b/target/i386/cpu.h > > index c6c57baed5..109b2e5669 100644 > > --- a/target/i386/cpu.h > > +++ b/target/i386/cpu.h > > @@ -1788,6 +1788,8 @@ typedef struct CPUArchState { > > #endif > > #if defined(CONFIG_KVM) > > struct kvm_nested_state *nested_state; > > + uint64_t xen_vcpu_info_gpa; > > + uint64_t xen_vcpu_info_default_gpa; > > #endif > > #if defined(CONFIG_HVF) > > HVFX86LazyFlags hvf_lflags; > > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > > index ebde6bc204..fa45e2f99a 100644 > > --- a/target/i386/kvm/kvm.c > > +++ b/target/i386/kvm/kvm.c > > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) > > has_msr_hv_hypercall = true; > > } > > > > + env->xen_vcpu_info_gpa = UINT64_MAX; > > + env->xen_vcpu_info_default_gpa = UINT64_MAX; > > > There was an INVALID_GPA definition for shared info. Looks like we > could use it here too. There was, and I started trying to use it, but it fell foul of the "is this going to live in target/ or hw/ and who can include what from where?" and I decided to just use UINT64_MAX for now and keep typing. That will work out in the end, I'm sure. > Some sanity checks wouldn't go a miss here... > > rvi.offset should: > a) be < TARGET_PAGE_SIZE, and > b) ba aligned to vcpu_info_t size Ack.
On 13/12/2022 00:13, David Woodhouse wrote: > On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote: >> On 09/12/2022 09:56, David Woodhouse wrote: >>> From: Joao Martins < >>> joao.m.martins@oracle.com >>>> >>> >>> Handle the hypercall to set a per vcpu info, and also wire up the >>> default >>> vcpu_info in the shared_info page for the first 32 vCPUs. >>> >>> To avoid deadlock within KVM a vCPU thread must set its *own* >>> vcpu_info >>> rather than it being set from the context in which the hypercall is >>> invoked. >>> >>> Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for >>> migration, >>> and restore it in kvm_arch_put_registers() appropriately. >>> >>> Signed-off-by: Joao Martins < >>> joao.m.martins@oracle.com >>>> >>> Signed-off-by: David Woodhouse < >>> dwmw@amazon.co.uk >>>> >>> --- >>> target/i386/cpu.h | 2 ++ >>> target/i386/kvm/kvm.c | 19 +++++++++++ >>> target/i386/machine.c | 21 ++++++++++++ >>> target/i386/trace-events | 1 + >>> target/i386/xen.c | 74 >>> +++++++++++++++++++++++++++++++++++++--- >>> target/i386/xen.h | 1 + >>> 6 files changed, 113 insertions(+), 5 deletions(-) >>> >>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h >>> index c6c57baed5..109b2e5669 100644 >>> --- a/target/i386/cpu.h >>> +++ b/target/i386/cpu.h >>> @@ -1788,6 +1788,8 @@ typedef struct CPUArchState { >>> #endif >>> #if defined(CONFIG_KVM) >>> struct kvm_nested_state *nested_state; >>> + uint64_t xen_vcpu_info_gpa; >>> + uint64_t xen_vcpu_info_default_gpa; >>> #endif >>> #if defined(CONFIG_HVF) >>> HVFX86LazyFlags hvf_lflags; >>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c >>> index ebde6bc204..fa45e2f99a 100644 >>> --- a/target/i386/kvm/kvm.c >>> +++ b/target/i386/kvm/kvm.c >>> @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) >>> has_msr_hv_hypercall = true; >>> } >>> >>> + env->xen_vcpu_info_gpa = UINT64_MAX; >>> + env->xen_vcpu_info_default_gpa = UINT64_MAX; >> >> >> There was an INVALID_GPA definition for shared info. Looks like we >> could use it here too. > > There was, and I started trying to use it, but it fell foul of the "is > this going to live in target/ or hw/ and who can include what from > where?" and I decided to just use UINT64_MAX for now and keep typing. > > That will work out in the end, I'm sure. Hopefully https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/ will help. > >> Some sanity checks wouldn't go a miss here... >> >> rvi.offset should: >> a) be < TARGET_PAGE_SIZE, and >> b) ba aligned to vcpu_info_t size > > Ack.
On Wed, 2022-12-14 at 10:28 +0000, Paul Durrant wrote: > On 13/12/2022 00:13, David Woodhouse wrote: > > On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote: > > > On 09/12/2022 09:56, David Woodhouse wrote: > > > > > > > > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) > > > > has_msr_hv_hypercall = true; > > > > } > > > > > > > > + env->xen_vcpu_info_gpa = UINT64_MAX; > > > > + env->xen_vcpu_info_default_gpa = UINT64_MAX; > > > > > > > > > There was an INVALID_GPA definition for shared info. Looks like we > > > could use it here too. > > > > There was, and I started trying to use it, but it fell foul of the "is > > this going to live in target/ or hw/ and who can include what from > > where?" and I decided to just use UINT64_MAX for now and keep typing. > > > > That will work out in the end, I'm sure. > > Hopefully > https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/ > > will help. Those are kernel-internal; not in uapi headers. Although maybe they *should* be uapi, at least for the KVM/Xen support because they are actually part of the userspace ABI. The kernel returns GFN_INVALID when queried about the shared info page if it isn't set, or GPA_INVALID when queried about vcpu_info etc. (Those are the same numerically but semantically subtly different, and it hurts my brain that GFN_INVALID != GPA_INVALID >> PAGE_SHIFT.) Userspace can also *set* those fields to Gxx_INVALID. Unlike the Xen APIs which don't allow them to be turned off, we implement SHUTDOWN_soft_reset in the userspace VMM so it needs to be able to turn the shinfo areas off.
diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c6c57baed5..109b2e5669 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1788,6 +1788,8 @@ typedef struct CPUArchState { #endif #if defined(CONFIG_KVM) struct kvm_nested_state *nested_state; + uint64_t xen_vcpu_info_gpa; + uint64_t xen_vcpu_info_default_gpa; #endif #if defined(CONFIG_HVF) HVFX86LazyFlags hvf_lflags; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index ebde6bc204..fa45e2f99a 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_hv_hypercall = true; } + env->xen_vcpu_info_gpa = UINT64_MAX; + env->xen_vcpu_info_default_gpa = UINT64_MAX; + xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine())); if (xen_version) { #ifdef CONFIG_XEN_EMU @@ -4728,6 +4731,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level) kvm_arch_set_tsc_khz(cpu); } +#ifdef CONFIG_XEN_EMU + if (level == KVM_PUT_FULL_STATE) { + uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa; + if (gpa == UINT64_MAX) { + gpa = x86_cpu->env.xen_vcpu_info_default_gpa; + } + + if (gpa != UINT64_MAX) { + ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); + if (ret < 0) { + return ret; + } + } + } +#endif + ret = kvm_getput_regs(x86_cpu, 1); if (ret < 0) { return ret; diff --git a/target/i386/machine.c b/target/i386/machine.c index 310b125235..104cd6047c 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1257,6 +1257,26 @@ static const VMStateDescription vmstate_nested_state = { } }; +static bool xen_vcpu_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return (env->xen_vcpu_info_gpa != UINT64_MAX || + env->xen_vcpu_info_default_gpa != UINT64_MAX); +} + +static const VMStateDescription vmstate_xen_vcpu = { + .name = "cpu/xen_vcpu", + .version_id = 1, + .minimum_version_id = 1, + .needed = xen_vcpu_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.xen_vcpu_info_gpa, X86CPU), + VMSTATE_UINT64(env.xen_vcpu_info_default_gpa, X86CPU), + VMSTATE_END_OF_LIST() + } +}; #endif static bool mcg_ext_ctl_needed(void *opaque) @@ -1716,6 +1736,7 @@ const VMStateDescription vmstate_x86_cpu = { #endif #ifdef CONFIG_KVM &vmstate_nested_state, + &vmstate_xen_vcpu, #endif &vmstate_msr_tsx_ctrl, &vmstate_msr_intel_sgx, diff --git a/target/i386/trace-events b/target/i386/trace-events index fb999d0052..7118640697 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -15,3 +15,4 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data # target/i386/xen.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64 +kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIx64 diff --git a/target/i386/xen.c b/target/i386/xen.c index 9d1daadee1..cd816bb711 100644 --- a/target/i386/xen.c +++ b/target/i386/xen.c @@ -129,10 +129,47 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, return true; } +int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa) +{ + struct kvm_xen_vcpu_attr xhsi; + + xhsi.type = type; + xhsi.u.gpa = gpa; + + trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa); + + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi); +} + +static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_info_default_gpa = data.host_ulong; + + /* Changing the default does nothing if a vcpu_info was explicitly set. */ + if (env->xen_vcpu_info_gpa == UINT64_MAX) { + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, + env->xen_vcpu_info_default_gpa); + } +} + +static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_info_gpa = data.host_ulong; + + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, + env->xen_vcpu_info_gpa); +} + static int xen_set_shared_info(CPUState *cs, uint64_t gfn) { uint64_t gpa = gfn << TARGET_PAGE_BITS; - int err; + int i, err; /* The xen_overlay device tells KVM about it too, since it had to * do that on migration load anyway (unless we're going to jump @@ -144,6 +181,14 @@ static int xen_set_shared_info(CPUState *cs, uint64_t gfn) trace_kvm_xen_set_shared_info(gfn); + for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) { + CPUState *cpu = qemu_get_cpu(i); + if (cpu) { + async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa, RUN_ON_CPU_HOST_ULONG(gpa)); + } + gpa += sizeof(vcpu_info_t); + } + return err; } @@ -195,19 +240,38 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, } } +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ + struct vcpu_register_vcpu_info rvi; + uint64_t gpa; + + if (!target) + return -ENOENT; + + if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) { + return -EFAULT; + } + + gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset); + async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa)); + return 0; +} + static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, int cmd, int vcpu_id, uint64_t arg) { + CPUState *dest = qemu_get_cpu(vcpu_id); + CPUState *cs = CPU(cpu); int err; switch (cmd) { case VCPUOP_register_vcpu_info: - /* no vcpu info placement for now */ - err = -ENOSYS; - break; + err = vcpuop_register_vcpu_info(cs, dest, arg); + break; default: - return false; + return false; } exit->u.hcall.result = err; diff --git a/target/i386/xen.h b/target/i386/xen.h index 9134d78685..53573e07f8 100644 --- a/target/i386/xen.h +++ b/target/i386/xen.h @@ -24,5 +24,6 @@ int kvm_xen_init(KVMState *s, uint32_t xen_version); int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit); +int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa); #endif /* QEMU_I386_XEN_H */