Message ID | 1537524123-9578-23-git-send-email-paulus@ozlabs.org |
---|---|
State | Superseded |
Headers | show |
Series | KVM: PPC: Book3S HV: Nested HV virtualization | expand |
On Fri, Sep 21, 2018 at 08:01:53PM +1000, Paul Mackerras wrote: > When we are running as a nested hypervisor, we use a hypercall to > enter the guest rather than code in book3s_hv_rmhandlers.S. This means > that the hypercall handlers listed in hcall_real_table never get called. > There are some hypercalls that are handled there and not in > kvmppc_pseries_do_hcall(), which therefore won't get processed for > a nested guest. > > To fix this, we add cases to kvmppc_pseries_do_hcall() to handle those > hypercalls, with the following exceptions: > > - The HPT hypercalls (H_ENTER, H_REMOVE, etc.) are not handled because > we only support radix mode for nested guests. > > - H_CEDE has to be handled specially because the cede logic in > kvmhv_run_single_vcpu assumes that it has been processed by the time > that kvmhv_p9_guest_entry() returns. Therefore we put a special > case for H_CEDE in kvmhv_p9_guest_entry(). > > For the XICS hypercalls, if real-mode processing is enabled, then the > virtual-mode handlers assume that they are being called only to finish > up the operation. Therefore we turn off the real-mode flag in the XICS > code when running as a nested hypervisor. > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> > --- > arch/powerpc/include/asm/asm-prototypes.h | 4 +++ > arch/powerpc/kvm/book3s_hv.c | 43 +++++++++++++++++++++++++++++++ > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++ > arch/powerpc/kvm/book3s_xics.c | 3 ++- > 4 files changed, 51 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h > index 5c9b00c..c55ba3b 100644 > --- a/arch/powerpc/include/asm/asm-prototypes.h > +++ b/arch/powerpc/include/asm/asm-prototypes.h > @@ -167,4 +167,8 @@ void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); > > int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu); > > +long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); > +long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr, > + unsigned long dabrx); > + > #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c > index 9052a34..61de6ac 100644 > --- a/arch/powerpc/kvm/book3s_hv.c > +++ b/arch/powerpc/kvm/book3s_hv.c > @@ -50,6 +50,7 @@ > #include <asm/reg.h> > #include <asm/ppc-opcode.h> > #include <asm/asm-prototypes.h> > +#include <asm/archrandom.h> > #include <asm/debug.h> > #include <asm/disassemble.h> > #include <asm/cputable.h> > @@ -915,6 +916,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > break; > } > return RESUME_HOST; > + case H_SET_DABR: > + ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4)); > + break; > + case H_SET_XDABR: > + ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), > + kvmppc_get_gpr(vcpu, 5)); > + break; > + case H_GET_TCE: > + ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), > + kvmppc_get_gpr(vcpu, 5)); > + if (ret == H_TOO_HARD) > + return RESUME_HOST; > + break; > case H_PUT_TCE: > ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), > kvmppc_get_gpr(vcpu, 5), > @@ -938,6 +952,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > if (ret == H_TOO_HARD) > return RESUME_HOST; > break; > + case H_RANDOM: > + if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) > + ret = H_HARDWARE; > + break; > > case H_SET_PARTITION_TABLE: > ret = H_FUNCTION; > @@ -963,6 +981,24 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) > return RESUME_GUEST; > } > > +/* > + * Handle H_CEDE in the nested virtualization case where we haven't > + * called the real-mode hcall handlers in book3s_hv_rmhandlers.S. > + * This has to be done early, not in kvmppc_pseries_do_hcall(), so > + * that the cede logic in kvmppc_run_single_vcpu() works properly. > + */ > +static void kvmppc_nested_cede(struct kvm_vcpu *vcpu) > +{ > + vcpu->arch.shregs.msr |= MSR_EE; > + vcpu->arch.ceded = 1; > + smp_mb(); > + if (vcpu->arch.prodded) { > + vcpu->arch.prodded = 0; > + smp_mb(); > + vcpu->arch.ceded = 0; > + } > +} > + > static int kvmppc_hcall_impl_hv(unsigned long cmd) > { > switch (cmd) { > @@ -3420,6 +3456,13 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit) > vcpu->arch.shregs.msr = vcpu->arch.regs.msr; > vcpu->arch.shregs.dar = mfspr(SPRN_DAR); > vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); > + > + /* H_CEDE has to be handled now, not later */ > + if (trap == BOOK3S_INTERRUPT_SYSCALL && > + kvmppc_get_gpr(vcpu, 3) == H_CEDE) { > + kvmppc_nested_cede(vcpu); > + trap = 0; > + } > } else { > trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit); > } > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 04fcaa4..a4780be 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -2462,6 +2462,7 @@ hcall_real_table: > hcall_real_table_end: > > _GLOBAL(kvmppc_h_set_xdabr) > +EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) > andi. r0, r5, DABRX_USER | DABRX_KERNEL > beq 6f > li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI > @@ -2471,6 +2472,7 @@ _GLOBAL(kvmppc_h_set_xdabr) > blr > > _GLOBAL(kvmppc_h_set_dabr) > +EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) > li r5, DABRX_USER | DABRX_KERNEL > 3: > BEGIN_FTR_SECTION > diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c > index d9ba1b0..b0b2bfc 100644 > --- a/arch/powerpc/kvm/book3s_xics.c > +++ b/arch/powerpc/kvm/book3s_xics.c > @@ -1390,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) > } > > #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > - if (cpu_has_feature(CPU_FTR_ARCH_206)) { > + if (cpu_has_feature(CPU_FTR_ARCH_206) && > + cpu_has_feature(CPU_FTR_HVMODE)) { > /* Enable real mode support */ > xics->real_mode = ENABLE_REALMODE; > xics->real_mode_dbg = DEBUG_REALMODE;
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 5c9b00c..c55ba3b 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -167,4 +167,8 @@ void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu); int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu); +long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr); +long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr, + unsigned long dabrx); + #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9052a34..61de6ac 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -50,6 +50,7 @@ #include <asm/reg.h> #include <asm/ppc-opcode.h> #include <asm/asm-prototypes.h> +#include <asm/archrandom.h> #include <asm/debug.h> #include <asm/disassemble.h> #include <asm/cputable.h> @@ -915,6 +916,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) break; } return RESUME_HOST; + case H_SET_DABR: + ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4)); + break; + case H_SET_XDABR: + ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + break; + case H_GET_TCE: + ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4), + kvmppc_get_gpr(vcpu, 5)); + if (ret == H_TOO_HARD) + return RESUME_HOST; + break; case H_PUT_TCE: ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4), kvmppc_get_gpr(vcpu, 5), @@ -938,6 +952,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (ret == H_TOO_HARD) return RESUME_HOST; break; + case H_RANDOM: + if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4])) + ret = H_HARDWARE; + break; case H_SET_PARTITION_TABLE: ret = H_FUNCTION; @@ -963,6 +981,24 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * Handle H_CEDE in the nested virtualization case where we haven't + * called the real-mode hcall handlers in book3s_hv_rmhandlers.S. + * This has to be done early, not in kvmppc_pseries_do_hcall(), so + * that the cede logic in kvmppc_run_single_vcpu() works properly. + */ +static void kvmppc_nested_cede(struct kvm_vcpu *vcpu) +{ + vcpu->arch.shregs.msr |= MSR_EE; + vcpu->arch.ceded = 1; + smp_mb(); + if (vcpu->arch.prodded) { + vcpu->arch.prodded = 0; + smp_mb(); + vcpu->arch.ceded = 0; + } +} + static int kvmppc_hcall_impl_hv(unsigned long cmd) { switch (cmd) { @@ -3420,6 +3456,13 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit) vcpu->arch.shregs.msr = vcpu->arch.regs.msr; vcpu->arch.shregs.dar = mfspr(SPRN_DAR); vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR); + + /* H_CEDE has to be handled now, not later */ + if (trap == BOOK3S_INTERRUPT_SYSCALL && + kvmppc_get_gpr(vcpu, 3) == H_CEDE) { + kvmppc_nested_cede(vcpu); + trap = 0; + } } else { trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit); } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 04fcaa4..a4780be 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2462,6 +2462,7 @@ hcall_real_table: hcall_real_table_end: _GLOBAL(kvmppc_h_set_xdabr) +EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI @@ -2471,6 +2472,7 @@ _GLOBAL(kvmppc_h_set_xdabr) blr _GLOBAL(kvmppc_h_set_dabr) +EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: BEGIN_FTR_SECTION diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index d9ba1b0..b0b2bfc 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1390,7 +1390,8 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type) } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (cpu_has_feature(CPU_FTR_ARCH_206)) { + if (cpu_has_feature(CPU_FTR_ARCH_206) && + cpu_has_feature(CPU_FTR_HVMODE)) { /* Enable real mode support */ xics->real_mode = ENABLE_REALMODE; xics->real_mode_dbg = DEBUG_REALMODE;
When we are running as a nested hypervisor, we use a hypercall to enter the guest rather than code in book3s_hv_rmhandlers.S. This means that the hypercall handlers listed in hcall_real_table never get called. There are some hypercalls that are handled there and not in kvmppc_pseries_do_hcall(), which therefore won't get processed for a nested guest. To fix this, we add cases to kvmppc_pseries_do_hcall() to handle those hypercalls, with the following exceptions: - The HPT hypercalls (H_ENTER, H_REMOVE, etc.) are not handled because we only support radix mode for nested guests. - H_CEDE has to be handled specially because the cede logic in kvmhv_run_single_vcpu assumes that it has been processed by the time that kvmhv_p9_guest_entry() returns. Therefore we put a special case for H_CEDE in kvmhv_p9_guest_entry(). For the XICS hypercalls, if real-mode processing is enabled, then the virtual-mode handlers assume that they are being called only to finish up the operation. Therefore we turn off the real-mode flag in the XICS code when running as a nested hypervisor. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> --- arch/powerpc/include/asm/asm-prototypes.h | 4 +++ arch/powerpc/kvm/book3s_hv.c | 43 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 ++ arch/powerpc/kvm/book3s_xics.c | 3 ++- 4 files changed, 51 insertions(+), 1 deletion(-)