diff mbox

[2/2] KVM: PPC: Book3S HV: Better handling of exceptions that happen in real mode

Message ID 1380887104-805-3-git-send-email-paulus@samba.org
State New, archived
Headers show

Commit Message

Paul Mackerras Oct. 4, 2013, 11:45 a.m. UTC
When an interrupt or exception happens in the guest that comes to the
host, the CPU goes to hypervisor real mode (MMU off) to handle the
exception but doesn't change the MMU context.  After saving a few
registers, we then clear the "in guest" flag.  If, for any reason,
we get an exception in the real-mode code, that then gets handled
by the normal kernel exception handlers, which turn the MMU on.  This
is disastrous if the MMU is still set to the guest context, since we
end up executing instructions from random places in the guest kernel
with hypervisor privilege.

In order to catch this situation, we define a new value for the "in guest"
flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
mode with guest MMU context.  If the "in guest" flag is set to this value,
we branch off to an emergency handler.  For the moment, this just does
a branch to self to stop the CPU from doing anything further.

While we're here, we define another new flag value to indicate that we
are in a HV guest, as distinct from a PR guest.  This will be useful
when we have a kernel that can support both PR and HV guests concurrently.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_asm.h      |  2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 31 +++++++++++++++++++++++++------
 2 files changed, 27 insertions(+), 6 deletions(-)

Comments

Alexander Graf Oct. 4, 2013, 11:59 a.m. UTC | #1
On 04.10.2013, at 13:45, Paul Mackerras wrote:

> When an interrupt or exception happens in the guest that comes to the
> host, the CPU goes to hypervisor real mode (MMU off) to handle the
> exception but doesn't change the MMU context.  After saving a few
> registers, we then clear the "in guest" flag.  If, for any reason,
> we get an exception in the real-mode code, that then gets handled
> by the normal kernel exception handlers, which turn the MMU on.  This
> is disastrous if the MMU is still set to the guest context, since we
> end up executing instructions from random places in the guest kernel
> with hypervisor privilege.
> 
> In order to catch this situation, we define a new value for the "in guest"
> flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
> mode with guest MMU context.  If the "in guest" flag is set to this value,
> we branch off to an emergency handler.  For the moment, this just does
> a branch to self to stop the CPU from doing anything further.

I don't understand how you get there. The only case I can imagine where you'd hit a normal Linux handler while in guest MMU context is a bug in the complex real mode handling code.

So basically what you're doing is you're changing the "guest mode" bit to HOST_NV while you're executing these.

The other change this patch does is it postpones the return to GUEST_MODE_NONE to after fast-path handling of interrupt exits.

What if you simply don't introduce a new mode but instead only postpone the GUEST_MODE_NONE switch to later? Worst case that can happen is that your bug spins the CPU into handling that exit in a tight loop - not much different from your explicit spin, no?


Alex

> 
> While we're here, we define another new flag value to indicate that we
> are in a HV guest, as distinct from a PR guest.  This will be useful
> when we have a kernel that can support both PR and HV guests concurrently.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/include/asm/kvm_asm.h      |  2 ++
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 31 +++++++++++++++++++++++++------
> 2 files changed, 27 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
> index e2d4d46..1bd92fd 100644
> --- a/arch/powerpc/include/asm/kvm_asm.h
> +++ b/arch/powerpc/include/asm/kvm_asm.h
> @@ -138,6 +138,8 @@
> #define KVM_GUEST_MODE_NONE	0
> #define KVM_GUEST_MODE_GUEST	1
> #define KVM_GUEST_MODE_SKIP	2
> +#define KVM_GUEST_MODE_GUEST_HV	3
> +#define KVM_GUEST_MODE_HOST_HV	4
> 
> #define KVM_INST_FETCH_FAILED	-1
> 
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 8e0f28f..f1f1bf3 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -383,6 +383,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
> 	mtspr	SPRN_DAR, r5
> 	mtspr	SPRN_DSISR, r6
> 
> +	li	r6, KVM_GUEST_MODE_HOST_HV
> +	stb	r6, HSTATE_IN_GUEST(r13)
> +
> BEGIN_FTR_SECTION
> 	/* Restore AMR and UAMOR, set AMOR to all 1s */
> 	ld	r5,VCPU_AMR(r4)
> @@ -682,7 +685,7 @@ fast_guest_return:
> 	mtspr	SPRN_HSRR1,r11
> 
> 	/* Activate guest mode, so faults get handled by KVM */
> -	li	r9, KVM_GUEST_MODE_GUEST
> +	li	r9, KVM_GUEST_MODE_GUEST_HV
> 	stb	r9, HSTATE_IN_GUEST(r13)
> 
> 	/* Enter guest */
> @@ -742,6 +745,14 @@ kvmppc_interrupt:
> 	 */
> 	/* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
> 	std	r9, HSTATE_HOST_R2(r13)
> +
> +	lbz	r9, HSTATE_IN_GUEST(r13)
> +	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
> +	beq	kvmppc_bad_host_intr
> +	/* We're now back in the host but in guest MMU context */
> +	li	r9, KVM_GUEST_MODE_HOST_HV
> +	stb	r9, HSTATE_IN_GUEST(r13)
> +
> 	ld	r9, HSTATE_KVM_VCPU(r13)
> 
> 	/* Save registers */
> @@ -793,10 +804,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> 	std	r3, VCPU_GPR(R13)(r9)
> 	std	r4, VCPU_LR(r9)
> 
> -	/* Unset guest mode */
> -	li	r0, KVM_GUEST_MODE_NONE
> -	stb	r0, HSTATE_IN_GUEST(r13)
> -
> 	stw	r12,VCPU_TRAP(r9)
> 
> 	/* Save HEIR (HV emulation assist reg) in last_inst
> @@ -1186,6 +1193,10 @@ BEGIN_FTR_SECTION
> 	mtspr	SPRN_AMR,r6
> END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
> 
> +	/* Unset guest mode */
> +	li	r0, KVM_GUEST_MODE_NONE
> +	stb	r0, HSTATE_IN_GUEST(r13)
> +
> 	/* Switch DSCR back to host value */
> BEGIN_FTR_SECTION
> 	mfspr	r8, SPRN_DSCR
> @@ -1388,7 +1399,7 @@ fast_interrupt_c_return:
> 	stw	r8, VCPU_LAST_INST(r9)
> 
> 	/* Unset guest mode. */
> -	li	r0, KVM_GUEST_MODE_NONE
> +	li	r0, KVM_GUEST_MODE_HOST_HV
> 	stb	r0, HSTATE_IN_GUEST(r13)
> 	b	guest_exit_cont
> 
> @@ -1937,3 +1948,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
> 	lwz	r7,VCPU_VRSAVE(r4)
> 	mtspr	SPRN_VRSAVE,r7
> 	blr
> +
> +/*
> + * We come here if we get any exception or interrupt while we are
> + * executing host real mode code while in guest MMU context.
> + * For now just spin, but we should do something better.
> + */
> +kvmppc_bad_host_intr:
> +	b	.
> -- 
> 1.8.4.rc3
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras Oct. 4, 2013, 12:33 p.m. UTC | #2
On Fri, Oct 04, 2013 at 01:59:25PM +0200, Alexander Graf wrote:
> 
> On 04.10.2013, at 13:45, Paul Mackerras wrote:
> 
> > When an interrupt or exception happens in the guest that comes to the
> > host, the CPU goes to hypervisor real mode (MMU off) to handle the
> > exception but doesn't change the MMU context.  After saving a few
> > registers, we then clear the "in guest" flag.  If, for any reason,
> > we get an exception in the real-mode code, that then gets handled
> > by the normal kernel exception handlers, which turn the MMU on.  This
> > is disastrous if the MMU is still set to the guest context, since we
> > end up executing instructions from random places in the guest kernel
> > with hypervisor privilege.
> > 
> > In order to catch this situation, we define a new value for the "in guest"
> > flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
> > mode with guest MMU context.  If the "in guest" flag is set to this value,
> > we branch off to an emergency handler.  For the moment, this just does
> > a branch to self to stop the CPU from doing anything further.
> 
> I don't understand how you get there. The only case I can imagine where you'd hit a normal Linux handler while in guest MMU context is a bug in the complex real mode handling code.

A bug is the usual case.  I think it is also possible (though very
unlikely) to get a machine check interrupt, since they can come at any
time.

> So basically what you're doing is you're changing the "guest mode" bit to HOST_NV while you're executing these.
> 
> The other change this patch does is it postpones the return to GUEST_MODE_NONE to after fast-path handling of interrupt exits.
> 
> What if you simply don't introduce a new mode but instead only postpone the GUEST_MODE_NONE switch to later? Worst case that can happen is that your bug spins the CPU into handling that exit in a tight loop - not much different from your explicit spin, no?

I did it like that so that we have a chance to save away the register
state for the point where the exception happened separately from the
guest state.  It can be very useful for debugging to have both sets.
The other thing of course is that if I did what you suggest and then
happened not to hit the exception on the second time through, we would
end up with corrupted guest state and no indication that it was
corrupted (since the register state for the bad exception would get
saved away in the vcpu struct).

I admit I haven't written the code to save away the register state
when one of these bad exceptions happens; that's partly because in the
lab we have ways of getting the register state directly from the CPU,
but I'm certainly intending to write that code soon.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Oct. 4, 2013, 12:56 p.m. UTC | #3
On 04.10.2013, at 14:33, Paul Mackerras wrote:

> On Fri, Oct 04, 2013 at 01:59:25PM +0200, Alexander Graf wrote:
>> 
>> On 04.10.2013, at 13:45, Paul Mackerras wrote:
>> 
>>> When an interrupt or exception happens in the guest that comes to the
>>> host, the CPU goes to hypervisor real mode (MMU off) to handle the
>>> exception but doesn't change the MMU context.  After saving a few
>>> registers, we then clear the "in guest" flag.  If, for any reason,
>>> we get an exception in the real-mode code, that then gets handled
>>> by the normal kernel exception handlers, which turn the MMU on.  This
>>> is disastrous if the MMU is still set to the guest context, since we
>>> end up executing instructions from random places in the guest kernel
>>> with hypervisor privilege.
>>> 
>>> In order to catch this situation, we define a new value for the "in guest"
>>> flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
>>> mode with guest MMU context.  If the "in guest" flag is set to this value,
>>> we branch off to an emergency handler.  For the moment, this just does
>>> a branch to self to stop the CPU from doing anything further.
>> 
>> I don't understand how you get there. The only case I can imagine where you'd hit a normal Linux handler while in guest MMU context is a bug in the complex real mode handling code.
> 
> A bug is the usual case.  I think it is also possible (though very
> unlikely) to get a machine check interrupt, since they can come at any
> time.
> 
>> So basically what you're doing is you're changing the "guest mode" bit to HOST_NV while you're executing these.
>> 
>> The other change this patch does is it postpones the return to GUEST_MODE_NONE to after fast-path handling of interrupt exits.
>> 
>> What if you simply don't introduce a new mode but instead only postpone the GUEST_MODE_NONE switch to later? Worst case that can happen is that your bug spins the CPU into handling that exit in a tight loop - not much different from your explicit spin, no?
> 
> I did it like that so that we have a chance to save away the register
> state for the point where the exception happened separately from the
> guest state.  It can be very useful for debugging to have both sets.
> The other thing of course is that if I did what you suggest and then
> happened not to hit the exception on the second time through, we would
> end up with corrupted guest state and no indication that it was
> corrupted (since the register state for the bad exception would get
> saved away in the vcpu struct).
> 
> I admit I haven't written the code to save away the register state
> when one of these bad exceptions happens; that's partly because in the
> lab we have ways of getting the register state directly from the CPU,
> but I'm certainly intending to write that code soon.

Fair enough, but I think doing that additional code when we only have a single register available and then even stall the CPU on a memory write to store away and load the state doesn't really help performance.

Either way, applied to ppc-next.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras Oct. 4, 2013, 11:42 p.m. UTC | #4
On Fri, Oct 04, 2013 at 02:56:31PM +0200, Alexander Graf wrote:
> 
> On 04.10.2013, at 14:33, Paul Mackerras wrote:
> 
> > On Fri, Oct 04, 2013 at 01:59:25PM +0200, Alexander Graf wrote:
> >> 
> >> On 04.10.2013, at 13:45, Paul Mackerras wrote:
> >> 
> >>> When an interrupt or exception happens in the guest that comes to the
> >>> host, the CPU goes to hypervisor real mode (MMU off) to handle the
> >>> exception but doesn't change the MMU context.  After saving a few
> >>> registers, we then clear the "in guest" flag.  If, for any reason,
> >>> we get an exception in the real-mode code, that then gets handled
> >>> by the normal kernel exception handlers, which turn the MMU on.  This
> >>> is disastrous if the MMU is still set to the guest context, since we
> >>> end up executing instructions from random places in the guest kernel
> >>> with hypervisor privilege.
> >>> 
> >>> In order to catch this situation, we define a new value for the "in guest"
> >>> flag, KVM_GUEST_MODE_HOST_HV, to indicate that we are in hypervisor real
> >>> mode with guest MMU context.  If the "in guest" flag is set to this value,
> >>> we branch off to an emergency handler.  For the moment, this just does
> >>> a branch to self to stop the CPU from doing anything further.
> >> 
> >> I don't understand how you get there. The only case I can imagine where you'd hit a normal Linux handler while in guest MMU context is a bug in the complex real mode handling code.
> > 
> > A bug is the usual case.  I think it is also possible (though very
> > unlikely) to get a machine check interrupt, since they can come at any
> > time.
> > 
> >> So basically what you're doing is you're changing the "guest mode" bit to HOST_NV while you're executing these.
> >> 
> >> The other change this patch does is it postpones the return to GUEST_MODE_NONE to after fast-path handling of interrupt exits.
> >> 
> >> What if you simply don't introduce a new mode but instead only postpone the GUEST_MODE_NONE switch to later? Worst case that can happen is that your bug spins the CPU into handling that exit in a tight loop - not much different from your explicit spin, no?
> > 
> > I did it like that so that we have a chance to save away the register
> > state for the point where the exception happened separately from the
> > guest state.  It can be very useful for debugging to have both sets.
> > The other thing of course is that if I did what you suggest and then
> > happened not to hit the exception on the second time through, we would
> > end up with corrupted guest state and no indication that it was
> > corrupted (since the register state for the bad exception would get
> > saved away in the vcpu struct).
> > 
> > I admit I haven't written the code to save away the register state
> > when one of these bad exceptions happens; that's partly because in the
> > lab we have ways of getting the register state directly from the CPU,
> > but I'm certainly intending to write that code soon.
> 
> Fair enough, but I think doing that additional code when we only have a single register available and then even stall the CPU on a memory write to store away and load the state doesn't really help performance.

That's what register renaming, branch prediction and speculative
execution are for. :)

> Either way, applied to ppc-next.

Thanks,
Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index e2d4d46..1bd92fd 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -138,6 +138,8 @@ 
 #define KVM_GUEST_MODE_NONE	0
 #define KVM_GUEST_MODE_GUEST	1
 #define KVM_GUEST_MODE_SKIP	2
+#define KVM_GUEST_MODE_GUEST_HV	3
+#define KVM_GUEST_MODE_HOST_HV	4
 
 #define KVM_INST_FETCH_FAILED	-1
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 8e0f28f..f1f1bf3 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -383,6 +383,9 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
+	li	r6, KVM_GUEST_MODE_HOST_HV
+	stb	r6, HSTATE_IN_GUEST(r13)
+
 BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
@@ -682,7 +685,7 @@  fast_guest_return:
 	mtspr	SPRN_HSRR1,r11
 
 	/* Activate guest mode, so faults get handled by KVM */
-	li	r9, KVM_GUEST_MODE_GUEST
+	li	r9, KVM_GUEST_MODE_GUEST_HV
 	stb	r9, HSTATE_IN_GUEST(r13)
 
 	/* Enter guest */
@@ -742,6 +745,14 @@  kvmppc_interrupt:
 	 */
 	/* abuse host_r2 as third scratch area; we get r2 from PACATOC(r13) */
 	std	r9, HSTATE_HOST_R2(r13)
+
+	lbz	r9, HSTATE_IN_GUEST(r13)
+	cmpwi	r9, KVM_GUEST_MODE_HOST_HV
+	beq	kvmppc_bad_host_intr
+	/* We're now back in the host but in guest MMU context */
+	li	r9, KVM_GUEST_MODE_HOST_HV
+	stb	r9, HSTATE_IN_GUEST(r13)
+
 	ld	r9, HSTATE_KVM_VCPU(r13)
 
 	/* Save registers */
@@ -793,10 +804,6 @@  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r3, VCPU_GPR(R13)(r9)
 	std	r4, VCPU_LR(r9)
 
-	/* Unset guest mode */
-	li	r0, KVM_GUEST_MODE_NONE
-	stb	r0, HSTATE_IN_GUEST(r13)
-
 	stw	r12,VCPU_TRAP(r9)
 
 	/* Save HEIR (HV emulation assist reg) in last_inst
@@ -1186,6 +1193,10 @@  BEGIN_FTR_SECTION
 	mtspr	SPRN_AMR,r6
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
+	/* Unset guest mode */
+	li	r0, KVM_GUEST_MODE_NONE
+	stb	r0, HSTATE_IN_GUEST(r13)
+
 	/* Switch DSCR back to host value */
 BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
@@ -1388,7 +1399,7 @@  fast_interrupt_c_return:
 	stw	r8, VCPU_LAST_INST(r9)
 
 	/* Unset guest mode. */
-	li	r0, KVM_GUEST_MODE_NONE
+	li	r0, KVM_GUEST_MODE_HOST_HV
 	stb	r0, HSTATE_IN_GUEST(r13)
 	b	guest_exit_cont
 
@@ -1937,3 +1948,11 @@  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	lwz	r7,VCPU_VRSAVE(r4)
 	mtspr	SPRN_VRSAVE,r7
 	blr
+
+/*
+ * We come here if we get any exception or interrupt while we are
+ * executing host real mode code while in guest MMU context.
+ * For now just spin, but we should do something better.
+ */
+kvmppc_bad_host_intr:
+	b	.