Patchwork [2/8] KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu

login
register
mail settings
Submitter Paul Mackerras
Date July 11, 2013, 11:50 a.m.
Message ID <20130711115046.GD21353@iris.ozlabs.ibm.com>
Download mbox | patch
Permalink /patch/258421/
State New
Headers show

Comments

Paul Mackerras - July 11, 2013, 11:50 a.m.
Currently PR-style KVM keeps the volatile guest register values
(R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
the main kvm_vcpu struct.  For 64-bit, the shadow_vcpu exists in two
places, a kmalloc'd struct and in the PACA, and it gets copied back
and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
can't rely on being able to access the kmalloc'd struct.

This changes the code to copy the volatile values into the shadow_vcpu
as one of the last things done before entering the guest.  Similarly
the values are copied back out of the shadow_vcpu to the kvm_vcpu
immediately after exiting the guest.  We arrange for interrupts to be
still disabled at this point so that we can't get preempted on 64-bit
and end up copying values from the wrong PACA.

This means that the accessor functions in kvm_book3s.h for these
registers are greatly simplified, and are same between PR and HV KVM.
In places where accesses to shadow_vcpu fields are now replaced by
accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.

With this, the time to read the PVR one million times in a loop went
from 478.2ms to 480.1ms (averages of 4 values), a difference which is
not statistically significant given the variability of the results.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/include/asm/kvm_book3s.h     | 193 +++++-------------------------
 arch/powerpc/include/asm/kvm_book3s_asm.h |   6 +-
 arch/powerpc/include/asm/kvm_host.h       |   1 +
 arch/powerpc/kernel/asm-offsets.c         |   3 +-
 arch/powerpc/kvm/book3s_emulate.c         |   8 +-
 arch/powerpc/kvm/book3s_interrupts.S      | 101 ++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c              |  68 +++++------
 arch/powerpc/kvm/book3s_rmhandlers.S      |   5 -
 arch/powerpc/kvm/trace.h                  |   7 +-
 9 files changed, 175 insertions(+), 217 deletions(-)
Alexander Graf - July 25, 2013, 1:54 p.m.
On 11.07.2013, at 13:50, Paul Mackerras wrote:

> Currently PR-style KVM keeps the volatile guest register values
> (R0 - R13, CR, LR, CTR, XER, PC) in a shadow_vcpu struct rather than
> the main kvm_vcpu struct.  For 64-bit, the shadow_vcpu exists in two
> places, a kmalloc'd struct and in the PACA, and it gets copied back
> and forth in kvmppc_core_vcpu_load/put(), because the real-mode code
> can't rely on being able to access the kmalloc'd struct.
> 
> This changes the code to copy the volatile values into the shadow_vcpu
> as one of the last things done before entering the guest.  Similarly
> the values are copied back out of the shadow_vcpu to the kvm_vcpu
> immediately after exiting the guest.  We arrange for interrupts to be
> still disabled at this point so that we can't get preempted on 64-bit
> and end up copying values from the wrong PACA.
> 
> This means that the accessor functions in kvm_book3s.h for these
> registers are greatly simplified, and are same between PR and HV KVM.
> In places where accesses to shadow_vcpu fields are now replaced by
> accesses to the kvm_vcpu, we can also remove the svcpu_get/put pairs.
> Finally, on 64-bit, we don't need the kmalloc'd struct at all any more.
> 
> With this, the time to read the PVR one million times in a loop went
> from 478.2ms to 480.1ms (averages of 4 values), a difference which is
> not statistically significant given the variability of the results.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/include/asm/kvm_book3s.h     | 193 +++++-------------------------
> arch/powerpc/include/asm/kvm_book3s_asm.h |   6 +-
> arch/powerpc/include/asm/kvm_host.h       |   1 +
> arch/powerpc/kernel/asm-offsets.c         |   3 +-
> arch/powerpc/kvm/book3s_emulate.c         |   8 +-
> arch/powerpc/kvm/book3s_interrupts.S      | 101 ++++++++++++++++
> arch/powerpc/kvm/book3s_pr.c              |  68 +++++------
> arch/powerpc/kvm/book3s_rmhandlers.S      |   5 -
> arch/powerpc/kvm/trace.h                  |   7 +-
> 9 files changed, 175 insertions(+), 217 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 08891d0..5d68f6c 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -198,149 +198,97 @@ extern void kvm_return_point(void);
> #include <asm/kvm_book3s_64.h>
> #endif
> 
> -#ifdef CONFIG_KVM_BOOK3S_PR
> -
> -static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
> -{
> -	return to_book3s(vcpu)->hior;
> -}
> -
> -static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
> -			unsigned long pending_now, unsigned long old_pending)
> -{
> -	if (pending_now)
> -		vcpu->arch.shared->int_pending = 1;
> -	else if (old_pending)
> -		vcpu->arch.shared->int_pending = 0;
> -}
> -
> static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
> {
> -	if ( num < 14 ) {
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -		svcpu->gpr[num] = val;
> -		svcpu_put(svcpu);
> -		to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
> -	} else
> -		vcpu->arch.gpr[num] = val;
> +	vcpu->arch.gpr[num] = val;
> }
> 
> static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
> {
> -	if ( num < 14 ) {
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -		ulong r = svcpu->gpr[num];
> -		svcpu_put(svcpu);
> -		return r;
> -	} else
> -		return vcpu->arch.gpr[num];
> +	return vcpu->arch.gpr[num];
> }
> 
> static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	svcpu->cr = val;
> -	svcpu_put(svcpu);
> -	to_book3s(vcpu)->shadow_vcpu->cr = val;
> +	vcpu->arch.cr = val;
> }
> 
> static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	u32 r;
> -	r = svcpu->cr;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.cr;
> }
> 
> static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	svcpu->xer = val;
> -	to_book3s(vcpu)->shadow_vcpu->xer = val;
> -	svcpu_put(svcpu);
> +	vcpu->arch.xer = val;
> }
> 
> static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	u32 r;
> -	r = svcpu->xer;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.xer;
> }
> 
> static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	svcpu->ctr = val;
> -	svcpu_put(svcpu);
> +	vcpu->arch.ctr = val;
> }
> 
> static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	ulong r;
> -	r = svcpu->ctr;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.ctr;
> }
> 
> static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	svcpu->lr = val;
> -	svcpu_put(svcpu);
> +	vcpu->arch.lr = val;
> }
> 
> static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	ulong r;
> -	r = svcpu->lr;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.lr;
> }
> 
> static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	svcpu->pc = val;
> -	svcpu_put(svcpu);
> +	vcpu->arch.pc = val;
> }
> 
> static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	ulong r;
> -	r = svcpu->pc;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.pc;
> }
> 
> static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
> {
> 	ulong pc = kvmppc_get_pc(vcpu);
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	u32 r;
> 
> 	/* Load the instruction manually if it failed to do so in the
> 	 * exit path */
> -	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
> -		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
> +	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
> +		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
> 
> -	r = svcpu->last_inst;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.last_inst;
> }
> 
> static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
> {
> -	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -	ulong r;
> -	r = svcpu->fault_dar;
> -	svcpu_put(svcpu);
> -	return r;
> +	return vcpu->arch.fault_dar;
> +}
> +
> +#ifdef CONFIG_KVM_BOOK3S_PR
> +
> +static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
> +{
> +	return to_book3s(vcpu)->hior;
> +}
> +
> +static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
> +			unsigned long pending_now, unsigned long old_pending)
> +{
> +	if (pending_now)
> +		vcpu->arch.shared->int_pending = 1;
> +	else if (old_pending)
> +		vcpu->arch.shared->int_pending = 0;
> }
> 
> static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
> @@ -374,83 +322,6 @@ static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
> {
> }
> 
> -static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
> -{
> -	vcpu->arch.gpr[num] = val;
> -}
> -
> -static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
> -{
> -	return vcpu->arch.gpr[num];
> -}
> -
> -static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
> -{
> -	vcpu->arch.cr = val;
> -}
> -
> -static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.cr;
> -}
> -
> -static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
> -{
> -	vcpu->arch.xer = val;
> -}
> -
> -static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.xer;
> -}
> -
> -static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
> -{
> -	vcpu->arch.ctr = val;
> -}
> -
> -static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.ctr;
> -}
> -
> -static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
> -{
> -	vcpu->arch.lr = val;
> -}
> -
> -static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.lr;
> -}
> -
> -static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
> -{
> -	vcpu->arch.pc = val;
> -}
> -
> -static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.pc;
> -}
> -
> -static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
> -{
> -	ulong pc = kvmppc_get_pc(vcpu);
> -
> -	/* Load the instruction manually if it failed to do so in the
> -	 * exit path */
> -	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
> -		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
> -
> -	return vcpu->arch.last_inst;
> -}
> -
> -static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
> -{
> -	return vcpu->arch.fault_dar;
> -}
> -
> static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
> {
> 	return false;
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 9039d3c..4141409 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -108,14 +108,14 @@ struct kvmppc_book3s_shadow_vcpu {
> 	ulong gpr[14];
> 	u32 cr;
> 	u32 xer;
> -
> -	u32 fault_dsisr;
> -	u32 last_inst;
> 	ulong ctr;
> 	ulong lr;
> 	ulong pc;
> +
> 	ulong shadow_srr1;
> 	ulong fault_dar;
> +	u32 fault_dsisr;
> +	u32 last_inst;
> 
> #ifdef CONFIG_PPC_BOOK3S_32
> 	u32     sr[16];			/* Guest SRs */
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 3328353..7b26395 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -463,6 +463,7 @@ struct kvm_vcpu_arch {
> 	u32 ctrl;
> 	ulong dabr;
> 	ulong cfar;
> +	ulong shadow_srr1;
> #endif
> 	u32 vrsave; /* also USPRG0 */
> 	u32 mmucr;
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index a67c76e..936d7cf 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -462,6 +462,7 @@ int main(void)
> 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
> 	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
> 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
> +	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
> 
> 	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
> 	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
> @@ -519,8 +520,6 @@ int main(void)
> 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
> 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
> 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
> -	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
> -			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
> 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
> 	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
> 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
> diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
> index 360ce68..34044b1 100644
> --- a/arch/powerpc/kvm/book3s_emulate.c
> +++ b/arch/powerpc/kvm/book3s_emulate.c
> @@ -267,12 +267,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 
> 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
> 			if ((r == -ENOENT) || (r == -EPERM)) {
> -				struct kvmppc_book3s_shadow_vcpu *svcpu;
> -
> -				svcpu = svcpu_get(vcpu);
> 				*advance = 0;
> 				vcpu->arch.shared->dar = vaddr;
> -				svcpu->fault_dar = vaddr;
> +				vcpu->arch.fault_dar = vaddr;
> 
> 				dsisr = DSISR_ISSTORE;
> 				if (r == -ENOENT)
> @@ -281,8 +278,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 					dsisr |= DSISR_PROTFAULT;
> 
> 				vcpu->arch.shared->dsisr = dsisr;
> -				svcpu->fault_dsisr = dsisr;
> -				svcpu_put(svcpu);
> +				vcpu->arch.fault_dsisr = dsisr;
> 
> 				kvmppc_book3s_queue_irqprio(vcpu,
> 					BOOK3S_INTERRUPT_DATA_STORAGE);
> diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
> index 17cfae5..c935195 100644
> --- a/arch/powerpc/kvm/book3s_interrupts.S
> +++ b/arch/powerpc/kvm/book3s_interrupts.S
> @@ -26,8 +26,12 @@
> 
> #if defined(CONFIG_PPC_BOOK3S_64)
> #define FUNC(name) 		GLUE(.,name)
> +#define GET_SHADOW_VCPU(reg)    mr	reg, r13

Is this correct?

> +
> #elif defined(CONFIG_PPC_BOOK3S_32)
> #define FUNC(name)		name
> +#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
> +
> #endif /* CONFIG_PPC_BOOK3S_XX */
> 
> #define VCPU_LOAD_NVGPRS(vcpu) \
> @@ -87,8 +91,49 @@ kvm_start_entry:
> 	VCPU_LOAD_NVGPRS(r4)
> 
> kvm_start_lightweight:
> +	/* Copy registers into shadow vcpu so we can access them in real mode */
> +	GET_SHADOW_VCPU(r3)
> +	PPC_LL	r5, VCPU_GPR(R0)(r4)
> +	PPC_LL	r6, VCPU_GPR(R1)(r4)
> +	PPC_LL	r7, VCPU_GPR(R2)(r4)
> +	PPC_LL	r8, VCPU_GPR(R3)(r4)
> +	PPC_STL	r5, SVCPU_R0(r3)
> +	PPC_STL	r6, SVCPU_R1(r3)
> +	PPC_STL	r7, SVCPU_R2(r3)
> +	PPC_STL	r8, SVCPU_R3(r3)
> +	PPC_LL	r5, VCPU_GPR(R4)(r4)
> +	PPC_LL	r6, VCPU_GPR(R5)(r4)
> +	PPC_LL	r7, VCPU_GPR(R6)(r4)
> +	PPC_LL	r8, VCPU_GPR(R7)(r4)
> +	PPC_STL	r5, SVCPU_R4(r3)
> +	PPC_STL	r6, SVCPU_R5(r3)
> +	PPC_STL	r7, SVCPU_R6(r3)
> +	PPC_STL	r8, SVCPU_R7(r3)
> +	PPC_LL	r5, VCPU_GPR(R8)(r4)
> +	PPC_LL	r6, VCPU_GPR(R9)(r4)
> +	PPC_LL	r7, VCPU_GPR(R10)(r4)
> +	PPC_LL	r8, VCPU_GPR(R11)(r4)
> +	PPC_STL	r5, SVCPU_R8(r3)
> +	PPC_STL	r6, SVCPU_R9(r3)
> +	PPC_STL	r7, SVCPU_R10(r3)
> +	PPC_STL	r8, SVCPU_R11(r3)
> +	PPC_LL	r5, VCPU_GPR(R12)(r4)
> +	PPC_LL	r6, VCPU_GPR(R13)(r4)
> +	lwz	r7, VCPU_CR(r4)
> +	PPC_LL	r8, VCPU_XER(r4)
> +	PPC_STL	r5, SVCPU_R12(r3)
> +	PPC_STL	r6, SVCPU_R13(r3)
> +	stw	r7, SVCPU_CR(r3)
> +	stw	r8, SVCPU_XER(r3)
> +	PPC_LL	r5, VCPU_CTR(r4)
> +	PPC_LL	r6, VCPU_LR(r4)
> +	PPC_LL	r7, VCPU_PC(r4)
> +	PPC_STL	r5, SVCPU_CTR(r3)
> +	PPC_STL	r6, SVCPU_LR(r3)
> +	PPC_STL	r7, SVCPU_PC(r3)

Can we put this and the reverse copy into C functions and just call them from here and below? It'd definitely improve readability. We could even skip the whole thing on systems where we're not limited by an RMA.

> 
> #ifdef CONFIG_PPC_BOOK3S_64
> +	/* Get the dcbz32 flag */

Hrm :)

> 	PPC_LL	r3, VCPU_HFLAGS(r4)
> 	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
> 	stb	r3, HSTATE_RESTORE_HID5(r13)
> @@ -128,6 +173,61 @@ kvmppc_handler_highmem:
> 	/* R7 = vcpu */
> 	PPC_LL	r7, GPR4(r1)
> 
> +	/* Transfer reg values from shadow vcpu back to vcpu struct */
> +	/* On 64-bit, interrupts are still off at this point */
> +	GET_SHADOW_VCPU(r4)
> +	PPC_LL	r5, SVCPU_R0(r4)
> +	PPC_LL	r6, SVCPU_R1(r4)
> +	PPC_LL	r3, SVCPU_R2(r4)
> +	PPC_LL	r8, SVCPU_R3(r4)
> +	PPC_STL	r5, VCPU_GPR(R0)(r7)
> +	PPC_STL	r6, VCPU_GPR(R1)(r7)
> +	PPC_STL	r3, VCPU_GPR(R2)(r7)
> +	PPC_STL	r8, VCPU_GPR(R3)(r7)
> +	PPC_LL	r5, SVCPU_R4(r4)
> +	PPC_LL	r6, SVCPU_R5(r4)
> +	PPC_LL	r3, SVCPU_R6(r4)
> +	PPC_LL	r8, SVCPU_R7(r4)
> +	PPC_STL	r5, VCPU_GPR(R4)(r7)
> +	PPC_STL	r6, VCPU_GPR(R5)(r7)
> +	PPC_STL	r3, VCPU_GPR(R6)(r7)
> +	PPC_STL	r8, VCPU_GPR(R7)(r7)
> +	PPC_LL	r5, SVCPU_R8(r4)
> +	PPC_LL	r6, SVCPU_R9(r4)
> +	PPC_LL	r3, SVCPU_R10(r4)
> +	PPC_LL	r8, SVCPU_R11(r4)
> +	PPC_STL	r5, VCPU_GPR(R8)(r7)
> +	PPC_STL	r6, VCPU_GPR(R9)(r7)
> +	PPC_STL	r3, VCPU_GPR(R10)(r7)
> +	PPC_STL	r8, VCPU_GPR(R11)(r7)
> +	PPC_LL	r5, SVCPU_R12(r4)
> +	PPC_LL	r6, SVCPU_R13(r4)
> +	lwz	r3, SVCPU_CR(r4)
> +	lwz	r8, SVCPU_XER(r4)
> +	PPC_STL	r5, VCPU_GPR(R12)(r7)
> +	PPC_STL	r6, VCPU_GPR(R13)(r7)
> +	stw	r3, VCPU_CR(r7)
> +	PPC_STL	r8, VCPU_XER(r7)
> +	PPC_LL	r5, SVCPU_CTR(r4)
> +	PPC_LL	r6, SVCPU_LR(r4)
> +	PPC_LL	r3, SVCPU_PC(r4)
> +	PPC_LL	r8, SVCPU_SHADOW_SRR1(r4)
> +	PPC_STL	r5, VCPU_CTR(r7)
> +	PPC_STL	r6, VCPU_LR(r7)
> +	PPC_STL	r3, VCPU_PC(r7)
> +	PPC_STL	r8, VCPU_SHADOW_SRR1(r7)
> +	PPC_LL	r5, SVCPU_FAULT_DAR(r4)
> +	lwz	r6, SVCPU_FAULT_DSISR(r4)
> +	lwz	r3, SVCPU_LAST_INST(r4)
> +	PPC_STL	r5, VCPU_FAULT_DAR(r7)
> +	stw	r6, VCPU_FAULT_DSISR(r7)
> +	stw	r3, VCPU_LAST_INST(r7)
> +
> +	/* Re-enable interrupts */
> +	mfmsr	r3
> +	ori	r3, r3, MSR_EE
> +	MTMSR_EERI(r3)
> +
> #ifdef CONFIG_PPC_BOOK3S_64
> 	/*
> 	 * Reload kernel SPRG3 value.
> @@ -135,6 +235,7 @@ kvmppc_handler_highmem:
> 	 */
> 	ld	r3, PACA_SPRG3(r13)
> 	mtspr	SPRN_SPRG3, r3
> +
> #endif /* CONFIG_PPC_BOOK3S_64 */
> 
> 	PPC_STL	r14, VCPU_GPR(R14)(r7)
> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> index ddfaf56..5aa64e2 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -61,8 +61,6 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> #ifdef CONFIG_PPC_BOOK3S_64
> 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> 	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
> -	memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
> -	       sizeof(get_paca()->shadow_vcpu));
> 	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
> 	svcpu_put(svcpu);
> #endif
> @@ -77,8 +75,6 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
> #ifdef CONFIG_PPC_BOOK3S_64
> 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> 	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
> -	memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
> -	       sizeof(get_paca()->shadow_vcpu));
> 	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
> 	svcpu_put(svcpu);
> #endif
> @@ -388,22 +384,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 
> 	if (page_found == -ENOENT) {
> 		/* Page not found in guest PTE entries */
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
> -		vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
> +		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
> 		vcpu->arch.shared->msr |=
> -			(svcpu->shadow_srr1 & 0x00000000f8000000ULL);
> -		svcpu_put(svcpu);
> +			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
> 		kvmppc_book3s_queue_irqprio(vcpu, vec);
> 	} else if (page_found == -EPERM) {
> 		/* Storage protection */
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
> -		vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
> +		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
> 		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
> 		vcpu->arch.shared->msr |=
> -			svcpu->shadow_srr1 & 0x00000000f8000000ULL;
> -		svcpu_put(svcpu);
> +			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
> 		kvmppc_book3s_queue_irqprio(vcpu, vec);
> 	} else if (page_found == -EINVAL) {
> 		/* Page not found in guest SLB */
> @@ -623,21 +615,26 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	switch (exit_nr) {
> 	case BOOK3S_INTERRUPT_INST_STORAGE:
> 	{
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -		ulong shadow_srr1 = svcpu->shadow_srr1;
> +		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
> 		vcpu->stat.pf_instruc++;
> 
> #ifdef CONFIG_PPC_BOOK3S_32
> 		/* We set segments as unused segments when invalidating them. So
> 		 * treat the respective fault as segment fault. */
> -		if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
> -			kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
> -			r = RESUME_GUEST;
> +		{
> +			struct kvmppc_book3s_shadow_vcpu *svcpu;
> +			u32 sr;
> +
> +			svcpu = svcpu_get(vcpu);
> +			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
> 			svcpu_put(svcpu);
> -			break;
> +			if (sr == SR_INVALID) {
> +				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
> +				r = RESUME_GUEST;
> +				break;
> +			}
> 		}
> #endif
> -		svcpu_put(svcpu);
> 
> 		/* only care about PTEG not found errors, but leave NX alone */
> 		if (shadow_srr1 & 0x40000000) {
> @@ -662,21 +659,26 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	case BOOK3S_INTERRUPT_DATA_STORAGE:
> 	{
> 		ulong dar = kvmppc_get_fault_dar(vcpu);
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -		u32 fault_dsisr = svcpu->fault_dsisr;
> +		u32 fault_dsisr = vcpu->arch.fault_dsisr;
> 		vcpu->stat.pf_storage++;
> 
> #ifdef CONFIG_PPC_BOOK3S_32
> 		/* We set segments as unused segments when invalidating them. So
> 		 * treat the respective fault as segment fault. */
> -		if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {

Why are we even using a shadow vcpu on book3s_32? We can always just access the real vcpu, no? Hrm.

I think it makes sense to get rid of 99% of the svcpu logic. Remove all bits in C code that ever refer to an svcpu. Only use the vcpu as reference for everything. Until you hit the real mode barrier on book3s_64. There explicitly copy the few things we need in real mode into the PACA and only refer to the PACA in rm code.

It'd be nice if we could speed up that code path on systems that don't need to jump through hoops to get to their vcpu data (G5s, PowerStation, etc), but I don't think it's worth the added complexity. We should rather try to make the code easy to understand :).


Alex

> -			kvmppc_mmu_map_segment(vcpu, dar);
> -			r = RESUME_GUEST;
> +		{
> +			struct kvmppc_book3s_shadow_vcpu *svcpu;
> +			u32 sr;
> +
> +			svcpu = svcpu_get(vcpu);
> +			sr = svcpu->sr[dar >> SID_SHIFT];
> 			svcpu_put(svcpu);
> -			break;
> +			if (sr == SR_INVALID) {
> +				kvmppc_mmu_map_segment(vcpu, dar);
> +				r = RESUME_GUEST;
> +				break;
> +			}
> 		}
> #endif
> -		svcpu_put(svcpu);
> 
> 		/* The only case we need to handle is missing shadow PTEs */
> 		if (fault_dsisr & DSISR_NOHPTE) {
> @@ -723,13 +725,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
> 	{
> 		enum emulation_result er;
> -		struct kvmppc_book3s_shadow_vcpu *svcpu;
> 		ulong flags;
> 
> program_interrupt:
> -		svcpu = svcpu_get(vcpu);
> -		flags = svcpu->shadow_srr1 & 0x1f0000ull;
> -		svcpu_put(svcpu);
> +		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
> 
> 		if (vcpu->arch.shared->msr & MSR_PR) {
> #ifdef EXIT_DEBUG
> @@ -861,9 +860,7 @@ program_interrupt:
> 		break;
> 	default:
> 	{
> -		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
> -		ulong shadow_srr1 = svcpu->shadow_srr1;
> -		svcpu_put(svcpu);
> +		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
> 		/* Ugh - bork here! What did we get? */
> 		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
> 			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
> @@ -1037,11 +1034,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
> 	if (!vcpu_book3s)
> 		goto out;
> 
> +#ifdef CONFIG_KVM_BOOK3S_32
> 	vcpu_book3s->shadow_vcpu =
> 		kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
> 	if (!vcpu_book3s->shadow_vcpu)
> 		goto free_vcpu;
> -
> +#endif
> 	vcpu = &vcpu_book3s->vcpu;
> 	err = kvm_vcpu_init(vcpu, kvm, id);
> 	if (err)
> @@ -1074,8 +1072,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
> uninit_vcpu:
> 	kvm_vcpu_uninit(vcpu);
> free_shadow_vcpu:
> +#ifdef CONFIG_KVM_BOOK3S_32
> 	kfree(vcpu_book3s->shadow_vcpu);
> free_vcpu:
> +#endif
> 	vfree(vcpu_book3s);
> out:
> 	return ERR_PTR(err);
> diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
> index 8f7633e..b64d7f9 100644
> --- a/arch/powerpc/kvm/book3s_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_rmhandlers.S
> @@ -179,11 +179,6 @@ _GLOBAL(kvmppc_entry_trampoline)
> 
> 	li	r6, MSR_IR | MSR_DR
> 	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
> -	/*
> -	 * Set EE in HOST_MSR so that it's enabled when we get into our
> -	 * C exit handler function
> -	 */
> -	ori	r5, r5, MSR_EE
> 	mtsrr0	r7
> 	mtsrr1	r6
> 	RFI
> diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
> index e326489..a088e9a 100644
> --- a/arch/powerpc/kvm/trace.h
> +++ b/arch/powerpc/kvm/trace.h
> @@ -101,17 +101,12 @@ TRACE_EVENT(kvm_exit,
> 	),
> 
> 	TP_fast_assign(
> -#ifdef CONFIG_KVM_BOOK3S_PR
> -		struct kvmppc_book3s_shadow_vcpu *svcpu;
> -#endif
> 		__entry->exit_nr	= exit_nr;
> 		__entry->pc		= kvmppc_get_pc(vcpu);
> 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
> 		__entry->msr		= vcpu->arch.shared->msr;
> #ifdef CONFIG_KVM_BOOK3S_PR
> -		svcpu = svcpu_get(vcpu);
> -		__entry->srr1		= svcpu->shadow_srr1;
> -		svcpu_put(svcpu);
> +		__entry->srr1		= vcpu->arch.shadow_srr1;
> #endif
> 		__entry->last_inst	= vcpu->arch.last_inst;
> 	),
> -- 
> 1.8.3.1
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras - Aug. 3, 2013, 2 a.m.
On Thu, Jul 25, 2013 at 03:54:17PM +0200, Alexander Graf wrote:
> 
> On 11.07.2013, at 13:50, Paul Mackerras wrote:
> 
> > diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
> > index 17cfae5..c935195 100644
> > --- a/arch/powerpc/kvm/book3s_interrupts.S
> > +++ b/arch/powerpc/kvm/book3s_interrupts.S
> > @@ -26,8 +26,12 @@
> > 
> > #if defined(CONFIG_PPC_BOOK3S_64)
> > #define FUNC(name) 		GLUE(.,name)
> > +#define GET_SHADOW_VCPU(reg)    mr	reg, r13
> 
> Is this correct?

Yes, it's just a copy of what's in book3s_segment.S already.  For
64-bit the shadow vcpu is in the PACA (and the offsets defined in
asm-offsets.c are expressed as offsets from the PACA).

> > kvm_start_lightweight:
> > +	/* Copy registers into shadow vcpu so we can access them in real mode */
> > +	GET_SHADOW_VCPU(r3)
> > +	PPC_LL	r5, VCPU_GPR(R0)(r4)
> > +	PPC_LL	r6, VCPU_GPR(R1)(r4)
> > +	PPC_LL	r7, VCPU_GPR(R2)(r4)
> > +	PPC_LL	r8, VCPU_GPR(R3)(r4)
> > +	PPC_STL	r5, SVCPU_R0(r3)
> > +	PPC_STL	r6, SVCPU_R1(r3)
> > +	PPC_STL	r7, SVCPU_R2(r3)
> > +	PPC_STL	r8, SVCPU_R3(r3)
> > +	PPC_LL	r5, VCPU_GPR(R4)(r4)
> > +	PPC_LL	r6, VCPU_GPR(R5)(r4)
> > +	PPC_LL	r7, VCPU_GPR(R6)(r4)
> > +	PPC_LL	r8, VCPU_GPR(R7)(r4)
> > +	PPC_STL	r5, SVCPU_R4(r3)
> > +	PPC_STL	r6, SVCPU_R5(r3)
> > +	PPC_STL	r7, SVCPU_R6(r3)
> > +	PPC_STL	r8, SVCPU_R7(r3)
> > +	PPC_LL	r5, VCPU_GPR(R8)(r4)
> > +	PPC_LL	r6, VCPU_GPR(R9)(r4)
> > +	PPC_LL	r7, VCPU_GPR(R10)(r4)
> > +	PPC_LL	r8, VCPU_GPR(R11)(r4)
> > +	PPC_STL	r5, SVCPU_R8(r3)
> > +	PPC_STL	r6, SVCPU_R9(r3)
> > +	PPC_STL	r7, SVCPU_R10(r3)
> > +	PPC_STL	r8, SVCPU_R11(r3)
> > +	PPC_LL	r5, VCPU_GPR(R12)(r4)
> > +	PPC_LL	r6, VCPU_GPR(R13)(r4)
> > +	lwz	r7, VCPU_CR(r4)
> > +	PPC_LL	r8, VCPU_XER(r4)
> > +	PPC_STL	r5, SVCPU_R12(r3)
> > +	PPC_STL	r6, SVCPU_R13(r3)
> > +	stw	r7, SVCPU_CR(r3)
> > +	stw	r8, SVCPU_XER(r3)
> > +	PPC_LL	r5, VCPU_CTR(r4)
> > +	PPC_LL	r6, VCPU_LR(r4)
> > +	PPC_LL	r7, VCPU_PC(r4)
> > +	PPC_STL	r5, SVCPU_CTR(r3)
> > +	PPC_STL	r6, SVCPU_LR(r3)
> > +	PPC_STL	r7, SVCPU_PC(r3)
> 
> Can we put this and the reverse copy into C functions and just call them from here and below? It'd definitely improve readability. We could even skip the whole thing on systems where we're not limited by an RMA.

Sure, will do.

> Why are we even using a shadow vcpu on book3s_32? We can always just access the real vcpu, no? Hrm.

At this stage the vcpu is still part of the vcpu_book3s struct, which
is vmalloc'd, so no we can't access it in real mode.  However, I have
a patch series which I'm about to post that will make the vcpu be
kmalloc'd, and then 32-bit could access it directly in real mode, as
could 64-bit bare-metal platforms if that made sense.

> I think it makes sense to get rid of 99% of the svcpu logic. Remove all bits in C code that ever refer to an svcpu. Only use the vcpu as reference for everything. Until you hit the real mode barrier on book3s_64. There explicitly copy the few things we need in real mode into the PACA and only refer to the PACA in rm code.
> 
> It'd be nice if we could speed up that code path on systems that don't need to jump through hoops to get to their vcpu data (G5s, PowerStation, etc), but I don't think it's worth the added complexity. We should rather try to make the code easy to understand :).

I agree completely. :)

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 08891d0..5d68f6c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -198,149 +198,97 @@  extern void kvm_return_point(void);
 #include <asm/kvm_book3s_64.h>
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR
-
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
-	return to_book3s(vcpu)->hior;
-}
-
-static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
-			unsigned long pending_now, unsigned long old_pending)
-{
-	if (pending_now)
-		vcpu->arch.shared->int_pending = 1;
-	else if (old_pending)
-		vcpu->arch.shared->int_pending = 0;
-}
-
 static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
 {
-	if ( num < 14 ) {
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		svcpu->gpr[num] = val;
-		svcpu_put(svcpu);
-		to_book3s(vcpu)->shadow_vcpu->gpr[num] = val;
-	} else
-		vcpu->arch.gpr[num] = val;
+	vcpu->arch.gpr[num] = val;
 }
 
 static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
 {
-	if ( num < 14 ) {
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong r = svcpu->gpr[num];
-		svcpu_put(svcpu);
-		return r;
-	} else
-		return vcpu->arch.gpr[num];
+	return vcpu->arch.gpr[num];
 }
 
 static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->cr = val;
-	svcpu_put(svcpu);
-	to_book3s(vcpu)->shadow_vcpu->cr = val;
+	vcpu->arch.cr = val;
 }
 
 static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
-	r = svcpu->cr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.cr;
 }
 
 static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->xer = val;
-	to_book3s(vcpu)->shadow_vcpu->xer = val;
-	svcpu_put(svcpu);
+	vcpu->arch.xer = val;
 }
 
 static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
-	r = svcpu->xer;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.xer;
 }
 
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->ctr = val;
-	svcpu_put(svcpu);
+	vcpu->arch.ctr = val;
 }
 
 static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->ctr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.ctr;
 }
 
 static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->lr = val;
-	svcpu_put(svcpu);
+	vcpu->arch.lr = val;
 }
 
 static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->lr;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.lr;
 }
 
 static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	svcpu->pc = val;
-	svcpu_put(svcpu);
+	vcpu->arch.pc = val;
 }
 
 static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->pc;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.pc;
 }
 
 static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
 {
 	ulong pc = kvmppc_get_pc(vcpu);
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	u32 r;
 
 	/* Load the instruction manually if it failed to do so in the
 	 * exit path */
-	if (svcpu->last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &svcpu->last_inst, false);
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
 
-	r = svcpu->last_inst;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.last_inst;
 }
 
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
-	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-	ulong r;
-	r = svcpu->fault_dar;
-	svcpu_put(svcpu);
-	return r;
+	return vcpu->arch.fault_dar;
+}
+
+#ifdef CONFIG_KVM_BOOK3S_PR
+
+static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
+{
+	return to_book3s(vcpu)->hior;
+}
+
+static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
+			unsigned long pending_now, unsigned long old_pending)
+{
+	if (pending_now)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
 }
 
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
@@ -374,83 +322,6 @@  static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
 {
 }
 
-static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
-{
-	vcpu->arch.gpr[num] = val;
-}
-
-static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
-{
-	return vcpu->arch.gpr[num];
-}
-
-static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
-{
-	vcpu->arch.cr = val;
-}
-
-static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.cr;
-}
-
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
-{
-	vcpu->arch.xer = val;
-}
-
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.xer;
-}
-
-static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.ctr = val;
-}
-
-static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.ctr;
-}
-
-static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.lr = val;
-}
-
-static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.lr;
-}
-
-static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
-{
-	vcpu->arch.pc = val;
-}
-
-static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.pc;
-}
-
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	ulong pc = kvmppc_get_pc(vcpu);
-
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return vcpu->arch.last_inst;
-}
-
-static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.fault_dar;
-}
-
 static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 {
 	return false;
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 9039d3c..4141409 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -108,14 +108,14 @@  struct kvmppc_book3s_shadow_vcpu {
 	ulong gpr[14];
 	u32 cr;
 	u32 xer;
-
-	u32 fault_dsisr;
-	u32 last_inst;
 	ulong ctr;
 	ulong lr;
 	ulong pc;
+
 	ulong shadow_srr1;
 	ulong fault_dar;
+	u32 fault_dsisr;
+	u32 last_inst;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 	u32     sr[16];			/* Guest SRs */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3328353..7b26395 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -463,6 +463,7 @@  struct kvm_vcpu_arch {
 	u32 ctrl;
 	ulong dabr;
 	ulong cfar;
+	ulong shadow_srr1;
 #endif
 	u32 vrsave; /* also USPRG0 */
 	u32 mmucr;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a67c76e..936d7cf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -462,6 +462,7 @@  int main(void)
 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
 	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
 	DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+	DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
 
 	DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
 	DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
@@ -519,8 +520,6 @@  int main(void)
 	DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
 	DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
 	DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
-	DEFINE(VCPU_SVCPU, offsetof(struct kvmppc_vcpu_book3s, shadow_vcpu) -
-			   offsetof(struct kvmppc_vcpu_book3s, vcpu));
 	DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
 	DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
 	DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 360ce68..34044b1 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -267,12 +267,9 @@  int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
 			if ((r == -ENOENT) || (r == -EPERM)) {
-				struct kvmppc_book3s_shadow_vcpu *svcpu;
-
-				svcpu = svcpu_get(vcpu);
 				*advance = 0;
 				vcpu->arch.shared->dar = vaddr;
-				svcpu->fault_dar = vaddr;
+				vcpu->arch.fault_dar = vaddr;
 
 				dsisr = DSISR_ISSTORE;
 				if (r == -ENOENT)
@@ -281,8 +278,7 @@  int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 					dsisr |= DSISR_PROTFAULT;
 
 				vcpu->arch.shared->dsisr = dsisr;
-				svcpu->fault_dsisr = dsisr;
-				svcpu_put(svcpu);
+				vcpu->arch.fault_dsisr = dsisr;
 
 				kvmppc_book3s_queue_irqprio(vcpu,
 					BOOK3S_INTERRUPT_DATA_STORAGE);
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index 17cfae5..c935195 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -26,8 +26,12 @@ 
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define FUNC(name) 		GLUE(.,name)
+#define GET_SHADOW_VCPU(reg)    mr	reg, r13
+
 #elif defined(CONFIG_PPC_BOOK3S_32)
 #define FUNC(name)		name
+#define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
+
 #endif /* CONFIG_PPC_BOOK3S_XX */
 
 #define VCPU_LOAD_NVGPRS(vcpu) \
@@ -87,8 +91,49 @@  kvm_start_entry:
 	VCPU_LOAD_NVGPRS(r4)
 
 kvm_start_lightweight:
+	/* Copy registers into shadow vcpu so we can access them in real mode */
+	GET_SHADOW_VCPU(r3)
+	PPC_LL	r5, VCPU_GPR(R0)(r4)
+	PPC_LL	r6, VCPU_GPR(R1)(r4)
+	PPC_LL	r7, VCPU_GPR(R2)(r4)
+	PPC_LL	r8, VCPU_GPR(R3)(r4)
+	PPC_STL	r5, SVCPU_R0(r3)
+	PPC_STL	r6, SVCPU_R1(r3)
+	PPC_STL	r7, SVCPU_R2(r3)
+	PPC_STL	r8, SVCPU_R3(r3)
+	PPC_LL	r5, VCPU_GPR(R4)(r4)
+	PPC_LL	r6, VCPU_GPR(R5)(r4)
+	PPC_LL	r7, VCPU_GPR(R6)(r4)
+	PPC_LL	r8, VCPU_GPR(R7)(r4)
+	PPC_STL	r5, SVCPU_R4(r3)
+	PPC_STL	r6, SVCPU_R5(r3)
+	PPC_STL	r7, SVCPU_R6(r3)
+	PPC_STL	r8, SVCPU_R7(r3)
+	PPC_LL	r5, VCPU_GPR(R8)(r4)
+	PPC_LL	r6, VCPU_GPR(R9)(r4)
+	PPC_LL	r7, VCPU_GPR(R10)(r4)
+	PPC_LL	r8, VCPU_GPR(R11)(r4)
+	PPC_STL	r5, SVCPU_R8(r3)
+	PPC_STL	r6, SVCPU_R9(r3)
+	PPC_STL	r7, SVCPU_R10(r3)
+	PPC_STL	r8, SVCPU_R11(r3)
+	PPC_LL	r5, VCPU_GPR(R12)(r4)
+	PPC_LL	r6, VCPU_GPR(R13)(r4)
+	lwz	r7, VCPU_CR(r4)
+	PPC_LL	r8, VCPU_XER(r4)
+	PPC_STL	r5, SVCPU_R12(r3)
+	PPC_STL	r6, SVCPU_R13(r3)
+	stw	r7, SVCPU_CR(r3)
+	stw	r8, SVCPU_XER(r3)
+	PPC_LL	r5, VCPU_CTR(r4)
+	PPC_LL	r6, VCPU_LR(r4)
+	PPC_LL	r7, VCPU_PC(r4)
+	PPC_STL	r5, SVCPU_CTR(r3)
+	PPC_STL	r6, SVCPU_LR(r3)
+	PPC_STL	r7, SVCPU_PC(r3)
 
 #ifdef CONFIG_PPC_BOOK3S_64
+	/* Get the dcbz32 flag */
 	PPC_LL	r3, VCPU_HFLAGS(r4)
 	rldicl	r3, r3, 0, 63		/* r3 &= 1 */
 	stb	r3, HSTATE_RESTORE_HID5(r13)
@@ -128,6 +173,61 @@  kvmppc_handler_highmem:
 	/* R7 = vcpu */
 	PPC_LL	r7, GPR4(r1)
 
+	/* Transfer reg values from shadow vcpu back to vcpu struct */
+	/* On 64-bit, interrupts are still off at this point */
+	GET_SHADOW_VCPU(r4)
+	PPC_LL	r5, SVCPU_R0(r4)
+	PPC_LL	r6, SVCPU_R1(r4)
+	PPC_LL	r3, SVCPU_R2(r4)
+	PPC_LL	r8, SVCPU_R3(r4)
+	PPC_STL	r5, VCPU_GPR(R0)(r7)
+	PPC_STL	r6, VCPU_GPR(R1)(r7)
+	PPC_STL	r3, VCPU_GPR(R2)(r7)
+	PPC_STL	r8, VCPU_GPR(R3)(r7)
+	PPC_LL	r5, SVCPU_R4(r4)
+	PPC_LL	r6, SVCPU_R5(r4)
+	PPC_LL	r3, SVCPU_R6(r4)
+	PPC_LL	r8, SVCPU_R7(r4)
+	PPC_STL	r5, VCPU_GPR(R4)(r7)
+	PPC_STL	r6, VCPU_GPR(R5)(r7)
+	PPC_STL	r3, VCPU_GPR(R6)(r7)
+	PPC_STL	r8, VCPU_GPR(R7)(r7)
+	PPC_LL	r5, SVCPU_R8(r4)
+	PPC_LL	r6, SVCPU_R9(r4)
+	PPC_LL	r3, SVCPU_R10(r4)
+	PPC_LL	r8, SVCPU_R11(r4)
+	PPC_STL	r5, VCPU_GPR(R8)(r7)
+	PPC_STL	r6, VCPU_GPR(R9)(r7)
+	PPC_STL	r3, VCPU_GPR(R10)(r7)
+	PPC_STL	r8, VCPU_GPR(R11)(r7)
+	PPC_LL	r5, SVCPU_R12(r4)
+	PPC_LL	r6, SVCPU_R13(r4)
+	lwz	r3, SVCPU_CR(r4)
+	lwz	r8, SVCPU_XER(r4)
+	PPC_STL	r5, VCPU_GPR(R12)(r7)
+	PPC_STL	r6, VCPU_GPR(R13)(r7)
+	stw	r3, VCPU_CR(r7)
+	PPC_STL	r8, VCPU_XER(r7)
+	PPC_LL	r5, SVCPU_CTR(r4)
+	PPC_LL	r6, SVCPU_LR(r4)
+	PPC_LL	r3, SVCPU_PC(r4)
+	PPC_LL	r8, SVCPU_SHADOW_SRR1(r4)
+	PPC_STL	r5, VCPU_CTR(r7)
+	PPC_STL	r6, VCPU_LR(r7)
+	PPC_STL	r3, VCPU_PC(r7)
+	PPC_STL	r8, VCPU_SHADOW_SRR1(r7)
+	PPC_LL	r5, SVCPU_FAULT_DAR(r4)
+	lwz	r6, SVCPU_FAULT_DSISR(r4)
+	lwz	r3, SVCPU_LAST_INST(r4)
+	PPC_STL	r5, VCPU_FAULT_DAR(r7)
+	stw	r6, VCPU_FAULT_DSISR(r7)
+	stw	r3, VCPU_LAST_INST(r7)
+
+	/* Re-enable interrupts */
+	mfmsr	r3
+	ori	r3, r3, MSR_EE
+	MTMSR_EERI(r3)
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	/*
 	 * Reload kernel SPRG3 value.
@@ -135,6 +235,7 @@  kvmppc_handler_highmem:
 	 */
 	ld	r3, PACA_SPRG3(r13)
 	mtspr	SPRN_SPRG3, r3
+
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 	PPC_STL	r14, VCPU_GPR(R14)(r7)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ddfaf56..5aa64e2 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -61,8 +61,6 @@  void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 	memcpy(svcpu->slb, to_book3s(vcpu)->slb_shadow, sizeof(svcpu->slb));
-	memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
-	       sizeof(get_paca()->shadow_vcpu));
 	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
 	svcpu_put(svcpu);
 #endif
@@ -77,8 +75,6 @@  void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 	memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
-	memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
-	       sizeof(get_paca()->shadow_vcpu));
 	to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
 	svcpu_put(svcpu);
 #endif
@@ -388,22 +384,18 @@  int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	if (page_found == -ENOENT) {
 		/* Page not found in guest PTE entries */
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = svcpu->fault_dsisr;
+		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr;
 		vcpu->arch.shared->msr |=
-			(svcpu->shadow_srr1 & 0x00000000f8000000ULL);
-		svcpu_put(svcpu);
+			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
 		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
-		vcpu->arch.shared->dsisr = svcpu->fault_dsisr & ~DSISR_NOHPTE;
+		vcpu->arch.shared->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
 		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
 		vcpu->arch.shared->msr |=
-			svcpu->shadow_srr1 & 0x00000000f8000000ULL;
-		svcpu_put(svcpu);
+			vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL;
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
@@ -623,21 +615,26 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
 	{
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong shadow_srr1 = svcpu->shadow_srr1;
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		vcpu->stat.pf_instruc++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
-		if (svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT] == SR_INVALID) {
-			kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
-			r = RESUME_GUEST;
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT];
 			svcpu_put(svcpu);
-			break;
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+				r = RESUME_GUEST;
+				break;
+			}
 		}
 #endif
-		svcpu_put(svcpu);
 
 		/* only care about PTEG not found errors, but leave NX alone */
 		if (shadow_srr1 & 0x40000000) {
@@ -662,21 +659,26 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_DATA_STORAGE:
 	{
 		ulong dar = kvmppc_get_fault_dar(vcpu);
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		u32 fault_dsisr = svcpu->fault_dsisr;
+		u32 fault_dsisr = vcpu->arch.fault_dsisr;
 		vcpu->stat.pf_storage++;
 
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
-		if ((svcpu->sr[dar >> SID_SHIFT]) == SR_INVALID) {
-			kvmppc_mmu_map_segment(vcpu, dar);
-			r = RESUME_GUEST;
+		{
+			struct kvmppc_book3s_shadow_vcpu *svcpu;
+			u32 sr;
+
+			svcpu = svcpu_get(vcpu);
+			sr = svcpu->sr[dar >> SID_SHIFT];
 			svcpu_put(svcpu);
-			break;
+			if (sr == SR_INVALID) {
+				kvmppc_mmu_map_segment(vcpu, dar);
+				r = RESUME_GUEST;
+				break;
+			}
 		}
 #endif
-		svcpu_put(svcpu);
 
 		/* The only case we need to handle is missing shadow PTEs */
 		if (fault_dsisr & DSISR_NOHPTE) {
@@ -723,13 +725,10 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
 	{
 		enum emulation_result er;
-		struct kvmppc_book3s_shadow_vcpu *svcpu;
 		ulong flags;
 
 program_interrupt:
-		svcpu = svcpu_get(vcpu);
-		flags = svcpu->shadow_srr1 & 0x1f0000ull;
-		svcpu_put(svcpu);
+		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
 		if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -861,9 +860,7 @@  program_interrupt:
 		break;
 	default:
 	{
-		struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
-		ulong shadow_srr1 = svcpu->shadow_srr1;
-		svcpu_put(svcpu);
+		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		/* Ugh - bork here! What did we get? */
 		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
 			exit_nr, kvmppc_get_pc(vcpu), shadow_srr1);
@@ -1037,11 +1034,12 @@  struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (!vcpu_book3s)
 		goto out;
 
+#ifdef CONFIG_KVM_BOOK3S_32
 	vcpu_book3s->shadow_vcpu =
 		kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
 	if (!vcpu_book3s->shadow_vcpu)
 		goto free_vcpu;
-
+#endif
 	vcpu = &vcpu_book3s->vcpu;
 	err = kvm_vcpu_init(vcpu, kvm, id);
 	if (err)
@@ -1074,8 +1072,10 @@  struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 uninit_vcpu:
 	kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
+#ifdef CONFIG_KVM_BOOK3S_32
 	kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
+#endif
 	vfree(vcpu_book3s);
 out:
 	return ERR_PTR(err);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 8f7633e..b64d7f9 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -179,11 +179,6 @@  _GLOBAL(kvmppc_entry_trampoline)
 
 	li	r6, MSR_IR | MSR_DR
 	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
-	/*
-	 * Set EE in HOST_MSR so that it's enabled when we get into our
-	 * C exit handler function
-	 */
-	ori	r5, r5, MSR_EE
 	mtsrr0	r7
 	mtsrr1	r6
 	RFI
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index e326489..a088e9a 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -101,17 +101,12 @@  TRACE_EVENT(kvm_exit,
 	),
 
 	TP_fast_assign(
-#ifdef CONFIG_KVM_BOOK3S_PR
-		struct kvmppc_book3s_shadow_vcpu *svcpu;
-#endif
 		__entry->exit_nr	= exit_nr;
 		__entry->pc		= kvmppc_get_pc(vcpu);
 		__entry->dar		= kvmppc_get_fault_dar(vcpu);
 		__entry->msr		= vcpu->arch.shared->msr;
 #ifdef CONFIG_KVM_BOOK3S_PR
-		svcpu = svcpu_get(vcpu);
-		__entry->srr1		= svcpu->shadow_srr1;
-		svcpu_put(svcpu);
+		__entry->srr1		= vcpu->arch.shadow_srr1;
 #endif
 		__entry->last_inst	= vcpu->arch.last_inst;
 	),