diff mbox series

[v10,2/2] powerpc/64s: KVM update for reimplement book3s idle code in C

Message ID 20190428114515.32683-3-npiggin@gmail.com
State Accepted
Headers show
Series powerpc/64s: reimplement book3s idle code in C | expand

Commit Message

Nicholas Piggin April 28, 2019, 11:45 a.m. UTC
This is the KVM update to the new idle code. A few improvements:

- Idle sleepers now always return to caller rather than branch out
  to KVM first.
- This allows optimisations like very fast return to caller when no
  state has been lost.
- KVM no longer requires nap_state_lost because it controls NVGPR
  save/restore itself on the way in and out.
- The heavy idle wakeup KVM request check can be moved out of the
  normal host idle code and into the not-performance-critical offline
  code.
- KVM nap code now returns from where it is called, which makes the
  flow a bit easier to follow.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/paca.h         |   1 -
 arch/powerpc/kernel/asm-offsets.c       |   1 -
 arch/powerpc/kernel/exceptions-64s.S    |  14 ++-
 arch/powerpc/kernel/idle_book3s.S       |  22 -----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 118 ++++++++++++++----------
 arch/powerpc/platforms/powernv/idle.c   |  15 +++
 arch/powerpc/xmon/xmon.c                |   3 -
 7 files changed, 92 insertions(+), 82 deletions(-)

Comments

Paul Mackerras May 13, 2019, 6:42 a.m. UTC | #1
On Sun, Apr 28, 2019 at 09:45:15PM +1000, Nicholas Piggin wrote:
> This is the KVM update to the new idle code. A few improvements:
> 
> - Idle sleepers now always return to caller rather than branch out
>   to KVM first.
> - This allows optimisations like very fast return to caller when no
>   state has been lost.
> - KVM no longer requires nap_state_lost because it controls NVGPR
>   save/restore itself on the way in and out.
> - The heavy idle wakeup KVM request check can be moved out of the
>   normal host idle code and into the not-performance-critical offline
>   code.
> - KVM nap code now returns from where it is called, which makes the
>   flow a bit easier to follow.

One question below...

> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 58d0f1ba845d..f66191d8f841 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
...
> @@ -2656,6 +2662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>  
>  	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
>  
> +	/* Go back to host stack */
> +	ld	r1, HSTATE_HOST_R1(r13)

At this point we are in kvmppc_h_cede, which we branched to from
hcall_try_real_mode, which came from the guest exit path, where we
have already loaded r1 from HSTATE_HOST_R1(r13).  So if there is a
path to get here with r1 not already set to HSTATE_HOST_R1(r13), then
I missed it - please point it out to me.  Otherwise this statement
seems superfluous.

Paul.
Nicholas Piggin May 16, 2019, 4:43 a.m. UTC | #2
Paul Mackerras's on May 13, 2019 4:42 pm:
> On Sun, Apr 28, 2019 at 09:45:15PM +1000, Nicholas Piggin wrote:
>> This is the KVM update to the new idle code. A few improvements:
>> 
>> - Idle sleepers now always return to caller rather than branch out
>>   to KVM first.
>> - This allows optimisations like very fast return to caller when no
>>   state has been lost.
>> - KVM no longer requires nap_state_lost because it controls NVGPR
>>   save/restore itself on the way in and out.
>> - The heavy idle wakeup KVM request check can be moved out of the
>>   normal host idle code and into the not-performance-critical offline
>>   code.
>> - KVM nap code now returns from where it is called, which makes the
>>   flow a bit easier to follow.
> 
> One question below...
> 
>> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> index 58d0f1ba845d..f66191d8f841 100644
>> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
>> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> ...
>> @@ -2656,6 +2662,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
>>  
>>  	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
>>  
>> +	/* Go back to host stack */
>> +	ld	r1, HSTATE_HOST_R1(r13)
> 
> At this point we are in kvmppc_h_cede, which we branched to from
> hcall_try_real_mode, which came from the guest exit path, where we
> have already loaded r1 from HSTATE_HOST_R1(r13).  So if there is a
> path to get here with r1 not already set to HSTATE_HOST_R1(r13), then
> I missed it - please point it out to me.  Otherwise this statement
> seems superfluous.

I'm not sure why I put that there. I think you're right it could
be removed.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e55dedd7ee3e..245d11a71784 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,7 +174,6 @@  struct paca_struct {
 	u8 io_sync;			/* writel() needs spin_unlock sync */
 	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	u8 nap_state_lost;		/* NV GPR values lost in power7_idle */
 	u8 pmcregs_in_use;		/* pseries puts this in lppaca */
 #endif
 	u64 sprg_vdso;			/* Saved user-visible sprg */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index a14a77bc4243..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -271,7 +271,6 @@  int main(void)
 	OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
 	OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
 	OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
-	OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
 	OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
 #else /* CONFIG_PPC64 */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 671c256da167..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -122,6 +122,8 @@  EXC_VIRT_NONE(0x4000, 0x100)
 	rlwinm.	r10,r10,47-31,30,31 ;					\
 	beq-	1f ;							\
 	cmpwi	cr1,r10,2 ;						\
+	mfspr	r3,SPRN_SRR1 ;						\
+	bltlr	cr1 ;	/* no state loss, return to idle caller */	\
 	BRANCH_TO_C000(r10, system_reset_idle_common) ;			\
 1:									\
 	KVMTEST_PR(n) ;							\
@@ -145,12 +147,10 @@  TRAMP_KVM(PACA_EXNMI, 0x100)
 
 #ifdef CONFIG_PPC_P7_NAP
 EXC_COMMON_BEGIN(system_reset_idle_common)
-	mfspr	r3,SPRN_SRR1
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	/* this bltlr could  be moved before the branch_to, and the
-	 * branch_to could maybe go straight to idle_return */
-	bltlr	cr1	/* no state loss, return to idle caller */
-#endif
+	/*
+	 * This must be a direct branch (without linker branch stub) because
+	 * we can not use TOC at this point as r2 may not be restored yet.
+	 */
 	b	idle_return_gpr_loss
 #endif
 
@@ -443,9 +443,7 @@  EXC_COMMON_BEGIN(machine_check_idle_common)
 	mtlr	r4
 	rlwinm	r10,r3,47-31,30,31
 	cmpwi	cr1,r10,2
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	bltlr	cr1	/* no state loss, return to idle caller */
-#endif
 	b	idle_return_gpr_loss
 #endif
 	/*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 0fb2eb731a29..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -19,9 +19,6 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/ppc-opcode.h>
 #include <asm/cpuidle.h>
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-#include <asm/kvm_book3s_asm.h>
-#endif
 
 /*
  * Desired PSSCR in r3
@@ -93,25 +90,6 @@  _GLOBAL(isa300_idle_stop_mayloss)
  * a simple blr instead).
  */
 _GLOBAL(idle_return_gpr_loss)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	lbz	r0,HSTATE_HWTHREAD_STATE(r13)
-	cmpwi	r0,KVM_HWTHREAD_IN_KERNEL
-	beq	0f
-	li	r0,KVM_HWTHREAD_IN_KERNEL
-	stb	r0,HSTATE_HWTHREAD_STATE(r13)
-	/* Order setting hwthread_state vs. testing hwthread_req */
-	sync
-0:	lbz	r0,HSTATE_HWTHREAD_REQ(r13)
-	cmpwi	r0,0
-	beq	1f
-	b	kvm_start_guest
-1:
-	lbz	r0,PACA_NAPSTATELOST(r13)
-	cmpwi	r0,0
-	bne	2f
-	bltlr	cr1
-2:
-#endif
 	ld	r1,PACAR1(r13)
 	ld	r4,-8*19(r1)
 	ld	r5,-8*20(r1)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 58d0f1ba845d..f66191d8f841 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -35,6 +35,7 @@ 
 #include <asm/thread_info.h>
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
 
 /* Sign-extend HDEC if not on POWER9 */
 #define EXTEND_HDEC(reg)			\
@@ -45,6 +46,7 @@  END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
+#define NAPPING_UNSPLIT	3
 
 /* Stack frame offsets for kvmppc_hv_entry */
 #define SFS			208
@@ -290,17 +292,19 @@  kvm_novcpu_exit:
 	b	kvmhv_switch_to_host
 
 /*
- * We come in here when wakened from nap mode.
- * Relocation is off and most register values are lost.
- * r13 points to the PACA.
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
  * r3 contains the SRR1 wakeup value, SRR1 is trashed.
  */
-	.globl	kvm_start_guest
-kvm_start_guest:
-	/* Set runlatch bit the minute you wake up from nap */
-	mfspr	r0, SPRN_CTRLF
-	ori 	r0, r0, 1
-	mtspr	SPRN_CTRLT, r0
+_GLOBAL(idle_kvm_start_guest)
+	ld	r4,PACAEMERGSP(r13)
+	mfcr	r5
+	mflr	r0
+	std	r1,0(r4)
+	std	r5,8(r4)
+	std	r0,16(r4)
+	subi	r1,r4,STACK_FRAME_OVERHEAD
+	SAVE_NVGPRS(r1)
 
 	/*
 	 * Could avoid this and pass it through in r3. For now,
@@ -308,27 +312,23 @@  kvm_start_guest:
 	 */
 	mtspr	SPRN_SRR1,r3
 
-	ld	r2,PACATOC(r13)
-
 	li	r0,0
 	stb	r0,PACA_FTRACE_ENABLED(r13)
 
 	li	r0,KVM_HWTHREAD_IN_KVM
 	stb	r0,HSTATE_HWTHREAD_STATE(r13)
 
-	/* NV GPR values from power7_idle() will no longer be valid */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-
-	/* were we napping due to cede? */
+	/* kvm cede / napping does not come through here */
 	lbz	r0,HSTATE_NAPPING(r13)
-	cmpwi	r0,NAPPING_CEDE
-	beq	kvm_end_cede
-	cmpwi	r0,NAPPING_NOVCPU
-	beq	kvm_novcpu_wakeup
+	twnei	r0,0
+
+	b	1f
 
-	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
+kvm_unsplit_wakeup:
+	li	r0, 0
+	stb	r0, HSTATE_NAPPING(r13)
+
+1:
 
 	/*
 	 * We weren't napping due to cede, so this must be a secondary
@@ -437,21 +437,25 @@  kvm_no_guest:
 	lbz	r3, HSTATE_HWTHREAD_REQ(r13)
 	cmpwi	r3, 0
 	bne	54f
-/*
- * We jump to pnv_wakeup_loss, which will return to the caller
- * of power7_nap in the powernv cpu offline loop.  The value we
- * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
- * requires SRR1 in r12.
- */
+
+	/*
+	 * Jump to idle_return_gpr_loss, which returns to the
+	 * idle_kvm_start_guest caller.
+	 */
 	li	r3, LPCR_PECE0
 	mfspr	r4, SPRN_LPCR
 	rlwimi	r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
 	mtspr	SPRN_LPCR, r4
-	li	r3, 0
-	/* set up cr3 and r3 for return */
-	cmpdi	cr3, r3, 0
+	/* set up r3 for return */
 	mfspr	r3,SPRN_SRR1
-	b	idle_return_gpr_loss
+	REST_NVGPRS(r1)
+	addi	r1, r1, STACK_FRAME_OVERHEAD
+	ld	r0, 16(r1)
+	ld	r5, 8(r1)
+	ld	r1, 0(r1)
+	mtlr	r0
+	mtcr	r5
+	blr
 
 53:	HMT_LOW
 	ld	r5, HSTATE_KVM_VCORE(r13)
@@ -536,6 +540,8 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	lbz	r0, KVM_SPLIT_DO_NAP(r3)
 	cmpwi	r0, 0
 	beq	57f
+	li	r3, NAPPING_UNSPLIT
+	stb	r3, HSTATE_NAPPING(r13)
 	li	r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
 	mfspr	r5, SPRN_LPCR
 	rlwimi	r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
@@ -2656,6 +2662,9 @@  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
 	lis	r3, LPCR_PECEDP@h	/* Do wake on privileged doorbell */
 
+	/* Go back to host stack */
+	ld	r1, HSTATE_HOST_R1(r13)
+
 	/*
 	 * Take a nap until a decrementer or external or doobell interrupt
 	 * occurs, with PECE1 and PECE0 set in LPCR.
@@ -2684,26 +2693,42 @@  BEGIN_FTR_SECTION
 	 *		requested level = 0 (just stop dispatching)
 	 */
 	lis	r3, (PSSCR_EC | PSSCR_ESL)@h
-	mtspr	SPRN_PSSCR, r3
 	/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
 	li	r4, LPCR_PECE_HVEE@higher
 	sldi	r4, r4, 32
 	or	r5, r5, r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
+	li	r3, PNV_THREAD_NAP
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
 	mtspr	SPRN_LPCR,r5
 	isync
-	li	r0, 0
-	std	r0, HSTATE_SCRATCH0(r13)
-	ptesync
-	ld	r0, HSTATE_SCRATCH0(r13)
-1:	cmpd	r0, r0
-	bne	1b
+
 BEGIN_FTR_SECTION
-	nap
+	bl	isa300_idle_stop_mayloss
 FTR_SECTION_ELSE
-	PPC_STOP
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
-	b	.
+	bl	isa206_idle_insn_mayloss
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+	mfspr	r0, SPRN_CTRLF
+	ori	r0, r0, 1
+	mtspr	SPRN_CTRLT, r0
+
+	mtspr	SPRN_SRR1, r3
+
+	li	r0, 0
+	stb	r0, PACA_FTRACE_ENABLED(r13)
+
+	li	r0, KVM_HWTHREAD_IN_KVM
+	stb	r0, HSTATE_HWTHREAD_STATE(r13)
+
+	lbz	r0, HSTATE_NAPPING(r13)
+	cmpwi	r0, NAPPING_CEDE
+	beq	kvm_end_cede
+	cmpwi	r0, NAPPING_NOVCPU
+	beq	kvm_novcpu_wakeup
+	cmpwi	r0, NAPPING_UNSPLIT
+	beq	kvm_unsplit_wakeup
+	twi	31,0,0 /* Nap state must not be zero */
 
 33:	mr	r4, r3
 	li	r3, 0
@@ -2711,12 +2736,11 @@  ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 	b	34f
 
 kvm_end_cede:
+	/* Woken by external or decrementer interrupt */
+
 	/* get vcpu pointer */
 	ld	r4, HSTATE_KVM_VCPU(r13)
 
-	/* Woken by external or decrementer interrupt */
-	ld	r1, HSTATE_HOST_R1(r13)
-
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
 	addi	r3, r4, VCPU_TB_RMINTR
 	bl	kvmhv_accumulate_time
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 182112fd845a..c9133f7908ca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -505,6 +505,8 @@  static unsigned long power7_idle_insn(unsigned long type)
 	return srr1;
 }
 
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
 #ifdef CONFIG_HOTPLUG_CPU
 static unsigned long power7_offline(void)
 {
@@ -536,6 +538,14 @@  static unsigned long power7_offline(void)
 	srr1 = power7_idle_insn(power7_offline_type);
 	__ppc64_runlatch_on();
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	smp_mb();
+	if (local_paca->kvm_hstate.hwthread_req)
+		srr1 = idle_kvm_start_guest(srr1);
+#endif
+
 	mtmsr(MSR_KERNEL);
 
 	return srr1;
@@ -823,6 +833,11 @@  static unsigned long power9_offline_stop(unsigned long psscr)
 	srr1 = power9_idle_stop(psscr, false);
 	__ppc64_runlatch_on();
 
+	local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	smp_mb();
+	if (local_paca->kvm_hstate.hwthread_req)
+		srr1 = idle_kvm_start_guest(srr1);
 	mtmsr(MSR_KERNEL);
 #endif
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 77197110e900..e583ed3f6b93 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -2431,9 +2431,6 @@  static void dump_one_paca(int cpu)
 	DUMP(p, irq_happened, "%#-*x");
 	DUMP(p, io_sync, "%#-*x");
 	DUMP(p, irq_work_pending, "%#-*x");
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	DUMP(p, nap_state_lost, "%#-*x");
-#endif
 	DUMP(p, sprg_vdso, "%#-*llx");
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM