[v3,4/4] powerpc/64s: idle ESL=0 stop can avoid MSR and save/restore overhead

Message ID 20170825043036.18236-5-npiggin@gmail.com
State Rejected
Headers show

Commit Message

Nicholas Piggin Aug. 25, 2017, 4:30 a.m.
When stop is executed with EC=ESL=0, it appears to execute like a
normal instruction (resuming from NIP when woken by interrupt).
So all the save/restore handling can be avoided completely. In
particular NV GPRs do not have to be saved, and MSR does not have
to be switched back to kernel MSR.

So move the test for "lite" sleep states out to power9_idle_stop.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

Comments

Paul Mackerras Aug. 29, 2017, 12:20 a.m. | #1
On Fri, Aug 25, 2017 at 02:30:36PM +1000, Nicholas Piggin wrote:
> When stop is executed with EC=ESL=0, it appears to execute like a
> normal instruction (resuming from NIP when woken by interrupt).
> So all the save/restore handling can be avoided completely. In
> particular NV GPRs do not have to be saved, and MSR does not have
> to be switched back to kernel MSR.
> 
> So move the test for "lite" sleep states out to power9_idle_stop.
> 
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
>  1 file changed, 24 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 32d65ee323a0..fa56120bd0bc 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -315,9 +315,6 @@ enter_winkle:
>  
>  	ARCH207_IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
>  
> -/*
> - * r3 - PSSCR value corresponding to the requested stop state.
> - */
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>  power_enter_stop_kvm_rm:
>  	/*
> @@ -330,14 +327,11 @@ power_enter_stop_kvm_rm:
>  	li	r4,KVM_HWTHREAD_IN_IDLE
>  	/* DO THIS IN REAL MODE!  See comment above. */
>  	stb	r4,HSTATE_HWTHREAD_STATE(r13)
> -#endif
> -power_enter_stop:
>  /*
>   * Check if we are executing the lite variant with ESL=EC=0
>   */
> -	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> -	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> -	bne	 .Lhandle_esl_ec_set
> +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> +	bne	power_enter_stop_esl
>  	PPC_STOP
>  	li	r3,0  /* Since we didn't lose state, return 0 */
>  
> @@ -354,8 +348,13 @@ power_enter_stop:
>  	 */
>  	li	r12, 0
>  	b 	pnv_wakeup_noloss
> +#endif
>  
> -.Lhandle_esl_ec_set:
> +/*
> + * r3 - PSSCR value corresponding to the requested stop state.
> + */
> +power_enter_stop_esl:
> +	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
>  	/*
>  	 * POWER9 DD2 can incorrectly set PMAO when waking up after a
>  	 * state-loss idle. Saving and restoring MMCR0 over idle is a
> @@ -428,9 +427,23 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
>   * r3 contains desired PSSCR register value.
>   */
>  _GLOBAL(power9_idle_stop)
> -	std	r3, PACA_REQ_PSSCR(r13)
>  	mtspr 	SPRN_PSSCR,r3
> -	LOAD_REG_ADDR(r4,power_enter_stop)
> +
> +	/*
> +	 * Check if we are executing the lite variant with ESL=EC=0
> +	 * This case resumes execution after the stop instruction without
> +	 * losing any state, so nothing has to be saved. The following
> +	 * instructions up to the blr must be skipped if we want to
> +	 * use power_enter_stop_kvm_rm.
> +	 */
> +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED

I realize you're just moving existing code, but I think this would be
clearer (to me, anyway) as

	andis.	r4, r3, (PSSCR_EC | PSSCR_ESL)@h

Apart from that very minor nit,

Reviewed-by: Paul Mackerras <paulus@ozlabs.org>
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas Piggin Aug. 29, 2017, 1:39 a.m. | #2
On Tue, 29 Aug 2017 10:20:48 +1000
Paul Mackerras <paulus@ozlabs.org> wrote:

> On Fri, Aug 25, 2017 at 02:30:36PM +1000, Nicholas Piggin wrote:
> > When stop is executed with EC=ESL=0, it appears to execute like a
> > normal instruction (resuming from NIP when woken by interrupt).
> > So all the save/restore handling can be avoided completely. In
> > particular NV GPRs do not have to be saved, and MSR does not have
> > to be switched back to kernel MSR.
> > 
> > So move the test for "lite" sleep states out to power9_idle_stop.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
> >  1 file changed, 24 insertions(+), 11 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> > index 32d65ee323a0..fa56120bd0bc 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -315,9 +315,6 @@ enter_winkle:
> >  
> >  	ARCH207_IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
> >  
> > -/*
> > - * r3 - PSSCR value corresponding to the requested stop state.
> > - */
> >  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> >  power_enter_stop_kvm_rm:
> >  	/*
> > @@ -330,14 +327,11 @@ power_enter_stop_kvm_rm:
> >  	li	r4,KVM_HWTHREAD_IN_IDLE
> >  	/* DO THIS IN REAL MODE!  See comment above. */
> >  	stb	r4,HSTATE_HWTHREAD_STATE(r13)
> > -#endif
> > -power_enter_stop:
> >  /*
> >   * Check if we are executing the lite variant with ESL=EC=0
> >   */
> > -	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> > -	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> > -	bne	 .Lhandle_esl_ec_set
> > +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> > +	bne	power_enter_stop_esl
> >  	PPC_STOP
> >  	li	r3,0  /* Since we didn't lose state, return 0 */
> >  
> > @@ -354,8 +348,13 @@ power_enter_stop:
> >  	 */
> >  	li	r12, 0
> >  	b 	pnv_wakeup_noloss
> > +#endif
> >  
> > -.Lhandle_esl_ec_set:
> > +/*
> > + * r3 - PSSCR value corresponding to the requested stop state.
> > + */
> > +power_enter_stop_esl:
> > +	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> >  	/*
> >  	 * POWER9 DD2 can incorrectly set PMAO when waking up after a
> >  	 * state-loss idle. Saving and restoring MMCR0 over idle is a
> > @@ -428,9 +427,23 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
> >   * r3 contains desired PSSCR register value.
> >   */
> >  _GLOBAL(power9_idle_stop)
> > -	std	r3, PACA_REQ_PSSCR(r13)
> >  	mtspr 	SPRN_PSSCR,r3
> > -	LOAD_REG_ADDR(r4,power_enter_stop)
> > +
> > +	/*
> > +	 * Check if we are executing the lite variant with ESL=EC=0
> > +	 * This case resumes execution after the stop instruction without
> > +	 * losing any state, so nothing has to be saved. The following
> > +	 * instructions up to the blr must be skipped if we want to
> > +	 * use power_enter_stop_kvm_rm.
> > +	 */
> > +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED  
> 
> I realize you're just moving existing code, but I think this would be
> clearer (to me, anyway) as
> 
> 	andis.	r4, r3, (PSSCR_EC | PSSCR_ESL)@h

Agreed, it's not a very helpful extra indirection. Perhaps we could
do a cleanup patch to fix that (and your other points like the 206/207
confusion).

Thanks for the reviews.

> Apart from that very minor nit,
> 
> Reviewed-by: Paul Mackerras <paulus@ozlabs.org>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Ellerman Aug. 30, 2017, 11:25 a.m. | #3
Nicholas Piggin <npiggin@gmail.com> writes:

> When stop is executed with EC=ESL=0, it appears to execute like a
> normal instruction (resuming from NIP when woken by interrupt).
> So all the save/restore handling can be avoided completely. In
> particular NV GPRs do not have to be saved, and MSR does not have
> to be switched back to kernel MSR.
>
> So move the test for "lite" sleep states out to power9_idle_stop.
>
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
>  1 file changed, 24 insertions(+), 11 deletions(-)

This is blowing up for me on mambo:

  usbcore: registered new interface driver usb-storage
  Disabling lock debugging due to kernel taint
  Severe Machine check interrupt [Not recovered]
    NIP [c0000000002a0c04]: kmem_cache_free+0x64/0x2c0
    Initiator: CPU
    Error type: Real address [Load/Store (foreign)]
  opal: Hardware platform error: Unrecoverable Machine Check exception
  CPU: 0 PID: 0 Comm: swapper/0 Tainted: G   M            4.13.0-rc2-gcc-6.3.1-00257-g26268bb39bff #543
  task: c0000000016b1200 task.stack: c00000000175c000
  NIP:  c0000000002a0c04 LR: c0000000001128ec CTR: c000000000112800
  REGS: c00000003fff7d80 TRAP: 0200   Tainted: G   M             (4.13.0-rc2-gcc-6.3.1-00257-g26268bb39bff)
  MSR:  9000000000209003 <SF,HV,EE,ME,RI,LE>  CR: 28002828  XER: 20000000
  CFAR: c0000000001128e8 DAR: c00a0000003c3ce0 DSISR: 00000008 SOFTE: 1 
  GPR00: c0000000001128ec c00000000175f7b0 c000000001760500 c0000000f001a000 
  GPR04: c0000000f0f37f00 0000000000000003 00000000000220c3 0000000000000000 
  GPR08: 0000000000000000 00000000003c3cc0 c0000000017e7758 0000000000000000 
  GPR12: c000000000112800 c00000000fff0000 c000000000112800 c0000000f0f37f90 
  GPR16: c00000000175c000 c00000000175c000 0000000000000001 c0000000016d7900 
  GPR20: c00000000179ba98 7fffffffffffffff c00000000175c000 0000000000000000 
  GPR24: c00000000017a8c0 000000000000000a c0000000016d8a00 c00000000175f8d0 
  GPR28: c0000000001128ec c0000000f0f37f00 c00a0000003c3cc0 c0000000f001a000 
  NIP [c0000000002a0c04] kmem_cache_free+0x64/0x2c0
  LR [c0000000001128ec] put_cred_rcu+0xec/0x140
  Call Trace:
  [c00000000175f7b0] [c00000000175f800] init_thread_union+0x3800/0x4000 (unreliable)
  [c00000000175f840] [c0000000001128ec] put_cred_rcu+0xec/0x140
  [c00000000175f8b0] [c00000000017a908] rcu_process_callbacks+0x438/0x6a0
  [c00000000175f980] [c0000000000e5e28] __do_softirq+0x198/0x310
  [c00000000175fa70] [c0000000000e6248] irq_exit+0xf8/0x140
  [c00000000175fa90] [c000000000023710] timer_interrupt+0xa0/0xe0
  [c00000000175fac0] [c000000000008fcc] decrementer_common+0x11c/0x120
  --- interrupt: 901 at replay_interrupt_return+0x0/0x4
      LR = arch_local_irq_restore.part.1+0x84/0xb0
  [c00000000175fdb0] [c00000000175c000] init_thread_union+0x0/0x4000 (unreliable)
  [c00000000175fdd0] [c00000000001cde0] arch_cpu_idle+0xe0/0x140
  [c00000000175fe00] [c0000000007f4f44] default_idle_call+0x44/0x84
  [c00000000175fe20] [c000000000142e54] do_idle+0x254/0x320
  [c00000000175fe90] [c000000000143280] cpu_startup_entry+0x30/0x40
  [c00000000175fec0] [c00000000000d2d8] rest_init+0x2f8/0x320
  [c00000000175ff00] [c000000001000d74] start_kernel+0x510/0x52c
  [c00000000175ff90] [c00000000000ab70] start_here_common+0x1c/0x4ac
  Instruction dump:
  60000000 e9230008 71280100 40820170 2fbf0000 419e013c 3d420008 394a7258 
  7bbe8502 7bc93664 ebca0000 7fde4a14 <e93e0020> 712a0001 40820234 e93f0008 
  [    1.217346467,0] OPAL: Reboot requested due to Platform error.
  [    1.217351582,3] OPAL: failed to log an error
  [    1.217358188,5] OPAL: Reboot request...


I'll just drop it for now.

cheers
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas Piggin Aug. 30, 2017, 12:10 p.m. | #4
On Wed, 30 Aug 2017 21:25:59 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:

> Nicholas Piggin <npiggin@gmail.com> writes:
> 
> > When stop is executed with EC=ESL=0, it appears to execute like a
> > normal instruction (resuming from NIP when woken by interrupt).
> > So all the save/restore handling can be avoided completely. In
> > particular NV GPRs do not have to be saved, and MSR does not have
> > to be switched back to kernel MSR.
> >
> > So move the test for "lite" sleep states out to power9_idle_stop.
> >
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
> >  1 file changed, 24 insertions(+), 11 deletions(-)  
> 
> This is blowing up for me on mambo:

Oh this is a known bug in mambo that does not match the hardware.

You need >= Mambo.7.8.21, or this firmware patch to work around
the issue for old mambos.

mambo.git 58d3162f4d6204fc077ff4a6ba47e4d1e19d5120

https://lists.ozlabs.org/pipermail/skiboot/2017-August/008768.html

Thanks,
Nick
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Ellerman Sept. 1, 2017, 9:39 a.m. | #5
Nicholas Piggin <npiggin@gmail.com> writes:

> On Wed, 30 Aug 2017 21:25:59 +1000
> Michael Ellerman <mpe@ellerman.id.au> wrote:
>
>> Nicholas Piggin <npiggin@gmail.com> writes:
>> 
>> > When stop is executed with EC=ESL=0, it appears to execute like a
>> > normal instruction (resuming from NIP when woken by interrupt).
>> > So all the save/restore handling can be avoided completely. In
>> > particular NV GPRs do not have to be saved, and MSR does not have
>> > to be switched back to kernel MSR.
>> >
>> > So move the test for "lite" sleep states out to power9_idle_stop.
>> >
>> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
>> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> > ---
>> >  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
>> >  1 file changed, 24 insertions(+), 11 deletions(-)  
>> 
>> This is blowing up for me on mambo:
>
> Oh this is a known bug in mambo that does not match the hardware.
>
> You need >= Mambo.7.8.21, or this firmware patch to work around
> the issue for old mambos.

As discussed elsewhere this still breaks on new mambo with more than one
CPU. So I've dropped it for now.

cheers
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicholas Piggin Sept. 20, 2017, 1:56 p.m. | #6
On Fri, 01 Sep 2017 19:39:41 +1000
Michael Ellerman <mpe@ellerman.id.au> wrote:

> Nicholas Piggin <npiggin@gmail.com> writes:
> 
> > On Wed, 30 Aug 2017 21:25:59 +1000
> > Michael Ellerman <mpe@ellerman.id.au> wrote:
> >  
> >> Nicholas Piggin <npiggin@gmail.com> writes:
> >>   
> >> > When stop is executed with EC=ESL=0, it appears to execute like a
> >> > normal instruction (resuming from NIP when woken by interrupt).
> >> > So all the save/restore handling can be avoided completely. In
> >> > particular NV GPRs do not have to be saved, and MSR does not have
> >> > to be switched back to kernel MSR.
> >> >
> >> > So move the test for "lite" sleep states out to power9_idle_stop.
> >> >
> >> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> >> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >> > ---
> >> >  arch/powerpc/kernel/idle_book3s.S | 35 ++++++++++++++++++++++++-----------
> >> >  1 file changed, 24 insertions(+), 11 deletions(-)    
> >> 
> >> This is blowing up for me on mambo:  
> >
> > Oh this is a known bug in mambo that does not match the hardware.
> >
> > You need >= Mambo.7.8.21, or this firmware patch to work around
> > the issue for old mambos.  
> 
> As discussed elsewhere this still breaks on new mambo with more than one
> CPU. So I've dropped it for now.

This now seems to be properly fixed in mambo with commit 11783550ee11.

Skiboot also has a patch merged which disables the problematic state on
mambo so older versions won't crash.

d2a24406a49 ("idle: disable stop*_lite POWER9 idle states for Mambo platform")

So this could be re-applied now.

Thanks,
Nick
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 32d65ee323a0..fa56120bd0bc 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -315,9 +315,6 @@  enter_winkle:
 
 	ARCH207_IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
 
-/*
- * r3 - PSSCR value corresponding to the requested stop state.
- */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 power_enter_stop_kvm_rm:
 	/*
@@ -330,14 +327,11 @@  power_enter_stop_kvm_rm:
 	li	r4,KVM_HWTHREAD_IN_IDLE
 	/* DO THIS IN REAL MODE!  See comment above. */
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
-power_enter_stop:
 /*
  * Check if we are executing the lite variant with ESL=EC=0
  */
-	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
-	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-	bne	 .Lhandle_esl_ec_set
+	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
+	bne	power_enter_stop_esl
 	PPC_STOP
 	li	r3,0  /* Since we didn't lose state, return 0 */
 
@@ -354,8 +348,13 @@  power_enter_stop:
 	 */
 	li	r12, 0
 	b 	pnv_wakeup_noloss
+#endif
 
-.Lhandle_esl_ec_set:
+/*
+ * r3 - PSSCR value corresponding to the requested stop state.
+ */
+power_enter_stop_esl:
+	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
 	/*
 	 * POWER9 DD2 can incorrectly set PMAO when waking up after a
 	 * state-loss idle. Saving and restoring MMCR0 over idle is a
@@ -428,9 +427,23 @@  ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
  * r3 contains desired PSSCR register value.
  */
 _GLOBAL(power9_idle_stop)
-	std	r3, PACA_REQ_PSSCR(r13)
 	mtspr 	SPRN_PSSCR,r3
-	LOAD_REG_ADDR(r4,power_enter_stop)
+
+	/*
+	 * Check if we are executing the lite variant with ESL=EC=0
+	 * This case resumes execution after the stop instruction without
+	 * losing any state, so nothing has to be saved. The following
+	 * instructions up to the blr must be skipped if we want to
+	 * use power_enter_stop_kvm_rm.
+	 */
+	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
+	bne	1f
+	PPC_STOP
+	li	r3,0  /* Since we didn't lose state, return 0 */
+	blr
+1:	/* state-loss idle */
+	std	r3, PACA_REQ_PSSCR(r13)
+	LOAD_REG_ADDR(r4,power_enter_stop_esl)
 	b	pnv_powersave_common
 	/* No return */