diff mbox series

[3/3] powerpc/64s/idle: POWER9 ESL=0 stop avoid save/restore overhead

Message ID 20171117140807.22105-4-npiggin@gmail.com (mailing list archive)
State Changes Requested
Headers show
Series one more try at idle improvements | expand

Commit Message

Nicholas Piggin Nov. 17, 2017, 2:08 p.m. UTC
When stop is executed with EC=ESL=0, it appears to execute like a
normal instruction (resuming from NIP when woken by interrupt). So all
the save/restore handling can be avoided completely. In particular NV
GPRs do not have to be saved, and MSR does not have to be switched
back to kernel MSR.

So move the test for EC=ESL=0 sleep states out to power9_idle_stop,
and return directly to the caller after stop in that case. The mtspr
to PSSCR is moved to the top of power9_offline_stop just so it matches
power9_idle_stop.

This improves performance for ping-pong benchmark with the stop0_lite
idle state by 2.54% for 2 threads in the same core, and 2.57% for
different cores.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/idle_book3s.S     | 43 +++++++++++------------------------
 arch/powerpc/platforms/powernv/idle.c |  7 +++++-
 2 files changed, 19 insertions(+), 31 deletions(-)

Comments

Vaidyanathan Srinivasan Feb. 28, 2018, 6:34 p.m. UTC | #1
* Nicholas Piggin <npiggin@gmail.com> [2017-11-18 00:08:07]:

> When stop is executed with EC=ESL=0, it appears to execute like a
> normal instruction (resuming from NIP when woken by interrupt). So all
> the save/restore handling can be avoided completely. In particular NV
> GPRs do not have to be saved, and MSR does not have to be switched
> back to kernel MSR.
> 
> So move the test for EC=ESL=0 sleep states out to power9_idle_stop,
> and return directly to the caller after stop in that case. The mtspr
> to PSSCR is moved to the top of power9_offline_stop just so it matches
> power9_idle_stop.
> 
> This improves performance for ping-pong benchmark with the stop0_lite
> idle state by 2.54% for 2 threads in the same core, and 2.57% for
> different cores.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>

> ---
>  arch/powerpc/kernel/idle_book3s.S     | 43 +++++++++++------------------------
>  arch/powerpc/platforms/powernv/idle.c |  7 +++++-
>  2 files changed, 19 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 07a306173c5a..6243da99b26c 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -324,31 +324,8 @@ enter_winkle:
>  /*
>   * r3 - PSSCR value corresponding to the requested stop state.
>   */
> -power_enter_stop:
> -/*
> - * Check if we are executing the lite variant with ESL=EC=0
> - */
> -	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> +power_enter_stop_esl:
>  	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> -	bne	 .Lhandle_esl_ec_set
> -	PPC_STOP
> -	li	r3,0  /* Since we didn't lose state, return 0 */
> -
> -	/*
> -	 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
> -	 * it can determine if the wakeup reason is an HMI in
> -	 * CHECK_HMI_INTERRUPT.
> -	 *
> -	 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
> -	 * reason, so there is no point setting r12 to SRR1.
> -	 *
> -	 * Further, we clear r12 here, so that we don't accidentally enter the
> -	 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
> -	 */
> -	li	r12, 0
> -	b 	pnv_wakeup_noloss
> -
> -.Lhandle_esl_ec_set:
>  BEGIN_FTR_SECTION
>  	/*
>  	 * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
> @@ -423,26 +400,32 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
>   * r3 contains desired PSSCR register value.
>   */
>  _GLOBAL(power9_idle_stop)
> -	std	r3, PACA_REQ_PSSCR(r13)
>  	mtspr 	SPRN_PSSCR,r3
> -	LOAD_REG_ADDR(r4,power_enter_stop)
> +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> +	bne	1f
> +	PPC_STOP
> +	li	r3,0  /* Since we didn't lose state, return 0 */
> +	blr
> +
> +1:	std	r3, PACA_REQ_PSSCR(r13)
> +	LOAD_REG_ADDR(r4,power_enter_stop_esl)
>  	b	pnv_powersave_common
>  	/* No return */

Good optimization to skip the context save and directly execute stop
for ESL=EC=0 case.

>  /*
> - * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
> - * r3 contains desired PSSCR register value.
> + * This is the same as the above, but it sets KVM state for secondaries,
> + * and it must have PSSCR[EC]=1
>   */
>  _GLOBAL(power9_offline_stop)
> -	std	r3, PACA_REQ_PSSCR(r13)
>  	mtspr 	SPRN_PSSCR,r3
> +	std	r3, PACA_REQ_PSSCR(r13)
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>  	/* Tell KVM we're entering idle */
>  	li	r4,KVM_HWTHREAD_IN_IDLE
>  	/* DO THIS IN REAL MODE!  See comment above. */
>  	stb	r4,HSTATE_HWTHREAD_STATE(r13)
>  #endif
> -	LOAD_REG_ADDR(r4,power_enter_stop)
> +	LOAD_REG_ADDR(r4,power_enter_stop_esl)
>  	b	pnv_powersave_common
>  	/* No return */
> 
> diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> index a921d5428d76..610b1637c16f 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -621,7 +621,12 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
>  			continue;
>  		}
> 
> -		if (max_residency_ns < residency_ns[i]) {
> +		/*
> +		 * Deepest stop for unplug must be PSSCR[EC]=1 (wakeup at
> +		 * 0x100.
> +		 */
> +		if ((max_residency_ns < residency_ns[i])&&
> +				(psscr_val[i] & PSSCR_EC)) {
>  			max_residency_ns = residency_ns[i];
>  			pnv_deepest_stop_psscr_val = psscr_val[i];
>  			pnv_deepest_stop_psscr_mask = psscr_mask[i];

If firmware did not provide any ESL=EC=1 state, we can still leave
threads in stop ESL=0 state.  This is just a corner case or random
test scenario.  Why do we want to enforce that offline cpus really use
a ESL=0 state or just spin? 

--Vaidy
Nicholas Piggin March 1, 2018, 11:57 a.m. UTC | #2
On Thu, 1 Mar 2018 00:04:39 +0530
Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote:

> * Nicholas Piggin <npiggin@gmail.com> [2017-11-18 00:08:07]:
> 
> > When stop is executed with EC=ESL=0, it appears to execute like a
> > normal instruction (resuming from NIP when woken by interrupt). So all
> > the save/restore handling can be avoided completely. In particular NV
> > GPRs do not have to be saved, and MSR does not have to be switched
> > back to kernel MSR.
> > 
> > So move the test for EC=ESL=0 sleep states out to power9_idle_stop,
> > and return directly to the caller after stop in that case. The mtspr
> > to PSSCR is moved to the top of power9_offline_stop just so it matches
> > power9_idle_stop.
> > 
> > This improves performance for ping-pong benchmark with the stop0_lite
> > idle state by 2.54% for 2 threads in the same core, and 2.57% for
> > different cores.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>  
> 
> Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
> 
> > ---
> >  arch/powerpc/kernel/idle_book3s.S     | 43 +++++++++++------------------------
> >  arch/powerpc/platforms/powernv/idle.c |  7 +++++-
> >  2 files changed, 19 insertions(+), 31 deletions(-)
> > 
> > diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> > index 07a306173c5a..6243da99b26c 100644
> > --- a/arch/powerpc/kernel/idle_book3s.S
> > +++ b/arch/powerpc/kernel/idle_book3s.S
> > @@ -324,31 +324,8 @@ enter_winkle:
> >  /*
> >   * r3 - PSSCR value corresponding to the requested stop state.
> >   */
> > -power_enter_stop:
> > -/*
> > - * Check if we are executing the lite variant with ESL=EC=0
> > - */
> > -	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> > +power_enter_stop_esl:
> >  	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> > -	bne	 .Lhandle_esl_ec_set
> > -	PPC_STOP
> > -	li	r3,0  /* Since we didn't lose state, return 0 */
> > -
> > -	/*
> > -	 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
> > -	 * it can determine if the wakeup reason is an HMI in
> > -	 * CHECK_HMI_INTERRUPT.
> > -	 *
> > -	 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
> > -	 * reason, so there is no point setting r12 to SRR1.
> > -	 *
> > -	 * Further, we clear r12 here, so that we don't accidentally enter the
> > -	 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
> > -	 */
> > -	li	r12, 0
> > -	b 	pnv_wakeup_noloss
> > -
> > -.Lhandle_esl_ec_set:
> >  BEGIN_FTR_SECTION
> >  	/*
> >  	 * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
> > @@ -423,26 +400,32 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
> >   * r3 contains desired PSSCR register value.
> >   */
> >  _GLOBAL(power9_idle_stop)
> > -	std	r3, PACA_REQ_PSSCR(r13)
> >  	mtspr 	SPRN_PSSCR,r3
> > -	LOAD_REG_ADDR(r4,power_enter_stop)
> > +	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> > +	bne	1f
> > +	PPC_STOP
> > +	li	r3,0  /* Since we didn't lose state, return 0 */
> > +	blr
> > +
> > +1:	std	r3, PACA_REQ_PSSCR(r13)
> > +	LOAD_REG_ADDR(r4,power_enter_stop_esl)
> >  	b	pnv_powersave_common
> >  	/* No return */  
> 
> Good optimization to skip the context save and directly execute stop
> for ESL=EC=0 case.

Yep we should now be getting pretty close to optimal for ESL=EC=0.
About the only other thing we could do is maintain PSSCR state and
only change it when going to a different idle state. May not be
worth worrying about but I haven't benchmarked it.

> 
> >  /*
> > - * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
> > - * r3 contains desired PSSCR register value.
> > + * This is the same as the above, but it sets KVM state for secondaries,
> > + * and it must have PSSCR[EC]=1
> >   */
> >  _GLOBAL(power9_offline_stop)
> > -	std	r3, PACA_REQ_PSSCR(r13)
> >  	mtspr 	SPRN_PSSCR,r3
> > +	std	r3, PACA_REQ_PSSCR(r13)
> >  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> >  	/* Tell KVM we're entering idle */
> >  	li	r4,KVM_HWTHREAD_IN_IDLE
> >  	/* DO THIS IN REAL MODE!  See comment above. */
> >  	stb	r4,HSTATE_HWTHREAD_STATE(r13)
> >  #endif
> > -	LOAD_REG_ADDR(r4,power_enter_stop)
> > +	LOAD_REG_ADDR(r4,power_enter_stop_esl)
> >  	b	pnv_powersave_common
> >  	/* No return */
> > 
> > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> > index a921d5428d76..610b1637c16f 100644
> > --- a/arch/powerpc/platforms/powernv/idle.c
> > +++ b/arch/powerpc/platforms/powernv/idle.c
> > @@ -621,7 +621,12 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
> >  			continue;
> >  		}
> > 
> > -		if (max_residency_ns < residency_ns[i]) {
> > +		/*
> > +		 * Deepest stop for unplug must be PSSCR[EC]=1 (wakeup at
> > +		 * 0x100.
> > +		 */
> > +		if ((max_residency_ns < residency_ns[i])&&
> > +				(psscr_val[i] & PSSCR_EC)) {
> >  			max_residency_ns = residency_ns[i];
> >  			pnv_deepest_stop_psscr_val = psscr_val[i];
> >  			pnv_deepest_stop_psscr_mask = psscr_mask[i];  
> 
> If firmware did not provide any ESL=EC=1 state, we can still leave
> threads in stop ESL=0 state.  This is just a corner case or random
> test scenario.  Why do we want to enforce that offline cpus really use
> a ESL=0 state or just spin? 

It's because power9_offline_stop only has cases for EC=ESL=1
states now.

It actually looks like EC=ESL=0 unplug today is broken KVM, because
the wakeup side does not check HWTHREAD_REQ, and yet they do set
HWTHREAD_IN_IDLE. That would probably hang in KVM if we run with
dependent threads, wouldn't it?

I think banning it for now should be okay.

Thanks,
Nick
Paul Mackerras March 4, 2018, 11:01 p.m. UTC | #3
On Thu, Mar 01, 2018 at 09:57:34PM +1000, Nicholas Piggin wrote:
> On Thu, 1 Mar 2018 00:04:39 +0530
> Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote:
> 
> > * Nicholas Piggin <npiggin@gmail.com> [2017-11-18 00:08:07]:
[snip]
> > > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> > > index a921d5428d76..610b1637c16f 100644
> > > --- a/arch/powerpc/platforms/powernv/idle.c
> > > +++ b/arch/powerpc/platforms/powernv/idle.c
> > > @@ -621,7 +621,12 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
> > >  			continue;
> > >  		}
> > > 
> > > -		if (max_residency_ns < residency_ns[i]) {
> > > +		/*
> > > +		 * Deepest stop for unplug must be PSSCR[EC]=1 (wakeup at
> > > +		 * 0x100.
> > > +		 */
> > > +		if ((max_residency_ns < residency_ns[i])&&
> > > +				(psscr_val[i] & PSSCR_EC)) {
> > >  			max_residency_ns = residency_ns[i];
> > >  			pnv_deepest_stop_psscr_val = psscr_val[i];
> > >  			pnv_deepest_stop_psscr_mask = psscr_mask[i];  
> > 
> > If firmware did not provide any ESL=EC=1 state, we can still leave
> > threads in stop ESL=0 state.  This is just a corner case or random
> > test scenario.  Why do we want to enforce that offline cpus really use
> > a ESL=0 state or just spin? 
> 
> It's because power9_offline_stop only has cases for EC=ESL=1
> states now.
> 
> It actually looks like EC=ESL=0 unplug today is broken KVM, because
> the wakeup side does not check HWTHREAD_REQ, and yet they do set
> HWTHREAD_IN_IDLE. That would probably hang in KVM if we run with
> dependent threads, wouldn't it?

Right.  KVM with indep_threads_mode=N is broken at the moment if you
run with powersave=off or if firmware provides no stop states with
EC=ESL=1.  I'm not sure what's the best way to fix that.

> I think banning it for now should be okay.

Banning what exactly?

Paul.
Nicholas Piggin March 5, 2018, 9:59 a.m. UTC | #4
On Mon, 5 Mar 2018 10:01:01 +1100
Paul Mackerras <paulus@ozlabs.org> wrote:

> On Thu, Mar 01, 2018 at 09:57:34PM +1000, Nicholas Piggin wrote:
> > On Thu, 1 Mar 2018 00:04:39 +0530
> > Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> wrote:
> >   
> > > * Nicholas Piggin <npiggin@gmail.com> [2017-11-18 00:08:07]:  
> [snip]
> > > > diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
> > > > index a921d5428d76..610b1637c16f 100644
> > > > --- a/arch/powerpc/platforms/powernv/idle.c
> > > > +++ b/arch/powerpc/platforms/powernv/idle.c
> > > > @@ -621,7 +621,12 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
> > > >  			continue;
> > > >  		}
> > > > 
> > > > -		if (max_residency_ns < residency_ns[i]) {
> > > > +		/*
> > > > +		 * Deepest stop for unplug must be PSSCR[EC]=1 (wakeup at
> > > > +		 * 0x100.
> > > > +		 */
> > > > +		if ((max_residency_ns < residency_ns[i])&&
> > > > +				(psscr_val[i] & PSSCR_EC)) {
> > > >  			max_residency_ns = residency_ns[i];
> > > >  			pnv_deepest_stop_psscr_val = psscr_val[i];
> > > >  			pnv_deepest_stop_psscr_mask = psscr_mask[i];    
> > > 
> > > If firmware did not provide any ESL=EC=1 state, we can still leave
> > > threads in stop ESL=0 state.  This is just a corner case or random
> > > test scenario.  Why do we want to enforce that offline cpus really use
> > > a ESL=0 state or just spin?   
> > 
> > It's because power9_offline_stop only has cases for EC=ESL=1
> > states now.
> > 
> > It actually looks like EC=ESL=0 unplug today is broken KVM, because
> > the wakeup side does not check HWTHREAD_REQ, and yet they do set
> > HWTHREAD_IN_IDLE. That would probably hang in KVM if we run with
> > dependent threads, wouldn't it?  
> 
> Right.  KVM with indep_threads_mode=N is broken at the moment if you
> run with powersave=off or if firmware provides no stop states with
> EC=ESL=1.  I'm not sure what's the best way to fix that.

For EC=ESL=1, would it be enough to do a test and branch right
after the stop instruction?

> > I think banning it for now should be okay.  
> 
> Banning what exactly?

power9 CPU unplug using a EC=ESL=0 stop state.

Thanks,
Nick
Michael Ellerman March 31, 2018, 11:46 a.m. UTC | #5
Nicholas Piggin <npiggin@gmail.com> writes:

> When stop is executed with EC=ESL=0, it appears to execute like a
> normal instruction (resuming from NIP when woken by interrupt). So all
> the save/restore handling can be avoided completely. In particular NV
> GPRs do not have to be saved, and MSR does not have to be switched
> back to kernel MSR.
>
> So move the test for EC=ESL=0 sleep states out to power9_idle_stop,
> and return directly to the caller after stop in that case. The mtspr
> to PSSCR is moved to the top of power9_offline_stop just so it matches
> power9_idle_stop.
>
> This improves performance for ping-pong benchmark with the stop0_lite
> idle state by 2.54% for 2 threads in the same core, and 2.57% for
> different cores.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/kernel/idle_book3s.S     | 43 +++++++++++------------------------
>  arch/powerpc/platforms/powernv/idle.c |  7 +++++-
>  2 files changed, 19 insertions(+), 31 deletions(-)
>
> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 07a306173c5a..6243da99b26c 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -324,31 +324,8 @@ enter_winkle:
>  /*
>   * r3 - PSSCR value corresponding to the requested stop state.
>   */
> -power_enter_stop:
> -/*
> - * Check if we are executing the lite variant with ESL=EC=0
> - */
> -	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> +power_enter_stop_esl:
>  	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> -	bne	 .Lhandle_esl_ec_set
> -	PPC_STOP
> -	li	r3,0  /* Since we didn't lose state, return 0 */

Sorry this clashed with Paul's force-SMT4 patches which I needed to
merge to enable TM workarounds.

Can you rebase?

cheers
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 07a306173c5a..6243da99b26c 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -324,31 +324,8 @@  enter_winkle:
 /*
  * r3 - PSSCR value corresponding to the requested stop state.
  */
-power_enter_stop:
-/*
- * Check if we are executing the lite variant with ESL=EC=0
- */
-	andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
+power_enter_stop_esl:
 	clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
-	bne	 .Lhandle_esl_ec_set
-	PPC_STOP
-	li	r3,0  /* Since we didn't lose state, return 0 */
-
-	/*
-	 * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
-	 * it can determine if the wakeup reason is an HMI in
-	 * CHECK_HMI_INTERRUPT.
-	 *
-	 * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
-	 * reason, so there is no point setting r12 to SRR1.
-	 *
-	 * Further, we clear r12 here, so that we don't accidentally enter the
-	 * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
-	 */
-	li	r12, 0
-	b 	pnv_wakeup_noloss
-
-.Lhandle_esl_ec_set:
 BEGIN_FTR_SECTION
 	/*
 	 * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
@@ -423,26 +400,32 @@  ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);		\
  * r3 contains desired PSSCR register value.
  */
 _GLOBAL(power9_idle_stop)
-	std	r3, PACA_REQ_PSSCR(r13)
 	mtspr 	SPRN_PSSCR,r3
-	LOAD_REG_ADDR(r4,power_enter_stop)
+	andis.	r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
+	bne	1f
+	PPC_STOP
+	li	r3,0  /* Since we didn't lose state, return 0 */
+	blr
+
+1:	std	r3, PACA_REQ_PSSCR(r13)
+	LOAD_REG_ADDR(r4,power_enter_stop_esl)
 	b	pnv_powersave_common
 	/* No return */
 
 /*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired PSSCR register value.
+ * This is the same as the above, but it sets KVM state for secondaries,
+ * and it must have PSSCR[EC]=1
  */
 _GLOBAL(power9_offline_stop)
-	std	r3, PACA_REQ_PSSCR(r13)
 	mtspr 	SPRN_PSSCR,r3
+	std	r3, PACA_REQ_PSSCR(r13)
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're entering idle */
 	li	r4,KVM_HWTHREAD_IN_IDLE
 	/* DO THIS IN REAL MODE!  See comment above. */
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
-	LOAD_REG_ADDR(r4,power_enter_stop)
+	LOAD_REG_ADDR(r4,power_enter_stop_esl)
 	b	pnv_powersave_common
 	/* No return */
 
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index a921d5428d76..610b1637c16f 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -621,7 +621,12 @@  static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
 			continue;
 		}
 
-		if (max_residency_ns < residency_ns[i]) {
+		/*
+		 * Deepest stop for unplug must be PSSCR[EC]=1 (wakeup at
+		 * 0x100.
+		 */
+		if ((max_residency_ns < residency_ns[i])&&
+				(psscr_val[i] & PSSCR_EC)) {
 			max_residency_ns = residency_ns[i];
 			pnv_deepest_stop_psscr_val = psscr_val[i];
 			pnv_deepest_stop_psscr_mask = psscr_mask[i];