diff mbox

[4/8] powerpc/64s: fix POWER9 machine check handler from stop state

Message ID 20170314092349.10981-5-npiggin@gmail.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Nicholas Piggin March 14, 2017, 9:23 a.m. UTC
The ISA specifies power save wakeup can cause a machine check interrupt.
The machine check handler currently has code to handle that for POWER8,
but POWER9 crashes when trying to execute the P8 style sleep
instructions.

So queue up the machine check, then call into the idle code to wake up
as the system reset interrupt does, rather than attempting to sleep
again without going through the main idle path.

Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/reg.h       |  1 +
 arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
 2 files changed, 35 insertions(+), 35 deletions(-)

Comments

Mahesh J Salgaonkar March 16, 2017, 12:40 p.m. UTC | #1
On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> The ISA specifies power save wakeup can cause a machine check interrupt.
> The machine check handler currently has code to handle that for POWER8,
> but POWER9 crashes when trying to execute the P8 style sleep
> instructions.
> 
> So queue up the machine check, then call into the idle code to wake up
> as the system reset interrupt does, rather than attempting to sleep
> again without going through the main idle path.
> 
> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/include/asm/reg.h       |  1 +
>  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
>  2 files changed, 35 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index fc879fd6bdae..8bbdfacce970 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -656,6 +656,7 @@
>  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
>  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
>  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
>  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
>  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
>  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> index e390fcd04bcb..5779d2d6a192 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
>  	/* restore original r1. */			\
>  	ld	r1,GPR1(r1)
> 
> +#ifdef CONFIG_PPC_P7_NAP
> +EXC_COMMON_BEGIN(machine_check_idle_common)
> +	bl	machine_check_queue_event
> +	/*
> +	 * Queue the machine check, then reload SRR1 and use it to set
> +	 * CR3 according to pnv_powersave_wakeup convention.
> +	 */
> +	ld	r12,_MSR(r1)
> +	rlwinm	r11,r12,47-31,30,31
> +	cmpwi	cr3,r11,2
> +
> +	/*
> +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> +	 * system reset wakeup code.
> +	 */
> +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> +	mtspr	SPRN_SRR1,r12
> +	std	r12,_MSR(r1)
> +
> +	/*
> +	 * Decrement MCE nesting after finishing with the stack.
> +	 */
> +	lhz	r11,PACA_IN_MCE(r13)
> +	subi	r11,r11,1
> +	sth	r11,PACA_IN_MCE(r13)

Looks like we are not winding up.. Shouldn't we ? What if we may end up
in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
anything ?

> +	b	pnv_powersave_wakeup
> +#endif
>  	/*

[...]

Rest looks good to me.

Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
-Mahesh.
Nicholas Piggin March 16, 2017, 1:05 p.m. UTC | #2
On Thu, 16 Mar 2017 18:10:48 +0530
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > The ISA specifies power save wakeup can cause a machine check interrupt.
> > The machine check handler currently has code to handle that for POWER8,
> > but POWER9 crashes when trying to execute the P8 style sleep
> > instructions.
> > 
> > So queue up the machine check, then call into the idle code to wake up
> > as the system reset interrupt does, rather than attempting to sleep
> > again without going through the main idle path.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/include/asm/reg.h       |  1 +
> >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> >  2 files changed, 35 insertions(+), 35 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > index fc879fd6bdae..8bbdfacce970 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -656,6 +656,7 @@
> >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index e390fcd04bcb..5779d2d6a192 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> >  	/* restore original r1. */			\
> >  	ld	r1,GPR1(r1)
> > 
> > +#ifdef CONFIG_PPC_P7_NAP
> > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > +	bl	machine_check_queue_event
> > +	/*
> > +	 * Queue the machine check, then reload SRR1 and use it to set
> > +	 * CR3 according to pnv_powersave_wakeup convention.
> > +	 */
> > +	ld	r12,_MSR(r1)
> > +	rlwinm	r11,r12,47-31,30,31
> > +	cmpwi	cr3,r11,2
> > +
> > +	/*
> > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > +	 * system reset wakeup code.
> > +	 */
> > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > +	mtspr	SPRN_SRR1,r12
> > +	std	r12,_MSR(r1)
> > +
> > +	/*
> > +	 * Decrement MCE nesting after finishing with the stack.
> > +	 */
> > +	lhz	r11,PACA_IN_MCE(r13)
> > +	subi	r11,r11,1
> > +	sth	r11,PACA_IN_MCE(r13)  
> 
> Looks like we are not winding up.. Shouldn't we ? What if we may end up
> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> anything ?

Hmm, no I think you're right. Thanks, good catch. But can we do it with
just setting PACA_NAPSTATELOST?

> 
> > +	b	pnv_powersave_wakeup
> > +#endif
> >  	/*  
> 
> [...]
> 
> Rest looks good to me.
> 
> Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>

Thanks,
Nick
Gautham R Shenoy March 16, 2017, 1:19 p.m. UTC | #3
Hi,

On Thu, Mar 16, 2017 at 11:05:20PM +1000, Nicholas Piggin wrote:
> On Thu, 16 Mar 2017 18:10:48 +0530
> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > > The ISA specifies power save wakeup can cause a machine check interrupt.
> > > The machine check handler currently has code to handle that for POWER8,
> > > but POWER9 crashes when trying to execute the P8 style sleep
> > > instructions.
> > > 
> > > So queue up the machine check, then call into the idle code to wake up
> > > as the system reset interrupt does, rather than attempting to sleep
> > > again without going through the main idle path.
> > > 
> > > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > > ---
> > >  arch/powerpc/include/asm/reg.h       |  1 +
> > >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> > >  2 files changed, 35 insertions(+), 35 deletions(-)
> > > 
> > > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > > index fc879fd6bdae..8bbdfacce970 100644
> > > --- a/arch/powerpc/include/asm/reg.h
> > > +++ b/arch/powerpc/include/asm/reg.h
> > > @@ -656,6 +656,7 @@
> > >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> > >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> > >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> > >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> > >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> > >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > > index e390fcd04bcb..5779d2d6a192 100644
> > > --- a/arch/powerpc/kernel/exceptions-64s.S
> > > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> > >  	/* restore original r1. */			\
> > >  	ld	r1,GPR1(r1)
> > > 
> > > +#ifdef CONFIG_PPC_P7_NAP
> > > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > > +	bl	machine_check_queue_event
> > > +	/*
> > > +	 * Queue the machine check, then reload SRR1 and use it to set
> > > +	 * CR3 according to pnv_powersave_wakeup convention.
> > > +	 */
> > > +	ld	r12,_MSR(r1)
> > > +	rlwinm	r11,r12,47-31,30,31
> > > +	cmpwi	cr3,r11,2
> > > +
> > > +	/*
> > > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > > +	 * system reset wakeup code.
> > > +	 */
> > > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > > +	mtspr	SPRN_SRR1,r12
> > > +	std	r12,_MSR(r1)
> > > +
> > > +	/*
> > > +	 * Decrement MCE nesting after finishing with the stack.
> > > +	 */
> > > +	lhz	r11,PACA_IN_MCE(r13)
> > > +	subi	r11,r11,1
> > > +	sth	r11,PACA_IN_MCE(r13)  
> > 
> > Looks like we are not winding up.. Shouldn't we ? What if we may end up
> > in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> > anything ?

Nice catch! This can occur if SRR1[46:47] == 0b01.

>
> Hmm, no I think you're right. Thanks, good catch. But can we do it with
> just setting PACA_NAPSTATELOST?

Unconditionally setting PACA_NAPSTATELOST should be sufficient.

> 
> > 
> > > +	b	pnv_powersave_wakeup
> > > +#endif
> > >  	/*  
> > 
> > [...]
> > 
> > Rest looks good to me.
> > 
> > Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
> 
> Thanks,
> Nick
>
Nicholas Piggin March 17, 2017, 2:49 a.m. UTC | #4
On Thu, 16 Mar 2017 18:10:48 +0530
Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
> > The ISA specifies power save wakeup can cause a machine check interrupt.
> > The machine check handler currently has code to handle that for POWER8,
> > but POWER9 crashes when trying to execute the P8 style sleep
> > instructions.
> > 
> > So queue up the machine check, then call into the idle code to wake up
> > as the system reset interrupt does, rather than attempting to sleep
> > again without going through the main idle path.
> > 
> > Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  arch/powerpc/include/asm/reg.h       |  1 +
> >  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
> >  2 files changed, 35 insertions(+), 35 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > index fc879fd6bdae..8bbdfacce970 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -656,6 +656,7 @@
> >  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
> >  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
> >  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
> > +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
> >  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
> >  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
> >  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
> > diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
> > index e390fcd04bcb..5779d2d6a192 100644
> > --- a/arch/powerpc/kernel/exceptions-64s.S
> > +++ b/arch/powerpc/kernel/exceptions-64s.S
> > @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
> >  	/* restore original r1. */			\
> >  	ld	r1,GPR1(r1)
> > 
> > +#ifdef CONFIG_PPC_P7_NAP
> > +EXC_COMMON_BEGIN(machine_check_idle_common)
> > +	bl	machine_check_queue_event
> > +	/*
> > +	 * Queue the machine check, then reload SRR1 and use it to set
> > +	 * CR3 according to pnv_powersave_wakeup convention.
> > +	 */
> > +	ld	r12,_MSR(r1)
> > +	rlwinm	r11,r12,47-31,30,31
> > +	cmpwi	cr3,r11,2
> > +
> > +	/*
> > +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
> > +	 * system reset wakeup code.
> > +	 */
> > +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
> > +	mtspr	SPRN_SRR1,r12
> > +	std	r12,_MSR(r1)
> > +
> > +	/*
> > +	 * Decrement MCE nesting after finishing with the stack.
> > +	 */
> > +	lhz	r11,PACA_IN_MCE(r13)
> > +	subi	r11,r11,1
> > +	sth	r11,PACA_IN_MCE(r13)  
> 
> Looks like we are not winding up.. Shouldn't we ? What if we may end up
> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> anything ?

Hmm, on second look, I don't think any non-volatile GPRs are overwritten
in this path. But this MCE is a slow path, and it is a much longer path
than the system reset idle wakeup... So I'll add the napstatelost with
a comment.

Thanks,
Nick
Nicholas Piggin March 17, 2017, 5:15 a.m. UTC | #5
On Fri, 17 Mar 2017 12:49:27 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:

> On Thu, 16 Mar 2017 18:10:48 +0530
> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > On 03/14/2017 02:53 PM, Nicholas Piggin wrote:  

> > Looks like we are not winding up.. Shouldn't we ? What if we may end up
> > in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
> > anything ?  
> 
> Hmm, on second look, I don't think any non-volatile GPRs are overwritten
> in this path. But this MCE is a slow path, and it is a much longer path
> than the system reset idle wakeup... So I'll add the napstatelost with
> a comment.

On third look, I'll just add the comment. The windup does not restore
non-volatile GPRs either, and in general we're careful not to use them
in exception handlers. So I think it's okay.

Thanks,
Nick
Mahesh J Salgaonkar March 20, 2017, 5:22 a.m. UTC | #6
On 03/16/2017 06:49 PM, Gautham R Shenoy wrote:
> Hi,
> 
> On Thu, Mar 16, 2017 at 11:05:20PM +1000, Nicholas Piggin wrote:
>> On Thu, 16 Mar 2017 18:10:48 +0530
>> Mahesh Jagannath Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
>>
>>> On 03/14/2017 02:53 PM, Nicholas Piggin wrote:
>>>> The ISA specifies power save wakeup can cause a machine check interrupt.
>>>> The machine check handler currently has code to handle that for POWER8,
>>>> but POWER9 crashes when trying to execute the P8 style sleep
>>>> instructions.
>>>>
>>>> So queue up the machine check, then call into the idle code to wake up
>>>> as the system reset interrupt does, rather than attempting to sleep
>>>> again without going through the main idle path.
>>>>
>>>> Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
>>>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>>>> ---
>>>>  arch/powerpc/include/asm/reg.h       |  1 +
>>>>  arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++++------------------
>>>>  2 files changed, 35 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
>>>> index fc879fd6bdae..8bbdfacce970 100644
>>>> --- a/arch/powerpc/include/asm/reg.h
>>>> +++ b/arch/powerpc/include/asm/reg.h
>>>> @@ -656,6 +656,7 @@
>>>>  #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
>>>>  #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
>>>>  #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
>>>> +#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
>>>>  #define   SRR1_WAKESYSERR	0x00300000 /* System error */
>>>>  #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
>>>>  #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
>>>> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
>>>> index e390fcd04bcb..5779d2d6a192 100644
>>>> --- a/arch/powerpc/kernel/exceptions-64s.S
>>>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>>>> @@ -306,6 +306,33 @@ EXC_COMMON_BEGIN(machine_check_common)
>>>>  	/* restore original r1. */			\
>>>>  	ld	r1,GPR1(r1)
>>>>
>>>> +#ifdef CONFIG_PPC_P7_NAP
>>>> +EXC_COMMON_BEGIN(machine_check_idle_common)
>>>> +	bl	machine_check_queue_event
>>>> +	/*
>>>> +	 * Queue the machine check, then reload SRR1 and use it to set
>>>> +	 * CR3 according to pnv_powersave_wakeup convention.
>>>> +	 */
>>>> +	ld	r12,_MSR(r1)
>>>> +	rlwinm	r11,r12,47-31,30,31
>>>> +	cmpwi	cr3,r11,2
>>>> +
>>>> +	/*
>>>> +	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
>>>> +	 * system reset wakeup code.
>>>> +	 */
>>>> +	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
>>>> +	mtspr	SPRN_SRR1,r12
>>>> +	std	r12,_MSR(r1)
>>>> +
>>>> +	/*
>>>> +	 * Decrement MCE nesting after finishing with the stack.
>>>> +	 */
>>>> +	lhz	r11,PACA_IN_MCE(r13)
>>>> +	subi	r11,r11,1
>>>> +	sth	r11,PACA_IN_MCE(r13)  
>>>
>>> Looks like we are not winding up.. Shouldn't we ? What if we may end up
>>> in pnv_wakeup_noloss() which assumes that no GPRs are lost. Am I missing
>>> anything ?
> 
> Nice catch! This can occur if SRR1[46:47] == 0b01.
> 
>>
>> Hmm, no I think you're right. Thanks, good catch. But can we do it with
>> just setting PACA_NAPSTATELOST?
> 
> Unconditionally setting PACA_NAPSTATELOST should be sufficient.

Agree, that should take care.

> 
>>
>>>
>>>> +	b	pnv_powersave_wakeup
>>>> +#endif
>>>>  	/*  
>>>
>>> [...]
>>>
>>> Rest looks good to me.
>>>
>>> Reviewed-by: Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com>
>>
>> Thanks,
>> Nick
>>
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index fc879fd6bdae..8bbdfacce970 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -656,6 +656,7 @@ 
 #define   SRR1_ISI_PROT		0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK		0x00380000 /* reason for wakeup */
 #define   SRR1_WAKEMASK_P8	0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define   SRR1_WAKEMCE_RESVD	0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
 #define   SRR1_WAKESYSERR	0x00300000 /* System error */
 #define   SRR1_WAKEEE		0x00200000 /* External interrupt */
 #define   SRR1_WAKEHVI		0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e390fcd04bcb..5779d2d6a192 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -306,6 +306,33 @@  EXC_COMMON_BEGIN(machine_check_common)
 	/* restore original r1. */			\
 	ld	r1,GPR1(r1)
 
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(machine_check_idle_common)
+	bl	machine_check_queue_event
+	/*
+	 * Queue the machine check, then reload SRR1 and use it to set
+	 * CR3 according to pnv_powersave_wakeup convention.
+	 */
+	ld	r12,_MSR(r1)
+	rlwinm	r11,r12,47-31,30,31
+	cmpwi	cr3,r11,2
+
+	/*
+	 * Now put SRR1_WAKEMCE_RESVD into SRR1, allows it to follow the
+	 * system reset wakeup code.
+	 */
+	oris	r12,r12,SRR1_WAKEMCE_RESVD@h
+	mtspr	SPRN_SRR1,r12
+	std	r12,_MSR(r1)
+
+	/*
+	 * Decrement MCE nesting after finishing with the stack.
+	 */
+	lhz	r11,PACA_IN_MCE(r13)
+	subi	r11,r11,1
+	sth	r11,PACA_IN_MCE(r13)
+	b	pnv_powersave_wakeup
+#endif
 	/*
 	 * Handle machine check early in real mode. We come here with
 	 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
@@ -318,6 +345,7 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	bl	machine_check_early
 	std	r3,RESULT(r1)	/* Save result */
 	ld	r12,_MSR(r1)
+
 #ifdef	CONFIG_PPC_P7_NAP
 	/*
 	 * Check if thread was in power saving mode. We come here when any
@@ -328,43 +356,14 @@  EXC_COMMON_BEGIN(machine_check_handle_early)
 	 *
 	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
-	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
-	beq	4f			/* No, it wasn't */
-	/* Thread was in power saving mode. Go back to nap again. */
-	cmpwi	r11,2
-	blt	3f
-	/* Supervisor/Hypervisor state loss */
-	li	r0,1
-	stb	r0,PACA_NAPSTATELOST(r13)
-3:	bl	machine_check_queue_event
-	MACHINE_CHECK_HANDLER_WINDUP
-	GET_PACA(r13)
-	ld	r1,PACAR1(r13)
-	/*
-	 * Check what idle state this CPU was in and go back to same mode
-	 * again.
-	 */
-	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
-	cmpwi	r3,PNV_THREAD_NAP
-	bgt	10f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
-	/* No return */
-10:
-	cmpwi	r3,PNV_THREAD_SLEEP
-	bgt	2f
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-	/* No return */
-
-2:
-	/*
-	 * Go back to winkle. Please note that this thread was woken up in
-	 * machine check from winkle and have not restored the per-subcore
-	 * state.
-	 */
-	IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-	/* No return */
+	BEGIN_FTR_SECTION
+	rlwinm.	r11,r12,47-31,30,31
+	beq-	4f
+	BRANCH_TO_COMMON(r10, machine_check_idle_common)
 4:
+	END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
+
 	/*
 	 * Check if we are coming from hypervisor userspace. If yes then we
 	 * continue in host kernel in V mode to deliver the MC event.