diff mbox series

powerpc64/idle: Fix SP offsets when saving GPRs

Message ID 20210130030430.11369-1-cmr@codefail.de (mailing list archive)
State Changes Requested
Headers show
Series powerpc64/idle: Fix SP offsets when saving GPRs | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (44158b256b30415079588d0fcb1bccbdc2ccd009)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 150 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Christopher M. Riedl Jan. 30, 2021, 3:04 a.m. UTC
The idle entry/exit code saves/restores GPRs in the stack "red zone"
(Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
used for the first GPR is incorrect and overwrites the back chain - the
Protected Zone actually starts below the current SP. In practice this is
probably not an issue, but it's still incorrect so fix it.

Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
---
 arch/powerpc/kernel/idle_book3s.S | 126 +++++++++++++++---------------
 1 file changed, 63 insertions(+), 63 deletions(-)

Comments

Michael Ellerman Jan. 30, 2021, 11:32 a.m. UTC | #1
"Christopher M. Riedl" <cmr@codefail.de> writes:
> The idle entry/exit code saves/restores GPRs in the stack "red zone"
> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
> used for the first GPR is incorrect and overwrites the back chain - the
> Protected Zone actually starts below the current SP. In practice this is
> probably not an issue, but it's still incorrect so fix it.

Nice catch.

Corrupting the back chain means you can't backtrace from there, which
could be confusing for debugging one day.

It does make me wonder why we don't just create a stack frame and use
the normal macros? It would use a bit more stack space, but we shouldn't
be short of stack space when going idle.

Nick, was there a particular reason for using the red zone?

cheers


> diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
> index 22f249b6f58d..80cf35183e9d 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -53,27 +53,27 @@ _GLOBAL(isa300_idle_stop_mayloss)
>  	mflr	r4
>  	mfcr	r5
>  	/* use stack red zone rather than a new frame for saving regs */
> -	std	r2,-8*0(r1)
> -	std	r14,-8*1(r1)
> -	std	r15,-8*2(r1)
> -	std	r16,-8*3(r1)
> -	std	r17,-8*4(r1)
> -	std	r18,-8*5(r1)
> -	std	r19,-8*6(r1)
> -	std	r20,-8*7(r1)
> -	std	r21,-8*8(r1)
> -	std	r22,-8*9(r1)
> -	std	r23,-8*10(r1)
> -	std	r24,-8*11(r1)
> -	std	r25,-8*12(r1)
> -	std	r26,-8*13(r1)
> -	std	r27,-8*14(r1)
> -	std	r28,-8*15(r1)
> -	std	r29,-8*16(r1)
> -	std	r30,-8*17(r1)
> -	std	r31,-8*18(r1)
> -	std	r4,-8*19(r1)
> -	std	r5,-8*20(r1)
> +	std	r2,-8*1(r1)
> +	std	r14,-8*2(r1)
> +	std	r15,-8*3(r1)
> +	std	r16,-8*4(r1)
> +	std	r17,-8*5(r1)
> +	std	r18,-8*6(r1)
> +	std	r19,-8*7(r1)
> +	std	r20,-8*8(r1)
> +	std	r21,-8*9(r1)
> +	std	r22,-8*10(r1)
> +	std	r23,-8*11(r1)
> +	std	r24,-8*12(r1)
> +	std	r25,-8*13(r1)
> +	std	r26,-8*14(r1)
> +	std	r27,-8*15(r1)
> +	std	r28,-8*16(r1)
> +	std	r29,-8*17(r1)
> +	std	r30,-8*18(r1)
> +	std	r31,-8*19(r1)
> +	std	r4,-8*20(r1)
> +	std	r5,-8*21(r1)
>  	/* 168 bytes */
>  	PPC_STOP
>  	b	.	/* catch bugs */
> @@ -89,8 +89,8 @@ _GLOBAL(isa300_idle_stop_mayloss)
>   */
>  _GLOBAL(idle_return_gpr_loss)
>  	ld	r1,PACAR1(r13)
> -	ld	r4,-8*19(r1)
> -	ld	r5,-8*20(r1)
> +	ld	r4,-8*20(r1)
> +	ld	r5,-8*21(r1)
>  	mtlr	r4
>  	mtcr	r5
>  	/*
> @@ -98,25 +98,25 @@ _GLOBAL(idle_return_gpr_loss)
>  	 * from PACATOC. This could be avoided for that less common case
>  	 * if KVM saved its r2.
>  	 */
> -	ld	r2,-8*0(r1)
> -	ld	r14,-8*1(r1)
> -	ld	r15,-8*2(r1)
> -	ld	r16,-8*3(r1)
> -	ld	r17,-8*4(r1)
> -	ld	r18,-8*5(r1)
> -	ld	r19,-8*6(r1)
> -	ld	r20,-8*7(r1)
> -	ld	r21,-8*8(r1)
> -	ld	r22,-8*9(r1)
> -	ld	r23,-8*10(r1)
> -	ld	r24,-8*11(r1)
> -	ld	r25,-8*12(r1)
> -	ld	r26,-8*13(r1)
> -	ld	r27,-8*14(r1)
> -	ld	r28,-8*15(r1)
> -	ld	r29,-8*16(r1)
> -	ld	r30,-8*17(r1)
> -	ld	r31,-8*18(r1)
> +	ld	r2,-8*1(r1)
> +	ld	r14,-8*2(r1)
> +	ld	r15,-8*3(r1)
> +	ld	r16,-8*4(r1)
> +	ld	r17,-8*5(r1)
> +	ld	r18,-8*6(r1)
> +	ld	r19,-8*7(r1)
> +	ld	r20,-8*8(r1)
> +	ld	r21,-8*9(r1)
> +	ld	r22,-8*10(r1)
> +	ld	r23,-8*11(r1)
> +	ld	r24,-8*12(r1)
> +	ld	r25,-8*13(r1)
> +	ld	r26,-8*14(r1)
> +	ld	r27,-8*15(r1)
> +	ld	r28,-8*16(r1)
> +	ld	r29,-8*17(r1)
> +	ld	r30,-8*18(r1)
> +	ld	r31,-8*19(r1)
>  	blr
>  
>  /*
> @@ -155,27 +155,27 @@ _GLOBAL(isa206_idle_insn_mayloss)
>  	mflr	r4
>  	mfcr	r5
>  	/* use stack red zone rather than a new frame for saving regs */
> -	std	r2,-8*0(r1)
> -	std	r14,-8*1(r1)
> -	std	r15,-8*2(r1)
> -	std	r16,-8*3(r1)
> -	std	r17,-8*4(r1)
> -	std	r18,-8*5(r1)
> -	std	r19,-8*6(r1)
> -	std	r20,-8*7(r1)
> -	std	r21,-8*8(r1)
> -	std	r22,-8*9(r1)
> -	std	r23,-8*10(r1)
> -	std	r24,-8*11(r1)
> -	std	r25,-8*12(r1)
> -	std	r26,-8*13(r1)
> -	std	r27,-8*14(r1)
> -	std	r28,-8*15(r1)
> -	std	r29,-8*16(r1)
> -	std	r30,-8*17(r1)
> -	std	r31,-8*18(r1)
> -	std	r4,-8*19(r1)
> -	std	r5,-8*20(r1)
> +	std	r2,-8*1(r1)
> +	std	r14,-8*2(r1)
> +	std	r15,-8*3(r1)
> +	std	r16,-8*4(r1)
> +	std	r17,-8*5(r1)
> +	std	r18,-8*6(r1)
> +	std	r19,-8*7(r1)
> +	std	r20,-8*8(r1)
> +	std	r21,-8*9(r1)
> +	std	r22,-8*10(r1)
> +	std	r23,-8*11(r1)
> +	std	r24,-8*12(r1)
> +	std	r25,-8*13(r1)
> +	std	r26,-8*14(r1)
> +	std	r27,-8*15(r1)
> +	std	r28,-8*16(r1)
> +	std	r29,-8*17(r1)
> +	std	r30,-8*18(r1)
> +	std	r31,-8*19(r1)
> +	std	r4,-8*20(r1)
> +	std	r5,-8*21(r1)
>  	cmpwi	r3,PNV_THREAD_NAP
>  	bne	1f
>  	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
> -- 
> 2.26.1
Nicholas Piggin Jan. 30, 2021, 1:44 p.m. UTC | #2
Excerpts from Michael Ellerman's message of January 30, 2021 9:32 pm:
> "Christopher M. Riedl" <cmr@codefail.de> writes:
>> The idle entry/exit code saves/restores GPRs in the stack "red zone"
>> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
>> used for the first GPR is incorrect and overwrites the back chain - the
>> Protected Zone actually starts below the current SP. In practice this is
>> probably not an issue, but it's still incorrect so fix it.
> 
> Nice catch.
> 
> Corrupting the back chain means you can't backtrace from there, which
> could be confusing for debugging one day.

Yeah, we seem to have got away without noticing because the CPU will 
wake up and return out of here before it tries to unwind the stack,
but if you tried to walk it by hand if the CPU got stuck in idle or 
something, then we'd get confused.

> It does make me wonder why we don't just create a stack frame and use
> the normal macros? It would use a bit more stack space, but we shouldn't
> be short of stack space when going idle.
> 
> Nick, was there a particular reason for using the red zone?

I don't recall a particular reason, I think a normal stack frame is 
probably a good idea.

Thanks,
Nick
Christopher M. Riedl Feb. 4, 2021, 6:59 a.m. UTC | #3
On Sat Jan 30, 2021 at 7:44 AM CST, Nicholas Piggin wrote:
> Excerpts from Michael Ellerman's message of January 30, 2021 9:32 pm:
> > "Christopher M. Riedl" <cmr@codefail.de> writes:
> >> The idle entry/exit code saves/restores GPRs in the stack "red zone"
> >> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
> >> used for the first GPR is incorrect and overwrites the back chain - the
> >> Protected Zone actually starts below the current SP. In practice this is
> >> probably not an issue, but it's still incorrect so fix it.
> > 
> > Nice catch.
> > 
> > Corrupting the back chain means you can't backtrace from there, which
> > could be confusing for debugging one day.
>
> Yeah, we seem to have got away without noticing because the CPU will
> wake up and return out of here before it tries to unwind the stack,
> but if you tried to walk it by hand if the CPU got stuck in idle or
> something, then we'd get confused.
>
> > It does make me wonder why we don't just create a stack frame and use
> > the normal macros? It would use a bit more stack space, but we shouldn't
> > be short of stack space when going idle.
> > 
> > Nick, was there a particular reason for using the red zone?
>
> I don't recall a particular reason, I think a normal stack frame is
> probably a good idea.

I'll send a version using STACKFRAMESIZE - I assume that's the "normal"
stack frame :)

I admit I am a bit confused when I saw the similar but much smaller
STACK_FRAME_OVERHEAD which is also used in _some_ cases to save/restore
a few registers.

>
> Thanks,
> Nick
Nicholas Piggin Feb. 4, 2021, 9:05 a.m. UTC | #4
Excerpts from Christopher M. Riedl's message of February 4, 2021 4:59 pm:
> On Sat Jan 30, 2021 at 7:44 AM CST, Nicholas Piggin wrote:
>> Excerpts from Michael Ellerman's message of January 30, 2021 9:32 pm:
>> > "Christopher M. Riedl" <cmr@codefail.de> writes:
>> >> The idle entry/exit code saves/restores GPRs in the stack "red zone"
>> >> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
>> >> used for the first GPR is incorrect and overwrites the back chain - the
>> >> Protected Zone actually starts below the current SP. In practice this is
>> >> probably not an issue, but it's still incorrect so fix it.
>> > 
>> > Nice catch.
>> > 
>> > Corrupting the back chain means you can't backtrace from there, which
>> > could be confusing for debugging one day.
>>
>> Yeah, we seem to have got away without noticing because the CPU will
>> wake up and return out of here before it tries to unwind the stack,
>> but if you tried to walk it by hand if the CPU got stuck in idle or
>> something, then we'd get confused.
>>
>> > It does make me wonder why we don't just create a stack frame and use
>> > the normal macros? It would use a bit more stack space, but we shouldn't
>> > be short of stack space when going idle.
>> > 
>> > Nick, was there a particular reason for using the red zone?
>>
>> I don't recall a particular reason, I think a normal stack frame is
>> probably a good idea.
> 
> I'll send a version using STACKFRAMESIZE - I assume that's the "normal"
> stack frame :)
> 

I think STACKFRAMESIZE is STACK_FRAME_OVERHEAD + NVGPRs. LR and CR can 
be saved in the caller's frame so that should be okay.

> I admit I am a bit confused when I saw the similar but much smaller
> STACK_FRAME_OVERHEAD which is also used in _some_ cases to save/restore
> a few registers.

Yeah if you don't need to save all nvgprs you can use caller's frame 
plus a few bytes in the minimum frame as volatile storage.

STACK_FRAME_OVERHEAD should be 32 on LE, but I think the problem is a 
lot of asm uses it and hasn't necessarily been audited to make sure it's 
not assuming it's bigger. You could actually use STACK_FRAME_MIN_SIZE
for new code, maybe we add a STACK_FRAME_MIN_NVGPR_SIZE to match and
use that.

Thanks,
Nick
Michael Ellerman Feb. 4, 2021, 11:13 a.m. UTC | #5
Nicholas Piggin <npiggin@gmail.com> writes:
> Excerpts from Christopher M. Riedl's message of February 4, 2021 4:59 pm:
>> On Sat Jan 30, 2021 at 7:44 AM CST, Nicholas Piggin wrote:
>>> Excerpts from Michael Ellerman's message of January 30, 2021 9:32 pm:
>>> > "Christopher M. Riedl" <cmr@codefail.de> writes:
>>> >> The idle entry/exit code saves/restores GPRs in the stack "red zone"
>>> >> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
>>> >> used for the first GPR is incorrect and overwrites the back chain - the
>>> >> Protected Zone actually starts below the current SP. In practice this is
>>> >> probably not an issue, but it's still incorrect so fix it.
>>> > 
>>> > Nice catch.
>>> > 
>>> > Corrupting the back chain means you can't backtrace from there, which
>>> > could be confusing for debugging one day.
>>>
>>> Yeah, we seem to have got away without noticing because the CPU will
>>> wake up and return out of here before it tries to unwind the stack,
>>> but if you tried to walk it by hand if the CPU got stuck in idle or
>>> something, then we'd get confused.
>>>
>>> > It does make me wonder why we don't just create a stack frame and use
>>> > the normal macros? It would use a bit more stack space, but we shouldn't
>>> > be short of stack space when going idle.
>>> > 
>>> > Nick, was there a particular reason for using the red zone?
>>>
>>> I don't recall a particular reason, I think a normal stack frame is
>>> probably a good idea.
>> 
>> I'll send a version using STACKFRAMESIZE - I assume that's the "normal"
>> stack frame :)
>> 
>
> I think STACKFRAMESIZE is STACK_FRAME_OVERHEAD + NVGPRs. LR and CR can 
> be saved in the caller's frame so that should be okay.

TBH I didn't know/had forgotten we had STACKFRAMESIZE.

The safest is SWITCH_FRAME_SIZE, because it's calculated based on
pt_regs (which can change size):

	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));

But the name is obviously terrible for your usage, and it will allocate
a bit more space than you need, because pt_regs has more than just the GPRs.

>> I admit I am a bit confused when I saw the similar but much smaller
>> STACK_FRAME_OVERHEAD which is also used in _some_ cases to save/restore
>> a few registers.
>
> Yeah if you don't need to save all nvgprs you can use caller's frame 
> plus a few bytes in the minimum frame as volatile storage.
>
> STACK_FRAME_OVERHEAD should be 32 on LE, but I think the problem is a 
> lot of asm uses it and hasn't necessarily been audited to make sure it's 
> not assuming it's bigger.

Yeah it's a total mess. See this ~3.5 year old issue :/

https://github.com/linuxppc/issues/issues/113

> You could actually use STACK_FRAME_MIN_SIZE for new code, maybe we add
> a STACK_FRAME_MIN_NVGPR_SIZE to match and use that.

Something like that makes me nervous because it's so easy to
accidentally use one of the macros that expects a full pt_regs.

I think ideally you have just two options.

Option 1:

You use:
  STACK_FRAME_WITH_PT_REGS = STACK_FRAME_MIN_SIZE + sizeof(struct pt_regs)

And then you can use all the macros in asm-offsets.c generated with
STACK_PT_REGS_OFFSET.


Option 2:

If you want to be fancy you manually allocate your frame with just
the right amount of space, but with the size there in the code, so for
example the idle code that wants 19 GPRs would do:

	stdu	r1, -(STACK_FRAME_MIN_SIZE + 8 * 19)(r1)

And then it's very obvious that you have a non-standard frame size and
need to be more careful.

cheers
Nicholas Piggin Feb. 4, 2021, 11:26 a.m. UTC | #6
Excerpts from Michael Ellerman's message of February 4, 2021 9:13 pm:
> Nicholas Piggin <npiggin@gmail.com> writes:
>> Excerpts from Christopher M. Riedl's message of February 4, 2021 4:59 pm:
>>> On Sat Jan 30, 2021 at 7:44 AM CST, Nicholas Piggin wrote:
>>>> Excerpts from Michael Ellerman's message of January 30, 2021 9:32 pm:
>>>> > "Christopher M. Riedl" <cmr@codefail.de> writes:
>>>> >> The idle entry/exit code saves/restores GPRs in the stack "red zone"
>>>> >> (Protected Zone according to PowerPC64 ELF ABI v2). However, the offset
>>>> >> used for the first GPR is incorrect and overwrites the back chain - the
>>>> >> Protected Zone actually starts below the current SP. In practice this is
>>>> >> probably not an issue, but it's still incorrect so fix it.
>>>> > 
>>>> > Nice catch.
>>>> > 
>>>> > Corrupting the back chain means you can't backtrace from there, which
>>>> > could be confusing for debugging one day.
>>>>
>>>> Yeah, we seem to have got away without noticing because the CPU will
>>>> wake up and return out of here before it tries to unwind the stack,
>>>> but if you tried to walk it by hand if the CPU got stuck in idle or
>>>> something, then we'd get confused.
>>>>
>>>> > It does make me wonder why we don't just create a stack frame and use
>>>> > the normal macros? It would use a bit more stack space, but we shouldn't
>>>> > be short of stack space when going idle.
>>>> > 
>>>> > Nick, was there a particular reason for using the red zone?
>>>>
>>>> I don't recall a particular reason, I think a normal stack frame is
>>>> probably a good idea.
>>> 
>>> I'll send a version using STACKFRAMESIZE - I assume that's the "normal"
>>> stack frame :)
>>> 
>>
>> I think STACKFRAMESIZE is STACK_FRAME_OVERHEAD + NVGPRs. LR and CR can 
>> be saved in the caller's frame so that should be okay.
> 
> TBH I didn't know/had forgotten we had STACKFRAMESIZE.
> 
> The safest is SWITCH_FRAME_SIZE, because it's calculated based on
> pt_regs (which can change size):
> 
> 	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
> 
> But the name is obviously terrible for your usage, and it will allocate
> a bit more space than you need, because pt_regs has more than just the GPRs.

I don't see why that's safer if we're not using pt_regs.

> 
>>> I admit I am a bit confused when I saw the similar but much smaller
>>> STACK_FRAME_OVERHEAD which is also used in _some_ cases to save/restore
>>> a few registers.
>>
>> Yeah if you don't need to save all nvgprs you can use caller's frame 
>> plus a few bytes in the minimum frame as volatile storage.
>>
>> STACK_FRAME_OVERHEAD should be 32 on LE, but I think the problem is a 
>> lot of asm uses it and hasn't necessarily been audited to make sure it's 
>> not assuming it's bigger.
> 
> Yeah it's a total mess. See this ~3.5 year old issue :/
> 
> https://github.com/linuxppc/issues/issues/113
> 
>> You could actually use STACK_FRAME_MIN_SIZE for new code, maybe we add
>> a STACK_FRAME_MIN_NVGPR_SIZE to match and use that.
> 
> Something like that makes me nervous because it's so easy to
> accidentally use one of the macros that expects a full pt_regs.
> 
> I think ideally you have just two options.
> 
> Option 1:
> 
> You use:
>   STACK_FRAME_WITH_PT_REGS = STACK_FRAME_MIN_SIZE + sizeof(struct pt_regs)
> 
> And then you can use all the macros in asm-offsets.c generated with
> STACK_PT_REGS_OFFSET.

I don't see a good reason to use pt_regs here, but in general sure this 
would be good to have and begin using.

> Option 2:
> 
> If you want to be fancy you manually allocate your frame with just
> the right amount of space, but with the size there in the code, so for
> example the idle code that wants 19 GPRs would do:
> 
> 	stdu	r1, -(STACK_FRAME_MIN_SIZE + 8 * 19)(r1)
> 
> And then it's very obvious that you have a non-standard frame size and
> need to be more careful.

I would be happy with this for the idle code.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 22f249b6f58d..80cf35183e9d 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -53,27 +53,27 @@  _GLOBAL(isa300_idle_stop_mayloss)
 	mflr	r4
 	mfcr	r5
 	/* use stack red zone rather than a new frame for saving regs */
-	std	r2,-8*0(r1)
-	std	r14,-8*1(r1)
-	std	r15,-8*2(r1)
-	std	r16,-8*3(r1)
-	std	r17,-8*4(r1)
-	std	r18,-8*5(r1)
-	std	r19,-8*6(r1)
-	std	r20,-8*7(r1)
-	std	r21,-8*8(r1)
-	std	r22,-8*9(r1)
-	std	r23,-8*10(r1)
-	std	r24,-8*11(r1)
-	std	r25,-8*12(r1)
-	std	r26,-8*13(r1)
-	std	r27,-8*14(r1)
-	std	r28,-8*15(r1)
-	std	r29,-8*16(r1)
-	std	r30,-8*17(r1)
-	std	r31,-8*18(r1)
-	std	r4,-8*19(r1)
-	std	r5,-8*20(r1)
+	std	r2,-8*1(r1)
+	std	r14,-8*2(r1)
+	std	r15,-8*3(r1)
+	std	r16,-8*4(r1)
+	std	r17,-8*5(r1)
+	std	r18,-8*6(r1)
+	std	r19,-8*7(r1)
+	std	r20,-8*8(r1)
+	std	r21,-8*9(r1)
+	std	r22,-8*10(r1)
+	std	r23,-8*11(r1)
+	std	r24,-8*12(r1)
+	std	r25,-8*13(r1)
+	std	r26,-8*14(r1)
+	std	r27,-8*15(r1)
+	std	r28,-8*16(r1)
+	std	r29,-8*17(r1)
+	std	r30,-8*18(r1)
+	std	r31,-8*19(r1)
+	std	r4,-8*20(r1)
+	std	r5,-8*21(r1)
 	/* 168 bytes */
 	PPC_STOP
 	b	.	/* catch bugs */
@@ -89,8 +89,8 @@  _GLOBAL(isa300_idle_stop_mayloss)
  */
 _GLOBAL(idle_return_gpr_loss)
 	ld	r1,PACAR1(r13)
-	ld	r4,-8*19(r1)
-	ld	r5,-8*20(r1)
+	ld	r4,-8*20(r1)
+	ld	r5,-8*21(r1)
 	mtlr	r4
 	mtcr	r5
 	/*
@@ -98,25 +98,25 @@  _GLOBAL(idle_return_gpr_loss)
 	 * from PACATOC. This could be avoided for that less common case
 	 * if KVM saved its r2.
 	 */
-	ld	r2,-8*0(r1)
-	ld	r14,-8*1(r1)
-	ld	r15,-8*2(r1)
-	ld	r16,-8*3(r1)
-	ld	r17,-8*4(r1)
-	ld	r18,-8*5(r1)
-	ld	r19,-8*6(r1)
-	ld	r20,-8*7(r1)
-	ld	r21,-8*8(r1)
-	ld	r22,-8*9(r1)
-	ld	r23,-8*10(r1)
-	ld	r24,-8*11(r1)
-	ld	r25,-8*12(r1)
-	ld	r26,-8*13(r1)
-	ld	r27,-8*14(r1)
-	ld	r28,-8*15(r1)
-	ld	r29,-8*16(r1)
-	ld	r30,-8*17(r1)
-	ld	r31,-8*18(r1)
+	ld	r2,-8*1(r1)
+	ld	r14,-8*2(r1)
+	ld	r15,-8*3(r1)
+	ld	r16,-8*4(r1)
+	ld	r17,-8*5(r1)
+	ld	r18,-8*6(r1)
+	ld	r19,-8*7(r1)
+	ld	r20,-8*8(r1)
+	ld	r21,-8*9(r1)
+	ld	r22,-8*10(r1)
+	ld	r23,-8*11(r1)
+	ld	r24,-8*12(r1)
+	ld	r25,-8*13(r1)
+	ld	r26,-8*14(r1)
+	ld	r27,-8*15(r1)
+	ld	r28,-8*16(r1)
+	ld	r29,-8*17(r1)
+	ld	r30,-8*18(r1)
+	ld	r31,-8*19(r1)
 	blr
 
 /*
@@ -155,27 +155,27 @@  _GLOBAL(isa206_idle_insn_mayloss)
 	mflr	r4
 	mfcr	r5
 	/* use stack red zone rather than a new frame for saving regs */
-	std	r2,-8*0(r1)
-	std	r14,-8*1(r1)
-	std	r15,-8*2(r1)
-	std	r16,-8*3(r1)
-	std	r17,-8*4(r1)
-	std	r18,-8*5(r1)
-	std	r19,-8*6(r1)
-	std	r20,-8*7(r1)
-	std	r21,-8*8(r1)
-	std	r22,-8*9(r1)
-	std	r23,-8*10(r1)
-	std	r24,-8*11(r1)
-	std	r25,-8*12(r1)
-	std	r26,-8*13(r1)
-	std	r27,-8*14(r1)
-	std	r28,-8*15(r1)
-	std	r29,-8*16(r1)
-	std	r30,-8*17(r1)
-	std	r31,-8*18(r1)
-	std	r4,-8*19(r1)
-	std	r5,-8*20(r1)
+	std	r2,-8*1(r1)
+	std	r14,-8*2(r1)
+	std	r15,-8*3(r1)
+	std	r16,-8*4(r1)
+	std	r17,-8*5(r1)
+	std	r18,-8*6(r1)
+	std	r19,-8*7(r1)
+	std	r20,-8*8(r1)
+	std	r21,-8*9(r1)
+	std	r22,-8*10(r1)
+	std	r23,-8*11(r1)
+	std	r24,-8*12(r1)
+	std	r25,-8*13(r1)
+	std	r26,-8*14(r1)
+	std	r27,-8*15(r1)
+	std	r28,-8*16(r1)
+	std	r29,-8*17(r1)
+	std	r30,-8*18(r1)
+	std	r31,-8*19(r1)
+	std	r4,-8*20(r1)
+	std	r5,-8*21(r1)
 	cmpwi	r3,PNV_THREAD_NAP
 	bne	1f
 	IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)