diff mbox

powerpc: Don't corrupt user registers on 32-bit

Message ID 20131023084002.GA8325@iris.ozlabs.ibm.com (mailing list archive)
State Accepted, archived
Headers show

Commit Message

Paul Mackerras Oct. 23, 2013, 8:40 a.m. UTC
Commit de79f7b9f6 ("powerpc: Put FP/VSX and VR state into structures")
modified load_up_fpu() and load_up_altivec() in such a way that they
now use r7 and r8.  Unfortunately, the callers of these functions on
32-bit machines then return to userspace via fast_exception_return,
which doesn't restore all of the volatile GPRs, but only r1, r3 -- r6
and r9 -- r12.  This was causing userspace segfaults and other
userspace misbehaviour on 32-bit machines.

This fixes the problem by changing the register usage of load_up_fpu()
and load_up_altivec() to avoid using r7 and r8 and instead use r6 and
r10.  This also adds comments to those functions saying which registers
may be used.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/fpu.S    | 14 ++++++++------
 arch/powerpc/kernel/vector.S | 15 +++++++++------
 2 files changed, 17 insertions(+), 12 deletions(-)

Comments

Scott Wood Oct. 23, 2013, 10:20 a.m. UTC | #1
On Wed, Oct 23, 2013 at 09:40:02AM +0100, Paul Mackerras wrote:
> Commit de79f7b9f6 ("powerpc: Put FP/VSX and VR state into structures")
> modified load_up_fpu() and load_up_altivec() in such a way that they
> now use r7 and r8.  Unfortunately, the callers of these functions on
> 32-bit machines then return to userspace via fast_exception_return,
> which doesn't restore all of the volatile GPRs, but only r1, r3 -- r6
> and r9 -- r12.  This was causing userspace segfaults and other
> userspace misbehaviour on 32-bit machines.
> 
> This fixes the problem by changing the register usage of load_up_fpu()
> and load_up_altivec() to avoid using r7 and r8 and instead use r6 and
> r10.  This also adds comments to those functions saying which registers
> may be used.
> 
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> 
> ---
> arch/powerpc/kernel/fpu.S    | 14 ++++++++------
>  arch/powerpc/kernel/vector.S | 15 +++++++++------
>  2 files changed, 17 insertions(+), 12 deletions(-)

Tested-by: Scott Wood <scottwood@freescale.com> (on e500mc, so no altivec)

-Scott
Alexander Graf Oct. 23, 2013, 3:07 p.m. UTC | #2
Am 23.10.2013 um 11:20 schrieb Scott Wood <scottwood@freescale.com>:

> On Wed, Oct 23, 2013 at 09:40:02AM +0100, Paul Mackerras wrote:
>> Commit de79f7b9f6 ("powerpc: Put FP/VSX and VR state into structures")
>> modified load_up_fpu() and load_up_altivec() in such a way that they
>> now use r7 and r8.  Unfortunately, the callers of these functions on
>> 32-bit machines then return to userspace via fast_exception_return,
>> which doesn't restore all of the volatile GPRs, but only r1, r3 -- r6
>> and r9 -- r12.  This was causing userspace segfaults and other
>> userspace misbehaviour on 32-bit machines.
>> 
>> This fixes the problem by changing the register usage of load_up_fpu()
>> and load_up_altivec() to avoid using r7 and r8 and instead use r6 and
>> r10.  This also adds comments to those functions saying which registers
>> may be used.
>> 
>> Signed-off-by: Paul Mackerras <paulus@samba.org>
>> 
>> ---
>> arch/powerpc/kernel/fpu.S    | 14 ++++++++------
>> arch/powerpc/kernel/vector.S | 15 +++++++++------
>> 2 files changed, 17 insertions(+), 12 deletions(-)
> 
> Tested-by: Scott Wood <scottwood@freescale.com> (on e500mc, so no altivec)

Tested-by: Alexander Graf <agraf@suse.de> (on a G4 iBook and 970)

> 
> -Scott
>
shiva7 May 22, 2014, 9:51 p.m. UTC | #3
Paul Mackerras wrote
> Commit de79f7b9f6 ("powerpc: Put FP/VSX and VR state into structures")
> modified load_up_fpu() and load_up_altivec() in such a way that they
> now use r7 and r8.  Unfortunately, the callers of these functions on
> 32-bit machines then return to userspace via fast_exception_return,
> which doesn't restore all of the volatile GPRs, but only r1, r3 -- r6
> and r9 -- r12.  This was causing userspace segfaults and other
> userspace misbehaviour on 32-bit machines.
> 
> This fixes the problem by changing the register usage of load_up_fpu()
> and load_up_altivec() to avoid using r7 and r8 and instead use r6 and
> r10.  This also adds comments to those functions saying which registers
> may be used.
> 
> Signed-off-by: Paul Mackerras &lt;

> paulus@

> &gt;
> ---
>  arch/powerpc/kernel/fpu.S    | 14 ++++++++------
>  arch/powerpc/kernel/vector.S | 15 +++++++++------
>  2 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
> index 4dca05e..f7f5b8b 100644
> --- a/arch/powerpc/kernel/fpu.S
> +++ b/arch/powerpc/kernel/fpu.S
> @@ -106,6 +106,8 @@ _GLOBAL(store_fp_state)
>   * and save its floating-point registers in its thread_struct.
>   * Load up this task's FP registers from its thread_struct,
>   * enable the FPU for the current task and return to the task.
> + * Note that on 32-bit this can only use registers that will be
> + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
>   */
>  _GLOBAL(load_up_fpu)
>  	mfmsr	r5
> @@ -131,10 +133,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
>  	beq	1f
>  	toreal(r4)
>  	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
> -	addi	r8,r4,THREAD_FPSTATE
> -	SAVE_32FPVSRS(0, R5, R8)
> +	addi	r10,r4,THREAD_FPSTATE
> +	SAVE_32FPVSRS(0, R5, R10)
>  	mffs	fr0
> -	stfd	fr0,FPSTATE_FPSCR(r8)
> +	stfd	fr0,FPSTATE_FPSCR(r10)
>  	PPC_LL	r5,PT_REGS(r4)
>  	toreal(r5)
>  	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
> @@ -157,10 +159,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
>  	or	r12,r12,r4
>  	std	r12,_MSR(r1)
>  #endif
> -	addi	r7,r5,THREAD_FPSTATE
> -	lfd	fr0,FPSTATE_FPSCR(r7)
> +	addi	r10,r5,THREAD_FPSTATE
> +	lfd	fr0,FPSTATE_FPSCR(r10)
>  	MTFSF_L(fr0)
> -	REST_32FPVSRS(0, R4, R7)
> +	REST_32FPVSRS(0, R4, R10)
>  #ifndef CONFIG_SMP
>  	subi	r4,r5,THREAD
>  	fromreal(r4)
> diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
> index eacda4e..0458a9a 100644
> --- a/arch/powerpc/kernel/vector.S
> +++ b/arch/powerpc/kernel/vector.S
> @@ -64,6 +64,9 @@ _GLOBAL(store_vr_state)
>   * Enables the VMX for use in the kernel on return.
>   * On SMP we know the VMX is free, since we give it up every
>   * switch (ie, no lazy save of the vector registers).
> + *
> + * Note that on 32-bit this can only use registers that will be
> + * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
>   */
>  _GLOBAL(load_up_altivec)
>  	mfmsr	r5			/* grab the current MSR */
> @@ -89,11 +92,11 @@ _GLOBAL(load_up_altivec)
>  	/* Save VMX state to last_task_used_altivec's THREAD struct */
>  	toreal(r4)
>  	addi	r4,r4,THREAD
> -	addi	r7,r4,THREAD_VRSTATE
> -	SAVE_32VRS(0,r5,r7)
> +	addi	r6,r4,THREAD_VRSTATE
> +	SAVE_32VRS(0,r5,r6)
>  	mfvscr	vr0
>  	li	r10,VRSTATE_VSCR
> -	stvx	vr0,r10,r7
> +	stvx	vr0,r10,r6
>  	/* Disable VMX for last_task_used_altivec */
>  	PPC_LL	r5,PT_REGS(r4)
>  	toreal(r5)
> @@ -125,13 +128,13 @@ _GLOBAL(load_up_altivec)
>  	oris	r12,r12,MSR_VEC@h
>  	std	r12,_MSR(r1)
>  #endif
> -	addi	r7,r5,THREAD_VRSTATE
> +	addi	r6,r5,THREAD_VRSTATE
>  	li	r4,1
>  	li	r10,VRSTATE_VSCR
>  	stw	r4,THREAD_USED_VR(r5)
> -	lvx	vr0,r10,r7
> +	lvx	vr0,r10,r6
>  	mtvscr	vr0
> -	REST_32VRS(0,r4,r7)
> +	REST_32VRS(0,r4,r6)
>  #ifndef CONFIG_SMP
>  	/* Update last_task_used_altivec to 'current' */
>  	subi	r4,r5,THREAD		/* Back to 'current' */
> -- 
> 1.8.4.rc3
> 
> _______________________________________________
> Linuxppc-dev mailing list

> Linuxppc-dev@.ozlabs

> https://lists.ozlabs.org/listinfo/linuxppc-dev


By any chance, same corruption is happening for DEBUG_DEBUG exception ?
because I could see similar SEGV but dont have any code/program to prove it
:(



--
View this message in context: http://linuxppc.10917.n7.nabble.com/PATCH-powerpc-Don-t-corrupt-user-registers-on-32-bit-tp77443p82590.html
Sent from the linuxppc-dev mailing list archive at Nabble.com.
diff mbox

Patch

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 4dca05e..f7f5b8b 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -106,6 +106,8 @@  _GLOBAL(store_fp_state)
  * and save its floating-point registers in its thread_struct.
  * Load up this task's FP registers from its thread_struct,
  * enable the FPU for the current task and return to the task.
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  */
 _GLOBAL(load_up_fpu)
 	mfmsr	r5
@@ -131,10 +133,10 @@  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	beq	1f
 	toreal(r4)
 	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
-	addi	r8,r4,THREAD_FPSTATE
-	SAVE_32FPVSRS(0, R5, R8)
+	addi	r10,r4,THREAD_FPSTATE
+	SAVE_32FPVSRS(0, R5, R10)
 	mffs	fr0
-	stfd	fr0,FPSTATE_FPSCR(r8)
+	stfd	fr0,FPSTATE_FPSCR(r10)
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
 	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
@@ -157,10 +159,10 @@  END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	or	r12,r12,r4
 	std	r12,_MSR(r1)
 #endif
-	addi	r7,r5,THREAD_FPSTATE
-	lfd	fr0,FPSTATE_FPSCR(r7)
+	addi	r10,r5,THREAD_FPSTATE
+	lfd	fr0,FPSTATE_FPSCR(r10)
 	MTFSF_L(fr0)
-	REST_32FPVSRS(0, R4, R7)
+	REST_32FPVSRS(0, R4, R10)
 #ifndef CONFIG_SMP
 	subi	r4,r5,THREAD
 	fromreal(r4)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index eacda4e..0458a9a 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -64,6 +64,9 @@  _GLOBAL(store_vr_state)
  * Enables the VMX for use in the kernel on return.
  * On SMP we know the VMX is free, since we give it up every
  * switch (ie, no lazy save of the vector registers).
+ *
+ * Note that on 32-bit this can only use registers that will be
+ * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
  */
 _GLOBAL(load_up_altivec)
 	mfmsr	r5			/* grab the current MSR */
@@ -89,11 +92,11 @@  _GLOBAL(load_up_altivec)
 	/* Save VMX state to last_task_used_altivec's THREAD struct */
 	toreal(r4)
 	addi	r4,r4,THREAD
-	addi	r7,r4,THREAD_VRSTATE
-	SAVE_32VRS(0,r5,r7)
+	addi	r6,r4,THREAD_VRSTATE
+	SAVE_32VRS(0,r5,r6)
 	mfvscr	vr0
 	li	r10,VRSTATE_VSCR
-	stvx	vr0,r10,r7
+	stvx	vr0,r10,r6
 	/* Disable VMX for last_task_used_altivec */
 	PPC_LL	r5,PT_REGS(r4)
 	toreal(r5)
@@ -125,13 +128,13 @@  _GLOBAL(load_up_altivec)
 	oris	r12,r12,MSR_VEC@h
 	std	r12,_MSR(r1)
 #endif
-	addi	r7,r5,THREAD_VRSTATE
+	addi	r6,r5,THREAD_VRSTATE
 	li	r4,1
 	li	r10,VRSTATE_VSCR
 	stw	r4,THREAD_USED_VR(r5)
-	lvx	vr0,r10,r7
+	lvx	vr0,r10,r6
 	mtvscr	vr0
-	REST_32VRS(0,r4,r7)
+	REST_32VRS(0,r4,r6)
 #ifndef CONFIG_SMP
 	/* Update last_task_used_altivec to 'current' */
 	subi	r4,r5,THREAD		/* Back to 'current' */