Patchwork [v3,2/3] ppc/kprobe: complete kprobe and migrate exception frame

login
register
mail settings
Submitter Tiejun Chen
Date Sept. 11, 2012, 2:20 a.m.
Message ID <1347330053-27039-2-git-send-email-tiejun.chen@windriver.com>
Download mbox | patch
Permalink /patch/182994/
State Superseded
Headers show

Comments

Tiejun Chen - Sept. 11, 2012, 2:20 a.m.
We can't emulate stwu since that may corrupt current exception stack.
So we will have to do real store operation in the exception return code.

Firstly we'll allocate a trampoline exception frame below the kprobed
function stack and copy the current exception frame to the trampoline.
Then we can do this real store operation to implement 'stwu', and reroute
the trampoline frame to r1 to complete this exception migration.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/entry_32.S |   45 ++++++++++++++++++++++++++++++++++------
 arch/powerpc/kernel/entry_64.S |   32 ++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 6 deletions(-)
Benjamin Herrenschmidt - Sept. 11, 2012, 5:51 a.m.
On Tue, 2012-09-11 at 10:20 +0800, Tiejun Chen wrote:
> We can't emulate stwu since that may corrupt current exception stack.
> So we will have to do real store operation in the exception return code.
> 
> Firstly we'll allocate a trampoline exception frame below the kprobed
> function stack and copy the current exception frame to the trampoline.
> Then we can do this real store operation to implement 'stwu', and reroute
> the trampoline frame to r1 to complete this exception migration.

Ok, so not quite there yet :-)

See below:

> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
> ---
>  arch/powerpc/kernel/entry_32.S |   45 ++++++++++++++++++++++++++++++++++------
>  arch/powerpc/kernel/entry_64.S |   32 ++++++++++++++++++++++++++++
>  2 files changed, 71 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
> index ead5016..6cfe12f 100644
> --- a/arch/powerpc/kernel/entry_32.S
> +++ b/arch/powerpc/kernel/entry_32.S
> @@ -831,19 +831,54 @@ restore_user:
>  	bnel-	load_dbcr0
>  #endif
>  
> -#ifdef CONFIG_PREEMPT
>  	b	restore
>  
>  /* N.B. the only way to get here is from the beq following ret_from_except. */
>  resume_kernel:
> +	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
> +	CURRENT_THREAD_INFO(r9, r1)
> +	lwz	r0,TI_FLAGS(r9)
> +	andis.	r0,r0,_TIF_EMULATE_STACK_STORE@h
> +	beq+	1f

So you used r0 to load the TI_FLAGS and immediately clobbered it in
andis. forcing you to re-load them later down. Instead, put them in r8

	lwz	r8,TI_FLAGS(r9)
	andis.	r0,r8,_TIF_*
	beq+	*

> +	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */

Then you put your entry in r8 ....

> +	lwz	r3,GPR1(r1)
> +	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
> +	mr	r4,r1			/* src:  current exception frame */
> +	li	r5,INT_FRAME_SIZE	/* size: INT_FRAME_SIZE */
> +	mr	r1,r3			/* Reroute the trampoline frame to r1 */
> +	bl	memcpy			/* Copy from the original to the trampoline */

Which you just clobbered... oops :-)

So you need to store that old r1 somewhere fist then retrieve it
after the memcpy call. That or open-code the memcpy to avoid all
the clobbering problems.

> +	CURRENT_THREAD_INFO(r9, r1)
> +	lwz	r0,TI_FLAGS(r9)		/* Restore this clobbered r0 */

Re-load in r8 as suggested above ? Anyway, it doesn't matter you don't
actually need to load it at all because you re-load it in your
lwarx/stwcx. loop further down

> +	/* Do real store operation to complete stwu */
> +	lwz	r5,GPR1(r1)
> +	stw	r8,0(r5)

(Storing a clobbered value.)

> +	/* Clear _TIF_EMULATE_STACK_STORE flag */
> +	CURRENT_THREAD_INFO(r9, r1)

Why re-calculate r9 here ? you just did 4 lines above

> +	lis	r11,_TIF_EMULATE_STACK_STORE@h
> +	addi	r5,r9,TI_FLAGS
> +0:	lwarx	r8,0,r5
>
> +	andc	r8,r8,r11
> +#ifdef CONFIG_IBM405_ERR77
> +	dcbt	0,r5
> +#endif
> +	stwcx.	r8,0,r5
> +	bne-	0b

So here, r8 contains TI_FLAGS

> +1:

And if you do the change I suggested above, here too.

> +#ifdef CONFIG_PREEMPT
>  	/* check current_thread_info->preempt_count */
>  	CURRENT_THREAD_INFO(r9, r1)

r9 already has what you want

> -	lwz	r0,TI_PREEMPT(r9)
> -	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
> +	lwz	r8,TI_PREEMPT(r9)
> +	cmpwi	0,r8,0		/* if non-zero, just restore regs and return */

Leave that to be r0, r8 has your TI_FLAGS already.

>  	bne	restore
> -	lwz	r0,TI_FLAGS(r9)

See above.

>  	andi.	r0,r0,_TIF_NEED_RESCHED
>  	beq+	restore
> +	lwz	r3,_MSR(r1)
>  	andi.	r0,r3,MSR_EE	/* interrupts off? */
>  	beq	restore		/* don't schedule if so */
>  #ifdef CONFIG_TRACE_IRQFLAGS
> @@ -864,8 +899,6 @@ resume_kernel:
>  	 */
>  	bl	trace_hardirqs_on
>  #endif
> -#else
> -resume_kernel:
>  #endif /* CONFIG_PREEMPT */
>  
>  	/* interrupts are hard-disabled at this point */
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index b40e0b4..b6d7483 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -593,6 +593,38 @@ _GLOBAL(ret_from_except_lite)
>  	b	.ret_from_except
>  
>  resume_kernel:
> +	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
> +	CURRENT_THREAD_INFO(r9, r1)
> +	ld	r0,TI_FLAGS(r9)
> +	andis.	r0,r0,_TIF_EMULATE_STACK_STORE@h
> +	beq+	1f

Similar comments to 32-bit

> +	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */

That gets clobbered too.

> +	lwz	r3,GPR1(r1)
> +	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
> +	mr	r4,r1			/* src:  current exception frame */
> +	li	r5,INT_FRAME_SIZE	/* size: INT_FRAME_SIZE */
> +	mr	r1,r3			/* Reroute the trampoline frame to r1 */
> +	bl	memcpy			/* Copy from the original to the trampoline */
> +
> +	CURRENT_THREAD_INFO(r9, r1)
> +	ld	r4,TI_FLAGS(r9)		/* Restore this clobbered r4 */

Usueless reloads

> +	/* Do real store operation to complete stwu */
> +	lwz	r5,GPR1(r1)
> +	std	r8,0(r5)
> +
> +	/* Clear _TIF_EMULATE_STACK_STORE flag */
> +	CURRENT_THREAD_INFO(r9, r1)
> +	lis	r11,_TIF_EMULATE_STACK_STORE@h
> +	addi	r5,r9,TI_FLAGS
> +0:	ldarx	r8,0,r5
> +	andc	r8,r8,r11
> +	stdcx.	r8,0,r5
> +	bne-	0b
> +1:
> +
>  #ifdef CONFIG_PREEMPT
>  	/* Check if we need to preempt */
>  	andi.	r0,r4,_TIF_NEED_RESCHED

Cheers,
Ben.
Tiejun Chen - Sept. 12, 2012, 8:38 a.m.
On 09/11/2012 01:51 PM, Benjamin Herrenschmidt wrote:
> On Tue, 2012-09-11 at 10:20 +0800, Tiejun Chen wrote:
>> We can't emulate stwu since that may corrupt current exception stack.
>> So we will have to do real store operation in the exception return code.
>>
>> Firstly we'll allocate a trampoline exception frame below the kprobed
>> function stack and copy the current exception frame to the trampoline.
>> Then we can do this real store operation to implement 'stwu', and reroute
>> the trampoline frame to r1 to complete this exception migration.
>
> Ok, so not quite there yet :-)
>
> See below:
>
>> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
>> ---
>>   arch/powerpc/kernel/entry_32.S |   45 ++++++++++++++++++++++++++++++++++------
>>   arch/powerpc/kernel/entry_64.S |   32 ++++++++++++++++++++++++++++
>>   2 files changed, 71 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
>> index ead5016..6cfe12f 100644
>> --- a/arch/powerpc/kernel/entry_32.S
>> +++ b/arch/powerpc/kernel/entry_32.S
>> @@ -831,19 +831,54 @@ restore_user:
>>   	bnel-	load_dbcr0
>>   #endif
>>
>> -#ifdef CONFIG_PREEMPT
>>   	b	restore
>>
>>   /* N.B. the only way to get here is from the beq following ret_from_except. */
>>   resume_kernel:
>> +	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
>> +	CURRENT_THREAD_INFO(r9, r1)
>> +	lwz	r0,TI_FLAGS(r9)
>> +	andis.	r0,r0,_TIF_EMULATE_STACK_STORE@h
>> +	beq+	1f
>
> So you used r0 to load the TI_FLAGS and immediately clobbered it in
> andis. forcing you to re-load them later down. Instead, put them in r8
>
> 	lwz	r8,TI_FLAGS(r9)
> 	andis.	r0,r8,_TIF_*
> 	beq+	*
>
>> +	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
>
> Then you put your entry in r8 ....

I'll update this for 32b and 64b sections.

>
>> +	lwz	r3,GPR1(r1)
>> +	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
>> +	mr	r4,r1			/* src:  current exception frame */
>> +	li	r5,INT_FRAME_SIZE	/* size: INT_FRAME_SIZE */
>> +	mr	r1,r3			/* Reroute the trampoline frame to r1 */
>> +	bl	memcpy			/* Copy from the original to the trampoline */
>
> Which you just clobbered... oops :-)
>
> So you need to store that old r1 somewhere fist then retrieve it
> after the memcpy call. That or open-code the memcpy to avoid all
> the clobbering problems.

Maybe we can use copy_and_flush() since looks copy_and_flush() only clobber r0, 
r6 and LR explicitly.

I'll resync these comments for v4.

Tiejun
Benjamin Herrenschmidt - Sept. 12, 2012, 8:43 a.m.
On Wed, 2012-09-12 at 16:38 +0800, tiejun.chen wrote:
> > So you need to store that old r1 somewhere fist then retrieve it
> > after the memcpy call. That or open-code the memcpy to avoid all
> > the clobbering problems.
> 
> Maybe we can use copy_and_flush() since looks copy_and_flush() only
> clobber r0, 
> r6 and LR explicitly.
> 
> I'll resync these comments for v4. 

I'd say just open code it. You already have src and dst in registers,
the length can easily be put in ctr... easier that way, not clobbering
to worry about nor stack frame to create etc...

Cheers,
Ben.
Tiejun Chen - Sept. 12, 2012, 8:55 a.m.
On 09/12/2012 04:43 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2012-09-12 at 16:38 +0800, tiejun.chen wrote:
>>> So you need to store that old r1 somewhere fist then retrieve it
>>> after the memcpy call. That or open-code the memcpy to avoid all
>>> the clobbering problems.
>>
>> Maybe we can use copy_and_flush() since looks copy_and_flush() only
>> clobber r0,
>> r6 and LR explicitly.
>>
>> I'll resync these comments for v4.
>
> I'd say just open code it. You already have src and dst in registers,
> the length can easily be put in ctr... easier that way, not clobbering

ctr should be easier :)

> to worry about nor stack frame to create etc...

If you don't like this v4, let me know and then I can go back memcpy for next 
version.

Tiejun
Benjamin Herrenschmidt - Sept. 12, 2012, 10:38 a.m.
On Wed, 2012-09-12 at 16:55 +0800, tiejun.chen wrote:
> > to worry about nor stack frame to create etc...
> 
> If you don't like this v4, let me know and then I can go back memcpy
> for next 
> version. 

Just open code the whole copy. It should be easy really. As I said, you
have the src and dst already in registers and you know they are aligned,
so just put the size of the frame in a register (divided by 4), do an
mtctr and do a little load_update/store_update loop to do the copy, all
in the asm.

Ben.

Patch

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ead5016..6cfe12f 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -831,19 +831,54 @@  restore_user:
 	bnel-	load_dbcr0
 #endif
 
-#ifdef CONFIG_PREEMPT
 	b	restore
 
 /* N.B. the only way to get here is from the beq following ret_from_except. */
 resume_kernel:
+	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+	CURRENT_THREAD_INFO(r9, r1)
+	lwz	r0,TI_FLAGS(r9)
+	andis.	r0,r0,_TIF_EMULATE_STACK_STORE@h
+	beq+	1f
+
+	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
+
+	lwz	r3,GPR1(r1)
+	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
+	mr	r4,r1			/* src:  current exception frame */
+	li	r5,INT_FRAME_SIZE	/* size: INT_FRAME_SIZE */
+	mr	r1,r3			/* Reroute the trampoline frame to r1 */
+	bl	memcpy			/* Copy from the original to the trampoline */
+
+	CURRENT_THREAD_INFO(r9, r1)
+	lwz	r0,TI_FLAGS(r9)		/* Restore this clobbered r0 */
+
+	/* Do real store operation to complete stwu */
+	lwz	r5,GPR1(r1)
+	stw	r8,0(r5)
+
+	/* Clear _TIF_EMULATE_STACK_STORE flag */
+	CURRENT_THREAD_INFO(r9, r1)
+	lis	r11,_TIF_EMULATE_STACK_STORE@h
+	addi	r5,r9,TI_FLAGS
+0:	lwarx	r8,0,r5
+	andc	r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+	dcbt	0,r5
+#endif
+	stwcx.	r8,0,r5
+	bne-	0b
+1:
+
+#ifdef CONFIG_PREEMPT
 	/* check current_thread_info->preempt_count */
 	CURRENT_THREAD_INFO(r9, r1)
-	lwz	r0,TI_PREEMPT(r9)
-	cmpwi	0,r0,0		/* if non-zero, just restore regs and return */
+	lwz	r8,TI_PREEMPT(r9)
+	cmpwi	0,r8,0		/* if non-zero, just restore regs and return */
 	bne	restore
-	lwz	r0,TI_FLAGS(r9)
 	andi.	r0,r0,_TIF_NEED_RESCHED
 	beq+	restore
+	lwz	r3,_MSR(r1)
 	andi.	r0,r3,MSR_EE	/* interrupts off? */
 	beq	restore		/* don't schedule if so */
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -864,8 +899,6 @@  resume_kernel:
 	 */
 	bl	trace_hardirqs_on
 #endif
-#else
-resume_kernel:
 #endif /* CONFIG_PREEMPT */
 
 	/* interrupts are hard-disabled at this point */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index b40e0b4..b6d7483 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -593,6 +593,38 @@  _GLOBAL(ret_from_except_lite)
 	b	.ret_from_except
 
 resume_kernel:
+	/* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+	CURRENT_THREAD_INFO(r9, r1)
+	ld	r0,TI_FLAGS(r9)
+	andis.	r0,r0,_TIF_EMULATE_STACK_STORE@h
+	beq+	1f
+
+	addi	r8,r1,INT_FRAME_SIZE	/* Get the kprobed function entry */
+
+	lwz	r3,GPR1(r1)
+	subi	r3,r3,INT_FRAME_SIZE	/* dst: Allocate a trampoline exception frame */
+	mr	r4,r1			/* src:  current exception frame */
+	li	r5,INT_FRAME_SIZE	/* size: INT_FRAME_SIZE */
+	mr	r1,r3			/* Reroute the trampoline frame to r1 */
+	bl	memcpy			/* Copy from the original to the trampoline */
+
+	CURRENT_THREAD_INFO(r9, r1)
+	ld	r4,TI_FLAGS(r9)		/* Restore this clobbered r4 */
+
+	/* Do real store operation to complete stwu */
+	lwz	r5,GPR1(r1)
+	std	r8,0(r5)
+
+	/* Clear _TIF_EMULATE_STACK_STORE flag */
+	CURRENT_THREAD_INFO(r9, r1)
+	lis	r11,_TIF_EMULATE_STACK_STORE@h
+	addi	r5,r9,TI_FLAGS
+0:	ldarx	r8,0,r5
+	andc	r8,r8,r11
+	stdcx.	r8,0,r5
+	bne-	0b
+1:
+
 #ifdef CONFIG_PREEMPT
 	/* Check if we need to preempt */
 	andi.	r0,r4,_TIF_NEED_RESCHED