diff mbox series

[v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32

Message ID 72a6cd86137b2a7ab835213cf5c74df6ed2f6ea7.1575739197.git.christophe.leroy@c-s.fr (mailing list archive)
State Superseded
Headers show
Series [v5] powerpc/irq: inline call_do_irq() and call_do_softirq() on PPC32 | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (567dea0e848944848650d7fd27699e2de5d49353)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 6 checks, 109 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Christophe Leroy Dec. 7, 2019, 5:20 p.m. UTC
call_do_irq() and call_do_softirq() are simple enough to be
worth inlining.

Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.

This is inspired from S390 arch. Several other arches do more or
less the same. The way sparc arch does seems odd thought.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>

---
v2: no change.
v3: no change.
v4:
- comment reminding the purpose of the inline asm block.
- added r2 as clobbered reg
v5:
- Limiting the change to PPC32 for now.
- removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
- Removed patch 1 and merged ksp_limit handling in here.
---
 arch/powerpc/include/asm/irq.h |  2 ++
 arch/powerpc/kernel/irq.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/misc_32.S  | 39 ----------------------------------
 3 files changed, 50 insertions(+), 39 deletions(-)

Comments

Christoph Hellwig Dec. 12, 2019, 12:52 p.m. UTC | #1
On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
> 
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
> 
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.

Any reason you only do this for 32-bit and not 64-bit as well?
Christophe Leroy Dec. 12, 2019, 4:34 p.m. UTC | #2
Le 12/12/2019 à 13:52, Christoph Hellwig a écrit :
> On Sat, Dec 07, 2019 at 05:20:04PM +0000, Christophe Leroy wrote:
>> call_do_irq() and call_do_softirq() are simple enough to be
>> worth inlining.
>>
>> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
>> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
>>
>> This is inspired from S390 arch. Several other arches do more or
>> less the same. The way sparc arch does seems odd thought.
> 
> Any reason you only do this for 32-bit and not 64-bit as well?
> 

Yes ... There has been a long discussion on this in v4, see 
https://patchwork.ozlabs.org/patch/1174288/

The problem is that on PPC64, r2 register is used as TOC pointer and it 
is apparently not straithforward to make sure the caller and the callee 
are using the same TOC.

On PPC32 it's more simple, r2 is current task_struct at all time, it 
never changes.

Christophe
Christophe Leroy June 1, 2020, 7:26 a.m. UTC | #3
Hi Michael,

Le 07/12/2019 à 18:20, Christophe Leroy a écrit :
> call_do_irq() and call_do_softirq() are simple enough to be
> worth inlining.
> 
> Inlining them avoids an mflr/mtlr pair plus a save/reload on stack.
> It also allows GCC to keep the saved ksp_limit in an nonvolatile reg.
> 
> This is inspired from S390 arch. Several other arches do more or
> less the same. The way sparc arch does seems odd thought.
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>

Anything more I need to do for this patch to get merged ?

Thanks
Christophe


> 
> ---
> v2: no change.
> v3: no change.
> v4:
> - comment reminding the purpose of the inline asm block.
> - added r2 as clobbered reg
> v5:
> - Limiting the change to PPC32 for now.
> - removed r2 from the clobbered regs list (on PPC32 r2 points to current all the time)
> - Removed patch 1 and merged ksp_limit handling in here.
> ---
>   arch/powerpc/include/asm/irq.h |  2 ++
>   arch/powerpc/kernel/irq.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
>   arch/powerpc/kernel/misc_32.S  | 39 ----------------------------------
>   3 files changed, 50 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
> index 814dfab7e392..e4a92f0b4ad4 100644
> --- a/arch/powerpc/include/asm/irq.h
> +++ b/arch/powerpc/include/asm/irq.h
> @@ -56,8 +56,10 @@ extern void *mcheckirq_ctx[NR_CPUS];
>   extern void *hardirq_ctx[NR_CPUS];
>   extern void *softirq_ctx[NR_CPUS];
>   
> +#ifdef CONFIG_PPC64
>   void call_do_softirq(void *sp);
>   void call_do_irq(struct pt_regs *regs, void *sp);
> +#endif
>   extern void do_IRQ(struct pt_regs *regs);
>   extern void __init init_IRQ(void);
>   extern void __do_irq(struct pt_regs *regs);
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 5645bc9cbc09..240eca12c71d 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -611,6 +611,54 @@ static inline void check_stack_overflow(void)
>   #endif
>   }
>   
> +#ifdef CONFIG_PPC32
> +static inline void call_do_softirq(const void *sp)
> +{
> +	register unsigned long ret asm("r3");
> +	unsigned long limit = current->thread.ksp_limit;
> +
> +	/* Adjust the stack limit */
> +	current->thread.ksp_limit = (unsigned long)sp;
> +
> +	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
> +	asm volatile(
> +		"	"PPC_STLU"	1, %2(%1);\n"
> +		"	mr		1, %1;\n"
> +		"	bl		%3;\n"
> +		"	"PPC_LL"	1, 0(1);\n" :
> +		"=r"(ret) :
> +		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
> +		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> +		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> +	/* Restore stack limit */
> +	current->thread.ksp_limit = limit;
> +}
> +
> +static inline void call_do_irq(struct pt_regs *regs, void *sp)
> +{
> +	register unsigned long r3 asm("r3") = (unsigned long)regs;
> +	unsigned long limit = current->thread.ksp_limit;
> +
> +	/* Adjust the stack limit */
> +	current->thread.ksp_limit = (unsigned long)sp;
> +
> +	/* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
> +	asm volatile(
> +		"	"PPC_STLU"	1, %2(%1);\n"
> +		"	mr		1, %1;\n"
> +		"	bl		%3;\n"
> +		"	"PPC_LL"	1, 0(1);\n" :
> +		"+r"(r3) :
> +		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
> +		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
> +		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
> +
> +	/* Restore stack limit */
> +	current->thread.ksp_limit = limit;
> +}
> +#endif
> +
>   void __do_irq(struct pt_regs *regs)
>   {
>   	unsigned int irq;
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index d80212be8698..341a3cd199cb 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -28,45 +28,6 @@
>   	.text
>   
>   /*
> - * We store the saved ksp_limit in the unused part
> - * of the STACK_FRAME_OVERHEAD
> - */
> -_GLOBAL(call_do_softirq)
> -	mflr	r0
> -	stw	r0,4(r1)
> -	lwz	r10,THREAD+KSP_LIMIT(r2)
> -	stw	r3, THREAD+KSP_LIMIT(r2)
> -	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
> -	mr	r1,r3
> -	stw	r10,8(r1)
> -	bl	__do_softirq
> -	lwz	r10,8(r1)
> -	lwz	r1,0(r1)
> -	lwz	r0,4(r1)
> -	stw	r10,THREAD+KSP_LIMIT(r2)
> -	mtlr	r0
> -	blr
> -
> -/*
> - * void call_do_irq(struct pt_regs *regs, void *sp);
> - */
> -_GLOBAL(call_do_irq)
> -	mflr	r0
> -	stw	r0,4(r1)
> -	lwz	r10,THREAD+KSP_LIMIT(r2)
> -	stw	r4, THREAD+KSP_LIMIT(r2)
> -	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
> -	mr	r1,r4
> -	stw	r10,8(r1)
> -	bl	__do_irq
> -	lwz	r10,8(r1)
> -	lwz	r1,0(r1)
> -	lwz	r0,4(r1)
> -	stw	r10,THREAD+KSP_LIMIT(r2)
> -	mtlr	r0
> -	blr
> -
> -/*
>    * This returns the high 64 bits of the product of two 64-bit numbers.
>    */
>   _GLOBAL(mulhdu)
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 814dfab7e392..e4a92f0b4ad4 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -56,8 +56,10 @@  extern void *mcheckirq_ctx[NR_CPUS];
 extern void *hardirq_ctx[NR_CPUS];
 extern void *softirq_ctx[NR_CPUS];
 
+#ifdef CONFIG_PPC64
 void call_do_softirq(void *sp);
 void call_do_irq(struct pt_regs *regs, void *sp);
+#endif
 extern void do_IRQ(struct pt_regs *regs);
 extern void __init init_IRQ(void);
 extern void __do_irq(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..240eca12c71d 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -611,6 +611,54 @@  static inline void check_stack_overflow(void)
 #endif
 }
 
+#ifdef CONFIG_PPC32
+static inline void call_do_softirq(const void *sp)
+{
+	register unsigned long ret asm("r3");
+	unsigned long limit = current->thread.ksp_limit;
+
+	/* Adjust the stack limit */
+	current->thread.ksp_limit = (unsigned long)sp;
+
+	/* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+	asm volatile(
+		"	"PPC_STLU"	1, %2(%1);\n"
+		"	mr		1, %1;\n"
+		"	bl		%3;\n"
+		"	"PPC_LL"	1, 0(1);\n" :
+		"=r"(ret) :
+		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_softirq) :
+		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+	/* Restore stack limit */
+	current->thread.ksp_limit = limit;
+}
+
+static inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+	register unsigned long r3 asm("r3") = (unsigned long)regs;
+	unsigned long limit = current->thread.ksp_limit;
+
+	/* Adjust the stack limit */
+	current->thread.ksp_limit = (unsigned long)sp;
+
+	/* Temporarily switch r1 to sp, call __do_irq() then restore r1 */
+	asm volatile(
+		"	"PPC_STLU"	1, %2(%1);\n"
+		"	mr		1, %1;\n"
+		"	bl		%3;\n"
+		"	"PPC_LL"	1, 0(1);\n" :
+		"+r"(r3) :
+		"b"(sp), "i"(THREAD_SIZE - STACK_FRAME_OVERHEAD), "i"(__do_irq) :
+		"lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6", "cr7",
+		"r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12");
+
+	/* Restore stack limit */
+	current->thread.ksp_limit = limit;
+}
+#endif
+
 void __do_irq(struct pt_regs *regs)
 {
 	unsigned int irq;
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index d80212be8698..341a3cd199cb 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -28,45 +28,6 @@ 
 	.text
 
 /*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r3, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
-	mr	r1,r3
-	stw	r10,8(r1)
-	bl	__do_softirq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, void *sp);
- */
-_GLOBAL(call_do_irq)
-	mflr	r0
-	stw	r0,4(r1)
-	lwz	r10,THREAD+KSP_LIMIT(r2)
-	stw	r4, THREAD+KSP_LIMIT(r2)
-	stwu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
-	mr	r1,r4
-	stw	r10,8(r1)
-	bl	__do_irq
-	lwz	r10,8(r1)
-	lwz	r1,0(r1)
-	lwz	r0,4(r1)
-	stw	r10,THREAD+KSP_LIMIT(r2)
-	mtlr	r0
-	blr
-
-/*
  * This returns the high 64 bits of the product of two 64-bit numbers.
  */
 _GLOBAL(mulhdu)