diff mbox series

[v3,3/3] powerpc/64: optimise LOAD_REG_IMMEDIATE_SYM()

Message ID 92bf50b31f5f78cc76ed055b11a492e8e9e2c731.1566223054.git.christophe.leroy@c-s.fr (mailing list archive)
State Changes Requested
Headers show
Series [v3,1/3] powerpc: rewrite LOAD_REG_IMMEDIATE() as an intelligent macro | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch next (c9633332103e55bc73d80d07ead28b95a22a85a3)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch fail total: 1 errors, 0 warnings, 3 checks, 70 lines checked

Commit Message

Christophe Leroy Aug. 19, 2019, 1:58 p.m. UTC
Optimise LOAD_REG_IMMEDIATE_SYM() using a temporary register to
parallelise operations.

It reduces the path from 5 to 3 instructions.

Suggested-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>

---
v3: new
---
 arch/powerpc/include/asm/ppc_asm.h   | 12 ++++++------
 arch/powerpc/kernel/exceptions-64e.S | 22 +++++++++++++---------
 arch/powerpc/kernel/head_64.S        |  2 +-
 3 files changed, 20 insertions(+), 16 deletions(-)

Comments

Segher Boessenkool Aug. 19, 2019, 2:24 p.m. UTC | #1
On Mon, Aug 19, 2019 at 01:58:12PM +0000, Christophe Leroy wrote:
> -#define LOAD_REG_IMMEDIATE_SYM(reg,expr)	\
> -	lis     reg,(expr)@highest;		\
> -	ori     reg,reg,(expr)@higher;	\
> -	rldicr  reg,reg,32,31;		\
> -	oris    reg,reg,(expr)@__AS_ATHIGH;	\
> -	ori     reg,reg,(expr)@l;
> +#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
> +	lis	reg, (expr)@highest;		\
> +	lis	tmp, (expr)@__AS_ATHIGH;	\
> +	ori	reg, reg, (expr)@higher;	\
> +	ori	tmp, reg, (expr)@l;		\
> +	rldimi	reg, tmp, 32, 0

That should be

#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
	lis	tmp, (expr)@highest;		\
	ori	tmp, tmp, (expr)@higher;	\
	lis	reg, (expr)@__AS_ATHIGH;	\
	ori	reg, reg, (expr)@l;		\
	rldimi	reg, tmp, 32, 0

(tmp is the high half, reg is the low half, as inputs to that rldimi).


Segher
Nicholas Piggin Aug. 19, 2019, 3:05 p.m. UTC | #2
Segher Boessenkool's on August 20, 2019 12:24 am:
> On Mon, Aug 19, 2019 at 01:58:12PM +0000, Christophe Leroy wrote:
>> -#define LOAD_REG_IMMEDIATE_SYM(reg,expr)	\
>> -	lis     reg,(expr)@highest;		\
>> -	ori     reg,reg,(expr)@higher;	\
>> -	rldicr  reg,reg,32,31;		\
>> -	oris    reg,reg,(expr)@__AS_ATHIGH;	\
>> -	ori     reg,reg,(expr)@l;
>> +#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
>> +	lis	reg, (expr)@highest;		\
>> +	lis	tmp, (expr)@__AS_ATHIGH;	\
>> +	ori	reg, reg, (expr)@higher;	\
>> +	ori	tmp, reg, (expr)@l;		\
>> +	rldimi	reg, tmp, 32, 0
> 
> That should be
> 
> #define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
> 	lis	tmp, (expr)@highest;		\
> 	ori	tmp, tmp, (expr)@higher;	\
> 	lis	reg, (expr)@__AS_ATHIGH;	\
> 	ori	reg, reg, (expr)@l;		\
> 	rldimi	reg, tmp, 32, 0
> 
> (tmp is the high half, reg is the low half, as inputs to that rldimi).

I guess the intention was also to try to fit the independent ops into
the earliest fetch/issue cycle possible.

#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
	lis	tmp, (expr)@highest;		\
	lis	reg, (expr)@__AS_ATHIGH;	\
	ori	tmp, tmp, (expr)@higher;	\
	ori	reg, reg, (expr)@l;		\
	rldimi	reg, tmp, 32, 0

Very cool series though.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index aa8717c1571a..9d55bff9a73c 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -347,12 +347,12 @@  GLUE(.,name):
 
 #define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
 
-#define LOAD_REG_IMMEDIATE_SYM(reg,expr)	\
-	lis     reg,(expr)@highest;		\
-	ori     reg,reg,(expr)@higher;	\
-	rldicr  reg,reg,32,31;		\
-	oris    reg,reg,(expr)@__AS_ATHIGH;	\
-	ori     reg,reg,(expr)@l;
+#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr)	\
+	lis	reg, (expr)@highest;		\
+	lis	tmp, (expr)@__AS_ATHIGH;	\
+	ori	reg, reg, (expr)@higher;	\
+	ori	tmp, reg, (expr)@l;		\
+	rldimi	reg, tmp, 32, 0
 
 #define LOAD_REG_ADDR(reg,name)			\
 	ld	reg,name@got(r2)
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 898aae6da167..829950b96d29 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -750,12 +750,14 @@  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	ld	r15,PACATOC(r13)
 	ld	r14,interrupt_base_book3e@got(r15)
 	ld	r15,__end_interrupts@got(r15)
-#else
-	LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
-	LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
-#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
+#else
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+	cmpld	cr0, r10, r14
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+	cmpld	cr1, r10, r14
+#endif
 	blt+	cr0,1f
 	bge+	cr1,1f
 
@@ -820,12 +822,14 @@  kernel_dbg_exc:
 	ld	r15,PACATOC(r13)
 	ld	r14,interrupt_base_book3e@got(r15)
 	ld	r15,__end_interrupts@got(r15)
-#else
-	LOAD_REG_IMMEDIATE_SYM(r14,interrupt_base_book3e)
-	LOAD_REG_IMMEDIATE_SYM(r15,__end_interrupts)
-#endif
 	cmpld	cr0,r10,r14
 	cmpld	cr1,r10,r15
+#else
+	LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+	cmpld	cr0, r10, r14
+	LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+	cmpld	cr1, r10, r14
+#endif
 	blt+	cr0,1f
 	bge+	cr1,1f
 
@@ -1449,7 +1453,7 @@  a2_tlbinit_code_start:
 a2_tlbinit_after_linear_map:
 
 	/* Now we branch the new virtual address mapped by this entry */
-	LOAD_REG_IMMEDIATE_SYM(r3,1f)
+	LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
 	mtctr	r3
 	bctr
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1fd44761e997..0f2d61af47cc 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -635,7 +635,7 @@  __after_prom_start:
 	sub	r5,r5,r11
 #else
 	/* just copy interrupts */
-	LOAD_REG_IMMEDIATE_SYM(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+	LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
 #endif
 	b	5f
 3: