diff mbox series

[v1,2/2] powerpc/atomics: Use immediate operand when possible

Message ID 9f50b5fadeb090553e5c2fae025052d04d52f3c7.1617896018.git.christophe.leroy@csgroup.eu (mailing list archive)
State Changes Requested
Headers show
Series [v1,1/2] powerpc/bitops: Use immediate operand when possible | expand
Related show

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (f2b8ef18c8e0634e176be99dcf242e515cfdb1d3)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 2 errors, 5 warnings, 8 checks, 117 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Christophe Leroy April 8, 2021, 3:33 p.m. UTC
Today we get the following code generation for atomic operations:

	c001bb2c:	39 20 00 01 	li      r9,1
	c001bb30:	7d 40 18 28 	lwarx   r10,0,r3
	c001bb34:	7d 09 50 50 	subf    r8,r9,r10
	c001bb38:	7d 00 19 2d 	stwcx.  r8,0,r3

	c001c7a8:	39 40 00 01 	li      r10,1
	c001c7ac:	7d 00 18 28 	lwarx   r8,0,r3
	c001c7b0:	7c ea 42 14 	add     r7,r10,r8
	c001c7b4:	7c e0 19 2d 	stwcx.  r7,0,r3

By allowing GCC to choose between immediate or regular operation,
we get:

	c001bb2c:	7d 20 18 28 	lwarx   r9,0,r3
	c001bb30:	39 49 ff ff 	addi    r10,r9,-1
	c001bb34:	7d 40 19 2d 	stwcx.  r10,0,r3
	--
	c001c7a4:	7d 40 18 28 	lwarx   r10,0,r3
	c001c7a8:	39 0a 00 01 	addi    r8,r10,1
	c001c7ac:	7d 00 19 2d 	stwcx.  r8,0,r3

For "and", the dot form has to be used because "andi" doesn't exist.

For logical operations we use unsigned 16 bits immediate.
For arithmetic operations we use signed 16 bits immediate.

On pmac32_defconfig, it reduces the text by approx another 8 kbytes.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/include/asm/atomic.h | 56 +++++++++++++++----------------
 1 file changed, 28 insertions(+), 28 deletions(-)

Comments

Segher Boessenkool April 12, 2021, 10:08 p.m. UTC | #1
Hi!

On Thu, Apr 08, 2021 at 03:33:45PM +0000, Christophe Leroy wrote:
> +#define ATOMIC_OP(op, asm_op, dot, sign)				\
>  static __inline__ void atomic_##op(int a, atomic_t *v)			\
>  {									\
>  	int t;								\
>  									\
>  	__asm__ __volatile__(						\
>  "1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
> -	#asm_op " %0,%2,%0\n"						\
> +	#asm_op "%I2" dot " %0,%0,%2\n"					\
>  "	stwcx.	%0,0,%3 \n"						\
>  "	bne-	1b\n"							\
> -	: "=&r" (t), "+m" (v->counter)					\
> -	: "r" (a), "r" (&v->counter)					\
> +	: "=&b" (t), "+m" (v->counter)					\
> +	: "r"#sign (a), "r" (&v->counter)				\
>  	: "cc");							\
>  }									\

You need "b" (instead of "r") only for "addi".  You can use "addic"
instead, which clobbers XER[CA], but *all* inline asm does, so that is
not a downside here (it is also not slower on any CPU that matters).

> @@ -238,14 +238,14 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
>  "1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
>  	cmpw	0,%0,%3 \n\
>  	beq	2f \n\
> -	add	%0,%2,%0 \n"
> +	add%I2	%0,%0,%2 \n"
>  "	stwcx.	%0,0,%1 \n\
>  	bne-	1b \n"
>  	PPC_ATOMIC_EXIT_BARRIER
> -"	subf	%0,%2,%0 \n\
> +"	sub%I2	%0,%0,%2 \n\
>  2:"
> -	: "=&r" (t)
> -	: "r" (&v->counter), "r" (a), "r" (u)
> +	: "=&b" (t)
> +	: "r" (&v->counter), "rI" (a), "r" (u)
>  	: "cc", "memory");

Same here.

Nice patches!

Acked-by: Segher Boessenkool <segher@kernel.crashing.org>


Segher
Christophe Leroy April 13, 2021, 4:36 p.m. UTC | #2
Le 13/04/2021 à 00:08, Segher Boessenkool a écrit :
> Hi!
> 
> On Thu, Apr 08, 2021 at 03:33:45PM +0000, Christophe Leroy wrote:
>> +#define ATOMIC_OP(op, asm_op, dot, sign)				\
>>   static __inline__ void atomic_##op(int a, atomic_t *v)			\
>>   {									\
>>   	int t;								\
>>   									\
>>   	__asm__ __volatile__(						\
>>   "1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
>> -	#asm_op " %0,%2,%0\n"						\
>> +	#asm_op "%I2" dot " %0,%0,%2\n"					\
>>   "	stwcx.	%0,0,%3 \n"						\
>>   "	bne-	1b\n"							\
>> -	: "=&r" (t), "+m" (v->counter)					\
>> -	: "r" (a), "r" (&v->counter)					\
>> +	: "=&b" (t), "+m" (v->counter)					\
>> +	: "r"#sign (a), "r" (&v->counter)				\
>>   	: "cc");							\
>>   }									\
> 
> You need "b" (instead of "r") only for "addi".  You can use "addic"
> instead, which clobbers XER[CA], but *all* inline asm does, so that is
> not a downside here (it is also not slower on any CPU that matters).
> 
>> @@ -238,14 +238,14 @@ static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
>>   "1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
>>   	cmpw	0,%0,%3 \n\
>>   	beq	2f \n\
>> -	add	%0,%2,%0 \n"
>> +	add%I2	%0,%0,%2 \n"
>>   "	stwcx.	%0,0,%1 \n\
>>   	bne-	1b \n"
>>   	PPC_ATOMIC_EXIT_BARRIER
>> -"	subf	%0,%2,%0 \n\
>> +"	sub%I2	%0,%0,%2 \n\
>>   2:"
>> -	: "=&r" (t)
>> -	: "r" (&v->counter), "r" (a), "r" (u)
>> +	: "=&b" (t)
>> +	: "r" (&v->counter), "rI" (a), "r" (u)
>>   	: "cc", "memory");
> 
> Same here.

Yes, I thought about addic, I didn't find an early solution because I forgot the matching 'addc'.

Now with the couple addc/addic it works well.

Thanks

> 
> Nice patches!
> 
> Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
> 

Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 61c6e8b200e8..e4b5e2f25ba7 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -37,62 +37,62 @@  static __inline__ void atomic_set(atomic_t *v, int i)
 	__asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"UPD_CONSTR(v->counter) : "r"(i));
 }
 
-#define ATOMIC_OP(op, asm_op)						\
+#define ATOMIC_OP(op, asm_op, dot, sign)				\
 static __inline__ void atomic_##op(int a, atomic_t *v)			\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "\n"			\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" dot " %0,%0,%2\n"					\
 "	stwcx.	%0,0,%3 \n"						\
 "	bne-	1b\n"							\
-	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
+	: "=&b" (t), "+m" (v->counter)					\
+	: "r"#sign (a), "r" (&v->counter)				\
 	: "cc");							\
 }									\
 
-#define ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, dot, sign)			\
 static inline int atomic_##op##_return_relaxed(int a, atomic_t *v)	\
 {									\
 	int t;								\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%3		# atomic_" #op "_return_relaxed\n"	\
-	#asm_op " %0,%2,%0\n"						\
+	#asm_op "%I2" dot " %0,%0,%2\n"					\
 "	stwcx.	%0,0,%3\n"						\
 "	bne-	1b\n"							\
-	: "=&r" (t), "+m" (v->counter)					\
-	: "r" (a), "r" (&v->counter)					\
+	: "=&b" (t), "+m" (v->counter)					\
+	: "r"#sign (a), "r" (&v->counter)				\
 	: "cc");							\
 									\
 	return t;							\
 }
 
-#define ATOMIC_FETCH_OP_RELAXED(op, asm_op)				\
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign)			\
 static inline int atomic_fetch_##op##_relaxed(int a, atomic_t *v)	\
 {									\
 	int res, t;							\
 									\
 	__asm__ __volatile__(						\
 "1:	lwarx	%0,0,%4		# atomic_fetch_" #op "_relaxed\n"	\
-	#asm_op " %1,%3,%0\n"						\
+	#asm_op "%I3" dot " %1,%0,%3\n"					\
 "	stwcx.	%1,0,%4\n"						\
 "	bne-	1b\n"							\
-	: "=&r" (res), "=&r" (t), "+m" (v->counter)			\
-	: "r" (a), "r" (&v->counter)					\
+	: "=&b" (res), "=&r" (t), "+m" (v->counter)			\
+	: "r"#sign (a), "r" (&v->counter)				\
 	: "cc");							\
 									\
 	return res;							\
 }
 
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_OP_RETURN_RELAXED(op, asm_op)				\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, dot, sign)				\
+	ATOMIC_OP(op, asm_op, dot, sign)				\
+	ATOMIC_OP_RETURN_RELAXED(op, asm_op, dot, sign)			\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign)
 
-ATOMIC_OPS(add, add)
-ATOMIC_OPS(sub, subf)
+ATOMIC_OPS(add, add, "", I)
+ATOMIC_OPS(sub, sub, "", I)
 
 #define atomic_add_return_relaxed atomic_add_return_relaxed
 #define atomic_sub_return_relaxed atomic_sub_return_relaxed
@@ -101,13 +101,13 @@  ATOMIC_OPS(sub, subf)
 #define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
-#define ATOMIC_OPS(op, asm_op)						\
-	ATOMIC_OP(op, asm_op)						\
-	ATOMIC_FETCH_OP_RELAXED(op, asm_op)
+#define ATOMIC_OPS(op, asm_op, dot, sign)				\
+	ATOMIC_OP(op, asm_op, dot, sign)				\
+	ATOMIC_FETCH_OP_RELAXED(op, asm_op, dot, sign)
 
-ATOMIC_OPS(and, and)
-ATOMIC_OPS(or, or)
-ATOMIC_OPS(xor, xor)
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
 
 #define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
 #define atomic_fetch_or_relaxed  atomic_fetch_or_relaxed
@@ -238,14 +238,14 @@  static __inline__ int atomic_fetch_add_unless(atomic_t *v, int a, int u)
 "1:	lwarx	%0,0,%1		# atomic_fetch_add_unless\n\
 	cmpw	0,%0,%3 \n\
 	beq	2f \n\
-	add	%0,%2,%0 \n"
+	add%I2	%0,%0,%2 \n"
 "	stwcx.	%0,0,%1 \n\
 	bne-	1b \n"
 	PPC_ATOMIC_EXIT_BARRIER
-"	subf	%0,%2,%0 \n\
+"	sub%I2	%0,%0,%2 \n\
 2:"
-	: "=&r" (t)
-	: "r" (&v->counter), "r" (a), "r" (u)
+	: "=&b" (t)
+	: "r" (&v->counter), "rI" (a), "r" (u)
 	: "cc", "memory");
 
 	return t;