diff mbox series

powerpc: add compile-time support for lbarx, lwarx

Message ID 20201107032328.2454582-1-npiggin@gmail.com
State Changes Requested
Headers show
Series powerpc: add compile-time support for lbarx, lwarx | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (f924bf2a58108816db1c95e08af4b1f99c90afa3)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 success Build succeeded
snowpatch_ozlabs/checkpatch warning total: 0 errors, 30 warnings, 14 checks, 304 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Nicholas Piggin Nov. 7, 2020, 3:23 a.m. UTC
ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/Kconfig                   |   3 +
 arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
 arch/powerpc/platforms/Kconfig.cputype |   5 +
 3 files changed, 243 insertions(+), 1 deletion(-)

Comments

Gabriel Paubert Nov. 7, 2020, 7:12 a.m. UTC | #1
On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.

Hmm, lwarx exists since original Power AFAIR, s/lwarx/lharx/ perhaps?

Same for the title of the patch and the CONFIG variable.	

	Gabriel

> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  arch/powerpc/Kconfig                   |   3 +
>  arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>  arch/powerpc/platforms/Kconfig.cputype |   5 +
>  3 files changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>  	default y
>  	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>  
> +config PPC_LBARX_LWARX
> +	bool
> +
>  config EARLY_PRINTK
>  	bool
>  	default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
>   * the previous value stored there.
>   */
>  
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  XCHG_GEN(u8, _local, "memory");
>  XCHG_GEN(u8, _relaxed, "cc");
>  XCHG_GEN(u16, _local, "memory");
>  XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2 \n"
> +"	stbcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2\n"
> +"	stbcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2 \n"
> +"	sthcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2\n"
> +"	sthcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +#endif
>  
>  static __always_inline unsigned long
>  __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>  	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
>  			(unsigned long)_x_, sizeof(*(ptr)));		\
>  })
> +
>  /*
>   * Compare and exchange - if *p == old, set it to new,
>   * and return the old value of *p.
>   */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
>  CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>  CMPXCHG_GEN(u8, _local, , , "memory");
>  CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _local, , , "memory");
>  CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
>  CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +#endif
>  
>  static __always_inline unsigned long
>  __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
>  	bool "Generic (POWER8 and above)"
>  	depends on PPC64 && CPU_LITTLE_ENDIAN
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config GENERIC_CPU
>  	bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
>  	bool "POWER7"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config POWER8_CPU
>  	bool "POWER8"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config POWER9_CPU
>  	bool "POWER9"
>  	depends on PPC_BOOK3S_64
>  	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>  
>  config E5500_CPU
>  	bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
>  config E6500_CPU
>  	bool "Freescale e6500"
>  	depends on E500
> +	select PPC_LBARX_LWARX
>  
>  config 860_CPU
>  	bool "8xx family"
> -- 
> 2.23.0
>
Christophe Leroy Nov. 7, 2020, 8:15 a.m. UTC | #2
Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> Add a compile option that allows code to use it, and add support in
> cmpxchg and xchg 8 and 16 bit values.

Do you mean lharx ? Because lwarx exists on all powerpcs I think.

> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>   arch/powerpc/Kconfig                   |   3 +
>   arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>   arch/powerpc/platforms/Kconfig.cputype |   5 +
>   3 files changed, 243 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index e9f13fe08492..d231af06f75a 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>   	default y
>   	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>   
> +config PPC_LBARX_LWARX
> +	bool

s/LWARX/LHARX/ ?

And maybe better with PPC_HAS_LBARX_LWARX ?

> +
>   config EARLY_PRINTK
>   	bool
>   	default y
> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
> index cf091c4c22e5..17fd996dc0d4 100644
> --- a/arch/powerpc/include/asm/cmpxchg.h
> +++ b/arch/powerpc/include/asm/cmpxchg.h
> @@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
>    * the previous value stored there.
>    */
>   
> +#ifndef CONFIG_PPC_LBARX_LWARX
>   XCHG_GEN(u8, _local, "memory");
>   XCHG_GEN(u8, _relaxed, "cc");
>   XCHG_GEN(u16, _local, "memory");
>   XCHG_GEN(u16, _relaxed, "cc");
> +#else
> +static __always_inline unsigned long
> +__xchg_u8_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2 \n"
> +"	stbcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u8_relaxed(u8 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lbarx	%0,0,%2\n"
> +"	stbcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_local(volatile void *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2 \n"
> +"	sthcx.	%3,0,%2 \n\
> +	bne-	1b"
> +	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
> +	: "r" (p), "r" (val)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__xchg_u16_relaxed(u16 *p, unsigned long val)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__(
> +"1:	lharx	%0,0,%2\n"
> +"	sthcx.	%3,0,%2\n"
> +"	bne-	1b"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (val)
> +	: "cc");
> +
> +	return prev;
> +}
> +#endif

That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

>   
>   static __always_inline unsigned long
>   __xchg_u32_local(volatile void *p, unsigned long val)
> @@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
>   	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
>   			(unsigned long)_x_, sizeof(*(ptr)));		\
>   })
> +
>   /*
>    * Compare and exchange - if *p == old, set it to new,
>    * and return the old value of *p.
>    */
> -
> +#ifndef CONFIG_PPC_LBARX_LWARX
>   CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>   CMPXCHG_GEN(u8, _local, , , "memory");
>   CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
> @@ -211,6 +278,173 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
>   CMPXCHG_GEN(u16, _local, , , "memory");
>   CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
>   CMPXCHG_GEN(u16, _relaxed, , , "cc");
> +#else
> +static __always_inline unsigned long
> +__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	stbcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +	PPC_ATOMIC_ENTRY_BARRIER
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	PPC_ATOMIC_EXIT_BARRIER
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
> +			unsigned long new)
> +{
> +	unsigned int prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
> +	cmpw	0,%0,%3\n\
> +	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n\
> +	bne-	1b"
> +	"\n\
> +2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc");
> +
> +	return prev;
> +}
> +
> +static __always_inline unsigned long
> +__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
> +{
> +	unsigned long prev;
> +
> +	__asm__ __volatile__ (
> +"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
> +"	cmpw	0,%0,%3\n"
> +"	bne-	2f\n"
> +"	sthcx.	%4,0,%2\n"
> +"	bne-	1b\n"
> +	PPC_ACQUIRE_BARRIER
> +	"\n"
> +"2:"
> +	: "=&r" (prev), "+m" (*p)
> +	: "r" (p), "r" (old), "r" (new)
> +	: "cc", "memory");
> +
> +	return prev;
> +}
> +#endif

That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

>   
>   static __always_inline unsigned long
>   __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index c194c4ae8bc7..2f8c8d61dba4 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -118,6 +118,7 @@ config GENERIC_CPU
>   	bool "Generic (POWER8 and above)"
>   	depends on PPC64 && CPU_LITTLE_ENDIAN
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX

s/LWARX/LHARX/ ?

>   
>   config GENERIC_CPU
>   	bool "Generic 32 bits powerpc"
> @@ -139,16 +140,19 @@ config POWER7_CPU
>   	bool "POWER7"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config POWER8_CPU
>   	bool "POWER8"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config POWER9_CPU
>   	bool "POWER9"
>   	depends on PPC_BOOK3S_64
>   	select ARCH_HAS_FAST_MULTIPLIER
> +	select PPC_LBARX_LWARX
>   
>   config E5500_CPU
>   	bool "Freescale e5500"
> @@ -157,6 +161,7 @@ config E5500_CPU
>   config E6500_CPU
>   	bool "Freescale e6500"
>   	depends on E500
> +	select PPC_LBARX_LWARX
>   
>   config 860_CPU
>   	bool "8xx family"
> 

Christophe
Segher Boessenkool Nov. 7, 2020, 11:42 a.m. UTC | #3
On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> 
> Hmm, lwarx exists since original Power AFAIR,

Almost: it was new on PowerPC.


Segher
Gabriel Paubert Nov. 8, 2020, 8:01 p.m. UTC | #4
On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > 
> > Hmm, lwarx exists since original Power AFAIR,
> 
> Almost: it was new on PowerPC.

I stand corrected. Does this mean that Power1 (and 2 I believe) had 
no SMP support?

	Gabriel
Segher Boessenkool Nov. 9, 2020, 12:34 p.m. UTC | #5
On Sun, Nov 08, 2020 at 09:01:52PM +0100, Gabriel Paubert wrote:
> On Sat, Nov 07, 2020 at 05:42:57AM -0600, Segher Boessenkool wrote:
> > On Sat, Nov 07, 2020 at 08:12:13AM +0100, Gabriel Paubert wrote:
> > > On Sat, Nov 07, 2020 at 01:23:28PM +1000, Nicholas Piggin wrote:
> > > > ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
> > > 
> > > Hmm, lwarx exists since original Power AFAIR,
> > 
> > Almost: it was new on PowerPC.
> 
> I stand corrected. Does this mean that Power1 (and 2 I believe) had 
> no SMP support?

As I understand it, that's correct.  Of course you always can do SMP "by
hand" -- you can do all synchronisation via software (perhaps using some
knowledge of the specific hardware you're running on), it's just slow
(and usually not portable).  Compare to SMP on 603 for example.


Segher
Nicholas Piggin Nov. 10, 2020, 8:18 a.m. UTC | #6
Excerpts from Christophe Leroy's message of November 7, 2020 6:15 pm:
> 
> 
> Le 07/11/2020 à 04:23, Nicholas Piggin a écrit :
>> ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lwarx.
>> Add a compile option that allows code to use it, and add support in
>> cmpxchg and xchg 8 and 16 bit values.
> 
> Do you mean lharx ? Because lwarx exists on all powerpcs I think.

Thanks all who pointed out mistakes :) Yes lharx.

> 
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>   arch/powerpc/Kconfig                   |   3 +
>>   arch/powerpc/include/asm/cmpxchg.h     | 236 ++++++++++++++++++++++++-
>>   arch/powerpc/platforms/Kconfig.cputype |   5 +
>>   3 files changed, 243 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index e9f13fe08492..d231af06f75a 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -266,6 +266,9 @@ config PPC_BARRIER_NOSPEC
>>   	default y
>>   	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
>>   
>> +config PPC_LBARX_LWARX
>> +	bool
> 
> s/LWARX/LHARX/ ?
> 
> And maybe better with PPC_HAS_LBARX_LWARX ?

Yes you're right, PPC_HAS_ fits better.

[...]

>> +#endif
> 
> That's a lot of code duplication. Could we use some macro, in the same spirit as what is done in 
> arch/powerpc/include/asm/io.h for in_be16(), in_be32(), in_be64() and friends ?

For now I don't get too fancy. It's a bit ugly but I'm working through a 
generic atomics conversion patch and trying to also work out a nice form
for larx/stcx operation generation macros, I'll look at tidying up this
some time after then.

Thanks,
Nick
diff mbox series

Patch

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe08492..d231af06f75a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -266,6 +266,9 @@  config PPC_BARRIER_NOSPEC
 	default y
 	depends on PPC_BOOK3S_64 || PPC_FSL_BOOK3E
 
+config PPC_LBARX_LWARX
+	bool
+
 config EARLY_PRINTK
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index cf091c4c22e5..17fd996dc0d4 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@  u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
  * the previous value stored there.
  */
 
+#ifndef CONFIG_PPC_LBARX_LWARX
 XCHG_GEN(u8, _local, "memory");
 XCHG_GEN(u8, _relaxed, "cc");
 XCHG_GEN(u16, _local, "memory");
 XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2 \n"
+"	stbcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2\n"
+"	stbcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2 \n"
+"	sthcx.	%3,0,%2 \n\
+	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@  __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
 			(unsigned long)_x_, sizeof(*(ptr)));		\
 })
+
 /*
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-
+#ifndef CONFIG_PPC_LBARX_LWARX
 CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u8, _local, , , "memory");
 CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,173 @@  CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u16, _local, , , "memory");
 CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
 CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stbcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	stbcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	sthcx.	%4,0,%2\n\
+	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n\
+	cmpw	0,%0,%3\n\
+	bne-	2f\n"
+"	sthcx.	%4,0,%2\n\
+	bne-	1b"
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+	"\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..2f8c8d61dba4 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -118,6 +118,7 @@  config GENERIC_CPU
 	bool "Generic (POWER8 and above)"
 	depends on PPC64 && CPU_LITTLE_ENDIAN
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config GENERIC_CPU
 	bool "Generic 32 bits powerpc"
@@ -139,16 +140,19 @@  config POWER7_CPU
 	bool "POWER7"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config POWER8_CPU
 	bool "POWER8"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config POWER9_CPU
 	bool "POWER9"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_LBARX_LWARX
 
 config E5500_CPU
 	bool "Freescale e5500"
@@ -157,6 +161,7 @@  config E5500_CPU
 config E6500_CPU
 	bool "Freescale e6500"
 	depends on E500
+	select PPC_LBARX_LWARX
 
 config 860_CPU
 	bool "8xx family"