diff mbox series

libatomic: Add rcpc3 128-bit atomic operations for AArch64

Message ID 20240516135124.2466005-1-victor.donascimento@arm.com
State New
Headers show
Series libatomic: Add rcpc3 128-bit atomic operations for AArch64 | expand

Commit Message

Victor Do Nascimento May 16, 2024, 1:51 p.m. UTC
The introduction of the optional RCPC3 architectural extension for
Armv8.2-A upwards provides additional support for the release
consistency model, introducing the Load-Acquire RCpc Pair Ordered, and
Store-Release Pair Ordered operations in the form of LDIAPP and STILP.

These operations are single-copy atomic on cores which also implement
LSE2 and, as such, support for these operations is added to Libatomic
and employed accordingly when the LSE2 and RCPC3 features are detected
in a given core at runtime.

libatomic/ChangeLog:

	* configure.ac: Add call to LIBAT_TEST_FEAT_LRCPC3() test.
	* configure: Regenerate.
	* config/linux/aarch64/host-config.h (has_rcpc3): New.
	(HWCAP2_LRCPC3): Likewise.
	(LSE2_LRCPC3_ATOP): Likewise.
	* libatomic/config/linux/aarch64/atomic_16.S: New +rcpc3 .arch
	directives.
	* config/linux/aarch64/atomic_16.S (libat_load_16): Add LRCPC3
	variant.
	(libat_store_16): Likewise.
	* acinclude.m4 (LIBAT_TEST_FEAT_AARCH64_LRCPC3): New.
	(HAVE_FEAT_LRCPC3): Likewise
	(ARCH_AARCH64_HAVE_LRCPC3): Likewise.
	* auto-config.h.in (HAVE_FEAT_LRCPC3): New.
---
 libatomic/acinclude.m4                       | 18 +++++++
 libatomic/auto-config.h.in                   |  3 ++
 libatomic/config/linux/aarch64/atomic_16.S   | 55 +++++++++++++++++++-
 libatomic/config/linux/aarch64/host-config.h | 39 ++++++++++++--
 libatomic/configure                          | 41 +++++++++++++++
 libatomic/configure.ac                       |  1 +
 6 files changed, 152 insertions(+), 5 deletions(-)
diff mbox series

Patch

diff --git a/libatomic/acinclude.m4 b/libatomic/acinclude.m4
index 6d2e0b1c355..628275b9945 100644
--- a/libatomic/acinclude.m4
+++ b/libatomic/acinclude.m4
@@ -101,6 +101,24 @@  AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LSE128],[
 	[Have LSE128 support for 16 byte integers.])
 ])
 
+dnl
+dnl Test if the host assembler supports armv8.2-a RCPC3 isns.
+dnl
+AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LRCPC3],[
+  AC_CACHE_CHECK([for armv8.2-a LRCPC3 insn support],
+    [libat_cv_have_feat_lrcpc3],[
+    AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv8.2-a+rcpc3")])])
+    if AC_TRY_EVAL(ac_link); then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+  ])
+  LIBAT_DEFINE_YESNO([HAVE_FEAT_LRCPC3], [$libat_cv_have_feat_lrcpc3],
+	[Have LRCPC3 support for 16 byte integers.])
+])
+
 dnl
 dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2
 dnl
diff --git a/libatomic/auto-config.h.in b/libatomic/auto-config.h.in
index 7c78933b07d..a925686effa 100644
--- a/libatomic/auto-config.h.in
+++ b/libatomic/auto-config.h.in
@@ -108,6 +108,9 @@ 
 /* Have LSE128 support for 16 byte integers. */
 #undef HAVE_FEAT_LSE128
 
+/* Have LRCPC3 support for 16 byte integers. */
+#undef HAVE_FEAT_LRCPC3
+
 /* Define to 1 if you have the <fenv.h> header file. */
 #undef HAVE_FENV_H
 
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 27363f82b75..47ceb7301c9 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -42,7 +42,13 @@ 
 
 #if HAVE_IFUNC
 # if HAVE_FEAT_LSE128
+#  if HAVE_FEAT_LRCPC3
+	.arch	armv9-a+lse128+rcpc3
+#  else
 	.arch	armv9-a+lse128
+#  endif
+# elif HAVE_FEAT_LRCPC3
+	.arch	armv8-a+lse+rcpc3
 # else
 	.arch	armv8-a+lse
 # endif
@@ -50,9 +56,20 @@ 
 	.arch	armv8-a+lse
 #endif
 
+/* There is overlap in some atomic instructions being implemented in both RCPC3
+   and LSE2 extensions, so both _i1 and _i2 suffixes are needed in such
+   situations.  Otherwise, all extension-specific implementations are mapped
+   to _i1.  */
+
+#if HAVE_FEAT_LRCPC3
+# define LRCPC3(NAME)	libat_##NAME##_i1
+# define LSE2(NAME)	libat_##NAME##_i2
+#else
+# define LSE2(NAME)	libat_##NAME##_i1
+#endif
+
 #define LSE128(NAME)	libat_##NAME##_i1
 #define LSE(NAME)	libat_##NAME##_i1
-#define LSE2(NAME)	libat_##NAME##_i1
 #define CORE(NAME)	libat_##NAME
 #define ATOMIC(NAME)	__atomic_##NAME
 
@@ -722,6 +739,42 @@  ENTRY_FEAT (and_fetch_16, LSE128)
 	ret
 END_FEAT (and_fetch_16, LSE128)
 #endif /* HAVE_FEAT_LSE128 */
+
+
+#if HAVE_FEAT_LRCPC3
+ENTRY_FEAT (load_16, LRCPC3)
+	cbnz	w1, 1f
+
+	/* RELAXED.  */
+	ldp	res0, res1, [x0]
+	ret
+1:
+	cmp	w1, SEQ_CST
+	b.eq	2f
+
+	/* ACQUIRE/CONSUME (Load-AcquirePC semantics).  */
+	ldiapp	res0, res1, [x0]
+	ret
+
+	/* SEQ_CST.  */
+2:	ldar	tmp0, [x0]	/* Block reordering with Store-Release instr.  */
+	ldiapp	res0, res1, [x0]
+	ret
+END_FEAT (load_16, LRCPC3)
+
+
+ENTRY_FEAT (store_16, LRCPC3)
+	cbnz	w4, 1f
+
+	/* RELAXED.  */
+	stp	in0, in1, [x0]
+	ret
+
+	/* RELEASE/SEQ_CST.  */
+1:	stilp	in0, in1, [x0]
+	ret
+END_FEAT (store_16, LRCPC3)
+#endif /* HAVE_FEAT_LRCPC3 */
 #endif /* HAVE_IFUNC */
 
 
diff --git a/libatomic/config/linux/aarch64/host-config.h b/libatomic/config/linux/aarch64/host-config.h
index 6e010594a6c..dce472d26d1 100644
--- a/libatomic/config/linux/aarch64/host-config.h
+++ b/libatomic/config/linux/aarch64/host-config.h
@@ -33,6 +33,9 @@ 
 #ifndef HWCAP_USCAT
 # define HWCAP_USCAT	(1 << 25)
 #endif
+#ifndef HWCAP2_LRCPC3
+# define HWCAP2_LRCPC3	(1UL << 46)
+#endif
 #ifndef HWCAP2_LSE128
 # define HWCAP2_LSE128	(1UL << 47)
 #endif
@@ -54,7 +57,7 @@  typedef struct __ifunc_arg_t {
 #if defined (LAT_CAS_N)
 # define LSE_ATOP
 #elif defined (LAT_LOAD_N) || defined (LAT_STORE_N)
-# define LSE2_ATOP
+# define LSE2_LRCPC3_ATOP
 #elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N)
 # define LSE128_ATOP
 #endif
@@ -63,9 +66,15 @@  typedef struct __ifunc_arg_t {
 #  if defined (LSE_ATOP)
 #   define IFUNC_NCOND(N)	1
 #   define IFUNC_COND_1	(hwcap & HWCAP_ATOMICS)
-#  elif defined (LSE2_ATOP)
-#   define IFUNC_NCOND(N)	1
-#   define IFUNC_COND_1	(has_lse2 (hwcap, features))
+#  elif defined (LSE2_LRCPC3_ATOP)
+#   if HAVE_FEAT_LRCPC3
+#    define IFUNC_NCOND(N)	2
+#    define IFUNC_COND_1	(has_rcpc3 (hwcap, features))
+#    define IFUNC_COND_2	(has_lse2  (hwcap, features))
+#   else
+#    define IFUNC_NCOND(N)	1
+#    define IFUNC_COND_1	(has_lse2 (hwcap, features))
+#   endif
 #  elif  HAVE_FEAT_LSE128 && defined (LSE128_ATOP)
 #   define IFUNC_NCOND(N)	1
 #   define IFUNC_COND_1	(has_lse128 (hwcap, features))
@@ -131,6 +140,28 @@  has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
   return false;
 }
 
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20].  The
+   expected value is 0b0011.  Check that.  */
+
+static inline bool
+has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
+{
+  if (hwcap & _IFUNC_ARG_HWCAP
+      && features->_hwcap2 & HWCAP2_LRCPC3)
+    return true;
+  /* Try fallback feature check method to guarantee LRCPC3 is not implemented.
+
+     In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
+     If feature check available, check LSE2 prerequisite before proceeding.  */
+  if (!(hwcap & HWCAP_CPUID)  || !(hwcap & HWCAP_USCAT))
+    return false;
+  unsigned long isar1;
+  asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
+  if (AT_FEAT_FIELD (isar1) >= 3)
+    return true;
+  return false;
+}
+
 #endif /* HAVE_IFUNC */
 
 /* All 128-bit atomic functions are defined in aarch64/atomic_16.S.  */
diff --git a/libatomic/configure b/libatomic/configure
index 32cb3ecac26..ce3a1d9ce67 100755
--- a/libatomic/configure
+++ b/libatomic/configure
@@ -14740,6 +14740,47 @@  _ACEOF
 
 
 
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 insn support" >&5
+$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; }
+if ${libat_cv_have_feat_lrcpc3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+asm(".arch armv8.2-a+rcpc3")
+  ;
+  return 0;
+}
+_ACEOF
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+      eval libat_cv_have_feat_lrcpc3=yes
+    else
+      eval libat_cv_have_feat_lrcpc3=no
+    fi
+    rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" >&5
+$as_echo "$libat_cv_have_feat_lrcpc3" >&6; }
+
+  yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1  0 '`
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_FEAT_LRCPC3 $yesno
+_ACEOF
+
+
+
+
  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
 $as_echo_n "checking whether byte ordering is bigendian... " >&6; }
 if ${ac_cv_c_bigendian+:} false; then :
diff --git a/libatomic/configure.ac b/libatomic/configure.ac
index 85824fa7614..8fd20e183a6 100644
--- a/libatomic/configure.ac
+++ b/libatomic/configure.ac
@@ -208,6 +208,7 @@  LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP])
 
 # Check for target-specific assembly-level support for atomic operations.
 LIBAT_TEST_FEAT_AARCH64_LSE128()
+LIBAT_TEST_FEAT_AARCH64_LRCPC3()
 
 AC_C_BIGENDIAN
 # I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE.