@@ -101,6 +101,24 @@ AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LSE128],[
[Have LSE128 support for 16 byte integers.])
])
+dnl
+dnl Test if the host assembler supports armv8.2-a RCPC3 isns.
+dnl
+AC_DEFUN([LIBAT_TEST_FEAT_AARCH64_LRCPC3],[
+ AC_CACHE_CHECK([for armv8.2-a LRCPC3 insn support],
+ [libat_cv_have_feat_lrcpc3],[
+ AC_LANG_CONFTEST([AC_LANG_PROGRAM([],[asm(".arch armv8.2-a+rcpc3")])])
+ if AC_TRY_EVAL(ac_link); then
+ eval libat_cv_have_feat_lrcpc3=yes
+ else
+ eval libat_cv_have_feat_lrcpc3=no
+ fi
+ rm -f conftest*
+ ])
+ LIBAT_DEFINE_YESNO([HAVE_FEAT_LRCPC3], [$libat_cv_have_feat_lrcpc3],
+ [Have LRCPC3 support for 16 byte integers.])
+])
+
dnl
dnl Test if we have __atomic_load and __atomic_store for mode $1, size $2
dnl
@@ -108,6 +108,9 @@
/* Have LSE128 support for 16 byte integers. */
#undef HAVE_FEAT_LSE128
+/* Have LRCPC3 support for 16 byte integers. */
+#undef HAVE_FEAT_LRCPC3
+
/* Define to 1 if you have the <fenv.h> header file. */
#undef HAVE_FENV_H
@@ -42,7 +42,13 @@
#if HAVE_IFUNC
# if HAVE_FEAT_LSE128
+# if HAVE_FEAT_LRCPC3
+ .arch armv9-a+lse128+rcpc3
+# else
.arch armv9-a+lse128
+# endif
+# elif HAVE_FEAT_LRCPC3
+ .arch armv8-a+lse+rcpc3
# else
.arch armv8-a+lse
# endif
@@ -50,9 +56,20 @@
.arch armv8-a+lse
#endif
+/* There is overlap in some atomic instructions being implemented in both RCPC3
+ and LSE2 extensions, so both _i1 and _i2 suffixes are needed in such
+ situations. Otherwise, all extension-specific implementations are mapped
+ to _i1. */
+
+#if HAVE_FEAT_LRCPC3
+# define LRCPC3(NAME) libat_##NAME##_i1
+# define LSE2(NAME) libat_##NAME##_i2
+#else
+# define LSE2(NAME) libat_##NAME##_i1
+#endif
+
#define LSE128(NAME) libat_##NAME##_i1
#define LSE(NAME) libat_##NAME##_i1
-#define LSE2(NAME) libat_##NAME##_i1
#define CORE(NAME) libat_##NAME
#define ATOMIC(NAME) __atomic_##NAME
@@ -722,6 +739,42 @@ ENTRY_FEAT (and_fetch_16, LSE128)
ret
END_FEAT (and_fetch_16, LSE128)
#endif /* HAVE_FEAT_LSE128 */
+
+
+#if HAVE_FEAT_LRCPC3
+ENTRY_FEAT (load_16, LRCPC3)
+ cbnz w1, 1f
+
+ /* RELAXED. */
+ ldp res0, res1, [x0]
+ ret
+1:
+ cmp w1, SEQ_CST
+ b.eq 2f
+
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
+ ldiapp res0, res1, [x0]
+ ret
+
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
+ ldiapp res0, res1, [x0]
+ ret
+END_FEAT (load_16, LRCPC3)
+
+
+ENTRY_FEAT (store_16, LRCPC3)
+ cbnz w4, 1f
+
+ /* RELAXED. */
+ stp in0, in1, [x0]
+ ret
+
+ /* RELEASE/SEQ_CST. */
+1: stilp in0, in1, [x0]
+ ret
+END_FEAT (store_16, LRCPC3)
+#endif /* HAVE_FEAT_LRCPC3 */
#endif /* HAVE_IFUNC */
@@ -33,6 +33,9 @@
#ifndef HWCAP_USCAT
# define HWCAP_USCAT (1 << 25)
#endif
+#ifndef HWCAP2_LRCPC3
+# define HWCAP2_LRCPC3 (1UL << 46)
+#endif
#ifndef HWCAP2_LSE128
# define HWCAP2_LSE128 (1UL << 47)
#endif
@@ -54,7 +57,7 @@ typedef struct __ifunc_arg_t {
#if defined (LAT_CAS_N)
# define LSE_ATOP
#elif defined (LAT_LOAD_N) || defined (LAT_STORE_N)
-# define LSE2_ATOP
+# define LSE2_LRCPC3_ATOP
#elif defined (LAT_EXCH_N) || defined (LAT_FIOR_N) || defined (LAT_FAND_N)
# define LSE128_ATOP
#endif
@@ -63,9 +66,15 @@ typedef struct __ifunc_arg_t {
# if defined (LSE_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (hwcap & HWCAP_ATOMICS)
-# elif defined (LSE2_ATOP)
-# define IFUNC_NCOND(N) 1
-# define IFUNC_COND_1 (has_lse2 (hwcap, features))
+# elif defined (LSE2_LRCPC3_ATOP)
+# if HAVE_FEAT_LRCPC3
+# define IFUNC_NCOND(N) 2
+# define IFUNC_COND_1 (has_rcpc3 (hwcap, features))
+# define IFUNC_COND_2 (has_lse2 (hwcap, features))
+# else
+# define IFUNC_NCOND(N) 1
+# define IFUNC_COND_1 (has_lse2 (hwcap, features))
+# endif
# elif HAVE_FEAT_LSE128 && defined (LSE128_ATOP)
# define IFUNC_NCOND(N) 1
# define IFUNC_COND_1 (has_lse128 (hwcap, features))
@@ -131,6 +140,28 @@ has_lse128 (unsigned long hwcap, const __ifunc_arg_t *features)
return false;
}
+/* LRCPC atomic support encoded in ID_AA64ISAR1_EL1.Atomic, bits[23:20]. The
+ expected value is 0b0011. Check that. */
+
+static inline bool
+has_rcpc3 (unsigned long hwcap, const __ifunc_arg_t *features)
+{
+ if (hwcap & _IFUNC_ARG_HWCAP
+ && features->_hwcap2 & HWCAP2_LRCPC3)
+ return true;
+ /* Try fallback feature check method to guarantee LRCPC3 is not implemented.
+
+ In the absence of HWCAP_CPUID, we are unable to check for RCPC3, return.
+ If feature check available, check LSE2 prerequisite before proceeding. */
+ if (!(hwcap & HWCAP_CPUID) || !(hwcap & HWCAP_USCAT))
+ return false;
+ unsigned long isar1;
+ asm volatile ("mrs %0, ID_AA64ISAR1_EL1" : "=r" (isar1));
+ if (AT_FEAT_FIELD (isar1) >= 3)
+ return true;
+ return false;
+}
+
#endif /* HAVE_IFUNC */
/* All 128-bit atomic functions are defined in aarch64/atomic_16.S. */
@@ -14740,6 +14740,47 @@ _ACEOF
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for armv8.2-a LRCPC3 insn support" >&5
+$as_echo_n "checking for armv8.2-a LRCPC3 insn support... " >&6; }
+if ${libat_cv_have_feat_lrcpc3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+asm(".arch armv8.2-a+rcpc3")
+ ;
+ return 0;
+}
+_ACEOF
+ if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+ (eval $ac_link) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; then
+ eval libat_cv_have_feat_lrcpc3=yes
+ else
+ eval libat_cv_have_feat_lrcpc3=no
+ fi
+ rm -f conftest*
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libat_cv_have_feat_lrcpc3" >&5
+$as_echo "$libat_cv_have_feat_lrcpc3" >&6; }
+
+ yesno=`echo $libat_cv_have_feat_lrcpc3 | tr 'yesno' '1 0 '`
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_FEAT_LRCPC3 $yesno
+_ACEOF
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
if ${ac_cv_c_bigendian+:} false; then :
@@ -208,6 +208,7 @@ LIBAT_FORALL_MODES([LIBAT_HAVE_ATOMIC_FETCH_OP])
# Check for target-specific assembly-level support for atomic operations.
LIBAT_TEST_FEAT_AARCH64_LSE128()
+LIBAT_TEST_FEAT_AARCH64_LRCPC3()
AC_C_BIGENDIAN
# I don't like the default behaviour of WORDS_BIGENDIAN undefined for LE.