@@ -40,15 +40,12 @@
#include "auto-config.h"
-#if !HAVE_IFUNC
-# undef HAVE_FEAT_LSE128
-# define HAVE_FEAT_LSE128 0
-#endif
-
-#define HAVE_FEAT_LSE2 HAVE_IFUNC
-
-#if HAVE_FEAT_LSE128
+#if HAVE_IFUNC
+# if HAVE_FEAT_LSE128
.arch armv9-a+lse128
+# else
+ .arch armv8-a+lse
+# endif
#else
.arch armv8-a+lse
#endif
@@ -124,6 +121,8 @@ NAME: \
#define ACQ_REL 4
#define SEQ_CST 5
+/* Core atomic operation implementations. These are available irrespective of
+ ifunc support or the presence of additional architectural extensions. */
ENTRY (load_16)
mov x5, x0
@@ -143,31 +142,6 @@ ENTRY (load_16)
END (load_16)
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (load_16, LSE2)
- cbnz w1, 1f
-
- /* RELAXED. */
- ldp res0, res1, [x0]
- ret
-1:
- cmp w1, SEQ_CST
- b.eq 2f
-
- /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
- ldp res0, res1, [x0]
- dmb ishld
- ret
-
- /* SEQ_CST. */
-2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
- ldp res0, res1, [x0]
- dmb ishld
- ret
-END_FEAT (load_16, LSE2)
-#endif
-
-
ENTRY (store_16)
cbnz w4, 2f
@@ -185,23 +159,6 @@ ENTRY (store_16)
END (store_16)
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (store_16, LSE2)
- cbnz w4, 1f
-
- /* RELAXED. */
- stp in0, in1, [x0]
- ret
-
- /* RELEASE/SEQ_CST. */
-1: ldxp xzr, tmp0, [x0]
- stlxp w4, in0, in1, [x0]
- cbnz w4, 1b
- ret
-END_FEAT (store_16, LSE2)
-#endif
-
-
ENTRY (exchange_16)
mov x5, x0
cbnz w4, 2f
@@ -229,31 +186,6 @@ ENTRY (exchange_16)
END (exchange_16)
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (exchange_16, LSE128)
- mov tmp0, x0
- mov res0, in0
- mov res1, in1
- cbnz w4, 1f
-
- /* RELAXED. */
- swpp res0, res1, [tmp0]
- ret
-1:
- cmp w4, ACQUIRE
- b.hi 2f
-
- /* ACQUIRE/CONSUME. */
- swppa res0, res1, [tmp0]
- ret
-
- /* RELEASE/ACQ_REL/SEQ_CST. */
-2: swppal res0, res1, [tmp0]
- ret
-END_FEAT (exchange_16, LSE128)
-#endif
-
-
ENTRY (compare_exchange_16)
ldp exp0, exp1, [x1]
cbz w4, 3f
@@ -301,43 +233,97 @@ ENTRY (compare_exchange_16)
END (compare_exchange_16)
-#if HAVE_FEAT_LSE2
-ENTRY_FEAT (compare_exchange_16, LSE)
- ldp exp0, exp1, [x1]
- mov tmp0, exp0
- mov tmp1, exp1
- cbz w4, 2f
- cmp w4, RELEASE
- b.hs 3f
+ENTRY (fetch_or_16)
+ mov x5, x0
+ cbnz w4, 2f
- /* ACQUIRE/CONSUME. */
- caspa exp0, exp1, in0, in1, [x0]
-0:
- cmp exp0, tmp0
- ccmp exp1, tmp1, 0, eq
- bne 1f
- mov x0, 1
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
ret
-1:
- stp exp0, exp1, [x1]
- mov x0, 0
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ orr tmp0, res0, in0
+ orr tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
ret
+END (fetch_or_16)
+
+
+ENTRY (or_fetch_16)
+ mov x5, x0
+ cbnz w4, 2f
/* RELAXED. */
-2: casp exp0, exp1, in0, in1, [x0]
- b 0b
+1: ldxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
- /* RELEASE. */
-3: b.hi 4f
- caspl exp0, exp1, in0, in1, [x0]
- b 0b
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ orr res0, res0, in0
+ orr res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (or_fetch_16)
+
+
+ENTRY (fetch_and_16)
+ mov x5, x0
+ cbnz w4, 2f
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 1b
+ ret
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ and tmp0, res0, in0
+ and tmp1, res1, in1
+ stlxp w4, tmp0, tmp1, [x5]
+ cbnz w4, 2b
+ ret
+END (fetch_and_16)
+
+
+ENTRY (and_fetch_16)
+ mov x5, x0
+ cbnz w4, 2f
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
+ ret
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ and res0, res0, in0
+ and res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
+ ret
+END (and_fetch_16)
- /* ACQ_REL/SEQ_CST. */
-4: caspal exp0, exp1, in0, in1, [x0]
- b 0b
-END_FEAT (compare_exchange_16, LSE)
-#endif
+/* The following functions are currently single-implementation operations,
+ so they are never assigned an ifunc selector. As such, they must be
+ reachable from __atomic_* entrypoints. */
ENTRY_ALIASED (fetch_add_16)
mov x5, x0
@@ -427,309 +413,316 @@ ENTRY_ALIASED (sub_fetch_16)
END (sub_fetch_16)
-ENTRY (fetch_or_16)
+ENTRY_ALIASED (fetch_xor_16)
mov x5, x0
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
- orr tmp0, res0, in0
- orr tmp1, res1, in1
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
- orr tmp0, res0, in0
- orr tmp1, res1, in1
+ eor tmp0, res0, in0
+ eor tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (fetch_or_16)
+END (fetch_xor_16)
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (fetch_or_16, LSE128)
- mov tmp0, x0
- mov res0, in0
- mov res1, in1
- cbnz w4, 1f
+ENTRY_ALIASED (xor_fetch_16)
+ mov x5, x0
+ cbnz w4, 2f
/* RELAXED. */
- ldsetp res0, res1, [tmp0]
- ret
-1:
- cmp w4, ACQUIRE
- b.hi 2f
-
- /* ACQUIRE/CONSUME. */
- ldsetpa res0, res1, [tmp0]
+1: ldxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
ret
- /* RELEASE/ACQ_REL/SEQ_CST. */
-2: ldsetpal res0, res1, [tmp0]
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ eor res0, res0, in0
+ eor res1, res1, in1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
ret
-END_FEAT (fetch_or_16, LSE128)
-#endif
+END (xor_fetch_16)
-ENTRY (or_fetch_16)
+ENTRY_ALIASED (fetch_nand_16)
mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
cbnz w4, 2f
/* RELAXED. */
1: ldxp res0, res1, [x5]
- orr res0, res0, in0
- orr res1, res1, in1
- stxp w4, res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
/* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: ldaxp res0, res1, [x5]
- orr res0, res0, in0
- orr res1, res1, in1
- stlxp w4, res0, res1, [x5]
+ orn tmp0, in0, res0
+ orn tmp1, in1, res1
+ stlxp w4, tmp0, tmp1, [x5]
cbnz w4, 2b
ret
-END (or_fetch_16)
+END (fetch_nand_16)
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (or_fetch_16, LSE128)
- cbnz w4, 1f
- mov tmp0, in0
- mov tmp1, in1
+ENTRY_ALIASED (nand_fetch_16)
+ mov x5, x0
+ mvn in0, in0
+ mvn in1, in1
+ cbnz w4, 2f
/* RELAXED. */
- ldsetp in0, in1, [x0]
- orr res0, in0, tmp0
- orr res1, in1, tmp1
+1: ldxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stxp w4, res0, res1, [x5]
+ cbnz w4, 1b
ret
-1:
- cmp w4, ACQUIRE
- b.hi 2f
- /* ACQUIRE/CONSUME. */
- ldsetpa in0, in1, [x0]
- orr res0, in0, tmp0
- orr res1, in1, tmp1
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
+ orn res0, in0, res0
+ orn res1, in1, res1
+ stlxp w4, res0, res1, [x5]
+ cbnz w4, 2b
ret
+END (nand_fetch_16)
- /* RELEASE/ACQ_REL/SEQ_CST. */
-2: ldsetpal in0, in1, [x0]
- orr res0, in0, tmp0
- orr res1, in1, tmp1
- ret
-END_FEAT (or_fetch_16, LSE128)
-#endif
+/* __atomic_test_and_set is always inlined, so this entry is unused and
+ only required for completeness. */
+ENTRY_ALIASED (test_and_set_16)
-ENTRY (fetch_and_16)
+ /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
mov x5, x0
- cbnz w4, 2f
-
- /* RELAXED. */
-1: ldxp res0, res1, [x5]
- and tmp0, res0, in0
- and tmp1, res1, in1
- stxp w4, tmp0, tmp1, [x5]
+1: ldaxrb w0, [x5]
+ stlxrb w4, w2, [x5]
cbnz w4, 1b
ret
+END (test_and_set_16)
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- and tmp0, res0, in0
- and tmp1, res1, in1
- stlxp w4, tmp0, tmp1, [x5]
- cbnz w4, 2b
- ret
-END (fetch_and_16)
-
+/* Ensure extension-specific implementations are not included unless ifunc
+ support is present, along with necessary assembler support. */
-#if HAVE_FEAT_LSE128
-ENTRY_FEAT (fetch_and_16, LSE128)
- mov tmp0, x0
- mvn res0, in0
- mvn res1, in1
- cbnz w4, 1f
+#if HAVE_IFUNC
+ENTRY_FEAT (load_16, LSE2)
+ cbnz w1, 1f
/* RELAXED. */
- ldclrp res0, res1, [tmp0]
+ ldp res0, res1, [x0]
ret
-
1:
- cmp w4, ACQUIRE
- b.hi 2f
+ cmp w1, SEQ_CST
+ b.eq 2f
- /* ACQUIRE/CONSUME. */
- ldclrpa res0, res1, [tmp0]
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
+ ldp res0, res1, [x0]
+ dmb ishld
ret
- /* RELEASE/ACQ_REL/SEQ_CST. */
-2: ldclrpal res0, res1, [tmp0]
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
+ ldp res0, res1, [x0]
+ dmb ishld
ret
-END_FEAT (fetch_and_16, LSE128)
-#endif
+END_FEAT (load_16, LSE2)
-ENTRY (and_fetch_16)
- mov x5, x0
- cbnz w4, 2f
+ENTRY_FEAT (store_16, LSE2)
+ cbnz w4, 1f
/* RELAXED. */
-1: ldxp res0, res1, [x5]
- and res0, res0, in0
- and res1, res1, in1
- stxp w4, res0, res1, [x5]
+ stp in0, in1, [x0]
+ ret
+
+ /* RELEASE/SEQ_CST. */
+1: ldxp xzr, tmp0, [x0]
+ stlxp w4, in0, in1, [x0]
cbnz w4, 1b
ret
+END_FEAT (store_16, LSE2)
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- and res0, res0, in0
- and res1, res1, in1
- stlxp w4, res0, res1, [x5]
- cbnz w4, 2b
+
+ENTRY_FEAT (compare_exchange_16, LSE)
+ ldp exp0, exp1, [x1]
+ mov tmp0, exp0
+ mov tmp1, exp1
+ cbz w4, 2f
+ cmp w4, RELEASE
+ b.hs 3f
+
+ /* ACQUIRE/CONSUME. */
+ caspa exp0, exp1, in0, in1, [x0]
+0:
+ cmp exp0, tmp0
+ ccmp exp1, tmp1, 0, eq
+ bne 1f
+ mov x0, 1
ret
-END (and_fetch_16)
+1:
+ stp exp0, exp1, [x1]
+ mov x0, 0
+ ret
+
+ /* RELAXED. */
+2: casp exp0, exp1, in0, in1, [x0]
+ b 0b
+
+ /* RELEASE. */
+3: b.hi 4f
+ caspl exp0, exp1, in0, in1, [x0]
+ b 0b
+
+ /* ACQ_REL/SEQ_CST. */
+4: caspal exp0, exp1, in0, in1, [x0]
+ b 0b
+END_FEAT (compare_exchange_16, LSE)
#if HAVE_FEAT_LSE128
-ENTRY_FEAT (and_fetch_16, LSE128)
- mvn tmp0, in0
- mvn tmp0, in1
+ENTRY_FEAT (exchange_16, LSE128)
+ mov tmp0, x0
+ mov res0, in0
+ mov res1, in1
cbnz w4, 1f
/* RELAXED. */
- ldclrp tmp0, tmp1, [x0]
- and res0, tmp0, in0
- and res1, tmp1, in1
+ swpp res0, res1, [tmp0]
ret
-
1:
cmp w4, ACQUIRE
b.hi 2f
/* ACQUIRE/CONSUME. */
- ldclrpa tmp0, tmp1, [x0]
- and res0, tmp0, in0
- and res1, tmp1, in1
+ swppa res0, res1, [tmp0]
ret
/* RELEASE/ACQ_REL/SEQ_CST. */
-2: ldclrpal tmp0, tmp1, [x5]
- and res0, tmp0, in0
- and res1, tmp1, in1
+2: swppal res0, res1, [tmp0]
ret
-END_FEAT (and_fetch_16, LSE128)
-#endif
+END_FEAT (exchange_16, LSE128)
-ENTRY_ALIASED (fetch_xor_16)
- mov x5, x0
- cbnz w4, 2f
+ENTRY_FEAT (fetch_or_16, LSE128)
+ mov tmp0, x0
+ mov res0, in0
+ mov res1, in1
+ cbnz w4, 1f
/* RELAXED. */
-1: ldxp res0, res1, [x5]
- eor tmp0, res0, in0
- eor tmp1, res1, in1
- stxp w4, tmp0, tmp1, [x5]
- cbnz w4, 1b
+ ldsetp res0, res1, [tmp0]
ret
+1:
+ cmp w4, ACQUIRE
+ b.hi 2f
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- eor tmp0, res0, in0
- eor tmp1, res1, in1
- stlxp w4, tmp0, tmp1, [x5]
- cbnz w4, 2b
+ /* ACQUIRE/CONSUME. */
+ ldsetpa res0, res1, [tmp0]
ret
-END (fetch_xor_16)
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+2: ldsetpal res0, res1, [tmp0]
+ ret
+END_FEAT (fetch_or_16, LSE128)
-ENTRY_ALIASED (xor_fetch_16)
- mov x5, x0
- cbnz w4, 2f
+
+ENTRY_FEAT (or_fetch_16, LSE128)
+ cbnz w4, 1f
+ mov tmp0, in0
+ mov tmp1, in1
/* RELAXED. */
-1: ldxp res0, res1, [x5]
- eor res0, res0, in0
- eor res1, res1, in1
- stxp w4, res0, res1, [x5]
- cbnz w4, 1b
+ ldsetp in0, in1, [x0]
+ orr res0, in0, tmp0
+ orr res1, in1, tmp1
ret
+1:
+ cmp w4, ACQUIRE
+ b.hi 2f
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- eor res0, res0, in0
- eor res1, res1, in1
- stlxp w4, res0, res1, [x5]
- cbnz w4, 2b
+ /* ACQUIRE/CONSUME. */
+ ldsetpa in0, in1, [x0]
+ orr res0, in0, tmp0
+ orr res1, in1, tmp1
ret
-END (xor_fetch_16)
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+2: ldsetpal in0, in1, [x0]
+ orr res0, in0, tmp0
+ orr res1, in1, tmp1
+ ret
+END_FEAT (or_fetch_16, LSE128)
-ENTRY_ALIASED (fetch_nand_16)
- mov x5, x0
- mvn in0, in0
- mvn in1, in1
- cbnz w4, 2f
+
+ENTRY_FEAT (fetch_and_16, LSE128)
+ mov tmp0, x0
+ mvn res0, in0
+ mvn res1, in1
+ cbnz w4, 1f
/* RELAXED. */
-1: ldxp res0, res1, [x5]
- orn tmp0, in0, res0
- orn tmp1, in1, res1
- stxp w4, tmp0, tmp1, [x5]
- cbnz w4, 1b
+ ldclrp res0, res1, [tmp0]
ret
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- orn tmp0, in0, res0
- orn tmp1, in1, res1
- stlxp w4, tmp0, tmp1, [x5]
- cbnz w4, 2b
+1:
+ cmp w4, ACQUIRE
+ b.hi 2f
+
+ /* ACQUIRE/CONSUME. */
+ ldclrpa res0, res1, [tmp0]
ret
-END (fetch_nand_16)
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+2: ldclrpal res0, res1, [tmp0]
+ ret
+END_FEAT (fetch_and_16, LSE128)
-ENTRY_ALIASED (nand_fetch_16)
- mov x5, x0
- mvn in0, in0
- mvn in1, in1
- cbnz w4, 2f
- /* RELAXED. */
-1: ldxp res0, res1, [x5]
- orn res0, in0, res0
- orn res1, in1, res1
- stxp w4, res0, res1, [x5]
- cbnz w4, 1b
- ret
+ENTRY_FEAT (and_fetch_16, LSE128)
+ mvn tmp0, in0
+ mvn tmp0, in1
+ cbnz w4, 1f
- /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
-2: ldaxp res0, res1, [x5]
- orn res0, in0, res0
- orn res1, in1, res1
- stlxp w4, res0, res1, [x5]
- cbnz w4, 2b
+ /* RELAXED. */
+ ldclrp tmp0, tmp1, [x0]
+ and res0, tmp0, in0
+ and res1, tmp1, in1
ret
-END (nand_fetch_16)
+1:
+ cmp w4, ACQUIRE
+ b.hi 2f
-/* __atomic_test_and_set is always inlined, so this entry is unused and
- only required for completeness. */
-ENTRY_ALIASED (test_and_set_16)
+ /* ACQUIRE/CONSUME. */
+ ldclrpa tmp0, tmp1, [x0]
+ and res0, tmp0, in0
+ and res1, tmp1, in1
+ ret
- /* RELAXED/ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
- mov x5, x0
-1: ldaxrb w0, [x5]
- stlxrb w4, w2, [x5]
- cbnz w4, 1b
+ /* RELEASE/ACQ_REL/SEQ_CST. */
+2: ldclrpal tmp0, tmp1, [x5]
+ and res0, tmp0, in0
+ and res1, tmp1, in1
ret
-END (test_and_set_16)
+END_FEAT (and_fetch_16, LSE128)
+#endif /* HAVE_FEAT_LSE128 */
+#endif /* HAVE_IFUNC */
/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */