Message ID | PAWPR08MB8982579B72C4CEE0A3C1BC9983AB9@PAWPR08MB8982.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] | expand |
ping
From: Wilco Dijkstra
Sent: 23 February 2023 15:11
To: GCC Patches <gcc-patches@gcc.gnu.org>
Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891]
The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP -
without it, it effectively has Load-AcquirePC semantics similar to LDAPR,
which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch
fixes this and adds comments to make it easier to see which sequence is
used for each case. Use a load/store exclusive loop for store to simplify
testing memory ordering is correct (it is slightly faster too).
Passes regress, OK for commit?
libatomic/
PR libgcc/108891
config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1.
Add comments describing the memory order.
---
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S
index 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582761ff7aaa7c8f9643 100644
--- a/libatomic/config/linux/aarch64/atomic_16.S
+++ b/libatomic/config/linux/aarch64/atomic_16.S
@@ -72,33 +72,38 @@ name: \
ENTRY (libat_load_16_i1)
cbnz w1, 1f
+
+ /* RELAXED. */
ldp res0, res1, [x0]
ret
1:
- cmp w1, ACQUIRE
- b.hi 2f
+ cmp w1, SEQ_CST
+ b.eq 2f
+
+ /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */
ldp res0, res1, [x0]
dmb ishld
ret
-2:
+
+ /* SEQ_CST. */
+2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */
ldp res0, res1, [x0]
- dmb ish
+ dmb ishld
ret
END (libat_load_16_i1)
ENTRY (libat_store_16_i1)
cbnz w4, 1f
+
+ /* RELAXED. */
stp in0, in1, [x0]
ret
-1:
- dmb ish
- stp in0, in1, [x0]
- cmp w4, SEQ_CST
- beq 2f
- ret
-2:
- dmb ish
+
+ /* RELEASE/SEQ_CST. */
+1: ldaxp xzr, tmp0, [x0]
+ stlxp w4, in0, in1, [x0]
+ cbnz w4, 1b
ret
END (libat_store_16_i1)
@@ -106,29 +111,33 @@ END (libat_store_16_i1)
ENTRY (libat_exchange_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 1b
ret
2:
cmp w4, ACQUIRE
b.hi 4f
-3:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME. */
+3: ldaxp res0, res1, [x5]
stxp w4, in0, in1, [x5]
cbnz w4, 3b
ret
4:
cmp w4, RELEASE
b.ne 6f
-5:
- ldxp res0, res1, [x5]
+
+ /* RELEASE. */
+5: ldxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 5b
ret
-6:
- ldaxp res0, res1, [x5]
+
+ /* ACQ_REL/SEQ_CST. */
+6: ldaxp res0, res1, [x5]
stlxp w4, in0, in1, [x5]
cbnz w4, 6b
ret
@@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1)
cbz w4, 2f
cmp w4, RELEASE
b.hs 3f
+
+ /* ACQUIRE/CONSUME. */
caspa exp0, exp1, in0, in1, [x0]
0:
cmp exp0, tmp0
@@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1)
stp exp0, exp1, [x1]
mov x0, 0
ret
-2:
- casp exp0, exp1, in0, in1, [x0]
+
+ /* RELAXED. */
+2: casp exp0, exp1, in0, in1, [x0]
b 0b
-3:
- b.hi 4f
+
+ /* RELEASE. */
+3: b.hi 4f
caspl exp0, exp1, in0, in1, [x0]
b 0b
-4:
- caspal exp0, exp1, in0, in1, [x0]
+
+ /* ACQ_REL/SEQ_CST. */
+4: caspal exp0, exp1, in0, in1, [x0]
b 0b
END (libat_compare_exchange_16_i1)
@@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1)
ENTRY (libat_fetch_add_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds tmplo, reslo, inlo
adc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1)
ENTRY (libat_add_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
adds reslo, reslo, inlo
adc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1)
ENTRY (libat_fetch_sub_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs tmplo, reslo, inlo
sbc tmphi, reshi, inhi
stlxp w4, tmp0, tmp1, [x5]
@@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1)
ENTRY (libat_sub_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
subs reslo, reslo, inlo
sbc reshi, reshi, inhi
stlxp w4, res0, res1, [x5]
@@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1)
ENTRY (libat_fetch_or_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr tmp0, res0, in0
orr tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1)
ENTRY (libat_or_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orr res0, res0, in0
orr res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1)
ENTRY (libat_fetch_and_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and tmp0, res0, in0
and tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1)
ENTRY (libat_and_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
and res0, res0, in0
and res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1)
ENTRY (libat_fetch_xor_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor tmp0, res0, in0
eor tmp1, res1, in1
stlxp w4, tmp0, tmp1, [x5]
@@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1)
ENTRY (libat_xor_fetch_16_i1)
mov x5, x0
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
eor res0, res0, in0
eor res1, res1, in1
stlxp w4, res0, res1, [x5]
@@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stxp w4, tmp0, tmp1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn tmp0, in0, res0
orn tmp1, in1, res1
stlxp w4, tmp0, tmp1, [x5]
@@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1)
mvn in0, in0
mvn in1, in1
cbnz w4, 2f
-1:
- ldxp res0, res1, [x5]
+
+ /* RELAXED. */
+1: ldxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stxp w4, res0, res1, [x5]
cbnz w4, 1b
ret
-2:
- ldaxp res0, res1, [x5]
+
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
+2: ldaxp res0, res1, [x5]
orn res0, in0, res0
orn res1, in1, res1
stlxp w4, res0, res1, [x5]
@@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1)
ENTRY (libat_test_and_set_16_i1)
mov w2, 1
cbnz w1, 2f
+
+ /* RELAXED. */
swpb w0, w2, [x0]
ret
+ /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */
2: swpalb w0, w2, [x0]
ret
END (libat_test_and_set_16_i1)
> -----Original Message----- > From: Wilco Dijkstra <Wilco.Dijkstra@arm.com> > Sent: Thursday, March 16, 2023 6:22 PM > To: GCC Patches <gcc-patches@gcc.gnu.org> > Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > Subject: Re: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] > > ping > > > From: Wilco Dijkstra > Sent: 23 February 2023 15:11 > To: GCC Patches <gcc-patches@gcc.gnu.org> > Cc: Richard Sandiford <Richard.Sandiford@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > Subject: [PATCH] libatomic: Fix SEQ_CST 128-bit atomic load [PR108891] > > > The LSE2 ifunc for 16-byte atomic load requires a barrier before the LDP - > without it, it effectively has Load-AcquirePC semantics similar to LDAPR, > which is less restrictive than what __ATOMIC_SEQ_CST requires. This patch > fixes this and adds comments to make it easier to see which sequence is > used for each case. Use a load/store exclusive loop for store to simplify > testing memory ordering is correct (it is slightly faster too). > > Passes regress, OK for commit? > Ok. Thanks, Kyrill > libatomic/ > PR libgcc/108891 > config/linux/aarch64/atomic_16.S: Fix libat_load_16_i1. > Add comments describing the memory order. > > --- > > diff --git a/libatomic/config/linux/aarch64/atomic_16.S > b/libatomic/config/linux/aarch64/atomic_16.S > index > 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582 > 761ff7aaa7c8f9643 100644 > --- a/libatomic/config/linux/aarch64/atomic_16.S > +++ b/libatomic/config/linux/aarch64/atomic_16.S > @@ -72,33 +72,38 @@ name: \ > > ENTRY (libat_load_16_i1) > cbnz w1, 1f > + > + /* RELAXED. */ > ldp res0, res1, [x0] > ret > 1: > - cmp w1, ACQUIRE > - b.hi 2f > + cmp w1, SEQ_CST > + b.eq 2f > + > + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ > ldp res0, res1, [x0] > dmb ishld > ret > -2: > + > + /* SEQ_CST. */ > +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ > ldp res0, res1, [x0] > - dmb ish > + dmb ishld > ret > END (libat_load_16_i1) > > > ENTRY (libat_store_16_i1) > cbnz w4, 1f > + > + /* RELAXED. */ > stp in0, in1, [x0] > ret > -1: > - dmb ish > - stp in0, in1, [x0] > - cmp w4, SEQ_CST > - beq 2f > - ret > -2: > - dmb ish > + > + /* RELEASE/SEQ_CST. */ > +1: ldaxp xzr, tmp0, [x0] > + stlxp w4, in0, in1, [x0] > + cbnz w4, 1b > ret > END (libat_store_16_i1) > > @@ -106,29 +111,33 @@ END (libat_store_16_i1) > ENTRY (libat_exchange_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > stxp w4, in0, in1, [x5] > cbnz w4, 1b > ret > 2: > cmp w4, ACQUIRE > b.hi 4f > -3: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME. */ > +3: ldaxp res0, res1, [x5] > stxp w4, in0, in1, [x5] > cbnz w4, 3b > ret > 4: > cmp w4, RELEASE > b.ne 6f > -5: > - ldxp res0, res1, [x5] > + > + /* RELEASE. */ > +5: ldxp res0, res1, [x5] > stlxp w4, in0, in1, [x5] > cbnz w4, 5b > ret > -6: > - ldaxp res0, res1, [x5] > + > + /* ACQ_REL/SEQ_CST. */ > +6: ldaxp res0, res1, [x5] > stlxp w4, in0, in1, [x5] > cbnz w4, 6b > ret > @@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1) > cbz w4, 2f > cmp w4, RELEASE > b.hs 3f > + > + /* ACQUIRE/CONSUME. */ > caspa exp0, exp1, in0, in1, [x0] > 0: > cmp exp0, tmp0 > @@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1) > stp exp0, exp1, [x1] > mov x0, 0 > ret > -2: > - casp exp0, exp1, in0, in1, [x0] > + > + /* RELAXED. */ > +2: casp exp0, exp1, in0, in1, [x0] > b 0b > -3: > - b.hi 4f > + > + /* RELEASE. */ > +3: b.hi 4f > caspl exp0, exp1, in0, in1, [x0] > b 0b > -4: > - caspal exp0, exp1, in0, in1, [x0] > + > + /* ACQ_REL/SEQ_CST. */ > +4: caspal exp0, exp1, in0, in1, [x0] > b 0b > END (libat_compare_exchange_16_i1) > > @@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1) > ENTRY (libat_fetch_add_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > adds tmplo, reslo, inlo > adc tmphi, reshi, inhi > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > adds tmplo, reslo, inlo > adc tmphi, reshi, inhi > stlxp w4, tmp0, tmp1, [x5] > @@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1) > ENTRY (libat_add_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > adds reslo, reslo, inlo > adc reshi, reshi, inhi > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > adds reslo, reslo, inlo > adc reshi, reshi, inhi > stlxp w4, res0, res1, [x5] > @@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1) > ENTRY (libat_fetch_sub_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > subs tmplo, reslo, inlo > sbc tmphi, reshi, inhi > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > subs tmplo, reslo, inlo > sbc tmphi, reshi, inhi > stlxp w4, tmp0, tmp1, [x5] > @@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1) > ENTRY (libat_sub_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > subs reslo, reslo, inlo > sbc reshi, reshi, inhi > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > subs reslo, reslo, inlo > sbc reshi, reshi, inhi > stlxp w4, res0, res1, [x5] > @@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1) > ENTRY (libat_fetch_or_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orr tmp0, res0, in0 > orr tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orr tmp0, res0, in0 > orr tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1) > ENTRY (libat_or_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orr res0, res0, in0 > orr res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orr res0, res0, in0 > orr res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1) > ENTRY (libat_fetch_and_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > and tmp0, res0, in0 > and tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > and tmp0, res0, in0 > and tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1) > ENTRY (libat_and_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > and res0, res0, in0 > and res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > and res0, res0, in0 > and res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1) > ENTRY (libat_fetch_xor_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > eor tmp0, res0, in0 > eor tmp1, res1, in1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > eor tmp0, res0, in0 > eor tmp1, res1, in1 > stlxp w4, tmp0, tmp1, [x5] > @@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1) > ENTRY (libat_xor_fetch_16_i1) > mov x5, x0 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > eor res0, res0, in0 > eor res1, res1, in1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > eor res0, res0, in0 > eor res1, res1, in1 > stlxp w4, res0, res1, [x5] > @@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1) > mvn in0, in0 > mvn in1, in1 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orn tmp0, in0, res0 > orn tmp1, in1, res1 > stxp w4, tmp0, tmp1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orn tmp0, in0, res0 > orn tmp1, in1, res1 > stlxp w4, tmp0, tmp1, [x5] > @@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1) > mvn in0, in0 > mvn in1, in1 > cbnz w4, 2f > -1: > - ldxp res0, res1, [x5] > + > + /* RELAXED. */ > +1: ldxp res0, res1, [x5] > orn res0, in0, res0 > orn res1, in1, res1 > stxp w4, res0, res1, [x5] > cbnz w4, 1b > ret > -2: > - ldaxp res0, res1, [x5] > + > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > +2: ldaxp res0, res1, [x5] > orn res0, in0, res0 > orn res1, in1, res1 > stlxp w4, res0, res1, [x5] > @@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1) > ENTRY (libat_test_and_set_16_i1) > mov w2, 1 > cbnz w1, 2f > + > + /* RELAXED. */ > swpb w0, w2, [x0] > ret > > + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ > 2: swpalb w0, w2, [x0] > ret > END (libat_test_and_set_16_i1)
diff --git a/libatomic/config/linux/aarch64/atomic_16.S b/libatomic/config/linux/aarch64/atomic_16.S index 732c3534a06678664a252bdbc53652eeab0af506..05439ce394b9653c9bcb582761ff7aaa7c8f9643 100644 --- a/libatomic/config/linux/aarch64/atomic_16.S +++ b/libatomic/config/linux/aarch64/atomic_16.S @@ -72,33 +72,38 @@ name: \ ENTRY (libat_load_16_i1) cbnz w1, 1f + + /* RELAXED. */ ldp res0, res1, [x0] ret 1: - cmp w1, ACQUIRE - b.hi 2f + cmp w1, SEQ_CST + b.eq 2f + + /* ACQUIRE/CONSUME (Load-AcquirePC semantics). */ ldp res0, res1, [x0] dmb ishld ret -2: + + /* SEQ_CST. */ +2: ldar tmp0, [x0] /* Block reordering with Store-Release instr. */ ldp res0, res1, [x0] - dmb ish + dmb ishld ret END (libat_load_16_i1) ENTRY (libat_store_16_i1) cbnz w4, 1f + + /* RELAXED. */ stp in0, in1, [x0] ret -1: - dmb ish - stp in0, in1, [x0] - cmp w4, SEQ_CST - beq 2f - ret -2: - dmb ish + + /* RELEASE/SEQ_CST. */ +1: ldaxp xzr, tmp0, [x0] + stlxp w4, in0, in1, [x0] + cbnz w4, 1b ret END (libat_store_16_i1) @@ -106,29 +111,33 @@ END (libat_store_16_i1) ENTRY (libat_exchange_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] stxp w4, in0, in1, [x5] cbnz w4, 1b ret 2: cmp w4, ACQUIRE b.hi 4f -3: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME. */ +3: ldaxp res0, res1, [x5] stxp w4, in0, in1, [x5] cbnz w4, 3b ret 4: cmp w4, RELEASE b.ne 6f -5: - ldxp res0, res1, [x5] + + /* RELEASE. */ +5: ldxp res0, res1, [x5] stlxp w4, in0, in1, [x5] cbnz w4, 5b ret -6: - ldaxp res0, res1, [x5] + + /* ACQ_REL/SEQ_CST. */ +6: ldaxp res0, res1, [x5] stlxp w4, in0, in1, [x5] cbnz w4, 6b ret @@ -142,6 +151,8 @@ ENTRY (libat_compare_exchange_16_i1) cbz w4, 2f cmp w4, RELEASE b.hs 3f + + /* ACQUIRE/CONSUME. */ caspa exp0, exp1, in0, in1, [x0] 0: cmp exp0, tmp0 @@ -153,15 +164,18 @@ ENTRY (libat_compare_exchange_16_i1) stp exp0, exp1, [x1] mov x0, 0 ret -2: - casp exp0, exp1, in0, in1, [x0] + + /* RELAXED. */ +2: casp exp0, exp1, in0, in1, [x0] b 0b -3: - b.hi 4f + + /* RELEASE. */ +3: b.hi 4f caspl exp0, exp1, in0, in1, [x0] b 0b -4: - caspal exp0, exp1, in0, in1, [x0] + + /* ACQ_REL/SEQ_CST. */ +4: caspal exp0, exp1, in0, in1, [x0] b 0b END (libat_compare_exchange_16_i1) @@ -169,15 +183,17 @@ END (libat_compare_exchange_16_i1) ENTRY (libat_fetch_add_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] adds tmplo, reslo, inlo adc tmphi, reshi, inhi stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] adds tmplo, reslo, inlo adc tmphi, reshi, inhi stlxp w4, tmp0, tmp1, [x5] @@ -189,15 +205,17 @@ END (libat_fetch_add_16_i1) ENTRY (libat_add_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] adds reslo, reslo, inlo adc reshi, reshi, inhi stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] adds reslo, reslo, inlo adc reshi, reshi, inhi stlxp w4, res0, res1, [x5] @@ -209,15 +227,17 @@ END (libat_add_fetch_16_i1) ENTRY (libat_fetch_sub_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] subs tmplo, reslo, inlo sbc tmphi, reshi, inhi stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] subs tmplo, reslo, inlo sbc tmphi, reshi, inhi stlxp w4, tmp0, tmp1, [x5] @@ -229,15 +249,17 @@ END (libat_fetch_sub_16_i1) ENTRY (libat_sub_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] subs reslo, reslo, inlo sbc reshi, reshi, inhi stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] subs reslo, reslo, inlo sbc reshi, reshi, inhi stlxp w4, res0, res1, [x5] @@ -249,15 +271,17 @@ END (libat_sub_fetch_16_i1) ENTRY (libat_fetch_or_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orr tmp0, res0, in0 orr tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orr tmp0, res0, in0 orr tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -269,15 +293,17 @@ END (libat_fetch_or_16_i1) ENTRY (libat_or_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orr res0, res0, in0 orr res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orr res0, res0, in0 orr res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -289,15 +315,17 @@ END (libat_or_fetch_16_i1) ENTRY (libat_fetch_and_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] and tmp0, res0, in0 and tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] and tmp0, res0, in0 and tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -309,15 +337,17 @@ END (libat_fetch_and_16_i1) ENTRY (libat_and_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] and res0, res0, in0 and res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] and res0, res0, in0 and res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -329,15 +359,17 @@ END (libat_and_fetch_16_i1) ENTRY (libat_fetch_xor_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] eor tmp0, res0, in0 eor tmp1, res1, in1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] eor tmp0, res0, in0 eor tmp1, res1, in1 stlxp w4, tmp0, tmp1, [x5] @@ -349,15 +381,17 @@ END (libat_fetch_xor_16_i1) ENTRY (libat_xor_fetch_16_i1) mov x5, x0 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] eor res0, res0, in0 eor res1, res1, in1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] eor res0, res0, in0 eor res1, res1, in1 stlxp w4, res0, res1, [x5] @@ -371,15 +405,17 @@ ENTRY (libat_fetch_nand_16_i1) mvn in0, in0 mvn in1, in1 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orn tmp0, in0, res0 orn tmp1, in1, res1 stxp w4, tmp0, tmp1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orn tmp0, in0, res0 orn tmp1, in1, res1 stlxp w4, tmp0, tmp1, [x5] @@ -393,15 +429,17 @@ ENTRY (libat_nand_fetch_16_i1) mvn in0, in0 mvn in1, in1 cbnz w4, 2f -1: - ldxp res0, res1, [x5] + + /* RELAXED. */ +1: ldxp res0, res1, [x5] orn res0, in0, res0 orn res1, in1, res1 stxp w4, res0, res1, [x5] cbnz w4, 1b ret -2: - ldaxp res0, res1, [x5] + + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ +2: ldaxp res0, res1, [x5] orn res0, in0, res0 orn res1, in1, res1 stlxp w4, res0, res1, [x5] @@ -413,9 +451,12 @@ END (libat_nand_fetch_16_i1) ENTRY (libat_test_and_set_16_i1) mov w2, 1 cbnz w1, 2f + + /* RELAXED. */ swpb w0, w2, [x0] ret + /* ACQUIRE/CONSUME/RELEASE/ACQ_REL/SEQ_CST. */ 2: swpalb w0, w2, [x0] ret END (libat_test_and_set_16_i1)