diff mbox series

[v8,7/8] aarch64: Add rseq_load32_load32_relaxed

Message ID 20240206162801.882585-8-mjeanson@efficios.com
State New
Headers show
Series Extend rseq support | expand

Commit Message

Michael Jeanson Feb. 6, 2024, 4:28 p.m. UTC
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>

Implement rseq_load32_load32_relaxed() for the aarch64 architecture.
This static inline function implements a rseq critical section to load
two 32-bit integer values atomically with respect to preemption and
signal delivery.

This implementation is imported from the librseq project.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
---
 .../unix/sysv/linux/aarch64/rseq-internal.h   | 173 ++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/aarch64/rseq-internal.h

Comments

DJ Delorie Feb. 17, 2024, 3:53 a.m. UTC | #1
Michael Jeanson <mjeanson@efficios.com> writes:
> This implementation is imported from the librseq project.

Same comments as [6/8] wrt origin URL

> diff --git a/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h b/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h
> +   Copyright (C) 2023 Free Software Foundation, Inc.

Year?

> +#define RSEQ_ASM_TMP_REG32	"w15"
> +#define RSEQ_ASM_TMP_REG	"x15"
> +#define RSEQ_ASM_TMP_REG_2	"x14"
> +
> +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,		\
> +				post_commit_offset, abort_ip)			\
> +	"	.pushsection	__rseq_cs, \"aw\"\n"				\
> +	"	.balign	32\n"							\
> +	__rseq_str(label) ":\n"							\
> +	"	.long	" __rseq_str(version) ", " __rseq_str(flags) "\n"	\
> +	"	.quad	" __rseq_str(start_ip) ", "				\
> +			  __rseq_str(post_commit_offset) ", "			\
> +			  __rseq_str(abort_ip) "\n"				\
> +	"	.popsection\n\t"						\
> +	"	.pushsection __rseq_cs_ptr_array, \"aw\"\n"				\
> +	"	.quad " __rseq_str(label) "b\n"					\
> +	"	.popsection\n"

Ok.

> +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
> +	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,			\
> +				(post_commit_ip - start_ip), abort_ip)

Ok.

> +/*
> + * Exit points of a rseq critical section consist of all instructions outside
> + * of the critical section where a critical section can either branch to or
> + * reach through the normal course of its execution. The abort IP and the
> + * post-commit IP are already part of the __rseq_cs section and should not be
> + * explicitly defined as additional exit points. Knowing all exit points is
> + * useful to assist debuggers stepping over the critical section.
> + */
> +#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip)				\
> +	"	.pushsection __rseq_exit_point_array, \"aw\"\n"			\
> +	"	.quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n"	\
> +	"	.popsection\n"

Ok.

> +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
> +	"	adrp	" RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n"	\
> +	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
> +			", :lo12:" __rseq_str(cs_label) "\n"			\
> +	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n"	\
> +	__rseq_str(label) ":\n"

Ok.

> +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
> +	"	b	222f\n"							\
> +	"	.inst 	"	__rseq_str(RSEQ_SIG_CODE) "\n"			\
> +	__rseq_str(label) ":\n"							\
> +	"	b	%l[" __rseq_str(abort_label) "]\n"			\
> +	"222:\n"

Ok.

> +#define RSEQ_ASM_OP_STORE(value, var)						\
> +	"	str	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
> +	"	stlr	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
> +	RSEQ_ASM_OP_STORE(value, var)						\
> +	__rseq_str(post_commit_label) ":\n"

Ok.

> +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label)		\
> +	RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
> +	__rseq_str(post_commit_label) ":\n"

Ok.

> +#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
> +	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
> +	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
> +			", %[" __rseq_str(expect) "]\n"				\
> +	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"

This is why we need documentation; I would have guessed this was a CMPNE
operation, but it depends on how you define "label"

> +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label)					\
> +	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
> +	"	sub	" RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32		\
> +			", %w[" __rseq_str(expect) "]\n"			\
> +	"	cbnz	" RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"

Ok.

> +#define RSEQ_ASM_OP_CMPNE(var, expect, label)					\
> +	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
> +	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
> +			", %[" __rseq_str(expect) "]\n"				\
> +	"	cbz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"

And of course this one is the opposite way ;-)

> +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
> +	RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)

Ok.

> +#define RSEQ_ASM_OP_R_LOAD(var)							\
> +	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_R_STORE(var)						\
> +	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_R_LOAD32(var)						\
> +	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_R_STORE32(var)						\
> +	"	str	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"

Ok.

> +#define RSEQ_ASM_OP_R_LOAD_OFF(offset)						\
> +	"	ldr	" RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG		\
> +			", %[" __rseq_str(offset) "]]\n"

Ok.

> +#define RSEQ_ASM_OP_R_ADD(count)						\
> +	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
> +			", %[" __rseq_str(count) "]\n"

Ok.

> +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
> +	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
> +	__rseq_str(post_commit_label) ":\n"

Ok.

> +#define RSEQ_ASM_OP_R_FINAL_STORE32(var, post_commit_label)			\
> +	"	str	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
> +	__rseq_str(post_commit_label) ":\n"

Ok.

> +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
> +	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
> +	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
> +	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
> +	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
> +	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
> +	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
> +	"333:\n"

Ok, but WHY?

> +/*
> + * Load @src1 (32-bit) into @dst1 and load @src2 (32-bit) into @dst2.
> + */
> +#define RSEQ_HAS_LOAD32_LOAD32_RELAXED 1
> +static __always_inline int
> +rseq_load32_load32_relaxed(uint32_t *dst1, uint32_t *src1,
> +			       uint32_t *dst2, uint32_t *src2)
> +{
> +	__asm__ __volatile__ goto (
> +		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
> +		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
> +		RSEQ_ASM_OP_R_LOAD32(src1)
> +		RSEQ_ASM_OP_R_STORE32(dst1)
> +		RSEQ_ASM_OP_R_LOAD32(src2)
> +		RSEQ_ASM_OP_R_FINAL_STORE32(dst2, 3)
> +		RSEQ_ASM_DEFINE_ABORT(4, abort)
> +		: /* gcc asm goto does not allow outputs */
> +		: [rseq_cs]		"m" (rseq_get_area()->rseq_cs),
> +		  [dst1]		"Qo" (*dst1),
> +		  [dst2]		"Qo" (*dst2),
> +		  [src1]		"Qo" (*src1),
> +		  [src2]		"Qo" (*src2)
> +		: "memory", RSEQ_ASM_TMP_REG
> +		: abort
> +	);
> +	rseq_after_asm_goto();
> +	return 0;
> +abort:
> +	rseq_after_asm_goto();
> +	return -1;
> +}

Ok.
Michael Jeanson Feb. 19, 2024, 8:29 p.m. UTC | #2
On 2024-02-16 22:53, DJ Delorie wrote:
> Michael Jeanson <mjeanson@efficios.com> writes:
>> This implementation is imported from the librseq project.
> 
> Same comments as [6/8] wrt origin URL

Ack.

>> diff --git a/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h b/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h
>> +   Copyright (C) 2023 Free Software Foundation, Inc.
> 
> Year?

Again, will clarify before next patchset.


I'll let Mathieu answer the macros / assembly questions.
Mathieu Desnoyers Feb. 20, 2024, 3:07 p.m. UTC | #3
On 2024-02-16 22:53, DJ Delorie wrote:
[...]
> 
>> +#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
>> +	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
>> +	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
>> +			", %[" __rseq_str(expect) "]\n"				\
>> +	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
> 
> This is why we need documentation; I would have guessed this was a CMPNE
> operation, but it depends on how you define "label"

This comes from the x86 implementation I originally did which has
"RSEQ_ASM_CMP_CPU_ID()" and static inline functions such as
"rseq_cmpeqv_storev()". The meaning of the "cmp" here is that the
critical section does _not_ abort (does not branch) if the comparison
matches.

But I understand how the ASM helpers that were contributed for other
architectures such as "RSEQ_ASM_OP_CMPEQ()", with the same semantic
of "do not branch to abort if the comparison matches" can be misleading
for someone used to reading assembler on pretty much any architecture,
where the conditional branch is expected to be taken if the condition
matches. So what I have here in librseq is backwards.

Fortunately, librseq is still just a master branch (no releases yet),
and the copy in the Linux kernel selftests is internal to that selftest,
so there are no stable API expectations at this stage.

So I don't think the semantic of e.g. "rseq_cmpeqv_storev()" is
misleading: it proceeds to do the store if the comparison matches.

However, the ASM macros would benefit from a logic flip. Even though
the API is not stable, I would like to introduce this in a way that
will allow users of the API to catch the change at compile-time. I
propose the following remapping of the macros for added clarity:

RSEQ_ASM_OP_CMPNE becomes RSEQ_ASM_OP_CBEQ (branch if equal)
RSEQ_ASM_OP_CMPEQ becomes RSEQ_ASM_OP_CBNE (branch if not equal)
RSEQ_ASM_CMP_CPU_ID becomes RSEQ_ASM_CBNE_CPU_ID (branch if cpu id is not equal)

I can do this change across all architectures in librseq to keep things in
sync. What do you think ?

[...]

> 
>> +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
>> +	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
>> +	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
>> +	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
>> +	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
>> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
>> +	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
>> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
>> +	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
>> +	"333:\n"
> 
> Ok, but WHY?

This is a memcpy from src to dst which can be aborted at any point
during the copy. Do you recommend we add documentation about what it does,
or that we remove it for now given that it is not used by the initial static
inline ?

Keeping all those helpers there simplifies the task of keeping librseq and
glibc in sync. But I would also understand if you prefer that we only introduce
what we use.

Thanks,

Mathieu
DJ Delorie Feb. 20, 2024, 5:55 p.m. UTC | #4
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> writes:

> On 2024-02-16 22:53, DJ Delorie wrote:
> [...]
>> 
>>> +#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
>>> +	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
>>> +	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
>>> +			", %[" __rseq_str(expect) "]\n"				\
>>> +	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
>> 
>> This is why we need documentation; I would have guessed this was a CMPNE
>> operation, but it depends on how you define "label"
>
> This comes from the x86 implementation I originally did which has
> "RSEQ_ASM_CMP_CPU_ID()" and static inline functions such as
> "rseq_cmpeqv_storev()". The meaning of the "cmp" here is that the
> critical section does _not_ abort (does not branch) if the comparison
> matches.

Given that I'm looking at it as "someone not familiar with the RSEQ
API[*]", perhaps a one line comment that says "compare VAR and EXPECT
and ensure they're equal, else abort to LABEL" would have made me think
"Oh, that makes sense".  Otherwise I have to read the inline asm, and my
familiarity with x86 asm would make me confused.

> However, the ASM macros would benefit from a logic flip. Even though
> the API is not stable, I would like to introduce this in a way that
> will allow users of the API to catch the change at compile-time.

I'm not pushing for an ABI change at this point.  *Any* undocumented
inline assembler macro is going to be confusing to someone not familiar
with it, and needs a human-understandable comment or documentation for
it.

>>> +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
>>> +	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
>>> +	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
>>> +	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
>>> +	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
>>> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
>>> +	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
>>> +			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
>>> +	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
>>> +	"333:\n"
>> 
>> Ok, but WHY?
>
> This is a memcpy from src to dst which can be aborted at any point
> during the copy. Do you recommend we add documentation about what it does,

Yes.  A short comment that says "This is a slow dumb memcpy, but it can
be used in an rseq abortable code sequence." would be sufficient.
That's what I meant by my WHY? comment - there should be something that
explains why it exists and/or why you'd use it.

And for a bit of humorous snark, I'll point out that this mail thread is
already longer than the new documentation would need to be ;-)


[*] And someone old enough to realize that comments aren't just for your
    peers, but also for your future self, to remind you what the heck
    you were thinking when you wrote that ;-)
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h b/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h
new file mode 100644
index 0000000000..3c03f67dbe
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/aarch64/rseq-internal.h
@@ -0,0 +1,173 @@ 
+/* Restartable Sequences internal API. aarch64 macros.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/unix/sysv/linux/rseq-internal.h>
+
+#define RSEQ_ASM_TMP_REG32	"w15"
+#define RSEQ_ASM_TMP_REG	"x15"
+#define RSEQ_ASM_TMP_REG_2	"x14"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,		\
+				post_commit_offset, abort_ip)			\
+	"	.pushsection	__rseq_cs, \"aw\"\n"				\
+	"	.balign	32\n"							\
+	__rseq_str(label) ":\n"							\
+	"	.long	" __rseq_str(version) ", " __rseq_str(flags) "\n"	\
+	"	.quad	" __rseq_str(start_ip) ", "				\
+			  __rseq_str(post_commit_offset) ", "			\
+			  __rseq_str(abort_ip) "\n"				\
+	"	.popsection\n\t"						\
+	"	.pushsection __rseq_cs_ptr_array, \"aw\"\n"				\
+	"	.quad " __rseq_str(label) "b\n"					\
+	"	.popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,			\
+				(post_commit_ip - start_ip), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip)				\
+	"	.pushsection __rseq_exit_point_array, \"aw\"\n"			\
+	"	.quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n"	\
+	"	.popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+	"	adrp	" RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n"	\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", :lo12:" __rseq_str(cs_label) "\n"			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n"	\
+	__rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
+	"	b	222f\n"							\
+	"	.inst 	"	__rseq_str(RSEQ_SIG_CODE) "\n"			\
+	__rseq_str(label) ":\n"							\
+	"	b	%l[" __rseq_str(abort_label) "]\n"			\
+	"222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var)						\
+	"	str	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	"	stlr	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
+	RSEQ_ASM_OP_STORE(value, var)						\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label)		\
+	RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
+	"	sub	" RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32		\
+			", %w[" __rseq_str(expect) "]\n"			\
+	"	cbnz	" RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
+	RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var)							\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var)						\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD32(var)						\
+	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE32(var)						\
+	"	str	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset)						\
+	"	ldr	" RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(offset) "]]\n"
+
+#define RSEQ_ASM_OP_R_ADD(count)						\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE32(var, post_commit_label)			\
+	"	str	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
+	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
+	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
+	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
+	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
+	"333:\n"
+
+/*
+ * Load @src1 (32-bit) into @dst1 and load @src2 (32-bit) into @dst2.
+ */
+#define RSEQ_HAS_LOAD32_LOAD32_RELAXED 1
+static __always_inline int
+rseq_load32_load32_relaxed(uint32_t *dst1, uint32_t *src1,
+			       uint32_t *dst2, uint32_t *src2)
+{
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_OP_R_LOAD32(src1)
+		RSEQ_ASM_OP_R_STORE32(dst1)
+		RSEQ_ASM_OP_R_LOAD32(src2)
+		RSEQ_ASM_OP_R_FINAL_STORE32(dst2, 3)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [rseq_cs]		"m" (rseq_get_area()->rseq_cs),
+		  [dst1]		"Qo" (*dst1),
+		  [dst2]		"Qo" (*dst2),
+		  [src1]		"Qo" (*src1),
+		  [src2]		"Qo" (*src2)
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort
+	);
+	rseq_after_asm_goto();
+	return 0;
+abort:
+	rseq_after_asm_goto();
+	return -1;
+}