[4/6] aarch64: Improve sysdep-cancel.h
diff mbox

Message ID 1400619378-7262-5-git-send-email-rth@twiddle.net
State New
Headers show

Commit Message

Richard Henderson May 20, 2014, 8:56 p.m. UTC
From: Richard Henderson <rth@redhat.com>

Use a constant frame size, rather than pushing/popping for every saved
register.  Use stp, ldp, cbz.  Share code with the _nocancel path.

	* sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO):
	Use ENTRY for _nocancel entry point.  Reuse pieces of _nocancel
	entry point for implementing the cancel path.  Simplify cancel
	path frame setup.  Use cbz instead of cmp+bne for singlethread path.
	(DOCARGS_2, UNDOCARGS_2): Use stp/ldp.
	(DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise.
	(SINGLE_THREAD_P) [ASM]: Take a register number in which to
	return the result.
---
 .../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h   | 186 +++++++--------------
 1 file changed, 64 insertions(+), 122 deletions(-)

Comments

Will Newton May 21, 2014, 8:26 a.m. UTC | #1
On 20 May 2014 21:56, Richard Henderson <rth@twiddle.net> wrote:
> From: Richard Henderson <rth@redhat.com>
>
> Use a constant frame size, rather than pushing/popping for every saved
> register.  Use stp, ldp, cbz.  Share code with the _nocancel path.
>
>         * sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h (PSEUDO):
>         Use ENTRY for _nocancel entry point.  Reuse pieces of _nocancel
>         entry point for implementing the cancel path.  Simplify cancel
>         path frame setup.  Use cbz instead of cmp+bne for singlethread path.
>         (DOCARGS_2, UNDOCARGS_2): Use stp/ldp.
>         (DOCARGS_4, UNDOCARGS_4, DOCARGS_6, UNDOCARGS_6): Likewise.
>         (SINGLE_THREAD_P) [ASM]: Take a register number in which to
>         return the result.
> ---
>  .../unix/sysv/linux/aarch64/nptl/sysdep-cancel.h   | 186 +++++++--------------
>  1 file changed, 64 insertions(+), 122 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> index e3b4b56..5cf3fd5 100644
> --- a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> +++ b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
> @@ -26,121 +26,66 @@
>
>  # undef PSEUDO
>  # define PSEUDO(name, syscall_name, args)                              \
> -  .section ".text";                                                    \
> -  .type __##syscall_name##_nocancel,%function;                         \
> -  .globl __##syscall_name##_nocancel;                                  \
> -  __##syscall_name##_nocancel:                                         \
> -    cfi_startproc;                                                     \
> -    DO_CALL (syscall_name, args);                                      \
> -    cmn x0, 4095;                                                      \
> -    b.cs .Lsyscall_error;                                              \
> -    PSEUDO_RET;                                                                \
> -    cfi_endproc;                                                       \
> -    .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel;   \
> -  ENTRY (name);                                                                \
> -    SINGLE_THREAD_P;                                                   \
> -    bne .Lpseudo_cancel;                                               \
> -    DO_CALL (syscall_name, 0);                                         \
> -    cmn x0, 4095;                                                      \
> -    b.cs .Lsyscall_error;                                              \
> -    PSEUDO_RET;                                                                \
> -  .Lpseudo_cancel:                                                     \
> -    DOCARGS_##args;    /* save syscall args etc. around CENABLE.  */   \
> -    CENABLE;                                                           \
> -    mov x16, x0;       /* put mask in safe place.  */                  \
> -    UNDOCARGS_##args;  /* restore syscall args.  */                    \
> -    mov x8, SYS_ify (syscall_name);    /* do the call.  */             \
> -    svc        0;                                                              \
> -    str x0, [sp, -16]!;        /* save syscall return value.  */               \
> -    cfi_adjust_cfa_offset (16);                                                \
> -    mov x0, x16;        /* get mask back.  */                          \
> -    CDISABLE;                                                          \
> -    ldr x0, [sp], 16;                                                  \
> -    cfi_adjust_cfa_offset (-16);                                       \
> -    ldr x30, [sp], 16;                                                 \
> -    cfi_adjust_cfa_offset (-16);                                       \
> -    cfi_restore (x30);                                                 \
> -    UNDOARGS_##args;                                                   \
> -    cmn x0, 4095;                                                      \
> -    b.cs .Lsyscall_error;
> -
> -# define DOCARGS_0                                                     \
> -       str x30, [sp, -16]!;                                            \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x30, 0)
> -
> +       .section ".text";                                               \
> +ENTRY (__##syscall_name##_nocancel);                                   \
> +.Lpseudo_nocancel:                                                     \
> +       DO_CALL (syscall_name, args);                                   \
> +.Lpseudo_ret:                                                          \
> +       cmn x0, 4095;                                                   \
> +       b.cs .Lsyscall_error;                                           \
> +       .subsection 2;                                                  \
> +       .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
> +ENTRY (name);                                                          \
> +       SINGLE_THREAD_P(16);                                            \
> +       cbz     w16, .Lpseudo_nocancel;                                 \
> +       /* Setup common stack frame no matter the number of args.  */   \
> +       stp     x19, x30, [sp, -64]!;                                   \
> +       cfi_adjust_cfa_offset (64);                                     \
> +       cfi_rel_offset (x19, 0);                                        \
> +       cfi_rel_offset (x30, 8);                                        \
> +       DOCARGS_##args;         /* save syscall args around CENABLE. */ \
> +       CENABLE;                                                        \
> +       mov     x19, x0;        /* save mask around syscall. */         \
> +       UNDOCARGS_##args;       /* restore syscall args.  */            \
> +       DO_CALL (syscall_name, args);                                   \
> +       str     x0, [sp, 16];   /* save syscall return value.  */       \
> +       mov     x0, x19;        /* pass mask to CDISABLE. */            \
> +       CDISABLE;                                                       \
> +       ldr     x0, [sp, 16];                                           \
> +       ldp     x19, x30, [sp], 64;                                     \
> +       cfi_adjust_cfa_offset (-64);                                    \
> +       cfi_restore (x19);                                              \
> +       cfi_restore (x30);                                              \
> +       b       .Lpseudo_ret;                                           \
> +       cfi_endproc;                                                    \
> +       .size name, .-name;                                             \
> +       .previous
> +
> +# undef PSEUDO_END
> +# define PSEUDO_END(name)                                              \
> +       SYSCALL_ERROR_HANDLER;                                          \
> +       cfi_endproc
> +
> +# define DOCARGS_0
>  # define UNDOCARGS_0
>
> -# define DOCARGS_1                                                     \
> -       DOCARGS_0;                                                      \
> -       str x0, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x0, 0)
> -
> -# define UNDOCARGS_1                                                   \
> -       ldr x0, [sp], 16;                                               \
> -       cfi_restore (x0);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -
> -# define DOCARGS_2                                                     \
> -       DOCARGS_1;                                                      \
> -       str x1, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x1, 0)
> -
> -# define UNDOCARGS_2                                                   \
> -       ldr x1, [sp], 16;                                               \
> -       cfi_restore (x1);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -       UNDOCARGS_1
> -
> -# define DOCARGS_3                                                     \
> -       DOCARGS_2;                                                      \
> -       str x2, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x2, 0)
> -
> -# define UNDOCARGS_3                                                   \
> -       ldr x2, [sp], 16;                                               \
> -       cfi_restore (x2);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -       UNDOCARGS_2
> -
> -# define DOCARGS_4                                                     \
> -       DOCARGS_3;                                                      \
> -       str x3, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x3, 0)
> -
> -# define UNDOCARGS_4                                                   \
> -       ldr x3, [sp], 16;                                               \
> -       cfi_restore (x3);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -       UNDOCARGS_3
> -
> -# define DOCARGS_5                                                     \
> -       DOCARGS_4;                                                      \
> -       str x4, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x4, 0)
> -
> -# define UNDOCARGS_5                                                   \
> -       ldr x4, [sp], 16;                                               \
> -       cfi_restore (x4);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -       UNDOCARGS_4
> -
> -# define DOCARGS_6                                                     \
> -       DOCARGS_5;                                                      \
> -       str x5, [sp, -16]!;                                             \
> -       cfi_adjust_cfa_offset (16);                                     \
> -       cfi_rel_offset (x5, 0)
> -
> -# define UNDOCARGS_6                                                   \
> -       ldr x5, [sp], 16;                                               \
> -       cfi_restore (x5);                                               \
> -       cfi_adjust_cfa_offset (-16);                                    \
> -       UNDOCARGS_5
> +# define DOCARGS_1     str x0, [sp, 16]
> +# define UNDOCARGS_1   ldr x0, [sp, 16]
> +
> +# define DOCARGS_2     stp x0, x1, [sp, 16]
> +# define UNDOCARGS_2   ldp x0, x1, [sp, 16]
> +
> +# define DOCARGS_3     DOCARGS_2;   str x2, [sp, 32]
> +# define UNDOCARGS_3   UNDOCARGS_2; ldr x2, [sp, 32]
> +
> +# define DOCARGS_4     DOCARGS_2;   stp x2, x3, [sp, 32]
> +# define UNDOCARGS_4   UNDOCARGS_2; ldp x2, x3, [sp, 32]
> +
> +# define DOCARGS_5     DOCARGS_4;   str x4, [sp, 48]
> +# define UNDOCARGS_5   UNDOCARGS_4; ldr x4, [sp, 48]
> +
> +# define DOCARGS_6     DOCARGS_4;   stp x4, x5, [sp, 48]
> +# define UNDOCARGS_6   UNDOCARGS_4; ldp x4, x5, [sp, 48]
>
>  # ifdef IS_IN_libpthread
>  #  define CENABLE      bl __pthread_enable_asynccancel
> @@ -162,10 +107,9 @@
>  extern int __local_multiple_threads attribute_hidden;
>  #   define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
>  #  else
> -#   define SINGLE_THREAD_P                                             \
> -  adrp x16, __local_multiple_threads;                                  \
> -  ldr  w16, [x16, #:lo12:__local_multiple_threads];                    \
> -  cmp  w16, 0;
> +#   define SINGLE_THREAD_P(R)                                          \
> +  adrp x##R, __local_multiple_threads;                                 \
> +  ldr  w##R, [x##R, #:lo12:__local_multiple_threads]
>  #  endif
>  # else
>  /*  There is no __local_multiple_threads for librt, so use the TCB.  */
> @@ -174,20 +118,18 @@ extern int __local_multiple_threads attribute_hidden;
>    __builtin_expect (THREAD_GETMEM (THREAD_SELF,                                \
>                                    header.multiple_threads) == 0, 1)
>  #  else
> -#   define SINGLE_THREAD_P                                             \
> +#   define SINGLE_THREAD_P(R)                                          \
>    stp  x0, x30, [sp, -16]!;                                            \
>    cfi_adjust_cfa_offset (16);                                          \
>    cfi_rel_offset (x0, 0);                                              \
>    cfi_rel_offset (x30, 8);                                             \
>    bl   __read_tp;                                                      \
>    sub  x0, x0, PTHREAD_SIZEOF;                                         \
> -  ldr  w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET];                     \
> +  ldr  w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET];                    \
>    ldp  x0, x30, [sp], 16;                                              \
>    cfi_restore (x0);                                                    \
>    cfi_restore (x30);                                                   \
> -  cfi_adjust_cfa_offset (-16);                                         \
> -  cmp  w16, 0
> -#   define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P

This macro gets removed which I think probably deserves a mention in
the ChangeLog.

It looks like it can be removed from the ARM port too...
Richard Henderson May 21, 2014, 3:10 p.m. UTC | #2
On 05/20/2014 01:56 PM, Richard Henderson wrote:
> -    mov x16, x0;	/* put mask in safe place.  */			\
> -    UNDOCARGS_##args;	/* restore syscall args.  */			\
> -    mov x8, SYS_ify (syscall_name);	/* do the call.  */		\
> -    svc	0;								\
> -    str x0, [sp, -16]!;	/* save syscall return value.  */		\
> -    cfi_adjust_cfa_offset (16);						\
> -    mov x0, x16;	 /* get mask back.  */				\

Oh, I should have mentioned, that there's either a bug or inconsistency here.

We're saving a value in x16 around the syscall.  Except that on the C side, for
the inline syscalls, we mark x16 as clobbered.

My patch assumes that we want to assume "normal-ish" calling conventions for
the kernel and puts the value in the normal call saved register x19.

If there are in fact a set of calling-convention call-clobbered registers that
are not clobbered by syscalls, then we should consider adjusting the inline
syscalls to match.


r~
Will Newton May 21, 2014, 3:19 p.m. UTC | #3
On 21 May 2014 16:10, Richard Henderson <rth@twiddle.net> wrote:
> On 05/20/2014 01:56 PM, Richard Henderson wrote:
>> -    mov x16, x0;     /* put mask in safe place.  */                  \
>> -    UNDOCARGS_##args;        /* restore syscall args.  */                    \
>> -    mov x8, SYS_ify (syscall_name);  /* do the call.  */             \
>> -    svc      0;                                                              \
>> -    str x0, [sp, -16]!;      /* save syscall return value.  */               \
>> -    cfi_adjust_cfa_offset (16);                                              \
>> -    mov x0, x16;      /* get mask back.  */                          \
>
> Oh, I should have mentioned, that there's either a bug or inconsistency here.
>
> We're saving a value in x16 around the syscall.  Except that on the C side, for
> the inline syscalls, we mark x16 as clobbered.
>
> My patch assumes that we want to assume "normal-ish" calling conventions for
> the kernel and puts the value in the normal call saved register x19.
>
> If there are in fact a set of calling-convention call-clobbered registers that
> are not clobbered by syscalls, then we should consider adjusting the inline
> syscalls to match.

When this was discussed previously it turns out that everything apart
from the return value is preserved:

https://sourceware.org/ml/libc-alpha/2014-03/msg00552.html

I guess it would be better to avoid clobbering so many registers in
the inline case.

Patch
diff mbox

diff --git a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
index e3b4b56..5cf3fd5 100644
--- a/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
+++ b/sysdeps/unix/sysv/linux/aarch64/nptl/sysdep-cancel.h
@@ -26,121 +26,66 @@ 
 
 # undef PSEUDO
 # define PSEUDO(name, syscall_name, args)				\
-  .section ".text";							\
-  .type __##syscall_name##_nocancel,%function;				\
-  .globl __##syscall_name##_nocancel;					\
-  __##syscall_name##_nocancel:						\
-    cfi_startproc;							\
-    DO_CALL (syscall_name, args);					\
-    cmn x0, 4095;							\
-    b.cs .Lsyscall_error;						\
-    PSEUDO_RET;								\
-    cfi_endproc;							\
-    .size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel;	\
-  ENTRY (name);								\
-    SINGLE_THREAD_P;							\
-    bne .Lpseudo_cancel;						\
-    DO_CALL (syscall_name, 0);						\
-    cmn x0, 4095;							\
-    b.cs .Lsyscall_error;						\
-    PSEUDO_RET;								\
-  .Lpseudo_cancel:							\
-    DOCARGS_##args;	/* save syscall args etc. around CENABLE.  */	\
-    CENABLE;								\
-    mov x16, x0;	/* put mask in safe place.  */			\
-    UNDOCARGS_##args;	/* restore syscall args.  */			\
-    mov x8, SYS_ify (syscall_name);	/* do the call.  */		\
-    svc	0;								\
-    str x0, [sp, -16]!;	/* save syscall return value.  */		\
-    cfi_adjust_cfa_offset (16);						\
-    mov x0, x16;	 /* get mask back.  */				\
-    CDISABLE;								\
-    ldr x0, [sp], 16;							\
-    cfi_adjust_cfa_offset (-16);					\
-    ldr x30, [sp], 16;							\
-    cfi_adjust_cfa_offset (-16);					\
-    cfi_restore (x30);							\
-    UNDOARGS_##args;							\
-    cmn x0, 4095;							\
-    b.cs .Lsyscall_error;
-
-# define DOCARGS_0							\
-	str x30, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x30, 0)
-
+	.section ".text";						\
+ENTRY (__##syscall_name##_nocancel);					\
+.Lpseudo_nocancel:							\
+	DO_CALL (syscall_name, args);					\
+.Lpseudo_ret:								\
+	cmn x0, 4095;							\
+	b.cs .Lsyscall_error;						\
+	.subsection 2;							\
+	.size __##syscall_name##_nocancel,.-__##syscall_name##_nocancel; \
+ENTRY (name);								\
+	SINGLE_THREAD_P(16);						\
+	cbz	w16, .Lpseudo_nocancel;					\
+	/* Setup common stack frame no matter the number of args.  */	\
+	stp	x19, x30, [sp, -64]!;					\
+	cfi_adjust_cfa_offset (64);					\
+	cfi_rel_offset (x19, 0);					\
+	cfi_rel_offset (x30, 8);					\
+	DOCARGS_##args;		/* save syscall args around CENABLE. */	\
+	CENABLE;							\
+	mov	x19, x0;	/* save mask around syscall. */		\
+	UNDOCARGS_##args;	/* restore syscall args.  */		\
+	DO_CALL (syscall_name, args);					\
+	str	x0, [sp, 16];	/* save syscall return value.  */	\
+	mov	x0, x19;	/* pass mask to CDISABLE. */		\
+	CDISABLE;							\
+	ldr	x0, [sp, 16];						\
+	ldp	x19, x30, [sp], 64;					\
+	cfi_adjust_cfa_offset (-64);					\
+	cfi_restore (x19);						\
+	cfi_restore (x30);						\
+	b	.Lpseudo_ret;						\
+	cfi_endproc;							\
+	.size name, .-name;						\
+	.previous
+
+# undef PSEUDO_END
+# define PSEUDO_END(name)						\
+	SYSCALL_ERROR_HANDLER;						\
+	cfi_endproc
+
+# define DOCARGS_0
 # define UNDOCARGS_0
 
-# define DOCARGS_1							\
-	DOCARGS_0;							\
-	str x0, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x0, 0)
-
-# define UNDOCARGS_1							\
-	ldr x0, [sp], 16;						\
-	cfi_restore (x0);						\
-	cfi_adjust_cfa_offset (-16);					\
-
-# define DOCARGS_2							\
-	DOCARGS_1;							\
-	str x1, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x1, 0)
-
-# define UNDOCARGS_2							\
-	ldr x1, [sp], 16;						\
-	cfi_restore (x1);						\
-	cfi_adjust_cfa_offset (-16);					\
-	UNDOCARGS_1
-
-# define DOCARGS_3							\
-	DOCARGS_2;							\
-	str x2, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x2, 0)
-
-# define UNDOCARGS_3							\
-	ldr x2, [sp], 16;						\
-	cfi_restore (x2);						\
-	cfi_adjust_cfa_offset (-16);					\
-	UNDOCARGS_2
-
-# define DOCARGS_4							\
-	DOCARGS_3;							\
-	str x3, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x3, 0)
-
-# define UNDOCARGS_4							\
-	ldr x3, [sp], 16;						\
-	cfi_restore (x3);						\
-	cfi_adjust_cfa_offset (-16);					\
-	UNDOCARGS_3
-
-# define DOCARGS_5							\
-	DOCARGS_4;							\
-	str x4, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x4, 0)
-
-# define UNDOCARGS_5							\
-	ldr x4, [sp], 16;						\
-	cfi_restore (x4);						\
-	cfi_adjust_cfa_offset (-16);					\
-	UNDOCARGS_4
-
-# define DOCARGS_6							\
-	DOCARGS_5;							\
-	str x5, [sp, -16]!;						\
-	cfi_adjust_cfa_offset (16);					\
-	cfi_rel_offset (x5, 0)
-
-# define UNDOCARGS_6							\
-	ldr x5, [sp], 16;						\
-	cfi_restore (x5);						\
-	cfi_adjust_cfa_offset (-16);					\
-	UNDOCARGS_5
+# define DOCARGS_1	str x0, [sp, 16]
+# define UNDOCARGS_1	ldr x0, [sp, 16]
+
+# define DOCARGS_2	stp x0, x1, [sp, 16]
+# define UNDOCARGS_2	ldp x0, x1, [sp, 16]
+
+# define DOCARGS_3	DOCARGS_2;   str x2, [sp, 32]
+# define UNDOCARGS_3	UNDOCARGS_2; ldr x2, [sp, 32]
+
+# define DOCARGS_4	DOCARGS_2;   stp x2, x3, [sp, 32]
+# define UNDOCARGS_4	UNDOCARGS_2; ldp x2, x3, [sp, 32]
+
+# define DOCARGS_5	DOCARGS_4;   str x4, [sp, 48]
+# define UNDOCARGS_5	UNDOCARGS_4; ldr x4, [sp, 48]
+
+# define DOCARGS_6	DOCARGS_4;   stp x4, x5, [sp, 48]
+# define UNDOCARGS_6	UNDOCARGS_4; ldp x4, x5, [sp, 48]
 
 # ifdef IS_IN_libpthread
 #  define CENABLE	bl __pthread_enable_asynccancel
@@ -162,10 +107,9 @@ 
 extern int __local_multiple_threads attribute_hidden;
 #   define SINGLE_THREAD_P __builtin_expect (__local_multiple_threads == 0, 1)
 #  else
-#   define SINGLE_THREAD_P						\
-  adrp	x16, __local_multiple_threads;					\
-  ldr	w16, [x16, #:lo12:__local_multiple_threads];			\
-  cmp	w16, 0;
+#   define SINGLE_THREAD_P(R)						\
+  adrp	x##R, __local_multiple_threads;					\
+  ldr	w##R, [x##R, #:lo12:__local_multiple_threads]
 #  endif
 # else
 /*  There is no __local_multiple_threads for librt, so use the TCB.  */
@@ -174,20 +118,18 @@  extern int __local_multiple_threads attribute_hidden;
   __builtin_expect (THREAD_GETMEM (THREAD_SELF,				\
 				   header.multiple_threads) == 0, 1)
 #  else
-#   define SINGLE_THREAD_P						\
+#   define SINGLE_THREAD_P(R)						\
   stp	x0, x30, [sp, -16]!;						\
   cfi_adjust_cfa_offset (16);						\
   cfi_rel_offset (x0, 0);						\
   cfi_rel_offset (x30, 8);						\
   bl	__read_tp;							\
   sub	x0, x0, PTHREAD_SIZEOF;						\
-  ldr	w16, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET];			\
+  ldr	w##R, [x0, PTHREAD_MULTIPLE_THREADS_OFFSET];			\
   ldp	x0, x30, [sp], 16;						\
   cfi_restore (x0);							\
   cfi_restore (x30);							\
-  cfi_adjust_cfa_offset (-16);						\
-  cmp	w16, 0
-#   define SINGLE_THREAD_P_PIC(x) SINGLE_THREAD_P
+  cfi_adjust_cfa_offset (-16)
 #  endif
 # endif