diff mbox series

[v7,1/2] x86-64: Save APX registers in ld.so trampoline

Message ID 20240216002114.2255406-2-hjl.tools@gmail.com
State New
Headers show
Series x86: Update _dl_tlsdesc_dynamic to preserve caller-saved registers | expand

Commit Message

H.J. Lu Feb. 16, 2024, 12:21 a.m. UTC
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
ld.so trampoline.  This fixes BZ #31371.

Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
be used by i386 _dl_tlsdesc_dynamic.
---
 sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 6 deletions(-)

Comments

Noah Goldstein Feb. 16, 2024, 7:39 a.m. UTC | #1
On Fri, Feb 16, 2024 at 12:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> ld.so trampoline.  This fixes BZ #31371.
>
> Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> be used by i386 _dl_tlsdesc_dynamic.
> ---
>  sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 46 insertions(+), 6 deletions(-)
>
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index 85d0a8c943..837fd28734 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -21,14 +21,54 @@
>
>  #include <sysdeps/generic/sysdep.h>
>
> +/* The extended state feature IDs in the state component bitmap.  */
> +#define X86_XSTATE_X87_ID      0
> +#define X86_XSTATE_SSE_ID      1
> +#define X86_XSTATE_AVX_ID      2
> +#define X86_XSTATE_BNDREGS_ID  3
> +#define X86_XSTATE_BNDCFG_ID   4
> +#define X86_XSTATE_K_ID                5
> +#define X86_XSTATE_ZMM_H_ID    6
> +#define X86_XSTATE_ZMM_ID      7
> +#define X86_XSTATE_PKRU_ID     9
> +#define X86_XSTATE_TILECFG_ID  17
> +#define X86_XSTATE_TILEDATA_ID 18
> +#define X86_XSTATE_APX_F_ID    19
> +
> +#ifdef __x86_64__
>  /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
>     space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
> -   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
> -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> -
> -/* Save SSE, AVX, AVX512, mask and bound registers.  */
> -#define STATE_SAVE_MASK \
> -  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> +   aligned to 16 bytes for fxsave and 64 bytes for xsave.
> +
> +   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
> +   are saved on stack without adjusting stack pointer first.  When we
> +   update stack pointer to allocate more space, we need to take the
> +   red-zone into account.  */
> +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> +
> +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> +   registers are mutually exclusive.  */
> +# define STATE_SAVE_MASK               \
> +  ((1 << X86_XSTATE_SSE_ID)            \
> +   | (1 << X86_XSTATE_AVX_ID)          \
> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> +   | (1 << X86_XSTATE_K_ID)            \
> +   | (1 << X86_XSTATE_ZMM_H_ID)        \
> +   | (1 << X86_XSTATE_ZMM_ID)          \
> +   | (1 << X86_XSTATE_APX_F_ID))
Need tile here?
> +#else
> +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> +   doesn't have red-zone, use 0 here.  */
> +# define STATE_SAVE_OFFSET 0
> +
> +/* Save SSE, AVX, AXV512, mask and bound registers.   */
> +# define STATE_SAVE_MASK               \
> +  ((1 << X86_XSTATE_SSE_ID)            \
> +   | (1 << X86_XSTATE_AVX_ID)          \
> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> +   | (1 << X86_XSTATE_K_ID)            \
> +   | (1 << X86_XSTATE_ZMM_H_ID))
> +#endif
>
>  /* Constants for bits in __x86_string_control:  */
>
> --
> 2.43.0
>
Florian Weimer Feb. 16, 2024, 11:51 a.m. UTC | #2
* Noah Goldstein:

>> +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
>> +   registers are mutually exclusive.  */
>> +# define STATE_SAVE_MASK               \
>> +  ((1 << X86_XSTATE_SSE_ID)            \
>> +   | (1 << X86_XSTATE_AVX_ID)          \
>> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
>> +   | (1 << X86_XSTATE_K_ID)            \
>> +   | (1 << X86_XSTATE_ZMM_H_ID)        \
>> +   | (1 << X86_XSTATE_ZMM_ID)          \
>> +   | (1 << X86_XSTATE_APX_F_ID))

> Need tile here?

We can't save the AMX state because it's too big and will overflow
existing stacks.  This change is immediately active on systems with AMX,
even for old applications which do not know about AMX at all.

Thanks,
Florian
H.J. Lu Feb. 16, 2024, 11:53 a.m. UTC | #3
On Fri, Feb 16, 2024 at 3:51 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Noah Goldstein:
>
> >> +/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> >> +   registers are mutually exclusive.  */
> >> +# define STATE_SAVE_MASK               \
> >> +  ((1 << X86_XSTATE_SSE_ID)            \
> >> +   | (1 << X86_XSTATE_AVX_ID)          \
> >> +   | (1 << X86_XSTATE_BNDREGS_ID)      \
> >> +   | (1 << X86_XSTATE_K_ID)            \
> >> +   | (1 << X86_XSTATE_ZMM_H_ID)        \
> >> +   | (1 << X86_XSTATE_ZMM_ID)          \
> >> +   | (1 << X86_XSTATE_APX_F_ID))
>
> > Need tile here?
>
> We can't save the AMX state because it's too big and will overflow
> existing stacks.  This change is immediately active on systems with AMX,
> even for old applications which do not know about AMX at all.
>
>

Another problem is that AMX is enabled by a syscall.  When ld.so checks
the xsave mask, AMX state may not be active at the time.
diff mbox series

Patch

diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index 85d0a8c943..837fd28734 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -21,14 +21,54 @@ 
 
 #include <sysdeps/generic/sysdep.h>
 
+/* The extended state feature IDs in the state component bitmap.  */
+#define X86_XSTATE_X87_ID	0
+#define X86_XSTATE_SSE_ID	1
+#define X86_XSTATE_AVX_ID	2
+#define X86_XSTATE_BNDREGS_ID	3
+#define X86_XSTATE_BNDCFG_ID	4
+#define X86_XSTATE_K_ID		5
+#define X86_XSTATE_ZMM_H_ID	6
+#define X86_XSTATE_ZMM_ID	7
+#define X86_XSTATE_PKRU_ID	9
+#define X86_XSTATE_TILECFG_ID	17
+#define X86_XSTATE_TILEDATA_ID	18
+#define X86_XSTATE_APX_F_ID	19
+
+#ifdef __x86_64__
 /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
-   aligned to 16 bytes for fxsave and 64 bytes for xsave.  */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
-
-/* Save SSE, AVX, AVX512, mask and bound registers.  */
-#define STATE_SAVE_MASK \
-  ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.
+
+   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
+   are saved on stack without adjusting stack pointer first.  When we
+   update stack pointer to allocate more space, we need to take the
+   red-zone into account.  */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
+   registers are mutually exclusive.  */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID) 	\
+   | (1 << X86_XSTATE_ZMM_ID)		\
+   | (1 << X86_XSTATE_APX_F_ID))
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
+   doesn't have red-zone, use 0 here.  */
+# define STATE_SAVE_OFFSET 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers.   */
+# define STATE_SAVE_MASK		\
+  ((1 << X86_XSTATE_SSE_ID)		\
+   | (1 << X86_XSTATE_AVX_ID)		\
+   | (1 << X86_XSTATE_BNDREGS_ID)	\
+   | (1 << X86_XSTATE_K_ID)		\
+   | (1 << X86_XSTATE_ZMM_H_ID))
+#endif
 
 /* Constants for bits in __x86_string_control:  */