diff mbox series

[v6] x86-64: Allocate state buffer space for RDI, RSI and RBX

Message ID 20240318134016.820218-1-hjl.tools@gmail.com
State New
Headers show
Series [v6] x86-64: Allocate state buffer space for RDI, RSI and RBX | expand

Commit Message

H.J. Lu March 18, 2024, 1:40 p.m. UTC
_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
STATE_SAVE_OFFSET(%rsp).

   +==================+<- stack frame start aligned at 8 or 16 bytes
   |                  |<- RDI saved in the red zone
   |                  |<- RSI saved in the red zone
   |                  |<- RBX saved in the red zone
   |                  |<- paddings for stack realignment of 64 bytes
   |------------------|<- xsave buffer end aligned at 64 bytes
   |                  |<-
   |                  |<-
   |                  |<-
   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
   |                  |<- 8-byte padding for 64-byte alignment
   |                  |<- 8-byte padding for 64-byte alignment
   |                  |<- R11
   |                  |<- R10
   |                  |<- R9
   |                  |<- R8
   |                  |<- RDX
   |                  |<- RCX
   +==================+<- RSP aligned at 64 bytes

Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
and RBX are saved onto stack without adjusting stack pointer first, using
the red-zone.  This fixes BZ #31501.
---
 sysdeps/x86/cpu-features.c         | 11 ++--
 sysdeps/x86/sysdep.h               | 60 ++++++++++++++++++---
 sysdeps/x86_64/tst-gnu2-tls2mod1.S | 87 ++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+), 11 deletions(-)
 create mode 100644 sysdeps/x86_64/tst-gnu2-tls2mod1.S

Comments

Sunil Pandey March 18, 2024, 11:20 p.m. UTC | #1
On Mon, Mar 18, 2024 at 6:40 AM H.J. Lu <hjl.tools@gmail.com> wrote:

> _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
> After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
> TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
> to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
> STATE_SAVE_OFFSET(%rsp).
>
>    +==================+<- stack frame start aligned at 8 or 16 bytes
>    |                  |<- RDI saved in the red zone
>    |                  |<- RSI saved in the red zone
>    |                  |<- RBX saved in the red zone
>    |                  |<- paddings for stack realignment of 64 bytes
>    |------------------|<- xsave buffer end aligned at 64 bytes
>    |                  |<-
>    |                  |<-
>    |                  |<-
>    |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
>    |                  |<- 8-byte padding for 64-byte alignment
>    |                  |<- 8-byte padding for 64-byte alignment
>    |                  |<- R11
>    |                  |<- R10
>    |                  |<- R9
>    |                  |<- R8
>    |                  |<- RDX
>    |                  |<- RCX
>    +==================+<- RSP aligned at 64 bytes
>
> Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size
> for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI
> and RBX are saved onto stack without adjusting stack pointer first, using
> the red-zone.  This fixes BZ #31501.
> ---
>  sysdeps/x86/cpu-features.c         | 11 ++--
>  sysdeps/x86/sysdep.h               | 60 ++++++++++++++++++---
>  sysdeps/x86_64/tst-gnu2-tls2mod1.S | 87 ++++++++++++++++++++++++++++++
>  3 files changed, 147 insertions(+), 11 deletions(-)
>  create mode 100644 sysdeps/x86_64/tst-gnu2-tls2mod1.S
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 4ea373dffa..3d7c2819d7 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
>               /* NB: On AMX capable processors, ebx always includes AMX
>                  states.  */
>               unsigned int xsave_state_full_size
> -               = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
> +               = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>
>               cpu_features->xsave_state_size
>                 = xsave_state_full_size;
> @@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
>                       unsigned int amx_size
>                         = (xstate_amx_comp_offsets[31]
>                            + xstate_amx_comp_sizes[31]);
> -                     amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
> -                                          64);
> +                     amx_size
> +                       = ALIGN_UP ((amx_size
> +                                    + TLSDESC_CALL_REGISTER_SAVE_AREA),
> +                                   64);
>                       /* Set xsave_state_full_size to the compact AMX
>                          state size for XSAVEC.  NB: xsave_state_full_size
>                          is only used in _dl_tlsdesc_dynamic_xsave and
> @@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
>                       cpu_features->xsave_state_full_size = amx_size;
>  #endif
>                       cpu_features->xsave_state_size
> -                       = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
> +                       = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> +                                   64);
>                       CPU_FEATURE_SET (cpu_features, XSAVEC);
>                     }
>                 }
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index db8e576e91..7359149e17 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -38,14 +38,59 @@
>  #ifdef __x86_64__
>  /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
>     space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
> -   aligned to 16 bytes for fxsave and 64 bytes for xsave.
> -
> -   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
> -   are saved on stack without adjusting stack pointer first.  When we
> -   update stack pointer to allocate more space, we need to take the
> -   red-zone into account.  */
> +   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
> +   because MOV, instead of PUSH, is used to save registers onto stack.
> +
> +   +==================+<- stack frame start aligned at 8 or 16 bytes
> +   |                  |<- paddings for stack realignment of 64 bytes
> +   |------------------|<- xsave buffer end aligned at 64 bytes
> +   |                  |<-
> +   |                  |<-
> +   |                  |<-
> +   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
> +   |                  |<- 8-byte padding for 64-byte alignment
> +   |                  |<- R9
> +   |                  |<- R8
> +   |                  |<- RDI
> +   |                  |<- RSI
> +   |                  |<- RDX
> +   |                  |<- RCX
> +   |                  |<- RAX
> +   +==================+<- RSP aligned at 64 bytes
> +
> + */
>  # define STATE_SAVE_OFFSET (8 * 7 + 8)
>
> +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> +   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> +   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
> +   RDI, RSI and RBX values on stack by xsave.
> +
> +   +==================+<- stack frame start aligned at 8 or 16 bytes
> +   |                  |<- RDI saved in the red zone
> +   |                  |<- RSI saved in the red zone
> +   |                  |<- RBX saved in the red zone
> +   |                  |<- paddings for stack realignment of 64 bytes
> +   |------------------|<- xsave buffer end aligned at 64 bytes
> +   |                  |<-
> +   |                  |<-
> +   |                  |<-
> +   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
> +   |                  |<- 8-byte padding for 64-byte alignment
> +   |                  |<- 8-byte padding for 64-byte alignment
> +   |                  |<- R11
> +   |                  |<- R10
> +   |                  |<- R9
> +   |                  |<- R8
> +   |                  |<- RDX
> +   |                  |<- RCX
> +   +==================+<- RSP aligned at 64 bytes
> +
> +   Define the total register save area size for all integer registers by
> +   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
> +   stack without adjusting stack pointer first, using the red-zone.  */
> +# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
> +
>  /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
>     registers are mutually exclusive.  */
>  # define STATE_SAVE_MASK               \
> @@ -66,8 +111,9 @@
>    (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
>  #else
>  /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> -   doesn't have red-zone, use 0 here.  */
> +   uses PUSH to save registers onto stack, use 0 here.  */
>  # define STATE_SAVE_OFFSET 0
> +# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
>
>  /* Save SSE, AVX, AXV512, mask and bound registers.   */
>  # define STATE_SAVE_MASK               \
> diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> new file mode 100644
> index 0000000000..1d636669ba
> --- /dev/null
> +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> @@ -0,0 +1,87 @@
> +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +
> +/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
> +   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
> +   is 960 bytes and this test didn't fail.  It may be due to the unused
> +   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
> +   this test might fail without the fix.  */
> +#ifndef OFFSET
> +# define OFFSET 40
> +#endif
> +
> +       .text
> +       .p2align 4
> +       .globl  apply_tls
> +       .type   apply_tls, @function
> +apply_tls:
> +       cfi_startproc
> +       _CET_ENDBR
> +       pushq   %rbp
> +       cfi_def_cfa_offset (16)
> +       cfi_offset (6, -16)
> +       movdqu  (%RDI_LP), %xmm0
> +       lea     tls_var1@TLSDESC(%rip), %RAX_LP
> +       mov     %RSP_LP, %RBP_LP
> +       cfi_def_cfa_register (6)
> +       /* Align stack to 64 bytes.  */
> +       and     $-64, %RSP_LP
> +       sub     $OFFSET, %RSP_LP
> +       pushq   %rbx
> +       /* Set %ebx to 0xbadbeef.  */
> +       movl    $0xbadbeef, %ebx
> +       movl    $0xbadbeef, %esi
> +       movq    %rdi, saved_rdi(%rip)
> +       movq    %rsi, saved_rsi(%rip)
> +       call    *tls_var1@TLSCALL(%RAX_LP)
> +       /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
> +       cmpq    saved_rdi(%rip), %rdi
> +       jne     L(hlt)
> +       cmpq    saved_rsi(%rip), %rsi
> +       jne     L(hlt)
> +       cmpl    $0xbadbeef, %ebx
> +       jne     L(hlt)
> +       add     %fs:0, %RAX_LP
> +       movups  %xmm0, 32(%RAX_LP)
> +       movdqu  16(%RDI_LP), %xmm1
> +       mov     %RAX_LP, %RBX_LP
> +       movups  %xmm1, 48(%RAX_LP)
> +       lea     32(%RBX_LP), %RAX_LP
> +       pop     %rbx
> +       leave
> +       cfi_def_cfa (7, 8)
> +       ret
> +L(hlt):
> +       hlt
> +       cfi_endproc
> +       .size   apply_tls, .-apply_tls
> +       .hidden tls_var1
> +       .globl  tls_var1
> +       .section        .tbss,"awT",@nobits
> +       .align 16
> +       .type   tls_var1, @object
> +       .size   tls_var1, 3200
> +tls_var1:
> +       .zero   3200
> +       .local  saved_rdi
> +       .comm   saved_rdi,8,8
> +       .local  saved_rsi
> +       .comm   saved_rsi,8,8
> +       .section        .note.GNU-stack,"",@progbits
> --
> 2.44.0
>
>
LGTM
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
Andreas Schwab March 19, 2024, 8:31 a.m. UTC | #2
On Mär 18 2024, H.J. Lu wrote:

> diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> new file mode 100644
> index 0000000000..1d636669ba
> --- /dev/null
> +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> @@ -0,0 +1,87 @@
> +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> +   Copyright (C) 2024 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>

A testsuite module cannot include non-installed headers.
H.J. Lu March 19, 2024, 11:12 a.m. UTC | #3
On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mär 18 2024, H.J. Lu wrote:
>
> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> > new file mode 100644
> > index 0000000000..1d636669ba
> > --- /dev/null
> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> > @@ -0,0 +1,87 @@
> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <http://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
>
> A testsuite module cannot include non-installed headers.
>

This is a very special case.  Does it cause any issues?
Andreas Schwab March 19, 2024, 11:21 a.m. UTC | #4
On Mär 19 2024, H.J. Lu wrote:

> On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote:
>>
>> On Mär 18 2024, H.J. Lu wrote:
>>
>> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
>> > new file mode 100644
>> > index 0000000000..1d636669ba
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
>> > @@ -0,0 +1,87 @@
>> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>> > +   This file is part of the GNU C Library.
>> > +
>> > +   The GNU C Library is free software; you can redistribute it and/or
>> > +   modify it under the terms of the GNU Lesser General Public
>> > +   License as published by the Free Software Foundation; either
>> > +   version 2.1 of the License, or (at your option) any later version.
>> > +
>> > +   The GNU C Library is distributed in the hope that it will be useful,
>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> > +   Lesser General Public License for more details.
>> > +
>> > +   You should have received a copy of the GNU Lesser General Public
>> > +   License along with the GNU C Library; if not, see
>> > +   <http://www.gnu.org/licenses/>.  */
>> > +
>> > +#include <sysdep.h>
>>
>> A testsuite module cannot include non-installed headers.
>>
>
> This is a very special case.  Does it cause any issues?

sysdep.h generally depends on definitions in config.h.
H.J. Lu March 19, 2024, 11:35 a.m. UTC | #5
On Tue, Mar 19, 2024 at 4:22 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mär 19 2024, H.J. Lu wrote:
>
> > On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote:
> >>
> >> On Mär 18 2024, H.J. Lu wrote:
> >>
> >> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> >> > new file mode 100644
> >> > index 0000000000..1d636669ba
> >> > --- /dev/null
> >> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> >> > @@ -0,0 +1,87 @@
> >> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> >> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> >> > +   This file is part of the GNU C Library.
> >> > +
> >> > +   The GNU C Library is free software; you can redistribute it and/or
> >> > +   modify it under the terms of the GNU Lesser General Public
> >> > +   License as published by the Free Software Foundation; either
> >> > +   version 2.1 of the License, or (at your option) any later version.
> >> > +
> >> > +   The GNU C Library is distributed in the hope that it will be useful,
> >> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> > +   Lesser General Public License for more details.
> >> > +
> >> > +   You should have received a copy of the GNU Lesser General Public
> >> > +   License along with the GNU C Library; if not, see
> >> > +   <http://www.gnu.org/licenses/>.  */
> >> > +
> >> > +#include <sysdep.h>
> >>
> >> A testsuite module cannot include non-installed headers.
> >>
> >
> > This is a very special case.  Does it cause any issues?
>
> sysdep.h generally depends on definitions in config.h.
>

tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from
sysdep.h.  Do they depend on config.h?
Andreas Schwab March 19, 2024, 11:46 a.m. UTC | #6
On Mär 19 2024, H.J. Lu wrote:

> tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from
> sysdep.h.  Do they depend on config.h?

No, but other parts may depend on it that can cause compilation errors.
H.J. Lu March 19, 2024, 11:50 a.m. UTC | #7
On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mär 19 2024, H.J. Lu wrote:
>
> > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from
> > sysdep.h.  Do they depend on config.h?
>
> No, but other parts may depend on it that can cause compilation errors.
>

Which configure options will cause compilation errors?
Andreas Schwab March 19, 2024, 12:02 p.m. UTC | #8
On Mär 19 2024, H.J. Lu wrote:

> On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote:
>>
>> On Mär 19 2024, H.J. Lu wrote:
>>
>> > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from
>> > sysdep.h.  Do they depend on config.h?
>>
>> No, but other parts may depend on it that can cause compilation errors.
>>
>
> Which configure options will cause compilation errors?

It's not upstream, you can see it here:

https://build.opensuse.org/package/live_build_log/home:Andreas_Schwab:glibc/glibc:testsuite/f/x86_64
H.J. Lu March 19, 2024, 12:07 p.m. UTC | #9
On Tue, Mar 19, 2024 at 5:03 AM Andreas Schwab <schwab@suse.de> wrote:
>
> On Mär 19 2024, H.J. Lu wrote:
>
> > On Tue, Mar 19, 2024 at 4:46 AM Andreas Schwab <schwab@suse.de> wrote:
> >>
> >> On Mär 19 2024, H.J. Lu wrote:
> >>
> >> > tst-gnu2-tls2mod1.S uses RXX_LP, cfi_xxx and L from
> >> > sysdep.h.  Do they depend on config.h?
> >>
> >> No, but other parts may depend on it that can cause compilation errors.
> >>
> >
> > Which configure options will cause compilation errors?
>
> It's not upstream, you can see it here:
>
> https://build.opensuse.org/package/live_build_log/home:Andreas_Schwab:glibc/glibc:testsuite/f/x86_64
>

Please open a glibc bug report with steps to reproduce the issue.
Florian Weimer March 19, 2024, 12:16 p.m. UTC | #10
* Andreas Schwab:

> On Mär 19 2024, H.J. Lu wrote:
>
>> On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote:
>>>
>>> On Mär 18 2024, H.J. Lu wrote:
>>>
>>> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
>>> > new file mode 100644
>>> > index 0000000000..1d636669ba
>>> > --- /dev/null
>>> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
>>> > @@ -0,0 +1,87 @@
>>> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
>>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
>>> > +   This file is part of the GNU C Library.
>>> > +
>>> > +   The GNU C Library is free software; you can redistribute it and/or
>>> > +   modify it under the terms of the GNU Lesser General Public
>>> > +   License as published by the Free Software Foundation; either
>>> > +   version 2.1 of the License, or (at your option) any later version.
>>> > +
>>> > +   The GNU C Library is distributed in the hope that it will be useful,
>>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>> > +   Lesser General Public License for more details.
>>> > +
>>> > +   You should have received a copy of the GNU Lesser General Public
>>> > +   License along with the GNU C Library; if not, see
>>> > +   <http://www.gnu.org/licenses/>.  */
>>> > +
>>> > +#include <sysdep.h>
>>>
>>> A testsuite module cannot include non-installed headers.
>>>
>>
>> This is a very special case.  Does it cause any issues?
>
> sysdep.h generally depends on definitions in config.h.

It seems to be a downstream-specific patch:

[ 1518s] In file included from ../sysdeps/unix/x86_64/sysdep.h:19,
[ 1518s]                  from ../sysdeps/unix/sysv/linux/x86_64/sysdep.h:23,
[ 1518s]                  from ../sysdeps/x86_64/tst-gnu2-tls2mod1.S:19:
[ 1518s] ../sysdeps/x86_64/sysdep.h:56:5: error: "ENABLE_USERSPACE_LIVEPATCH" is not defined, evaluates to 0 [-Werror=undef]
[ 1518s]    56 | #if ENABLE_USERSPACE_LIVEPATCH
[ 1518s]       |     ^~~~~~~~~~~~~~~~~~~~~~~~~~
[ 1518s] cc1: some warnings being treated as errors

I think the module using <sysdep.h> needs to be added to the
test-internal-extras Makefile variable.

Thanks,
Florian
H.J. Lu March 19, 2024, 12:26 p.m. UTC | #11
On Tue, Mar 19, 2024 at 5:17 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Andreas Schwab:
>
> > On Mär 19 2024, H.J. Lu wrote:
> >
> >> On Tue, Mar 19, 2024 at 1:31 AM Andreas Schwab <schwab@suse.de> wrote:
> >>>
> >>> On Mär 18 2024, H.J. Lu wrote:
> >>>
> >>> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> >>> > new file mode 100644
> >>> > index 0000000000..1d636669ba
> >>> > --- /dev/null
> >>> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> >>> > @@ -0,0 +1,87 @@
> >>> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> >>> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> >>> > +   This file is part of the GNU C Library.
> >>> > +
> >>> > +   The GNU C Library is free software; you can redistribute it and/or
> >>> > +   modify it under the terms of the GNU Lesser General Public
> >>> > +   License as published by the Free Software Foundation; either
> >>> > +   version 2.1 of the License, or (at your option) any later version.
> >>> > +
> >>> > +   The GNU C Library is distributed in the hope that it will be useful,
> >>> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >>> > +   Lesser General Public License for more details.
> >>> > +
> >>> > +   You should have received a copy of the GNU Lesser General Public
> >>> > +   License along with the GNU C Library; if not, see
> >>> > +   <http://www.gnu.org/licenses/>.  */
> >>> > +
> >>> > +#include <sysdep.h>
> >>>
> >>> A testsuite module cannot include non-installed headers.
> >>>
> >>
> >> This is a very special case.  Does it cause any issues?
> >
> > sysdep.h generally depends on definitions in config.h.
>
> It seems to be a downstream-specific patch:
>
> [ 1518s] In file included from ../sysdeps/unix/x86_64/sysdep.h:19,
> [ 1518s]                  from ../sysdeps/unix/sysv/linux/x86_64/sysdep.h:23,
> [ 1518s]                  from ../sysdeps/x86_64/tst-gnu2-tls2mod1.S:19:
> [ 1518s] ../sysdeps/x86_64/sysdep.h:56:5: error: "ENABLE_USERSPACE_LIVEPATCH" is not defined, evaluates to 0 [-Werror=undef]
> [ 1518s]    56 | #if ENABLE_USERSPACE_LIVEPATCH
> [ 1518s]       |     ^~~~~~~~~~~~~~~~~~~~~~~~~~
> [ 1518s] cc1: some warnings being treated as errors
>
> I think the module using <sysdep.h> needs to be added to the
> test-internal-extras Makefile variable.

I will fix it if I can reproduce it.  If a glibc patch is needed, I
will need it.
Andreas Schwab March 19, 2024, 12:35 p.m. UTC | #12
You can see that patch here:

https://build.opensuse.org/projects/home:Andreas_Schwab:glibc/packages/glibc/files/ulp-prologue-into-asm-functions.patch
Andreas Schwab March 19, 2024, 12:51 p.m. UTC | #13
This works for me:

diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 66b21954f3..39048b12e0 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \
   --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
 endif
 
+test-internal-extras += tst-gnu2-tls2mod1
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
H.J. Lu March 19, 2024, 12:53 p.m. UTC | #14
On Tue, Mar 19, 2024 at 5:51 AM Andreas Schwab <schwab@suse.de> wrote:
>
> This works for me:
>
> diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> index 66b21954f3..39048b12e0 100644
> --- a/sysdeps/x86_64/Makefile
> +++ b/sysdeps/x86_64/Makefile
> @@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \
>    --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
>  endif
>
> +test-internal-extras += tst-gnu2-tls2mod1
> +
>  endif # $(subdir) == elf
>
>  ifeq ($(subdir),csu)
> --
> 2.44.0
>

LGTM.

Thanks.
H.J. Lu March 19, 2024, 12:55 p.m. UTC | #15
On Tue, Mar 19, 2024 at 5:53 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, Mar 19, 2024 at 5:51 AM Andreas Schwab <schwab@suse.de> wrote:
> >
> > This works for me:
> >
> > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > index 66b21954f3..39048b12e0 100644
> > --- a/sysdeps/x86_64/Makefile
> > +++ b/sysdeps/x86_64/Makefile
> > @@ -217,6 +217,8 @@ valgrind-suppressions-tst-valgrind-smoke = \
> >    --suppressions=$(..)sysdeps/x86_64/tst-valgrind-smoke.supp
> >  endif
> >
> > +test-internal-extras += tst-gnu2-tls2mod1
> > +
> >  endif # $(subdir) == elf
> >
> >  ifeq ($(subdir),csu)
> > --
> > 2.44.0
> >
>
> LGTM.
>

Please mention BZ #31501 in your commit log.  It should be included
in all BZ #31501 backports.

Thanks.
diff mbox series

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 4ea373dffa..3d7c2819d7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -311,7 +311,7 @@  update_active (struct cpu_features *cpu_features)
 	      /* NB: On AMX capable processors, ebx always includes AMX
 		 states.  */
 	      unsigned int xsave_state_full_size
-		= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
+		= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
 
 	      cpu_features->xsave_state_size
 		= xsave_state_full_size;
@@ -401,8 +401,10 @@  update_active (struct cpu_features *cpu_features)
 		      unsigned int amx_size
 			= (xstate_amx_comp_offsets[31]
 			   + xstate_amx_comp_sizes[31]);
-		      amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
-					   64);
+		      amx_size
+			= ALIGN_UP ((amx_size
+				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
+				    64);
 		      /* Set xsave_state_full_size to the compact AMX
 			 state size for XSAVEC.  NB: xsave_state_full_size
 			 is only used in _dl_tlsdesc_dynamic_xsave and
@@ -410,7 +412,8 @@  update_active (struct cpu_features *cpu_features)
 		      cpu_features->xsave_state_full_size = amx_size;
 #endif
 		      cpu_features->xsave_state_size
-			= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
+			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+				    64);
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}
diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
index db8e576e91..7359149e17 100644
--- a/sysdeps/x86/sysdep.h
+++ b/sysdeps/x86/sysdep.h
@@ -38,14 +38,59 @@ 
 #ifdef __x86_64__
 /* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
    space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
-   aligned to 16 bytes for fxsave and 64 bytes for xsave.
-
-   NB: Is is non-zero because of the 128-byte red-zone.  Some registers
-   are saved on stack without adjusting stack pointer first.  When we
-   update stack pointer to allocate more space, we need to take the
-   red-zone into account.  */
+   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
+   because MOV, instead of PUSH, is used to save registers onto stack.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDI
+   |                  |<- RSI
+   |                  |<- RDX
+   |                  |<- RCX
+   |                  |<- RAX
+   +==================+<- RSP aligned at 64 bytes
+
+ */
 # define STATE_SAVE_OFFSET (8 * 7 + 8)
 
+/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
+   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
+   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
+   RDI, RSI and RBX values on stack by xsave.
+
+   +==================+<- stack frame start aligned at 8 or 16 bytes
+   |                  |<- RDI saved in the red zone
+   |                  |<- RSI saved in the red zone
+   |                  |<- RBX saved in the red zone
+   |                  |<- paddings for stack realignment of 64 bytes
+   |------------------|<- xsave buffer end aligned at 64 bytes
+   |                  |<-
+   |                  |<-
+   |                  |<-
+   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- 8-byte padding for 64-byte alignment
+   |                  |<- R11
+   |                  |<- R10
+   |                  |<- R9
+   |                  |<- R8
+   |                  |<- RDX
+   |                  |<- RCX
+   +==================+<- RSP aligned at 64 bytes
+
+   Define the total register save area size for all integer registers by
+   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
+   stack without adjusting stack pointer first, using the red-zone.  */
+# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
+
 /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
    registers are mutually exclusive.  */
 # define STATE_SAVE_MASK		\
@@ -66,8 +111,9 @@ 
   (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
 #else
 /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
-   doesn't have red-zone, use 0 here.  */
+   uses PUSH to save registers onto stack, use 0 here.  */
 # define STATE_SAVE_OFFSET 0
+# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
 
 /* Save SSE, AVX, AXV512, mask and bound registers.   */
 # define STATE_SAVE_MASK		\
diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
new file mode 100644
index 0000000000..1d636669ba
--- /dev/null
+++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
@@ -0,0 +1,87 @@ 
+/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
+   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
+   is 960 bytes and this test didn't fail.  It may be due to the unused
+   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
+   this test might fail without the fix.  */
+#ifndef OFFSET
+# define OFFSET 40
+#endif
+
+	.text
+	.p2align 4
+	.globl	apply_tls
+	.type	apply_tls, @function
+apply_tls:
+	cfi_startproc
+	_CET_ENDBR
+	pushq	%rbp
+	cfi_def_cfa_offset (16)
+	cfi_offset (6, -16)
+	movdqu	(%RDI_LP), %xmm0
+	lea	tls_var1@TLSDESC(%rip), %RAX_LP
+	mov	%RSP_LP, %RBP_LP
+	cfi_def_cfa_register (6)
+	/* Align stack to 64 bytes.  */
+	and	$-64, %RSP_LP
+	sub	$OFFSET, %RSP_LP
+	pushq	%rbx
+	/* Set %ebx to 0xbadbeef.  */
+	movl	$0xbadbeef, %ebx
+	movl	$0xbadbeef, %esi
+	movq	%rdi, saved_rdi(%rip)
+	movq	%rsi, saved_rsi(%rip)
+	call	*tls_var1@TLSCALL(%RAX_LP)
+	/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
+	cmpq	saved_rdi(%rip), %rdi
+	jne	L(hlt)
+	cmpq	saved_rsi(%rip), %rsi
+	jne	L(hlt)
+	cmpl	$0xbadbeef, %ebx
+	jne	L(hlt)
+	add	%fs:0, %RAX_LP
+	movups	%xmm0, 32(%RAX_LP)
+	movdqu	16(%RDI_LP), %xmm1
+	mov	%RAX_LP, %RBX_LP
+	movups	%xmm1, 48(%RAX_LP)
+	lea	32(%RBX_LP), %RAX_LP
+	pop	%rbx
+	leave
+	cfi_def_cfa (7, 8)
+	ret
+L(hlt):
+	hlt
+	cfi_endproc
+	.size	apply_tls, .-apply_tls
+	.hidden	tls_var1
+	.globl	tls_var1
+	.section	.tbss,"awT",@nobits
+	.align 16
+	.type	tls_var1, @object
+	.size	tls_var1, 3200
+tls_var1:
+	.zero	3200
+	.local	saved_rdi
+	.comm	saved_rdi,8,8
+	.local	saved_rsi
+	.comm	saved_rsi,8,8
+	.section	.note.GNU-stack,"",@progbits