diff mbox series

[v2] x86-64/cet: Check the restore token in longjmp

Message ID 20240102150329.3152784-1-hjl.tools@gmail.com
State New
Headers show
Series [v2] x86-64/cet: Check the restore token in longjmp | expand

Commit Message

H.J. Lu Jan. 2, 2024, 3:03 p.m. UTC
Changes in v2:

1. Merge __longjmp.S and __longjmp_chk.S by adding CHECK_INVALID_LONGJMP.
2. Remove a branch in the restore token searching loop.
3. Cache the target shadow stack pointer.

setcontext and swapcontext put a restore token on the old shadow stack
which is used to restore the target shadow stack when switching user
contexts.  When longjmp from a user context, the target shadow stack
can be different from the current shadow stack and INCSSP can't be
used to restore the shadow stack pointer to the target shadow stack.

Update longjmp to search for a restore token.  If found, use the token
to restore the shadow stack pointer before using INCSSP to pop the
shadow stack.  Stop the token search and use INCSSP if the shadow stack
entry value is the same as the current shadow stack pointer.

It is a user error if there is a shadow stack switch without leaving a
restore token on the old shadow stack.

The only difference between __longjmp.S and __longjmp_chk.S is that
__longjmp_chk.S has a check for invalid longjmp usages.  Merge
__longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
macro.
---
 .../unix/sysv/linux/x86_64/____longjmp_chk.S  | 179 ++++--------------
 sysdeps/x86/__longjmp_cancel.S                |   3 +
 sysdeps/x86_64/__longjmp.S                    |  47 ++++-
 3 files changed, 84 insertions(+), 145 deletions(-)

Comments

Noah Goldstein Jan. 4, 2024, 7:51 p.m. UTC | #1
On Tue, Jan 2, 2024 at 7:03 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Changes in v2:
>
> 1. Merge __longjmp.S and __longjmp_chk.S by adding CHECK_INVALID_LONGJMP.
> 2. Remove a branch in the restore token searching loop.
> 3. Cache the target shadow stack pointer.
>
> setcontext and swapcontext put a restore token on the old shadow stack
> which is used to restore the target shadow stack when switching user
> contexts.  When longjmp from a user context, the target shadow stack
> can be different from the current shadow stack and INCSSP can't be
> used to restore the shadow stack pointer to the target shadow stack.
>
> Update longjmp to search for a restore token.  If found, use the token
> to restore the shadow stack pointer before using INCSSP to pop the
> shadow stack.  Stop the token search and use INCSSP if the shadow stack
> entry value is the same as the current shadow stack pointer.
>
> It is a user error if there is a shadow stack switch without leaving a
> restore token on the old shadow stack.
>
> The only difference between __longjmp.S and __longjmp_chk.S is that
> __longjmp_chk.S has a check for invalid longjmp usages.  Merge
> __longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
> macro.
> ---
>  .../unix/sysv/linux/x86_64/____longjmp_chk.S  | 179 ++++--------------
>  sysdeps/x86/__longjmp_cancel.S                |   3 +
>  sysdeps/x86_64/__longjmp.S                    |  47 ++++-
>  3 files changed, 84 insertions(+), 145 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> index deb6398f43..9aa24620b9 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> +++ b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> @@ -15,18 +15,7 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <sysdep.h>
> -#include <pointer_guard.h>
> -#include <jmpbuf-offsets.h>
> -#include <asm-syntax.h>
> -#include <stap-probe.h>
>  #include <sigaltstack-offsets.h>
> -#include <jmp_buf-ssp.h>
> -
> -/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> -#if !SHSTK_ENABLED
> -# undef SHADOW_STACK_POINTER_OFFSET
> -#endif
>
>         .section .rodata.str1.1,"aMS",@progbits,1
>         .type   longjmp_msg,@object
> @@ -34,136 +23,48 @@ longjmp_msg:
>         .string "longjmp causes uninitialized stack frame"
>         .size   longjmp_msg, .-longjmp_msg
>
> -
> -//#define __longjmp ____longjmp_chk
> -
>  #ifdef PIC
> -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> -                       cfi_remember_state;                                   \
> -                       cfi_def_cfa_offset(16);                               \
> -                       lea     longjmp_msg(%rip), %RDI_LP;                   \
> -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> -                       nop;                                                  \
> -                       cfi_restore_state
> +# define LOAD_MSG      lea     longjmp_msg(%rip), %RDI_LP
>  #else
> -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> -                       cfi_remember_state;                                   \
> -                       cfi_def_cfa_offset(16);                               \
> -                       mov     $longjmp_msg, %RDI_LP;                        \
> -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> -                       nop;                                                  \
> -                       cfi_restore_state
> +# define LOAD_MSG      mov     $longjmp_msg, %RDI_LP
>  #endif
>
> -/* Jump to the position specified by ENV, causing the
> -   setjmp call there to return VAL, or 1 if VAL is 0.
> -   void __longjmp (__jmp_buf env, int val).  */
> -       .text
> -ENTRY(____longjmp_chk)
> -       /* Restore registers.  */
> -       mov     (JB_RSP*8)(%rdi), %R8_LP
> -       mov     (JB_RBP*8)(%rdi),%R9_LP
> -       mov     (JB_PC*8)(%rdi), %RDX_LP
> -#ifdef PTR_DEMANGLE
> -       PTR_DEMANGLE (%R8_LP)
> -       PTR_DEMANGLE (%R9_LP)
> -       PTR_DEMANGLE (%RDX_LP)
> -# ifdef __ILP32__
> -       /* We ignored the high bits of the %rbp value because only the low
> -          bits are mangled.  But we cannot presume that %rbp is being used
> -          as a pointer and truncate it, so recover the high bits.  */
> -       movl (JB_RBP*8 + 4)(%rdi), %eax
> -       shlq $32, %rax
> -       orq %rax, %r9
> -# endif
> -#endif
> -
> -       cmp     %R8_LP, %RSP_LP
> -       jbe     .Lok
> -
> -       /* Save function parameters.  */
> -       movq    %rdi, %r10
> -       cfi_register (%rdi, %r10)
> -       movl    %esi, %ebx
> -       cfi_register (%rsi, %rbx)
> -
> -       xorl    %edi, %edi
> -       lea     -sizeSS(%rsp), %RSI_LP
> -       movl    $__NR_sigaltstack, %eax
> -       syscall
> -       /* Without working sigaltstack we cannot perform the test.  */
> -       testl   %eax, %eax
> -       jne     .Lok2
> -       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp)
> -       jz      .Lfail
> -
> -       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP
> -       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> -       sub     %R8_LP, %RAX_LP
> -       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> -       jae     .Lok2
> -
> -.Lfail:        CALL_FAIL
> -
> -.Lok2: movq    %r10, %rdi
> -       cfi_restore (%rdi)
> -       movl    %ebx, %esi
> -       cfi_restore (%rsi)
> -
> +#define CHECK_INVALID_LONGJMP \
> +       cmp     %R8_LP, %RSP_LP;                                        \
> +       jbe     .Lok;                                                   \
> +       /* Save function parameters.  */                                \
> +       movq    %rdi, %r10;                                             \
> +       cfi_register (%rdi, %r10);                                      \
> +       movl    %esi, %ebx;                                             \
> +       cfi_register (%rsi, %rbx);                                      \
> +       xorl    %edi, %edi;                                             \
> +       lea     -sizeSS(%rsp), %RSI_LP;                                 \
> +       movl    $__NR_sigaltstack, %eax;                                \
> +       syscall;                                                        \
> +       /* Without working sigaltstack we cannot perform the test.  */  \
> +       testl   %eax, %eax;                                             \
> +       jne     .Lok2;                                                  \
> +       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp);                        \
> +       jz      .Lfail;                                                 \
> +       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP;                      \
> +       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> +       sub     %R8_LP, %RAX_LP;                                        \
> +       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> +       jae     .Lok2;                                                  \
> +.Lfail:                                                                        \
> +       sub     $8, %RSP_LP;                                            \
> +       cfi_remember_state;                                             \
> +       cfi_def_cfa_offset(16);                                         \
> +       LOAD_MSG;                                                       \
> +       call    HIDDEN_JUMPTARGET(__fortify_fail);                      \
> +       nop;                                                            \
what is this nop for?
If you want `.Lok2` aligned just use `.p2align`?
> +       cfi_restore_state;                                              \
> +.Lok2:                                                                 \
> +       movq    %r10, %rdi;                                             \
> +       cfi_restore (%rdi);                                             \
> +       movl    %ebx, %esi;                                             \
> +       cfi_restore (%rsi);                                             \
>  .Lok:
> -#ifdef SHADOW_STACK_POINTER_OFFSET
> -# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
> -       /* Check if Shadow Stack is enabled.  */
> -       testl   $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
> -       jz      L(skip_ssp)
> -# else
> -       xorl    %eax, %eax
> -# endif
> -       /* Check and adjust the Shadow-Stack-Pointer.  */
> -       rdsspq  %rax
> -       /* And compare it with the saved ssp value.  */
> -       subq    SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> -       je      L(skip_ssp)
> -       /* Count the number of frames to adjust and adjust it
> -          with incssp instruction.  The instruction can adjust
> -          the ssp by [0..255] value only thus use a loop if
> -          the number of frames is bigger than 255.  */
> -       negq    %rax
> -       shrq    $3, %rax
> -       /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
> -              restoring Shadow-Stack-Pointer of setjmp's caller, we
> -              need to unwind shadow stack by one more frame.  */
> -       addq    $1, %rax
> -       movl    $255, %ebx
> -L(loop):
> -       cmpq    %rbx, %rax
> -       cmovb   %rax, %rbx
> -       incsspq %rbx
> -       subq    %rbx, %rax
> -       ja      L(loop)
> -L(skip_ssp):
> -#endif
> -       LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
> -       /* We add unwind information for the target here.  */
> -       cfi_def_cfa(%rdi, 0)
> -       cfi_register(%rsp,%r8)
> -       cfi_register(%rbp,%r9)
> -       cfi_register(%rip,%rdx)
> -       cfi_offset(%rbx,JB_RBX*8)
> -       cfi_offset(%r12,JB_R12*8)
> -       cfi_offset(%r13,JB_R13*8)
> -       cfi_offset(%r14,JB_R14*8)
> -       cfi_offset(%r15,JB_R15*8)
> -       movq    (JB_RBX*8)(%rdi), %rbx
> -       movq    (JB_R12*8)(%rdi), %r12
> -       movq    (JB_R13*8)(%rdi), %r13
> -       movq    (JB_R14*8)(%rdi), %r14
> -       movq    (JB_R15*8)(%rdi), %r15
> -       /* Set return value for setjmp.  */
> -       movl    %esi, %eax
> -       mov     %R8_LP, %RSP_LP
> -       movq    %r9,%rbp
> -       LIBC_PROBE (longjmp_target, 3,
> -                   LP_SIZE@%RDI_LP, -4@%eax, LP_SIZE@%RDX_LP)
> -       jmpq    *%rdx
> -END (____longjmp_chk)
> +
> +#define __longjmp ____longjmp_chk
> +#include <__longjmp.S>
> diff --git a/sysdeps/x86/__longjmp_cancel.S b/sysdeps/x86/__longjmp_cancel.S
> index e71b304257..b03f52b308 100644
> --- a/sysdeps/x86/__longjmp_cancel.S
> +++ b/sysdeps/x86/__longjmp_cancel.S
> @@ -16,5 +16,8 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> +/* Don't restore shadow stack register for __longjmp_cancel.  */
> +#define DO_NOT_RESTORE_SHADOW_STACK
> +
>  #define __longjmp __longjmp_cancel
>  #include <__longjmp.S>
> diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
> index c9f70f8e2a..22fedc4997 100644
> --- a/sysdeps/x86_64/__longjmp.S
> +++ b/sysdeps/x86_64/__longjmp.S
> @@ -22,14 +22,15 @@
>  #include <asm-syntax.h>
>  #include <stap-probe.h>
>
> -/* Don't restore shadow stack register if
> -   1. Shadow stack isn't enabled.  Or
> -   2. __longjmp is defined for __longjmp_cancel.
> - */
> -#if !SHSTK_ENABLED || defined __longjmp
> +/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> +#if !SHSTK_ENABLED || defined DO_NOT_RESTORE_SHADOW_STACK
>  # undef SHADOW_STACK_POINTER_OFFSET
>  #endif
>
> +#ifndef CHECK_INVALID_LONGJMP
> +# define CHECK_INVALID_LONGJMP
> +#endif
> +
>  /* Jump to the position specified by ENV, causing the
>     setjmp call there to return VAL, or 1 if VAL is 0.
>     void __longjmp (__jmp_buf env, int val).  */
> @@ -52,6 +53,9 @@ ENTRY(__longjmp)
>         orq %rax, %r9
>  # endif
>  #endif
> +
> +       CHECK_INVALID_LONGJMP
This only runs if `__longjmp` is defined `____longjmp_chk` correct?
As far as I can tell its only used once. Maybe have the code here i.e:

#ifdef SHADOW_STACK_POINT_OFFSET
//cur code
#else
//CHECK_INVALID_LONGJMP code
#endif

or am I misunderstanding?
> +
>  #ifdef SHADOW_STACK_POINTER_OFFSET
>  # if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
>         /* Check if Shadow Stack is enabled.  */
> @@ -63,9 +67,40 @@ ENTRY(__longjmp)
>         /* Check and adjust the Shadow-Stack-Pointer.  */
>         /* Get the current ssp.  */
>         rdsspq %rax
> +       /* Save the current ssp.  */
> +       movq %rax, %r10
>         /* And compare it with the saved ssp value.  */
> -       subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> +       movq SHADOW_STACK_POINTER_OFFSET(%rdi), %rcx
> +       subq %rcx, %rax
>         je L(skip_ssp)
> +
> +       /* Save the target ssp.  */
> +       movq %rcx, %r11
> +
> +L(find_restore_token_loop):
> +       /* Look for a restore token.  */
> +       movq -8(%rcx), %rbx
> +       andq $-8, %rbx
> +       cmpq %rcx, %rbx
> +       /* Find the restore token.  */
> +       je L(restore_shadow_stack)
> +
> +       /* Try the next slot.  */
> +       subq $8, %rcx
> +       /* Stop if the current ssp is found.  */
> +       cmpq %rcx, %r10
> +       jne L(find_restore_token_loop)
> +       jmp L(no_shadow_stack_token)
> +
> +L(restore_shadow_stack):
> +       /* Restore the target shadow stack.  */
> +       rstorssp -8(%rcx)
> +       /* Save the restore token on the old shadow stack.  */
> +       saveprevssp
> +       rdsspq %rax
> +       subq %r11, %rax
> +
> +L(no_shadow_stack_token):
>         /* Count the number of frames to adjust and adjust it
>            with incssp instruction.  The instruction can adjust
>            the ssp by [0..255] value only thus use a loop if
> --
> 2.43.0
>
H.J. Lu Jan. 4, 2024, 8:11 p.m. UTC | #2
On Thu, Jan 4, 2024 at 11:51 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Tue, Jan 2, 2024 at 7:03 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Changes in v2:
> >
> > 1. Merge __longjmp.S and __longjmp_chk.S by adding CHECK_INVALID_LONGJMP.
> > 2. Remove a branch in the restore token searching loop.
> > 3. Cache the target shadow stack pointer.
> >
> > setcontext and swapcontext put a restore token on the old shadow stack
> > which is used to restore the target shadow stack when switching user
> > contexts.  When longjmp from a user context, the target shadow stack
> > can be different from the current shadow stack and INCSSP can't be
> > used to restore the shadow stack pointer to the target shadow stack.
> >
> > Update longjmp to search for a restore token.  If found, use the token
> > to restore the shadow stack pointer before using INCSSP to pop the
> > shadow stack.  Stop the token search and use INCSSP if the shadow stack
> > entry value is the same as the current shadow stack pointer.
> >
> > It is a user error if there is a shadow stack switch without leaving a
> > restore token on the old shadow stack.
> >
> > The only difference between __longjmp.S and __longjmp_chk.S is that
> > __longjmp_chk.S has a check for invalid longjmp usages.  Merge
> > __longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
> > macro.
> > ---
> >  .../unix/sysv/linux/x86_64/____longjmp_chk.S  | 179 ++++--------------
> >  sysdeps/x86/__longjmp_cancel.S                |   3 +
> >  sysdeps/x86_64/__longjmp.S                    |  47 ++++-
> >  3 files changed, 84 insertions(+), 145 deletions(-)
> >
> > diff --git a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> > index deb6398f43..9aa24620b9 100644
> > --- a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> > +++ b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> > @@ -15,18 +15,7 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > -#include <sysdep.h>
> > -#include <pointer_guard.h>
> > -#include <jmpbuf-offsets.h>
> > -#include <asm-syntax.h>
> > -#include <stap-probe.h>
> >  #include <sigaltstack-offsets.h>
> > -#include <jmp_buf-ssp.h>
> > -
> > -/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> > -#if !SHSTK_ENABLED
> > -# undef SHADOW_STACK_POINTER_OFFSET
> > -#endif
> >
> >         .section .rodata.str1.1,"aMS",@progbits,1
> >         .type   longjmp_msg,@object
> > @@ -34,136 +23,48 @@ longjmp_msg:
> >         .string "longjmp causes uninitialized stack frame"
> >         .size   longjmp_msg, .-longjmp_msg
> >
> > -
> > -//#define __longjmp ____longjmp_chk
> > -
> >  #ifdef PIC
> > -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> > -                       cfi_remember_state;                                   \
> > -                       cfi_def_cfa_offset(16);                               \
> > -                       lea     longjmp_msg(%rip), %RDI_LP;                   \
> > -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> > -                       nop;                                                  \
> > -                       cfi_restore_state
> > +# define LOAD_MSG      lea     longjmp_msg(%rip), %RDI_LP
> >  #else
> > -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> > -                       cfi_remember_state;                                   \
> > -                       cfi_def_cfa_offset(16);                               \
> > -                       mov     $longjmp_msg, %RDI_LP;                        \
> > -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> > -                       nop;                                                  \
> > -                       cfi_restore_state
> > +# define LOAD_MSG      mov     $longjmp_msg, %RDI_LP
> >  #endif
> >
> > -/* Jump to the position specified by ENV, causing the
> > -   setjmp call there to return VAL, or 1 if VAL is 0.
> > -   void __longjmp (__jmp_buf env, int val).  */
> > -       .text
> > -ENTRY(____longjmp_chk)
> > -       /* Restore registers.  */
> > -       mov     (JB_RSP*8)(%rdi), %R8_LP
> > -       mov     (JB_RBP*8)(%rdi),%R9_LP
> > -       mov     (JB_PC*8)(%rdi), %RDX_LP
> > -#ifdef PTR_DEMANGLE
> > -       PTR_DEMANGLE (%R8_LP)
> > -       PTR_DEMANGLE (%R9_LP)
> > -       PTR_DEMANGLE (%RDX_LP)
> > -# ifdef __ILP32__
> > -       /* We ignored the high bits of the %rbp value because only the low
> > -          bits are mangled.  But we cannot presume that %rbp is being used
> > -          as a pointer and truncate it, so recover the high bits.  */
> > -       movl (JB_RBP*8 + 4)(%rdi), %eax
> > -       shlq $32, %rax
> > -       orq %rax, %r9
> > -# endif
> > -#endif
> > -
> > -       cmp     %R8_LP, %RSP_LP
> > -       jbe     .Lok
> > -
> > -       /* Save function parameters.  */
> > -       movq    %rdi, %r10
> > -       cfi_register (%rdi, %r10)
> > -       movl    %esi, %ebx
> > -       cfi_register (%rsi, %rbx)
> > -
> > -       xorl    %edi, %edi
> > -       lea     -sizeSS(%rsp), %RSI_LP
> > -       movl    $__NR_sigaltstack, %eax
> > -       syscall
> > -       /* Without working sigaltstack we cannot perform the test.  */
> > -       testl   %eax, %eax
> > -       jne     .Lok2
> > -       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp)
> > -       jz      .Lfail
> > -
> > -       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP
> > -       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> > -       sub     %R8_LP, %RAX_LP
> > -       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> > -       jae     .Lok2
> > -
> > -.Lfail:        CALL_FAIL
> > -
> > -.Lok2: movq    %r10, %rdi
> > -       cfi_restore (%rdi)
> > -       movl    %ebx, %esi
> > -       cfi_restore (%rsi)
> > -
> > +#define CHECK_INVALID_LONGJMP \
> > +       cmp     %R8_LP, %RSP_LP;                                        \
> > +       jbe     .Lok;                                                   \
> > +       /* Save function parameters.  */                                \
> > +       movq    %rdi, %r10;                                             \
> > +       cfi_register (%rdi, %r10);                                      \
> > +       movl    %esi, %ebx;                                             \
> > +       cfi_register (%rsi, %rbx);                                      \
> > +       xorl    %edi, %edi;                                             \
> > +       lea     -sizeSS(%rsp), %RSI_LP;                                 \
> > +       movl    $__NR_sigaltstack, %eax;                                \
> > +       syscall;                                                        \
> > +       /* Without working sigaltstack we cannot perform the test.  */  \
> > +       testl   %eax, %eax;                                             \
> > +       jne     .Lok2;                                                  \
> > +       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp);                        \
> > +       jz      .Lfail;                                                 \
> > +       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP;                      \
> > +       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> > +       sub     %R8_LP, %RAX_LP;                                        \
> > +       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> > +       jae     .Lok2;                                                  \
> > +.Lfail:                                                                        \
> > +       sub     $8, %RSP_LP;                                            \
> > +       cfi_remember_state;                                             \
> > +       cfi_def_cfa_offset(16);                                         \
> > +       LOAD_MSG;                                                       \
> > +       call    HIDDEN_JUMPTARGET(__fortify_fail);                      \
> > +       nop;                                                            \
> what is this nop for?
> If you want `.Lok2` aligned just use `.p2align`?

This is in the existing code.  We should keep it.  If we
want to change it, it should be done with a separate
patch.

> > +       cfi_restore_state;                                              \
> > +.Lok2:                                                                 \
> > +       movq    %r10, %rdi;                                             \
> > +       cfi_restore (%rdi);                                             \
> > +       movl    %ebx, %esi;                                             \
> > +       cfi_restore (%rsi);                                             \
> >  .Lok:
> > -#ifdef SHADOW_STACK_POINTER_OFFSET
> > -# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
> > -       /* Check if Shadow Stack is enabled.  */
> > -       testl   $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
> > -       jz      L(skip_ssp)
> > -# else
> > -       xorl    %eax, %eax
> > -# endif
> > -       /* Check and adjust the Shadow-Stack-Pointer.  */
> > -       rdsspq  %rax
> > -       /* And compare it with the saved ssp value.  */
> > -       subq    SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> > -       je      L(skip_ssp)
> > -       /* Count the number of frames to adjust and adjust it
> > -          with incssp instruction.  The instruction can adjust
> > -          the ssp by [0..255] value only thus use a loop if
> > -          the number of frames is bigger than 255.  */
> > -       negq    %rax
> > -       shrq    $3, %rax
> > -       /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
> > -              restoring Shadow-Stack-Pointer of setjmp's caller, we
> > -              need to unwind shadow stack by one more frame.  */
> > -       addq    $1, %rax
> > -       movl    $255, %ebx
> > -L(loop):
> > -       cmpq    %rbx, %rax
> > -       cmovb   %rax, %rbx
> > -       incsspq %rbx
> > -       subq    %rbx, %rax
> > -       ja      L(loop)
> > -L(skip_ssp):
> > -#endif
> > -       LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
> > -       /* We add unwind information for the target here.  */
> > -       cfi_def_cfa(%rdi, 0)
> > -       cfi_register(%rsp,%r8)
> > -       cfi_register(%rbp,%r9)
> > -       cfi_register(%rip,%rdx)
> > -       cfi_offset(%rbx,JB_RBX*8)
> > -       cfi_offset(%r12,JB_R12*8)
> > -       cfi_offset(%r13,JB_R13*8)
> > -       cfi_offset(%r14,JB_R14*8)
> > -       cfi_offset(%r15,JB_R15*8)
> > -       movq    (JB_RBX*8)(%rdi), %rbx
> > -       movq    (JB_R12*8)(%rdi), %r12
> > -       movq    (JB_R13*8)(%rdi), %r13
> > -       movq    (JB_R14*8)(%rdi), %r14
> > -       movq    (JB_R15*8)(%rdi), %r15
> > -       /* Set return value for setjmp.  */
> > -       movl    %esi, %eax
> > -       mov     %R8_LP, %RSP_LP
> > -       movq    %r9,%rbp
> > -       LIBC_PROBE (longjmp_target, 3,
> > -                   LP_SIZE@%RDI_LP, -4@%eax, LP_SIZE@%RDX_LP)
> > -       jmpq    *%rdx
> > -END (____longjmp_chk)
> > +
> > +#define __longjmp ____longjmp_chk
> > +#include <__longjmp.S>
> > diff --git a/sysdeps/x86/__longjmp_cancel.S b/sysdeps/x86/__longjmp_cancel.S
> > index e71b304257..b03f52b308 100644
> > --- a/sysdeps/x86/__longjmp_cancel.S
> > +++ b/sysdeps/x86/__longjmp_cancel.S
> > @@ -16,5 +16,8 @@
> >     License along with the GNU C Library; if not, see
> >     <https://www.gnu.org/licenses/>.  */
> >
> > +/* Don't restore shadow stack register for __longjmp_cancel.  */
> > +#define DO_NOT_RESTORE_SHADOW_STACK
> > +
> >  #define __longjmp __longjmp_cancel
> >  #include <__longjmp.S>
> > diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
> > index c9f70f8e2a..22fedc4997 100644
> > --- a/sysdeps/x86_64/__longjmp.S
> > +++ b/sysdeps/x86_64/__longjmp.S
> > @@ -22,14 +22,15 @@
> >  #include <asm-syntax.h>
> >  #include <stap-probe.h>
> >
> > -/* Don't restore shadow stack register if
> > -   1. Shadow stack isn't enabled.  Or
> > -   2. __longjmp is defined for __longjmp_cancel.
> > - */
> > -#if !SHSTK_ENABLED || defined __longjmp
> > +/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> > +#if !SHSTK_ENABLED || defined DO_NOT_RESTORE_SHADOW_STACK
> >  # undef SHADOW_STACK_POINTER_OFFSET
> >  #endif
> >
> > +#ifndef CHECK_INVALID_LONGJMP
> > +# define CHECK_INVALID_LONGJMP
> > +#endif
> > +
> >  /* Jump to the position specified by ENV, causing the
> >     setjmp call there to return VAL, or 1 if VAL is 0.
> >     void __longjmp (__jmp_buf env, int val).  */
> > @@ -52,6 +53,9 @@ ENTRY(__longjmp)
> >         orq %rax, %r9
> >  # endif
> >  #endif
> > +
> > +       CHECK_INVALID_LONGJMP
> This only runs if `__longjmp` is defined `____longjmp_chk` correct?

Correct.

> As far as I can tell its only used once. Maybe have the code here i.e:
>
> #ifdef SHADOW_STACK_POINT_OFFSET
> //cur code
> #else
> //CHECK_INVALID_LONGJMP code
> #endif
>
> or am I misunderstanding?

1. ____longjmp_chk also defines SHADOW_STACK_POINT_OFFSET
and needs CHECK_INVALID_LONGJMP.
2. CHECK_INVALID_LONGJMP is OS specific and
sysdeps/x86_64/__longjmp.S is OS indenpendent.

> > +
> >  #ifdef SHADOW_STACK_POINTER_OFFSET
> >  # if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
> >         /* Check if Shadow Stack is enabled.  */
> > @@ -63,9 +67,40 @@ ENTRY(__longjmp)
> >         /* Check and adjust the Shadow-Stack-Pointer.  */
> >         /* Get the current ssp.  */
> >         rdsspq %rax
> > +       /* Save the current ssp.  */
> > +       movq %rax, %r10
> >         /* And compare it with the saved ssp value.  */
> > -       subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> > +       movq SHADOW_STACK_POINTER_OFFSET(%rdi), %rcx
> > +       subq %rcx, %rax
> >         je L(skip_ssp)
> > +
> > +       /* Save the target ssp.  */
> > +       movq %rcx, %r11
> > +
> > +L(find_restore_token_loop):
> > +       /* Look for a restore token.  */
> > +       movq -8(%rcx), %rbx
> > +       andq $-8, %rbx
> > +       cmpq %rcx, %rbx
> > +       /* Find the restore token.  */
> > +       je L(restore_shadow_stack)
> > +
> > +       /* Try the next slot.  */
> > +       subq $8, %rcx
> > +       /* Stop if the current ssp is found.  */
> > +       cmpq %rcx, %r10
> > +       jne L(find_restore_token_loop)
> > +       jmp L(no_shadow_stack_token)
> > +
> > +L(restore_shadow_stack):
> > +       /* Restore the target shadow stack.  */
> > +       rstorssp -8(%rcx)
> > +       /* Save the restore token on the old shadow stack.  */
> > +       saveprevssp
> > +       rdsspq %rax
> > +       subq %r11, %rax
> > +
> > +L(no_shadow_stack_token):
> >         /* Count the number of frames to adjust and adjust it
> >            with incssp instruction.  The instruction can adjust
> >            the ssp by [0..255] value only thus use a loop if
> > --
> > 2.43.0
> >
Noah Goldstein Jan. 4, 2024, 9:11 p.m. UTC | #3
On Tue, Jan 2, 2024 at 7:03 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Changes in v2:
>
> 1. Merge __longjmp.S and __longjmp_chk.S by adding CHECK_INVALID_LONGJMP.
> 2. Remove a branch in the restore token searching loop.
> 3. Cache the target shadow stack pointer.
>
> setcontext and swapcontext put a restore token on the old shadow stack
> which is used to restore the target shadow stack when switching user
> contexts.  When longjmp from a user context, the target shadow stack
> can be different from the current shadow stack and INCSSP can't be
> used to restore the shadow stack pointer to the target shadow stack.
>
> Update longjmp to search for a restore token.  If found, use the token
> to restore the shadow stack pointer before using INCSSP to pop the
> shadow stack.  Stop the token search and use INCSSP if the shadow stack
> entry value is the same as the current shadow stack pointer.
>
> It is a user error if there is a shadow stack switch without leaving a
> restore token on the old shadow stack.
>
> The only difference between __longjmp.S and __longjmp_chk.S is that
> __longjmp_chk.S has a check for invalid longjmp usages.  Merge
> __longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
> macro.
> ---
>  .../unix/sysv/linux/x86_64/____longjmp_chk.S  | 179 ++++--------------
>  sysdeps/x86/__longjmp_cancel.S                |   3 +
>  sysdeps/x86_64/__longjmp.S                    |  47 ++++-
>  3 files changed, 84 insertions(+), 145 deletions(-)
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> index deb6398f43..9aa24620b9 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> +++ b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
> @@ -15,18 +15,7 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> -#include <sysdep.h>
> -#include <pointer_guard.h>
> -#include <jmpbuf-offsets.h>
> -#include <asm-syntax.h>
> -#include <stap-probe.h>
>  #include <sigaltstack-offsets.h>
> -#include <jmp_buf-ssp.h>
> -
> -/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> -#if !SHSTK_ENABLED
> -# undef SHADOW_STACK_POINTER_OFFSET
> -#endif
>
>         .section .rodata.str1.1,"aMS",@progbits,1
>         .type   longjmp_msg,@object
> @@ -34,136 +23,48 @@ longjmp_msg:
>         .string "longjmp causes uninitialized stack frame"
>         .size   longjmp_msg, .-longjmp_msg
>
> -
> -//#define __longjmp ____longjmp_chk
> -
>  #ifdef PIC
> -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> -                       cfi_remember_state;                                   \
> -                       cfi_def_cfa_offset(16);                               \
> -                       lea     longjmp_msg(%rip), %RDI_LP;                   \
> -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> -                       nop;                                                  \
> -                       cfi_restore_state
> +# define LOAD_MSG      lea     longjmp_msg(%rip), %RDI_LP
>  #else
> -# define CALL_FAIL     sub     $8, %RSP_LP;                                  \
> -                       cfi_remember_state;                                   \
> -                       cfi_def_cfa_offset(16);                               \
> -                       mov     $longjmp_msg, %RDI_LP;                        \
> -                       call    HIDDEN_JUMPTARGET(__fortify_fail);            \
> -                       nop;                                                  \
> -                       cfi_restore_state
> +# define LOAD_MSG      mov     $longjmp_msg, %RDI_LP
>  #endif
>
> -/* Jump to the position specified by ENV, causing the
> -   setjmp call there to return VAL, or 1 if VAL is 0.
> -   void __longjmp (__jmp_buf env, int val).  */
> -       .text
> -ENTRY(____longjmp_chk)
> -       /* Restore registers.  */
> -       mov     (JB_RSP*8)(%rdi), %R8_LP
> -       mov     (JB_RBP*8)(%rdi),%R9_LP
> -       mov     (JB_PC*8)(%rdi), %RDX_LP
> -#ifdef PTR_DEMANGLE
> -       PTR_DEMANGLE (%R8_LP)
> -       PTR_DEMANGLE (%R9_LP)
> -       PTR_DEMANGLE (%RDX_LP)
> -# ifdef __ILP32__
> -       /* We ignored the high bits of the %rbp value because only the low
> -          bits are mangled.  But we cannot presume that %rbp is being used
> -          as a pointer and truncate it, so recover the high bits.  */
> -       movl (JB_RBP*8 + 4)(%rdi), %eax
> -       shlq $32, %rax
> -       orq %rax, %r9
> -# endif
> -#endif
> -
> -       cmp     %R8_LP, %RSP_LP
> -       jbe     .Lok
> -
> -       /* Save function parameters.  */
> -       movq    %rdi, %r10
> -       cfi_register (%rdi, %r10)
> -       movl    %esi, %ebx
> -       cfi_register (%rsi, %rbx)
> -
> -       xorl    %edi, %edi
> -       lea     -sizeSS(%rsp), %RSI_LP
> -       movl    $__NR_sigaltstack, %eax
> -       syscall
> -       /* Without working sigaltstack we cannot perform the test.  */
> -       testl   %eax, %eax
> -       jne     .Lok2
> -       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp)
> -       jz      .Lfail
> -
> -       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP
> -       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> -       sub     %R8_LP, %RAX_LP
> -       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
> -       jae     .Lok2
> -
> -.Lfail:        CALL_FAIL
> -
> -.Lok2: movq    %r10, %rdi
> -       cfi_restore (%rdi)
> -       movl    %ebx, %esi
> -       cfi_restore (%rsi)
> -
> +#define CHECK_INVALID_LONGJMP \
> +       cmp     %R8_LP, %RSP_LP;                                        \
> +       jbe     .Lok;                                                   \
> +       /* Save function parameters.  */                                \
> +       movq    %rdi, %r10;                                             \
> +       cfi_register (%rdi, %r10);                                      \
> +       movl    %esi, %ebx;                                             \
> +       cfi_register (%rsi, %rbx);                                      \
> +       xorl    %edi, %edi;                                             \
> +       lea     -sizeSS(%rsp), %RSI_LP;                                 \
> +       movl    $__NR_sigaltstack, %eax;                                \
> +       syscall;                                                        \
> +       /* Without working sigaltstack we cannot perform the test.  */  \
> +       testl   %eax, %eax;                                             \
> +       jne     .Lok2;                                                  \
> +       testl   $1, (-sizeSS + oSS_FLAGS)(%rsp);                        \
> +       jz      .Lfail;                                                 \
> +       mov     (-sizeSS + oSS_SP)(%rsp), %RAX_LP;                      \
> +       add     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> +       sub     %R8_LP, %RAX_LP;                                        \
> +       cmp     (-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;                    \
> +       jae     .Lok2;                                                  \
> +.Lfail:                                                                        \
> +       sub     $8, %RSP_LP;                                            \
> +       cfi_remember_state;                                             \
> +       cfi_def_cfa_offset(16);                                         \
> +       LOAD_MSG;                                                       \
> +       call    HIDDEN_JUMPTARGET(__fortify_fail);                      \
> +       nop;                                                            \
> +       cfi_restore_state;                                              \
> +.Lok2:                                                                 \
> +       movq    %r10, %rdi;                                             \
> +       cfi_restore (%rdi);                                             \
> +       movl    %ebx, %esi;                                             \
> +       cfi_restore (%rsi);                                             \
>  .Lok:
> -#ifdef SHADOW_STACK_POINTER_OFFSET
> -# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
> -       /* Check if Shadow Stack is enabled.  */
> -       testl   $X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
> -       jz      L(skip_ssp)
> -# else
> -       xorl    %eax, %eax
> -# endif
> -       /* Check and adjust the Shadow-Stack-Pointer.  */
> -       rdsspq  %rax
> -       /* And compare it with the saved ssp value.  */
> -       subq    SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> -       je      L(skip_ssp)
> -       /* Count the number of frames to adjust and adjust it
> -          with incssp instruction.  The instruction can adjust
> -          the ssp by [0..255] value only thus use a loop if
> -          the number of frames is bigger than 255.  */
> -       negq    %rax
> -       shrq    $3, %rax
> -       /* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
> -              restoring Shadow-Stack-Pointer of setjmp's caller, we
> -              need to unwind shadow stack by one more frame.  */
> -       addq    $1, %rax
> -       movl    $255, %ebx
> -L(loop):
> -       cmpq    %rbx, %rax
> -       cmovb   %rax, %rbx
> -       incsspq %rbx
> -       subq    %rbx, %rax
> -       ja      L(loop)
> -L(skip_ssp):
> -#endif
> -       LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
> -       /* We add unwind information for the target here.  */
> -       cfi_def_cfa(%rdi, 0)
> -       cfi_register(%rsp,%r8)
> -       cfi_register(%rbp,%r9)
> -       cfi_register(%rip,%rdx)
> -       cfi_offset(%rbx,JB_RBX*8)
> -       cfi_offset(%r12,JB_R12*8)
> -       cfi_offset(%r13,JB_R13*8)
> -       cfi_offset(%r14,JB_R14*8)
> -       cfi_offset(%r15,JB_R15*8)
> -       movq    (JB_RBX*8)(%rdi), %rbx
> -       movq    (JB_R12*8)(%rdi), %r12
> -       movq    (JB_R13*8)(%rdi), %r13
> -       movq    (JB_R14*8)(%rdi), %r14
> -       movq    (JB_R15*8)(%rdi), %r15
> -       /* Set return value for setjmp.  */
> -       movl    %esi, %eax
> -       mov     %R8_LP, %RSP_LP
> -       movq    %r9,%rbp
> -       LIBC_PROBE (longjmp_target, 3,
> -                   LP_SIZE@%RDI_LP, -4@%eax, LP_SIZE@%RDX_LP)
> -       jmpq    *%rdx
> -END (____longjmp_chk)
> +
> +#define __longjmp ____longjmp_chk
> +#include <__longjmp.S>
> diff --git a/sysdeps/x86/__longjmp_cancel.S b/sysdeps/x86/__longjmp_cancel.S
> index e71b304257..b03f52b308 100644
> --- a/sysdeps/x86/__longjmp_cancel.S
> +++ b/sysdeps/x86/__longjmp_cancel.S
> @@ -16,5 +16,8 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>
> +/* Don't restore shadow stack register for __longjmp_cancel.  */
> +#define DO_NOT_RESTORE_SHADOW_STACK
> +
>  #define __longjmp __longjmp_cancel
>  #include <__longjmp.S>
> diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
> index c9f70f8e2a..22fedc4997 100644
> --- a/sysdeps/x86_64/__longjmp.S
> +++ b/sysdeps/x86_64/__longjmp.S
> @@ -22,14 +22,15 @@
>  #include <asm-syntax.h>
>  #include <stap-probe.h>
>
> -/* Don't restore shadow stack register if
> -   1. Shadow stack isn't enabled.  Or
> -   2. __longjmp is defined for __longjmp_cancel.
> - */
> -#if !SHSTK_ENABLED || defined __longjmp
> +/* Don't restore shadow stack register if shadow stack isn't enabled.  */
> +#if !SHSTK_ENABLED || defined DO_NOT_RESTORE_SHADOW_STACK
>  # undef SHADOW_STACK_POINTER_OFFSET
>  #endif
>
> +#ifndef CHECK_INVALID_LONGJMP
> +# define CHECK_INVALID_LONGJMP
> +#endif
> +
>  /* Jump to the position specified by ENV, causing the
>     setjmp call there to return VAL, or 1 if VAL is 0.
>     void __longjmp (__jmp_buf env, int val).  */
> @@ -52,6 +53,9 @@ ENTRY(__longjmp)
>         orq %rax, %r9
>  # endif
>  #endif
> +
> +       CHECK_INVALID_LONGJMP
> +
>  #ifdef SHADOW_STACK_POINTER_OFFSET
>  # if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
>         /* Check if Shadow Stack is enabled.  */
> @@ -63,9 +67,40 @@ ENTRY(__longjmp)
>         /* Check and adjust the Shadow-Stack-Pointer.  */
>         /* Get the current ssp.  */
>         rdsspq %rax
> +       /* Save the current ssp.  */
> +       movq %rax, %r10
>         /* And compare it with the saved ssp value.  */
> -       subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
> +       movq SHADOW_STACK_POINTER_OFFSET(%rdi), %rcx
> +       subq %rcx, %rax
>         je L(skip_ssp)
> +
> +       /* Save the target ssp.  */
> +       movq %rcx, %r11
> +
> +L(find_restore_token_loop):
> +       /* Look for a restore token.  */
> +       movq -8(%rcx), %rbx
> +       andq $-8, %rbx
> +       cmpq %rcx, %rbx
> +       /* Find the restore token.  */
> +       je L(restore_shadow_stack)
> +
> +       /* Try the next slot.  */
> +       subq $8, %rcx
> +       /* Stop if the current ssp is found.  */
> +       cmpq %rcx, %r10
> +       jne L(find_restore_token_loop)
nit: Would probably prefer `jb` here to avoid alignment issues.
> +       jmp L(no_shadow_stack_token)
> +
> +L(restore_shadow_stack):
> +       /* Restore the target shadow stack.  */
> +       rstorssp -8(%rcx)
> +       /* Save the restore token on the old shadow stack.  */
> +       saveprevssp
> +       rdsspq %rax
> +       subq %r11, %rax
> +
> +L(no_shadow_stack_token):
>         /* Count the number of frames to adjust and adjust it
>            with incssp instruction.  The instruction can adjust
>            the ssp by [0..255] value only thus use a loop if
> --
> 2.43.0
>

LGTM.

Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
Edgecombe, Rick P Jan. 9, 2024, 3:06 a.m. UTC | #4
+Shadow stack folks

Context:
https://inbox.sourceware.org/libc-alpha/20240102150329.3152784-1-hjl.tools@gmail.com/

On Tue, 2024-01-02 at 07:03 -0800, H.J. Lu wrote:
> Changes in v2:
> 
> 1. Merge __longjmp.S and __longjmp_chk.S by adding
> CHECK_INVALID_LONGJMP.
> 2. Remove a branch in the restore token searching loop.
> 3. Cache the target shadow stack pointer.
> 
> setcontext and swapcontext put a restore token on the old shadow
> stack
> which is used to restore the target shadow stack when switching user
> contexts.  When longjmp from a user context, the target shadow stack
> can be different from the current shadow stack and INCSSP can't be
> used to restore the shadow stack pointer to the target shadow stack.
> 
> Update longjmp to search for a restore token.  If found, use the
> token
> to restore the shadow stack pointer before using INCSSP to pop the
> shadow stack.  Stop the token search and use INCSSP if the shadow
> stack
> entry value is the same as the current shadow stack pointer.
> 
> It is a user error if there is a shadow stack switch without leaving
> a
> restore token on the old shadow stack.
> 
> The only difference between __longjmp.S and __longjmp_chk.S is that
> __longjmp_chk.S has a check for invalid longjmp usages.  Merge
> __longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
> macro.

Hi,

I realize this is upstream already, but just to follow up from our
earlier conversation... It indeed seems a different algorithm than you
proposed earlier - you got rid of the zero frame checking. I can't see
any problems with this one as far as running off the shadow stack goes.
(except custom stack switching scenario, as discussed)

I think we also discussed in the past that it is ok to consume the
token that might have been left for ucontext usage, because once a
program longjmp()'s, it shouldn't assume anything up the stack is still
there.

Is there anyway in the x86 ABI docs we could codify that programs doing
their own shadow stack switching should take care to always leave a
token? Otherwise it seems the requirement would be hidden in the glibc
commit logs.

And I guess this means alt shadow stacks would really stick out as
requiring WRSS for longjmp() and we should try to make it work. And if
WRSS will be uncommon we should try to make clone3 consume a token, to
sort of match the security level.

Sound good everyone?
H.J. Lu Jan. 9, 2024, 3:41 a.m. UTC | #5
On Mon, Jan 8, 2024 at 7:06 PM Edgecombe, Rick P
<rick.p.edgecombe@intel.com> wrote:
>
> +Shadow stack folks
>
> Context:
> https://inbox.sourceware.org/libc-alpha/20240102150329.3152784-1-hjl.tools@gmail.com/
>
> On Tue, 2024-01-02 at 07:03 -0800, H.J. Lu wrote:
> > Changes in v2:
> >
> > 1. Merge __longjmp.S and __longjmp_chk.S by adding
> > CHECK_INVALID_LONGJMP.
> > 2. Remove a branch in the restore token searching loop.
> > 3. Cache the target shadow stack pointer.
> >
> > setcontext and swapcontext put a restore token on the old shadow
> > stack
> > which is used to restore the target shadow stack when switching user
> > contexts.  When longjmp from a user context, the target shadow stack
> > can be different from the current shadow stack and INCSSP can't be
> > used to restore the shadow stack pointer to the target shadow stack.
> >
> > Update longjmp to search for a restore token.  If found, use the
> > token
> > to restore the shadow stack pointer before using INCSSP to pop the
> > shadow stack.  Stop the token search and use INCSSP if the shadow
> > stack
> > entry value is the same as the current shadow stack pointer.
> >
> > It is a user error if there is a shadow stack switch without leaving
> > a
> > restore token on the old shadow stack.
> >
> > The only difference between __longjmp.S and __longjmp_chk.S is that
> > __longjmp_chk.S has a check for invalid longjmp usages.  Merge
> > __longjmp.S and __longjmp_chk.S by adding the CHECK_INVALID_LONGJMP
> > macro.
>
> Hi,
>
> I realize this is upstream already, but just to follow up from our
> earlier conversation... It indeed seems a different algorithm than you
> proposed earlier - you got rid of the zero frame checking. I can't see
> any problems with this one as far as running off the shadow stack goes.
> (except custom stack switching scenario, as discussed)
>
> I think we also discussed in the past that it is ok to consume the
> token that might have been left for ucontext usage, because once a
> program longjmp()'s, it shouldn't assume anything up the stack is still
> there.
>
> Is there anyway in the x86 ABI docs we could codify that programs doing
> their own shadow stack switching should take care to always leave a
> token? Otherwise it seems the requirement would be hidden in the glibc
> commit logs.

This applies to all shadow stack implementations, not just Linux.   I
could add a paragraph to x86-64 psABI.  But I think this belongs to
a white paper for shadow stack.

> And I guess this means alt shadow stacks would really stick out as
> requiring WRSS for longjmp() and we should try to make it work. And if
> WRSS will be uncommon we should try to make clone3 consume a token, to
> sort of match the security level.

As I mentioned in the shadow stack meeting today,  if we overwrote the last
shadow entry with a restore token, rstorssp couldn't fully restore the shadow
stack and longjmp might not work in some cases.  We should avoid WRSS
if possible.

> Sound good everyone?
Edgecombe, Rick P Jan. 9, 2024, 6:01 p.m. UTC | #6
On Mon, 2024-01-08 at 19:41 -0800, H.J. Lu wrote:
> I can't see
> > any problems with this one as far as running off the shadow stack
> > goes.
> > (except custom stack switching scenario, as discussed)

What is the expectation if a signal is delivered during an INCSSP loop?
I guess it will write the signal frame at an intermediate point on the
shadow stack.

If could be possible to overflow the shadow stack during the longjmp()
operation. A scenario would be a signal handler was setup from a
ucontext stack, and the swapcontext() to that stack was right at the
end of the thread shadow stack. If the signal was delivered immediately
after the RSTORSSP during a longjmp() operation back to the thread
stack, it could overflow the shadow stack when trying to deliver the
signal.

It is different then conventional longjmp() where the proximity of the
swapcontext() to the end of the stack is irrelevant. However we can
make up different rules of course for shadow stack. Not suggesting it
invalidates the design, but it's an odd corner to maybe think about
documenting.
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
index deb6398f43..9aa24620b9 100644
--- a/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
+++ b/sysdeps/unix/sysv/linux/x86_64/____longjmp_chk.S
@@ -15,18 +15,7 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <sysdep.h>
-#include <pointer_guard.h>
-#include <jmpbuf-offsets.h>
-#include <asm-syntax.h>
-#include <stap-probe.h>
 #include <sigaltstack-offsets.h>
-#include <jmp_buf-ssp.h>
-
-/* Don't restore shadow stack register if shadow stack isn't enabled.  */
-#if !SHSTK_ENABLED
-# undef SHADOW_STACK_POINTER_OFFSET
-#endif
 
 	.section .rodata.str1.1,"aMS",@progbits,1
 	.type	longjmp_msg,@object
@@ -34,136 +23,48 @@  longjmp_msg:
 	.string "longjmp causes uninitialized stack frame"
 	.size	longjmp_msg, .-longjmp_msg
 
-
-//#define __longjmp ____longjmp_chk
-
 #ifdef PIC
-# define CALL_FAIL	sub	$8, %RSP_LP;				      \
-			cfi_remember_state;				      \
-			cfi_def_cfa_offset(16);				      \
-			lea	longjmp_msg(%rip), %RDI_LP;		      \
-			call	HIDDEN_JUMPTARGET(__fortify_fail);	      \
-			nop;						      \
-			cfi_restore_state
+# define LOAD_MSG	lea	longjmp_msg(%rip), %RDI_LP
 #else
-# define CALL_FAIL	sub	$8, %RSP_LP;				      \
-			cfi_remember_state;				      \
-			cfi_def_cfa_offset(16);				      \
-			mov	$longjmp_msg, %RDI_LP;			      \
-			call	HIDDEN_JUMPTARGET(__fortify_fail);	      \
-			nop;						      \
-			cfi_restore_state
+# define LOAD_MSG	mov	$longjmp_msg, %RDI_LP
 #endif
 
-/* Jump to the position specified by ENV, causing the
-   setjmp call there to return VAL, or 1 if VAL is 0.
-   void __longjmp (__jmp_buf env, int val).  */
-	.text
-ENTRY(____longjmp_chk)
-	/* Restore registers.  */
-	mov	(JB_RSP*8)(%rdi), %R8_LP
-	mov	(JB_RBP*8)(%rdi),%R9_LP
-	mov	(JB_PC*8)(%rdi), %RDX_LP
-#ifdef PTR_DEMANGLE
-	PTR_DEMANGLE (%R8_LP)
-	PTR_DEMANGLE (%R9_LP)
-	PTR_DEMANGLE (%RDX_LP)
-# ifdef __ILP32__
-	/* We ignored the high bits of the %rbp value because only the low
-	   bits are mangled.  But we cannot presume that %rbp is being used
-	   as a pointer and truncate it, so recover the high bits.  */
-	movl (JB_RBP*8 + 4)(%rdi), %eax
-	shlq $32, %rax
-	orq %rax, %r9
-# endif
-#endif
-
-	cmp	%R8_LP, %RSP_LP
-	jbe	.Lok
-
-	/* Save function parameters.  */
-	movq	%rdi, %r10
-	cfi_register (%rdi, %r10)
-	movl	%esi, %ebx
-	cfi_register (%rsi, %rbx)
-
-	xorl	%edi, %edi
-	lea	-sizeSS(%rsp), %RSI_LP
-	movl	$__NR_sigaltstack, %eax
-	syscall
-	/* Without working sigaltstack we cannot perform the test.  */
-	testl	%eax, %eax
-	jne	.Lok2
-	testl	$1, (-sizeSS + oSS_FLAGS)(%rsp)
-	jz	.Lfail
-
-	mov	(-sizeSS + oSS_SP)(%rsp), %RAX_LP
-	add	(-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
-	sub	%R8_LP, %RAX_LP
-	cmp	(-sizeSS + oSS_SIZE)(%rsp), %RAX_LP
-	jae	.Lok2
-
-.Lfail:	CALL_FAIL
-
-.Lok2:	movq	%r10, %rdi
-	cfi_restore (%rdi)
-	movl	%ebx, %esi
-	cfi_restore (%rsi)
-
+#define CHECK_INVALID_LONGJMP \
+	cmp	%R8_LP, %RSP_LP;					\
+	jbe	.Lok;							\
+	/* Save function parameters.  */				\
+	movq	%rdi, %r10;						\
+	cfi_register (%rdi, %r10);					\
+	movl	%esi, %ebx;						\
+	cfi_register (%rsi, %rbx);					\
+	xorl	%edi, %edi;						\
+	lea	-sizeSS(%rsp), %RSI_LP;					\
+	movl	$__NR_sigaltstack, %eax;				\
+	syscall;							\
+	/* Without working sigaltstack we cannot perform the test.  */	\
+	testl	%eax, %eax;						\
+	jne	.Lok2;							\
+	testl	$1, (-sizeSS + oSS_FLAGS)(%rsp);			\
+	jz	.Lfail;							\
+	mov	(-sizeSS + oSS_SP)(%rsp), %RAX_LP;			\
+	add	(-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;			\
+	sub	%R8_LP, %RAX_LP;					\
+	cmp	(-sizeSS + oSS_SIZE)(%rsp), %RAX_LP;			\
+	jae	.Lok2;							\
+.Lfail:									\
+	sub	$8, %RSP_LP;						\
+	cfi_remember_state;						\
+	cfi_def_cfa_offset(16);						\
+	LOAD_MSG;							\
+	call	HIDDEN_JUMPTARGET(__fortify_fail);			\
+	nop;								\
+	cfi_restore_state;						\
+.Lok2:									\
+	movq	%r10, %rdi;						\
+	cfi_restore (%rdi);						\
+	movl	%ebx, %esi;						\
+	cfi_restore (%rsi);						\
 .Lok:
-#ifdef SHADOW_STACK_POINTER_OFFSET
-# if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
-	/* Check if Shadow Stack is enabled.  */
-	testl	$X86_FEATURE_1_SHSTK, %fs:FEATURE_1_OFFSET
-	jz	L(skip_ssp)
-# else
-	xorl	%eax, %eax
-# endif
-	/* Check and adjust the Shadow-Stack-Pointer.  */
-	rdsspq	%rax
-	/* And compare it with the saved ssp value.  */
-	subq	SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
-	je	L(skip_ssp)
-	/* Count the number of frames to adjust and adjust it
-	   with incssp instruction.  The instruction can adjust
-	   the ssp by [0..255] value only thus use a loop if
-	   the number of frames is bigger than 255.  */
-	negq	%rax
-	shrq	$3, %rax
-	/* NB: We saved Shadow-Stack-Pointer of setjmp.  Since we are
-	       restoring Shadow-Stack-Pointer of setjmp's caller, we
-	       need to unwind shadow stack by one more frame.  */
-	addq	$1, %rax
-	movl	$255, %ebx
-L(loop):
-	cmpq	%rbx, %rax
-	cmovb	%rax, %rbx
-	incsspq	%rbx
-	subq	%rbx, %rax
-	ja	L(loop)
-L(skip_ssp):
-#endif
-	LIBC_PROBE (longjmp, 3, LP_SIZE@%RDI_LP, -4@%esi, LP_SIZE@%RDX_LP)
-	/* We add unwind information for the target here.  */
-	cfi_def_cfa(%rdi, 0)
-	cfi_register(%rsp,%r8)
-	cfi_register(%rbp,%r9)
-	cfi_register(%rip,%rdx)
-	cfi_offset(%rbx,JB_RBX*8)
-	cfi_offset(%r12,JB_R12*8)
-	cfi_offset(%r13,JB_R13*8)
-	cfi_offset(%r14,JB_R14*8)
-	cfi_offset(%r15,JB_R15*8)
-	movq	(JB_RBX*8)(%rdi), %rbx
-	movq	(JB_R12*8)(%rdi), %r12
-	movq	(JB_R13*8)(%rdi), %r13
-	movq	(JB_R14*8)(%rdi), %r14
-	movq	(JB_R15*8)(%rdi), %r15
-	/* Set return value for setjmp.  */
-	movl	%esi, %eax
-	mov	%R8_LP, %RSP_LP
-	movq	%r9,%rbp
-	LIBC_PROBE (longjmp_target, 3,
-		    LP_SIZE@%RDI_LP, -4@%eax, LP_SIZE@%RDX_LP)
-	jmpq	*%rdx
-END (____longjmp_chk)
+
+#define __longjmp ____longjmp_chk
+#include <__longjmp.S>
diff --git a/sysdeps/x86/__longjmp_cancel.S b/sysdeps/x86/__longjmp_cancel.S
index e71b304257..b03f52b308 100644
--- a/sysdeps/x86/__longjmp_cancel.S
+++ b/sysdeps/x86/__longjmp_cancel.S
@@ -16,5 +16,8 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+/* Don't restore shadow stack register for __longjmp_cancel.  */
+#define DO_NOT_RESTORE_SHADOW_STACK
+
 #define __longjmp __longjmp_cancel
 #include <__longjmp.S>
diff --git a/sysdeps/x86_64/__longjmp.S b/sysdeps/x86_64/__longjmp.S
index c9f70f8e2a..22fedc4997 100644
--- a/sysdeps/x86_64/__longjmp.S
+++ b/sysdeps/x86_64/__longjmp.S
@@ -22,14 +22,15 @@ 
 #include <asm-syntax.h>
 #include <stap-probe.h>
 
-/* Don't restore shadow stack register if
-   1. Shadow stack isn't enabled.  Or
-   2. __longjmp is defined for __longjmp_cancel.
- */
-#if !SHSTK_ENABLED || defined __longjmp
+/* Don't restore shadow stack register if shadow stack isn't enabled.  */
+#if !SHSTK_ENABLED || defined DO_NOT_RESTORE_SHADOW_STACK
 # undef SHADOW_STACK_POINTER_OFFSET
 #endif
 
+#ifndef CHECK_INVALID_LONGJMP
+# define CHECK_INVALID_LONGJMP
+#endif
+
 /* Jump to the position specified by ENV, causing the
    setjmp call there to return VAL, or 1 if VAL is 0.
    void __longjmp (__jmp_buf env, int val).  */
@@ -52,6 +53,9 @@  ENTRY(__longjmp)
 	orq %rax, %r9
 # endif
 #endif
+
+	CHECK_INVALID_LONGJMP
+
 #ifdef SHADOW_STACK_POINTER_OFFSET
 # if IS_IN (libc) && defined SHARED && defined FEATURE_1_OFFSET
 	/* Check if Shadow Stack is enabled.  */
@@ -63,9 +67,40 @@  ENTRY(__longjmp)
 	/* Check and adjust the Shadow-Stack-Pointer.  */
 	/* Get the current ssp.  */
 	rdsspq %rax
+	/* Save the current ssp.  */
+	movq %rax, %r10
 	/* And compare it with the saved ssp value.  */
-	subq SHADOW_STACK_POINTER_OFFSET(%rdi), %rax
+	movq SHADOW_STACK_POINTER_OFFSET(%rdi), %rcx
+	subq %rcx, %rax
 	je L(skip_ssp)
+
+	/* Save the target ssp.  */
+	movq %rcx, %r11
+
+L(find_restore_token_loop):
+	/* Look for a restore token.  */
+	movq -8(%rcx), %rbx
+	andq $-8, %rbx
+	cmpq %rcx, %rbx
+	/* Find the restore token.  */
+	je L(restore_shadow_stack)
+
+	/* Try the next slot.  */
+	subq $8, %rcx
+	/* Stop if the current ssp is found.  */
+	cmpq %rcx, %r10
+	jne L(find_restore_token_loop)
+	jmp L(no_shadow_stack_token)
+
+L(restore_shadow_stack):
+	/* Restore the target shadow stack.  */
+	rstorssp -8(%rcx)
+	/* Save the restore token on the old shadow stack.  */
+	saveprevssp
+	rdsspq %rax
+	subq %r11, %rax
+
+L(no_shadow_stack_token):
 	/* Count the number of frames to adjust and adjust it
 	   with incssp instruction.  The instruction can adjust
 	   the ssp by [0..255] value only thus use a loop if