diff mbox series

[v6,3/6] x86/cet: Enable shadow stack during startup

Message ID 20231229164354.182594-4-hjl.tools@gmail.com
State New
Headers show
Series x86/cet: Update CET kernel interface | expand

Commit Message

H.J. Lu Dec. 29, 2023, 4:43 p.m. UTC
Previously, CET was enabled by kernel before passing control to user
space and the startup code must disable CET if applications or shared
libraries aren't CET enabled.  Since the current kernel only supports
shadow stack and won't enable shadow stack before passing control to
user space, we need to enable shadow stack during startup if the
application and all shared library are shadow stack enabled.  There
is no need to disable shadow stack at startup.  Shadow stack can only
be enabled in a function which will never return.  Otherwise, shadow
stack will underflow at the function return.

1. GL(dl_x86_feature_1) is set to the CET features which are supported
by the processor and are not disabled by the tunable.  Only non-zero
features in GL(dl_x86_feature_1) should be enabled.  After enabling
shadow stack with ARCH_SHSTK_ENABLE, ARCH_SHSTK_STATUS is used to check
if shadow stack is really enabled.
2. Use ARCH_SHSTK_ENABLE in RTLD_START in dynamic executable.  It is
safe since RTLD_START never returns.
3. Call arch_prctl (ARCH_SHSTK_ENABLE) from ARCH_SETUP_TLS in static
executable.  Since the start function using ARCH_SETUP_TLS never returns,
it is safe to enable shadow stack in ARCH_SETUP_TLS.
---
 sysdeps/unix/sysv/linux/x86/cpu-features.c | 49 --------------
 sysdeps/unix/sysv/linux/x86/dl-cet.h       | 27 +++++++-
 sysdeps/unix/sysv/linux/x86_64/dl-cet.h    | 47 +++++++++++++
 sysdeps/x86/cpu-features-offsets.sym       |  1 +
 sysdeps/x86/cpu-features.c                 | 51 ---------------
 sysdeps/x86/dl-cet.c                       | 76 +++++++++++-----------
 sysdeps/x86/get-cpuid-feature-leaf.c       |  2 +-
 sysdeps/x86/include/cpu-features.h         |  3 +
 sysdeps/x86/libc-start.h                   | 53 ++++++++++++++-
 sysdeps/x86_64/dl-machine.h                | 12 +++-
 10 files changed, 175 insertions(+), 146 deletions(-)
 delete mode 100644 sysdeps/unix/sysv/linux/x86/cpu-features.c
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/dl-cet.h

Comments

Szabolcs Nagy Jan. 8, 2024, 7:50 p.m. UTC | #1
The 12/29/2023 08:43, H.J. Lu wrote:
> Previously, CET was enabled by kernel before passing control to user
> space and the startup code must disable CET if applications or shared
> libraries aren't CET enabled.  Since the current kernel only supports
> shadow stack and won't enable shadow stack before passing control to
> user space, we need to enable shadow stack during startup if the
> application and all shared library are shadow stack enabled.

not all shared libraries are checked for shadow stack compat: ld.so
and vdso are not checked (i.e. kernel mapped dsos other than the
main.exe) and now static linking is another special case that is
handled differently.


_dl_process_property_note is called for property notes in dsos that
are loaded via _dl_map_object_from_fd and for the main.exe. this
happens early at phdr processing time:

- it is not called for ld.so or vdso.
- x86 libc start code calls it for static main.exe.
- it is called twice for main.exe when explicitly loaded via ld.so.
- there may be special link maps (e.g. fake ld.so one for dlmopen)
  those are not processed.

_rtld_main_check is called for main.exe and _dl_open_check is called
for a dlopened library after deps are loaded, but before relocation:

- these callbacks decide if loading the module is ok or not, so they
  have to iterate over dependencies and check the result of the
  previous property note processing. it's not clear what's the right
  way iterate over deps (l_initfini have redundant entries and it is
  missing in case of static linking, what's wrong with using
  l_searchlist.r_list instead?).
- deps include ld.so which is not processed (x86 currently special
  cases it during checks, but that means ld.so may be missing the
  property note so e.g. we would not catch toolchain inconsistency
  and one cannot force cet off by removing the marking from ld.so.)
- is it guaranteed that other than ld.so all deps are processed?
  if not then map->l_x86_feature_1_and may be wrong when checked.
- x86 libc start code does not call these for static main.exe
  instead replicates the logic (see my comment below).

i think cleaning this up would be useful: on aarch64 the note
processing has to do syscalls (to change BTI mappings) so if it
is called redundantly or missing that has significant impact.
and obviously it is useful to know which dsos are checked when
looking for security / reliability guarantees.


> --- a/sysdeps/x86/libc-start.h
> +++ b/sysdeps/x86/libc-start.h
> @@ -19,7 +19,56 @@
>  #ifndef SHARED
>  # define ARCH_SETUP_IREL() apply_irel ()
>  # define ARCH_APPLY_IREL()
> -# ifndef ARCH_SETUP_TLS
> -#  define ARCH_SETUP_TLS() __libc_setup_tls ()
> +# ifdef __CET__
> +/* Get CET features enabled in the static executable.  */
> +
> +static inline unsigned int
> +get_cet_feature (void)
> +{
> +  /* Check if CET is supported and not disabled by tunables.  */
> +  const struct cpu_features *cpu_features = __get_cpu_features ();
> +  unsigned int cet_feature = 0;
> +  if (CPU_FEATURE_USABLE_P (cpu_features, IBT))
> +    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
> +  if (CPU_FEATURE_USABLE_P (cpu_features, SHSTK))
> +    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
> +  if (!cet_feature)
> +    return cet_feature;
> +
> +  struct link_map *main_map = _dl_get_dl_main_map ();
> +
> +  /* Scan program headers backward to check PT_GNU_PROPERTY early for
> +     x86 feature bits on static executable.  */
> +  const ElfW(Phdr) *phdr = GL(dl_phdr);
> +  const ElfW(Phdr) *ph;
> +  for (ph = phdr + GL(dl_phnum); ph != phdr; ph--)
> +    if (ph[-1].p_type == PT_GNU_PROPERTY)
> +      {
> +	_dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
> +	/* Enable IBT and SHSTK only if they are enabled on static
> +	   executable.  */
> +	cet_feature &= (main_map->l_x86_feature_1_and
> +			& (GNU_PROPERTY_X86_FEATURE_1_IBT
> +			   | GNU_PROPERTY_X86_FEATURE_1_SHSTK));
> +	/* Set GL(dl_x86_feature_1) to the enabled CET features.  */
> +	GL(dl_x86_feature_1) = cet_feature;

in theory this logic can be

_rtld_main_check (main_map, _dl_argv[0]);

except l_initfini is currently not set up for static main.exe
which may be an oversight.

> +	break;
> +      }
> +
> +  return cet_feature;
> +}
H.J. Lu Jan. 8, 2024, 10:29 p.m. UTC | #2
On Mon, Jan 8, 2024 at 11:50 AM Szabolcs Nagy <szabolcs.nagy@arm.com> wrote:
>
> The 12/29/2023 08:43, H.J. Lu wrote:
> > Previously, CET was enabled by kernel before passing control to user
> > space and the startup code must disable CET if applications or shared
> > libraries aren't CET enabled.  Since the current kernel only supports
> > shadow stack and won't enable shadow stack before passing control to
> > user space, we need to enable shadow stack during startup if the
> > application and all shared library are shadow stack enabled.
>
> not all shared libraries are checked for shadow stack compat: ld.so
> and vdso are not checked (i.e. kernel mapped dsos other than the

Shadow stack enabled glibc must have shadow stack enabled
ld.so and shadow stack enabled kernel must have shadow stack
enabled vdso.

> main.exe) and now static linking is another special case that is
> handled differently.
>
>
> _dl_process_property_note is called for property notes in dsos that
> are loaded via _dl_map_object_from_fd and for the main.exe. this
> happens early at phdr processing time:
>
> - it is not called for ld.so or vdso.
> - x86 libc start code calls it for static main.exe.
> - it is called twice for main.exe when explicitly loaded via ld.so.
> - there may be special link maps (e.g. fake ld.so one for dlmopen)
>   those are not processed.
>
> _rtld_main_check is called for main.exe and _dl_open_check is called
> for a dlopened library after deps are loaded, but before relocation:
>
> - these callbacks decide if loading the module is ok or not, so they
>   have to iterate over dependencies and check the result of the
>   previous property note processing. it's not clear what's the right
>   way iterate over deps (l_initfini have redundant entries and it is
>   missing in case of static linking, what's wrong with using
>   l_searchlist.r_list instead?).

The current logic for dynamic executables is

1. Loading everything first.
2. Call _dl_process_property_note on each object and set
GL(dl_x86_feature_1) accordingly.
3. Enable features in GL(dl_x86_feature_1) in _dl_start_user.
4. Check if shadow stack is enabled and update
GL(dl_x86_feature_1) accordingly.

> - deps include ld.so which is not processed (x86 currently special
>   cases it during checks, but that means ld.so may be missing the
>   property note so e.g. we would not catch toolchain inconsistency
>   and one cannot force cet off by removing the marking from ld.so.)

ld.so must have shadow stack enabled.   If not, it is a glibc or toolchain
bug.

> - is it guaranteed that other than ld.so all deps are processed?

Yes.

>   if not then map->l_x86_feature_1_and may be wrong when checked.
> - x86 libc start code does not call these for static main.exe
>   instead replicates the logic (see my comment below).

Since sysdeps/x86/libc-start.h is only for static executables,
it only needs to check PT_GNU_PROPERTY in executables.

> i think cleaning this up would be useful: on aarch64 the note
> processing has to do syscalls (to change BTI mappings) so if it
> is called redundantly or missing that has significant impact.
> and obviously it is useful to know which dsos are checked when
> looking for security / reliability guarantees.
>
>
> > --- a/sysdeps/x86/libc-start.h
> > +++ b/sysdeps/x86/libc-start.h
> > @@ -19,7 +19,56 @@
> >  #ifndef SHARED
> >  # define ARCH_SETUP_IREL() apply_irel ()
> >  # define ARCH_APPLY_IREL()
> > -# ifndef ARCH_SETUP_TLS
> > -#  define ARCH_SETUP_TLS() __libc_setup_tls ()
> > +# ifdef __CET__
> > +/* Get CET features enabled in the static executable.  */
> > +
> > +static inline unsigned int
> > +get_cet_feature (void)
> > +{
> > +  /* Check if CET is supported and not disabled by tunables.  */
> > +  const struct cpu_features *cpu_features = __get_cpu_features ();
> > +  unsigned int cet_feature = 0;
> > +  if (CPU_FEATURE_USABLE_P (cpu_features, IBT))
> > +    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
> > +  if (CPU_FEATURE_USABLE_P (cpu_features, SHSTK))
> > +    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
> > +  if (!cet_feature)
> > +    return cet_feature;
> > +
> > +  struct link_map *main_map = _dl_get_dl_main_map ();
> > +
> > +  /* Scan program headers backward to check PT_GNU_PROPERTY early for
> > +     x86 feature bits on static executable.  */
> > +  const ElfW(Phdr) *phdr = GL(dl_phdr);
> > +  const ElfW(Phdr) *ph;
> > +  for (ph = phdr + GL(dl_phnum); ph != phdr; ph--)
> > +    if (ph[-1].p_type == PT_GNU_PROPERTY)
> > +      {
> > +     _dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
> > +     /* Enable IBT and SHSTK only if they are enabled on static
> > +        executable.  */
> > +     cet_feature &= (main_map->l_x86_feature_1_and
> > +                     & (GNU_PROPERTY_X86_FEATURE_1_IBT
> > +                        | GNU_PROPERTY_X86_FEATURE_1_SHSTK));
> > +     /* Set GL(dl_x86_feature_1) to the enabled CET features.  */
> > +     GL(dl_x86_feature_1) = cet_feature;
>
> in theory this logic can be
>
> _rtld_main_check (main_map, _dl_argv[0]);

Does _rtld_main_check work on static executables?

> except l_initfini is currently not set up for static main.exe
> which may be an oversight.
>
> > +     break;
> > +      }
> > +
> > +  return cet_feature;
> > +}
diff mbox series

Patch

diff --git a/sysdeps/unix/sysv/linux/x86/cpu-features.c b/sysdeps/unix/sysv/linux/x86/cpu-features.c
deleted file mode 100644
index 0e6e2bf855..0000000000
--- a/sysdeps/unix/sysv/linux/x86/cpu-features.c
+++ /dev/null
@@ -1,49 +0,0 @@ 
-/* Initialize CPU feature data for Linux/x86.
-   This file is part of the GNU C Library.
-   Copyright (C) 2018-2023 Free Software Foundation, Inc.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#if CET_ENABLED
-# include <sys/prctl.h>
-# include <asm/prctl.h>
-
-static inline int __attribute__ ((always_inline))
-get_cet_status (void)
-{
-  unsigned long long kernel_feature;
-  unsigned int status = 0;
-  if (INTERNAL_SYSCALL_CALL (arch_prctl, ARCH_SHSTK_STATUS,
-			     &kernel_feature) == 0)
-    {
-      if ((kernel_feature & ARCH_SHSTK_SHSTK) != 0)
-	status = GNU_PROPERTY_X86_FEATURE_1_SHSTK;
-    }
-  return status;
-}
-
-# ifndef SHARED
-static inline void
-x86_setup_tls (void)
-{
-  __libc_setup_tls ();
-  THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1));
-}
-
-#  define ARCH_SETUP_TLS() x86_setup_tls ()
-# endif
-#endif
-
-#include <sysdeps/x86/cpu-features.c>
diff --git a/sysdeps/unix/sysv/linux/x86/dl-cet.h b/sysdeps/unix/sysv/linux/x86/dl-cet.h
index da220ac627..6ad5e03f69 100644
--- a/sysdeps/unix/sysv/linux/x86/dl-cet.h
+++ b/sysdeps/unix/sysv/linux/x86/dl-cet.h
@@ -18,7 +18,7 @@ 
 #include <sys/prctl.h>
 #include <asm/prctl.h>
 
-static inline int __attribute__ ((always_inline))
+static __always_inline int
 dl_cet_disable_cet (unsigned int cet_feature)
 {
   if (cet_feature != GNU_PROPERTY_X86_FEATURE_1_SHSTK)
@@ -28,7 +28,7 @@  dl_cet_disable_cet (unsigned int cet_feature)
 				      kernel_feature);
 }
 
-static inline int __attribute__ ((always_inline))
+static __always_inline int
 dl_cet_lock_cet (unsigned int cet_feature)
 {
   if (cet_feature != GNU_PROPERTY_X86_FEATURE_1_SHSTK)
@@ -38,3 +38,26 @@  dl_cet_lock_cet (unsigned int cet_feature)
   return (int) INTERNAL_SYSCALL_CALL (arch_prctl, ARCH_SHSTK_LOCK,
 				      kernel_feature);
 }
+
+static __always_inline unsigned int
+dl_cet_get_cet_status (void)
+{
+  unsigned long long kernel_feature;
+  unsigned int status = 0;
+  if (INTERNAL_SYSCALL_CALL (arch_prctl, ARCH_SHSTK_STATUS,
+			     &kernel_feature) == 0)
+    {
+      if ((kernel_feature & ARCH_SHSTK_SHSTK) != 0)
+	status = GNU_PROPERTY_X86_FEATURE_1_SHSTK;
+    }
+  return status;
+}
+
+/* Enable shadow stack with a macro to avoid shadow stack underflow.  */
+#define ENABLE_X86_CET(cet_feature)				\
+  if ((cet_feature & GNU_PROPERTY_X86_FEATURE_1_SHSTK))		\
+    {								\
+      long long int kernel_feature = ARCH_SHSTK_SHSTK;		\
+      INTERNAL_SYSCALL_CALL (arch_prctl, ARCH_SHSTK_ENABLE,	\
+			     kernel_feature);			\
+    }
diff --git a/sysdeps/unix/sysv/linux/x86_64/dl-cet.h b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h
new file mode 100644
index 0000000000..e23e05c6b8
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/dl-cet.h
@@ -0,0 +1,47 @@ 
+/* Linux/x86-64 CET initializers function.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <cpu-features-offsets.h>
+#include_next <dl-cet.h>
+
+#define X86_STRINGIFY_1(x)	#x
+#define X86_STRINGIFY(x)	X86_STRINGIFY_1 (x)
+
+/* Enable shadow stack before calling _dl_init if it is enabled in
+   GL(dl_x86_feature_1).  Call _dl_setup_x86_features to setup shadow
+   stack.  */
+#define RTLD_START_ENABLE_X86_FEATURES \
+"\
+	# Check if shadow stack is enabled in GL(dl_x86_feature_1).\n\
+	movl _rtld_local+" X86_STRINGIFY (RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET) "(%rip), %edx\n\
+	testl $" X86_STRINGIFY (X86_FEATURE_1_SHSTK) ", %edx\n\
+	jz 1f\n\
+	# Enable shadow stack if enabled in GL(dl_x86_feature_1).\n\
+	movl $" X86_STRINGIFY (ARCH_SHSTK_SHSTK) ", %esi\n\
+	movl $" X86_STRINGIFY (ARCH_SHSTK_ENABLE) ", %edi\n\
+	movl $" X86_STRINGIFY (__NR_arch_prctl) ", %eax\n\
+	syscall\n\
+1:\n\
+	# Pass GL(dl_x86_feature_1) to _dl_cet_setup_features.\n\
+	movl %edx, %edi\n\
+	# Align stack for the _dl_cet_setup_features call.\n\
+	andq $-16, %rsp\n\
+	call _dl_cet_setup_features\n\
+	# Restore %rax and %rsp from %r12 and %r13.\n\
+	movq %r12, %rax\n\
+	movq %r13, %rsp\n\
+"
diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
index 6d03cea8e8..5429f60632 100644
--- a/sysdeps/x86/cpu-features-offsets.sym
+++ b/sysdeps/x86/cpu-features-offsets.sym
@@ -4,3 +4,4 @@ 
 
 RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features)
 XSAVE_STATE_SIZE_OFFSET	offsetof (struct cpu_features, xsave_state_size)
+RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index f180f0d9a4..097868c1d9 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -1106,57 +1106,6 @@  no_cpuid:
 	       TUNABLE_CALLBACK (set_x86_ibt));
   TUNABLE_GET (x86_shstk, tunable_val_t *,
 	       TUNABLE_CALLBACK (set_x86_shstk));
-
-  /* Check CET status.  */
-  unsigned int cet_status = get_cet_status ();
-
-  if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_IBT) == 0)
-    CPU_FEATURE_UNSET (cpu_features, IBT)
-  if ((cet_status & GNU_PROPERTY_X86_FEATURE_1_SHSTK) == 0)
-    CPU_FEATURE_UNSET (cpu_features, SHSTK)
-
-  if (cet_status)
-    {
-      GL(dl_x86_feature_1) = cet_status;
-
-# ifndef SHARED
-      /* Check if IBT and SHSTK are enabled by kernel.  */
-      if ((cet_status
-	   & (GNU_PROPERTY_X86_FEATURE_1_IBT
-	      | GNU_PROPERTY_X86_FEATURE_1_SHSTK)))
-	{
-	  /* Disable IBT and/or SHSTK if they are enabled by kernel, but
-	     disabled by environment variable:
-
-	     GLIBC_TUNABLES=glibc.cpu.hwcaps=-IBT,-SHSTK
-	   */
-	  unsigned int cet_feature = 0;
-	  if (!CPU_FEATURE_USABLE (IBT))
-	    cet_feature |= (cet_status
-			    & GNU_PROPERTY_X86_FEATURE_1_IBT);
-	  if (!CPU_FEATURE_USABLE (SHSTK))
-	    cet_feature |= (cet_status
-			    & GNU_PROPERTY_X86_FEATURE_1_SHSTK);
-
-	  if (cet_feature)
-	    {
-	      int res = dl_cet_disable_cet (cet_feature);
-
-	      /* Clear the disabled bits in dl_x86_feature_1.  */
-	      if (res == 0)
-		GL(dl_x86_feature_1) &= ~cet_feature;
-	    }
-
-	  /* Lock CET if IBT or SHSTK is enabled in executable.  Don't
-	     lock CET if IBT or SHSTK is enabled permissively.  */
-	  if (GL(dl_x86_feature_control).ibt != cet_permissive
-	      && GL(dl_x86_feature_control).shstk != cet_permissive)
-	    dl_cet_lock_cet (GL(dl_x86_feature_1)
-			     & (GNU_PROPERTY_X86_FEATURE_1_IBT
-				| GNU_PROPERTY_X86_FEATURE_1_SHSTK));
-	}
-# endif
-    }
 #endif
 
 #ifndef SHARED
diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c
index 66a78244d4..82167ebf50 100644
--- a/sysdeps/x86/dl-cet.c
+++ b/sysdeps/x86/dl-cet.c
@@ -173,40 +173,11 @@  dl_cet_check_startup (struct link_map *m, struct dl_cet_info *info)
     = info->enable_feature_1 ^ info->feature_1_enabled;
   if (disable_feature_1 != 0)
     {
-      /* Disable features in the kernel because of legacy objects or
-	 cet_always_off.  */
-      if (dl_cet_disable_cet (disable_feature_1) != 0)
-	_dl_fatal_printf ("%s: can't disable x86 Features\n",
-			  info->program);
-
       /* Clear the disabled bits.  Sync dl_x86_feature_1 and
          info->feature_1_enabled with info->enable_feature_1.  */
       info->feature_1_enabled = info->enable_feature_1;
       GL(dl_x86_feature_1) = info->enable_feature_1;
     }
-
-  if (HAS_CPU_FEATURE (IBT) || HAS_CPU_FEATURE (SHSTK))
-    {
-      /* Lock CET features only if IBT or SHSTK are enabled and are not
-         enabled permissively.  */
-      unsigned int feature_1_lock = 0;
-
-      if (((info->feature_1_enabled & GNU_PROPERTY_X86_FEATURE_1_IBT)
-	   != 0)
-	  && info->enable_ibt_type != cet_permissive)
-	feature_1_lock |= GNU_PROPERTY_X86_FEATURE_1_IBT;
-
-      if (((info->feature_1_enabled & GNU_PROPERTY_X86_FEATURE_1_SHSTK)
-	   != 0)
-	  && info->enable_shstk_type != cet_permissive)
-	feature_1_lock |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
-
-      if (feature_1_lock != 0
-	  && dl_cet_lock_cet (feature_1_lock) != 0)
-	_dl_fatal_printf ("%s: can't lock CET\n", info->program);
-    }
-
-  THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1));
 }
 #endif
 
@@ -298,6 +269,15 @@  dl_cet_check (struct link_map *m, const char *program)
 {
   struct dl_cet_info info;
 
+  /* CET is enabled only if RTLD_START_ENABLE_X86_FEATURES is defined.  */
+#if defined SHARED && defined RTLD_START_ENABLE_X86_FEATURES
+  /* Set dl_x86_feature_1 to features enabled in the executable.  */
+  if (program != NULL)
+    GL(dl_x86_feature_1) = (m->l_x86_feature_1_and
+			    & (X86_FEATURE_1_IBT
+			       | X86_FEATURE_1_SHSTK));
+#endif
+
   /* Check how IBT and SHSTK should be enabled. */
   info.enable_ibt_type = GL(dl_x86_feature_control).ibt;
   info.enable_shstk_type = GL(dl_x86_feature_control).shstk;
@@ -307,17 +287,9 @@  dl_cet_check (struct link_map *m, const char *program)
   /* No legacy object check if IBT and SHSTK are always on.  */
   if (info.enable_ibt_type == cet_always_on
       && info.enable_shstk_type == cet_always_on)
-    {
-#ifdef SHARED
-      /* Set it only during startup.  */
-      if (program != NULL)
-	THREAD_SETMEM (THREAD_SELF, header.feature_1,
-		       info.feature_1_enabled);
-#endif
-      return;
-    }
+    return;
 
-  /* Check if IBT and SHSTK were enabled by kernel.  */
+  /* Check if IBT and SHSTK were enabled.  */
   if (info.feature_1_enabled == 0)
     return;
 
@@ -351,6 +323,32 @@  _dl_cet_open_check (struct link_map *l)
   dl_cet_check (l, NULL);
 }
 
+/* Set GL(dl_x86_feature_1) to the enabled features and clear the
+   active bits of the disabled features.  */
+
+attribute_hidden void
+_dl_cet_setup_features (unsigned int cet_feature)
+{
+  /* NB: cet_feature == GL(dl_x86_feature_1) which is set to features
+     enabled from executable, not necessarily supported by kernel.  */
+  if (cet_feature != 0)
+    {
+      cet_feature = dl_cet_get_cet_status ();
+      if (cet_feature != 0)
+	{
+	  THREAD_SETMEM (THREAD_SELF, header.feature_1, cet_feature);
+
+	  /* Lock CET if IBT or SHSTK is enabled in executable.  Don't
+	     lock CET if IBT or SHSTK is enabled permissively.  */
+	  if (GL(dl_x86_feature_control).ibt != cet_permissive
+	      && (GL(dl_x86_feature_control).shstk != cet_permissive))
+	    dl_cet_lock_cet (cet_feature);
+	}
+      /* Sync GL(dl_x86_feature_1) with kernel.  */
+      GL(dl_x86_feature_1) = cet_feature;
+    }
+}
+
 #ifdef SHARED
 
 # ifndef LINKAGE
diff --git a/sysdeps/x86/get-cpuid-feature-leaf.c b/sysdeps/x86/get-cpuid-feature-leaf.c
index 40a46cc79c..9317a6b494 100644
--- a/sysdeps/x86/get-cpuid-feature-leaf.c
+++ b/sysdeps/x86/get-cpuid-feature-leaf.c
@@ -24,7 +24,7 @@  __x86_get_cpuid_feature_leaf (unsigned int leaf)
   static const struct cpuid_feature feature = {};
   if (leaf < CPUID_INDEX_MAX)
     return ((const struct cpuid_feature *)
-	      &GLRO(dl_x86_cpu_features).features[leaf]);
+	    &GLRO(dl_x86_cpu_features).features[leaf]);
   else
     return &feature;
 }
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index 2d7427a6c0..23bd8146a2 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -990,6 +990,9 @@  extern const struct cpu_features *_dl_x86_get_cpu_features (void)
 # define INIT_ARCH()
 # define _dl_x86_get_cpu_features() (&GLRO(dl_x86_cpu_features))
 extern void _dl_x86_init_cpu_features (void) attribute_hidden;
+
+extern void _dl_cet_setup_features (unsigned int)
+    attribute_hidden;
 #endif
 
 #ifdef __x86_64__
diff --git a/sysdeps/x86/libc-start.h b/sysdeps/x86/libc-start.h
index e93da6ef3d..7a91dd192a 100644
--- a/sysdeps/x86/libc-start.h
+++ b/sysdeps/x86/libc-start.h
@@ -19,7 +19,56 @@ 
 #ifndef SHARED
 # define ARCH_SETUP_IREL() apply_irel ()
 # define ARCH_APPLY_IREL()
-# ifndef ARCH_SETUP_TLS
-#  define ARCH_SETUP_TLS() __libc_setup_tls ()
+# ifdef __CET__
+/* Get CET features enabled in the static executable.  */
+
+static inline unsigned int
+get_cet_feature (void)
+{
+  /* Check if CET is supported and not disabled by tunables.  */
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+  unsigned int cet_feature = 0;
+  if (CPU_FEATURE_USABLE_P (cpu_features, IBT))
+    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_IBT;
+  if (CPU_FEATURE_USABLE_P (cpu_features, SHSTK))
+    cet_feature |= GNU_PROPERTY_X86_FEATURE_1_SHSTK;
+  if (!cet_feature)
+    return cet_feature;
+
+  struct link_map *main_map = _dl_get_dl_main_map ();
+
+  /* Scan program headers backward to check PT_GNU_PROPERTY early for
+     x86 feature bits on static executable.  */
+  const ElfW(Phdr) *phdr = GL(dl_phdr);
+  const ElfW(Phdr) *ph;
+  for (ph = phdr + GL(dl_phnum); ph != phdr; ph--)
+    if (ph[-1].p_type == PT_GNU_PROPERTY)
+      {
+	_dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
+	/* Enable IBT and SHSTK only if they are enabled on static
+	   executable.  */
+	cet_feature &= (main_map->l_x86_feature_1_and
+			& (GNU_PROPERTY_X86_FEATURE_1_IBT
+			   | GNU_PROPERTY_X86_FEATURE_1_SHSTK));
+	/* Set GL(dl_x86_feature_1) to the enabled CET features.  */
+	GL(dl_x86_feature_1) = cet_feature;
+	break;
+      }
+
+  return cet_feature;
+}
+
+/* The function using this macro to enable shadow stack must not return
+   to avoid shadow stack underflow.  */
+#  define ARCH_SETUP_TLS()						\
+  {									\
+    __libc_setup_tls ();						\
+									\
+    unsigned int cet_feature = get_cet_feature ();			\
+    ENABLE_X86_CET (cet_feature);					\
+    _dl_cet_setup_features (cet_feature);				\
+  }
+# else
+#  define ARCH_SETUP_TLS()	__libc_setup_tls ()
 # endif
 #endif /* !SHARED */
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 581a2f1a9e..faeae723cb 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -29,6 +29,11 @@ 
 #include <dl-static-tls.h>
 #include <dl-machine-rel.h>
 #include <isa-level.h>
+#ifdef __CET__
+# include <dl-cet.h>
+#else
+# define RTLD_START_ENABLE_X86_FEATURES
+#endif
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
@@ -146,13 +151,16 @@  _start:\n\
 _dl_start_user:\n\
 	# Save the user entry point address in %r12.\n\
 	movq %rax, %r12\n\
+	# Save %rsp value in %r13.\n\
+	movq %rsp, %r13\n\
+"\
+	RTLD_START_ENABLE_X86_FEATURES \
+"\
 	# Read the original argument count.\n\
 	movq (%rsp), %rdx\n\
 	# Call _dl_init (struct link_map *main_map, int argc, char **argv, char **env)\n\
 	# argc -> rsi\n\
 	movq %rdx, %rsi\n\
-	# Save %rsp value in %r13.\n\
-	movq %rsp, %r13\n\
 	# And align stack for the _dl_init call. \n\
 	andq $-16, %rsp\n\
 	# _dl_loaded -> rdi\n\