diff mbox series

[v2] elf: Add ELF_DYNAMIC_AFTER_RELOC to rewrite PLT

Message ID 20231221150402.2610268-1-hjl.tools@gmail.com
State New
Headers show
Series [v2] elf: Add ELF_DYNAMIC_AFTER_RELOC to rewrite PLT | expand

Commit Message

H.J. Lu Dec. 21, 2023, 3:04 p.m. UTC
Changes in v2:

1. Changed tunables to glibc.cpu.plt_rewrite.
2. Added documentation.
3. Added a testcase.

H.J.
--
Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
relocation.

For x86-64, add

 #define DT_X86_64_PLT     (DT_LOPROC + 0)
 #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
 #define DT_X86_64_PLTENT  (DT_LOPROC + 3)

1. DT_X86_64_PLT: The address of the procedure linkage table.
2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
table.
3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
entry.

With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
memory offset of the indirect branch instruction.

Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
with direct branch after relocation when the lazy binding is disabled.
PLT rewrite is disabled by default.  Add

$ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1

to enable PLT rewrite at run-time.
---
 elf/dynamic-link.h                   |   5 +
 elf/elf.h                            |   5 +
 elf/tst-glibcelf.py                  |   1 +
 manual/tunables.texi                 |   9 +
 scripts/glibcelf.py                  |   4 +
 sysdeps/x86/cet-control.h            |  14 ++
 sysdeps/x86/cpu-features.c           |  10 ++
 sysdeps/x86/dl-procruntime.c         |   1 +
 sysdeps/x86/dl-tunables.list         |   5 +
 sysdeps/x86_64/Makefile              |  19 +++
 sysdeps/x86_64/configure             |  35 ++++
 sysdeps/x86_64/configure.ac          |   4 +
 sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
 sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
 sysdeps/x86_64/link_map.h            |  22 +++
 sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
 sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
 17 files changed, 454 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
 create mode 100644 sysdeps/x86_64/link_map.h
 create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
 create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c

Comments

H.J. Lu Jan. 1, 2024, 2:13 p.m. UTC | #1
On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Changes in v2:
>
> 1. Changed tunables to glibc.cpu.plt_rewrite.
> 2. Added documentation.
> 3. Added a testcase.
>
> H.J.
> --
> Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> relocation.
>
> For x86-64, add
>
>  #define DT_X86_64_PLT     (DT_LOPROC + 0)
>  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
>  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
>
> 1. DT_X86_64_PLT: The address of the procedure linkage table.
> 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> table.
> 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> entry.
>
> With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> memory offset of the indirect branch instruction.
>
> Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> with direct branch after relocation when the lazy binding is disabled.
> PLT rewrite is disabled by default.  Add
>
> $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
>
> to enable PLT rewrite at run-time.
> ---
>  elf/dynamic-link.h                   |   5 +
>  elf/elf.h                            |   5 +
>  elf/tst-glibcelf.py                  |   1 +
>  manual/tunables.texi                 |   9 +
>  scripts/glibcelf.py                  |   4 +
>  sysdeps/x86/cet-control.h            |  14 ++
>  sysdeps/x86/cpu-features.c           |  10 ++
>  sysdeps/x86/dl-procruntime.c         |   1 +
>  sysdeps/x86/dl-tunables.list         |   5 +
>  sysdeps/x86_64/Makefile              |  19 +++
>  sysdeps/x86_64/configure             |  35 ++++
>  sysdeps/x86_64/configure.ac          |   4 +
>  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
>  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
>  sysdeps/x86_64/link_map.h            |  22 +++
>  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
>  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
>  17 files changed, 454 insertions(+), 1 deletion(-)
>  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
>  create mode 100644 sysdeps/x86_64/link_map.h
>  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
>  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
>
> diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> index e7f755fc75..5351671044 100644
> --- a/elf/dynamic-link.h
> +++ b/elf/dynamic-link.h
> @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>        }                                                                              \
>    } while (0);
>
> +# ifndef ELF_DYNAMIC_AFTER_RELOC
> +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> +# endif
> +
>  /* This can't just be an inline function because GCC is too dumb
>     to inline functions containing inlines themselves.  */
>  # ifdef RTLD_BOOTSTRAP
> @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>        ELF_DYNAMIC_DO_RELR (map);                                             \
>      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
>      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
>    } while (0)
>
>  #endif
> diff --git a/elf/elf.h b/elf/elf.h
> index 5c1c1972d1..eda4802f56 100644
> --- a/elf/elf.h
> +++ b/elf/elf.h
> @@ -3639,6 +3639,11 @@ enum
>  /* x86-64 sh_type values.  */
>  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
>
> +/* x86-64 d_tag values.  */
> +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> +#define DT_X86_64_NUM          4
>
>  /* AM33 relocations.  */
>  #define R_MN10300_NONE         0       /* No reloc.  */
> diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> index 6142ca28ae..52293f4adf 100644
> --- a/elf/tst-glibcelf.py
> +++ b/elf/tst-glibcelf.py
> @@ -187,6 +187,7 @@ DT_VALNUM
>  DT_VALRNGHI
>  DT_VALRNGLO
>  DT_VERSIONTAGNUM
> +DT_X86_64_NUM
>  ELFCLASSNUM
>  ELFDATANUM
>  EM_NUM
> diff --git a/manual/tunables.texi b/manual/tunables.texi
> index b31f16da84..f9bd83622e 100644
> --- a/manual/tunables.texi
> +++ b/manual/tunables.texi
> @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
>  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
>  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
>  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
>  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
>  glibc.cpu.x86_ibt:
>  glibc.cpu.hwcaps:
> @@ -614,6 +615,14 @@ this tunable.
>  This tunable is specific to 64-bit x86-64.
>  @end deftp
>
> +@deftp Tunable glibc.cpu.plt_rewrite
> +When this tunable is set to @code{1}, the dynamic linker will rewrite
> +the PLT section with direct branch after relocation if possible when
> +the lazy binding is disabled.
> +
> +This tunable is specific to x86-64.
> +@end deftp
> +
>  @node Memory Related Tunables
>  @section Memory Related Tunables
>  @cindex memory related tunables
> diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> index b52e83d613..3a21e25201 100644
> --- a/scripts/glibcelf.py
> +++ b/scripts/glibcelf.py
> @@ -439,6 +439,8 @@ class DtRISCV(Dt):
>      """Supplemental DT_* constants for EM_RISCV."""
>  class DtSPARC(Dt):
>      """Supplemental DT_* constants for EM_SPARC."""
> +class DtX86_64(Dt):
> +    """Supplemental DT_* constants for EM_X86_64."""
>  _dt_skip = '''
>  DT_ENCODING DT_PROCNUM
>  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> @@ -451,6 +453,7 @@ DT_MIPS_NUM
>  DT_PPC_NUM
>  DT_PPC64_NUM
>  DT_SPARC_NUM
> +DT_X86_64_NUM
>  '''.strip().split()
>  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
>  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
>  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
>  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
>  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
>  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
>  del _dt_skip
>
> diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> index 3bd00019e8..216d8cc576 100644
> --- a/sysdeps/x86/cet-control.h
> +++ b/sysdeps/x86/cet-control.h
> @@ -32,10 +32,24 @@ enum dl_x86_cet_control
>    cet_permissive
>  };
>
> +/* PLT rewrite control.  */
> +enum dl_plt_rewrite_control
> +{
> +  /* No PLT rewrite.  */
> +  plt_rewrite_none,
> +  /* PLT rewrite is enabled at run-time.  */
> +  plt_rewrite_enabled,
> +  /* Rewrite PLT with JMP at run-time.  */
> +  plt_rewrite_jmp,
> +  /* Rewrite PLT with JMPABS at run-time.  */
> +  plt_rewrite_jmpabs
> +};
> +
>  struct dl_x86_feature_control
>  {
>    enum dl_x86_cet_control ibt : 2;
>    enum dl_x86_cet_control shstk : 2;
> +  enum dl_plt_rewrite_control plt_rewrite : 2;
>  };
>
>  #endif /* cet-control.h */
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 0bf923d48b..5f2962bbe5 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -27,6 +27,13 @@
>  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
>    attribute_hidden;
>
> +static void
> +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> +{
> +  if (valp->numval)
> +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> +}
> +
>  #ifdef __LP64__
>  static void
>  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> @@ -996,6 +1003,9 @@ no_cpuid:
>
>    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
>
> +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> +              TUNABLE_CALLBACK (set_plt_rewrite));
> +
>  #ifdef __LP64__
>    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
>                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> index 2fb682ded3..03a612f3f3 100644
> --- a/sysdeps/x86/dl-procruntime.c
> +++ b/sysdeps/x86/dl-procruntime.c
> @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
>  = {
>      .ibt = DEFAULT_DL_X86_CET_CONTROL,
>      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> +    .plt_rewrite = plt_rewrite_none,
>    }
>  # endif
>  # if !defined SHARED || defined PROCINFO_DECL
> diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> index feb7004036..49da67fd74 100644
> --- a/sysdeps/x86/dl-tunables.list
> +++ b/sysdeps/x86/dl-tunables.list
> @@ -66,5 +66,10 @@ glibc {
>      x86_shared_cache_size {
>        type: SIZE_T
>      }
> +    plt_rewrite {
> +      type: INT_32
> +      minval: 0
> +      maxval: 1
> +    }
>    }
>  }
> diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> index 00120ca9ca..106f1d0f55 100644
> --- a/sysdeps/x86_64/Makefile
> +++ b/sysdeps/x86_64/Makefile
> @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
>  tests-container += tst-glibc-hwcaps-cache
>  endif
>
> +ifeq (yes,$(have-z-mark-plt))
> +tests += \
> +  tst-plt-rewrite1 \
> +# tests
> +modules-names += \
> +  tst-plt-rewritemod1 \
> +# modules-names
> +
> +tst-plt-rewrite1-no-pie = yes
> +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> +       $(evaluate-test)
> +endif
> +
>  endif # $(subdir) == elf
>
>  ifeq ($(subdir),csu)
> diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> index e307467afa..a15eaa349d 100755
> --- a/sysdeps/x86_64/configure
> +++ b/sysdeps/x86_64/configure
> @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
>  config_vars="$config_vars
>  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
>
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> +libc_linker_feature=no
> +cat > conftest.c <<EOF
> +int _start (void) { return 42; }
> +EOF
> +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> +                 -fPIC -shared -o conftest.so conftest.c
> +                 1>&5'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }
> +then
> +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> +    true
> +  else
> +    libc_linker_feature=yes
> +  fi
> +fi
> +rm -f conftest*
> +if test $libc_linker_feature = yes; then
> +  libc_cv_z_mark_plt=yes
> +else
> +  libc_cv_z_mark_plt=no
> +fi
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> +printf "%s\n" "$libc_linker_feature" >&6; }
> +config_vars="$config_vars
> +have-z-mark-plt = $libc_cv_z_mark_plt"
> +
>  if test x"$build_mathvec" = xnotset; then
>    build_mathvec=yes
>  fi
> diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> index 1215dcb1e4..40f642db31 100644
> --- a/sysdeps/x86_64/configure.ac
> +++ b/sysdeps/x86_64/configure.ac
> @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
>  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
>                 [$libc_cv_cc_mprefer_vector_width])
>
> +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> +
>  if test x"$build_mathvec" = xnotset; then
>    build_mathvec=yes
>  fi
> diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> new file mode 100644
> index 0000000000..dd41d95b88
> --- /dev/null
> +++ b/sysdeps/x86_64/dl-dtprocnum.h
> @@ -0,0 +1,21 @@
> +/* Configuration of lookup functions.  x64-64 version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* Number of extra dynamic section entries for this architecture.  By
> +   default there are none.  */
> +#define DT_THISPROCNUM DT_X86_64_NUM
> diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> index 581a2f1a9e..973042f9e6 100644
> --- a/sysdeps/x86_64/dl-machine.h
> +++ b/sysdeps/x86_64/dl-machine.h
> @@ -30,6 +30,9 @@
>  #include <dl-machine-rel.h>
>  #include <isa-level.h>
>
> +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> +
>  /* Return nonzero iff ELF header is compatible with the running host.  */
>  static inline int __attribute__ ((unused))
>  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
>
>        switch (r_type)
>         {
> -       case R_X86_64_GLOB_DAT:
>         case R_X86_64_JUMP_SLOT:
> +         map->l_has_jump_slot_reloc = true;
> +       case R_X86_64_GLOB_DAT:
>           *reloc_addr = value;
>           break;
>
> @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
>  }
>
>  #endif /* RESOLVE_MAP */
> +
> +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> +    && defined SHARED
> +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> +  x86_64_dynamic_after_reloc (map, (lazy))
> +
> +static const char *
> +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> +{
> +  const ElfW(Sym) *const symtab
> +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> +  return strtab + refsym->st_name;
> +}
> +
> +static void
> +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> +                   ElfW(Addr) plt_aligned)
> +{
> +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> +  ElfW(Addr) l_addr = map->l_addr;
> +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> +  const ElfW(Rela) *reloc = (const void *) start;
> +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> +
> +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> +                                         header.feature_1);
> +  bool ibt_enabled_p
> +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> +
> +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> +                     DSO_FILENAME (map->l_name));
> +
> +  for (; reloc < reloc_end; reloc++)
> +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> +      {
> +       /* Get the value from the GOT entry.  */
> +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> +
> +       /* Get the corresponding PLT entry from r_addend.  */
> +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> +       /* Skip ENDBR64 if IBT isn't enabled.  */
> +       if (!ibt_enabled_p)
> +         branch_start = ALIGN_DOWN (branch_start, pltent);
> +       /* Get the displacement from the branch target.  */
> +       ElfW(Addr) disp = value - branch_start - 5;
> +       ElfW(Addr) plt_end;
> +       ElfW(Addr) pad;
> +
> +       branch_start += plt_rewrite_bias;
> +       plt_end = (branch_start & -pltent) + pltent;
> +
> +       /* Update the PLT entry.  */
> +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> +         {
> +           /* If the target branch can be reached with a direct branch,
> +              rewrite the PLT entry with a direct branch.  */
> +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> +             {
> +               const char *sym_name = x86_64_reloc_symbol_name (map,
> +                                                                reloc);
> +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> +                                 "direct branch\n", sym_name,
> +                                 DSO_FILENAME (map->l_name));
> +             }
> +
> +           pad = branch_start + 5;
> +
> +           if (__glibc_unlikely (pad > plt_end))
> +             {
> +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> +                                     & DL_DEBUG_BINDINGS))
> +                 {
> +                   const char *sym_name
> +                     = x86_64_reloc_symbol_name (map, reloc);
> +                   _dl_debug_printf ("\ninvalid r_addend of "
> +                                     "R_X86_64_JUMP_SLOT against '%s' "
> +                                     "in '%s'\n", sym_name,
> +                                     DSO_FILENAME (map->l_name));
> +                 }
> +
> +               continue;
> +             }
> +
> +           /* Write out direct branch.  */
> +           *(uint8_t *) branch_start = 0xe9;
> +           *((uint32_t *) (branch_start + 1)) = disp;
> +         }
> +       else
> +         {
> +           if (GL(dl_x86_feature_control).plt_rewrite
> +               != plt_rewrite_jmpabs)
> +             continue;
> +
> +           pad = branch_start + 11;
> +
> +           if (pad > plt_end)
> +             continue;
> +
> +           /* Rewrite the PLT entry with JMPABS.  */
> +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> +             {
> +               const char *sym_name = x86_64_reloc_symbol_name (map,
> +                                                                reloc);
> +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> +                                 "JMPABS\n", sym_name,
> +                                 DSO_FILENAME (map->l_name));
> +             }
> +
> +           /* "jmpabs $target" for 64-bit displacement.  */
> +           *(uint8_t *) (branch_start + 0) = 0xd5;
> +           *(uint8_t *) (branch_start + 1) = 0x0;
> +           *(uint8_t *) (branch_start + 2) = 0xa1;
> +           *(uint64_t *) (branch_start + 3) = value;
> +         }
> +
> +       /* Fill the unused part of the PLT entry with INT3.  */
> +       for (; pad < plt_end; pad++)
> +         *(uint8_t *) pad = 0xcc;
> +      }
> +}
> +
> +static inline void
> +x86_64_rewrite_plt_in_place (struct link_map *map)
> +{
> +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> +                   + map->l_addr);
> +  size_t pagesize = GLRO(dl_pagesize);
> +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> +                 + plt - plt_aligned);
> +
> +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> +                     DSO_FILENAME (map->l_name));
> +
> +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> +                                   PROT_WRITE | PROT_READ) < 0))
> +    {
> +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> +                         DSO_FILENAME (map->l_name));
> +      return;
> +    }
> +
> +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> +
> +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> +          DSO_FILENAME (map->l_name));
> +
> +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> +                                   PROT_EXEC | PROT_READ) < 0))
> +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> +                     "failed to change PLT back to read-only");
> +}
> +
> +/* Rewrite PLT entries to direct branch if possible.  */
> +
> +static inline void
> +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> +{
> +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> +  if (lazy)
> +    return;
> +
> +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> +    return;
> +
> +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> +  if (!map->l_has_jump_slot_reloc)
> +    return;
> +
> +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> +    return;
> +
> +  /* Ignore DT_X86_64_PLT if
> +     1. DT_JMPREL isn't available or its value is 0.
> +     2. DT_PLTRELSZ is 0.
> +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> +     than 16 bytes.  */
> +  if (map->l_info[DT_JMPREL] == NULL
> +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> +    return;
> +
> +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> +    {
> +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> +                         MAP_PRIVATE | MAP_ANONYMOUS,
> +                         -1, 0);
> +      if (__glibc_unlikely (plt == MAP_FAILED))
> +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> +      else
> +       {
> +         *(int32_t *) plt = -1;
> +
> +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> +            rewrite PLT.  */
> +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> +           /* Use JMPABS on APX processors.  */
> +           GL(dl_x86_feature_control).plt_rewrite
> +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> +                ? plt_rewrite_jmpabs
> +                : plt_rewrite_jmp);
> +         else
> +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> +
> +         __munmap (plt, 4096);
> +       }
> +    }
> +
> +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> +    return;
> +
> +  x86_64_rewrite_plt_in_place (map);
> +}
> +#endif
> diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> new file mode 100644
> index 0000000000..ddb8e78077
> --- /dev/null
> +++ b/sysdeps/x86_64/link_map.h
> @@ -0,0 +1,22 @@
> +/* Additional fields in struct link_map.  x86-64 version.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* Has R_X86_64_JUMP_SLOT relocation.  */
> +bool l_has_jump_slot_reloc;
> +
> +#include <sysdeps/x86/link_map.h>
> diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> new file mode 100644
> index 0000000000..0dd4724a5a
> --- /dev/null
> +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> @@ -0,0 +1,31 @@
> +/* Test PLT rewrite.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <string.h>
> +#include <support/check.h>
> +
> +extern const char *foo (void);
> +
> +static int
> +do_test (void)
> +{
> +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> new file mode 100644
> index 0000000000..361b907790
> --- /dev/null
> +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> @@ -0,0 +1,32 @@
> +/* Check PLT rewrite works correctly.
> +   Copyright (C) 2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> +   change it to direct branch.  */
> +
> +const char *
> +bar (void)
> +{
> +  return "PLT rewrite works";
> +}
> +
> +const char *
> +foo (void)
> +{
> +  return bar ();
> +}
> --
> 2.43.0
>

PING.
Noah Goldstein Jan. 1, 2024, 6:36 p.m. UTC | #2
On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Changes in v2:
> >
> > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > 2. Added documentation.
> > 3. Added a testcase.
> >
> > H.J.
> > --
> > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > relocation.
> >
> > For x86-64, add
> >
> >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> >
> > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > table.
> > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > entry.
> >
> > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > memory offset of the indirect branch instruction.
> >
> > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > with direct branch after relocation when the lazy binding is disabled.
> > PLT rewrite is disabled by default.  Add
> >
> > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> >
> > to enable PLT rewrite at run-time.
> > ---
> >  elf/dynamic-link.h                   |   5 +
> >  elf/elf.h                            |   5 +
> >  elf/tst-glibcelf.py                  |   1 +
> >  manual/tunables.texi                 |   9 +
> >  scripts/glibcelf.py                  |   4 +
> >  sysdeps/x86/cet-control.h            |  14 ++
> >  sysdeps/x86/cpu-features.c           |  10 ++
> >  sysdeps/x86/dl-procruntime.c         |   1 +
> >  sysdeps/x86/dl-tunables.list         |   5 +
> >  sysdeps/x86_64/Makefile              |  19 +++
> >  sysdeps/x86_64/configure             |  35 ++++
> >  sysdeps/x86_64/configure.ac          |   4 +
> >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> >  sysdeps/x86_64/link_map.h            |  22 +++
> >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> >  17 files changed, 454 insertions(+), 1 deletion(-)
> >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> >  create mode 100644 sysdeps/x86_64/link_map.h
> >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> >
> > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > index e7f755fc75..5351671044 100644
> > --- a/elf/dynamic-link.h
> > +++ b/elf/dynamic-link.h
> > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> >        }                                                                              \
> >    } while (0);
> >
> > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > +# endif
> > +
> >  /* This can't just be an inline function because GCC is too dumb
> >     to inline functions containing inlines themselves.  */
> >  # ifdef RTLD_BOOTSTRAP
> > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> >        ELF_DYNAMIC_DO_RELR (map);                                             \
> >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> >    } while (0)
> >
> >  #endif
> > diff --git a/elf/elf.h b/elf/elf.h
> > index 5c1c1972d1..eda4802f56 100644
> > --- a/elf/elf.h
> > +++ b/elf/elf.h
> > @@ -3639,6 +3639,11 @@ enum
> >  /* x86-64 sh_type values.  */
> >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> >
> > +/* x86-64 d_tag values.  */
> > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
Why not +2?
Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
`PLTENTSZ`?
> > +#define DT_X86_64_NUM          4
> >
> >  /* AM33 relocations.  */
> >  #define R_MN10300_NONE         0       /* No reloc.  */
> > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > index 6142ca28ae..52293f4adf 100644
> > --- a/elf/tst-glibcelf.py
> > +++ b/elf/tst-glibcelf.py
> > @@ -187,6 +187,7 @@ DT_VALNUM
> >  DT_VALRNGHI
> >  DT_VALRNGLO
> >  DT_VERSIONTAGNUM
> > +DT_X86_64_NUM
> >  ELFCLASSNUM
> >  ELFDATANUM
> >  EM_NUM
> > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > index b31f16da84..f9bd83622e 100644
> > --- a/manual/tunables.texi
> > +++ b/manual/tunables.texi
> > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> >  glibc.cpu.x86_ibt:
> >  glibc.cpu.hwcaps:
> > @@ -614,6 +615,14 @@ this tunable.
> >  This tunable is specific to 64-bit x86-64.
> >  @end deftp
> >
> > +@deftp Tunable glibc.cpu.plt_rewrite
> > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > +the PLT section with direct branch after relocation if possible when
> > +the lazy binding is disabled.
> > +
> > +This tunable is specific to x86-64.
> > +@end deftp
> > +
> >  @node Memory Related Tunables
> >  @section Memory Related Tunables
> >  @cindex memory related tunables
> > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > index b52e83d613..3a21e25201 100644
> > --- a/scripts/glibcelf.py
> > +++ b/scripts/glibcelf.py
> > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> >      """Supplemental DT_* constants for EM_RISCV."""
> >  class DtSPARC(Dt):
> >      """Supplemental DT_* constants for EM_SPARC."""
> > +class DtX86_64(Dt):
> > +    """Supplemental DT_* constants for EM_X86_64."""
> >  _dt_skip = '''
> >  DT_ENCODING DT_PROCNUM
> >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> >  DT_PPC_NUM
> >  DT_PPC64_NUM
> >  DT_SPARC_NUM
> > +DT_X86_64_NUM
> >  '''.strip().split()
> >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> >  del _dt_skip
> >
> > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > index 3bd00019e8..216d8cc576 100644
> > --- a/sysdeps/x86/cet-control.h
> > +++ b/sysdeps/x86/cet-control.h
> > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> >    cet_permissive
> >  };
> >
> > +/* PLT rewrite control.  */
> > +enum dl_plt_rewrite_control
> > +{
> > +  /* No PLT rewrite.  */
> > +  plt_rewrite_none,
> > +  /* PLT rewrite is enabled at run-time.  */
> > +  plt_rewrite_enabled,
> > +  /* Rewrite PLT with JMP at run-time.  */
> > +  plt_rewrite_jmp,
> > +  /* Rewrite PLT with JMPABS at run-time.  */
> > +  plt_rewrite_jmpabs
> > +};
> > +
> >  struct dl_x86_feature_control
> >  {
> >    enum dl_x86_cet_control ibt : 2;
> >    enum dl_x86_cet_control shstk : 2;
> > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> >  };
> >
> >  #endif /* cet-control.h */
> > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > index 0bf923d48b..5f2962bbe5 100644
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -27,6 +27,13 @@
> >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> >    attribute_hidden;
> >
> > +static void
> > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > +{
> > +  if (valp->numval)
> > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > +}
> > +
> >  #ifdef __LP64__
> >  static void
> >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > @@ -996,6 +1003,9 @@ no_cpuid:
> >
> >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> >
> > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > +
> >  #ifdef __LP64__
> >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > index 2fb682ded3..03a612f3f3 100644
> > --- a/sysdeps/x86/dl-procruntime.c
> > +++ b/sysdeps/x86/dl-procruntime.c
> > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> >  = {
> >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > +    .plt_rewrite = plt_rewrite_none,
> >    }
> >  # endif
> >  # if !defined SHARED || defined PROCINFO_DECL
> > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > index feb7004036..49da67fd74 100644
> > --- a/sysdeps/x86/dl-tunables.list
> > +++ b/sysdeps/x86/dl-tunables.list
> > @@ -66,5 +66,10 @@ glibc {
> >      x86_shared_cache_size {
> >        type: SIZE_T
> >      }
> > +    plt_rewrite {
> > +      type: INT_32
> > +      minval: 0
> > +      maxval: 1
> > +    }
> >    }
> >  }
> > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > index 00120ca9ca..106f1d0f55 100644
> > --- a/sysdeps/x86_64/Makefile
> > +++ b/sysdeps/x86_64/Makefile
> > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> >  tests-container += tst-glibc-hwcaps-cache
> >  endif
> >
> > +ifeq (yes,$(have-z-mark-plt))
> > +tests += \
> > +  tst-plt-rewrite1 \
> > +# tests
> > +modules-names += \
> > +  tst-plt-rewritemod1 \
> > +# modules-names
> > +
> > +tst-plt-rewrite1-no-pie = yes
> > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > +       $(evaluate-test)
> > +endif
> > +
> >  endif # $(subdir) == elf
> >
> >  ifeq ($(subdir),csu)
> > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > index e307467afa..a15eaa349d 100755
> > --- a/sysdeps/x86_64/configure
> > +++ b/sysdeps/x86_64/configure
> > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> >  config_vars="$config_vars
> >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> >
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > +libc_linker_feature=no
> > +cat > conftest.c <<EOF
> > +int _start (void) { return 42; }
> > +EOF
> > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > +                 -fPIC -shared -o conftest.so conftest.c
> > +                 1>&5'
> > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > +  (eval $ac_try) 2>&5
> > +  ac_status=$?
> > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > +  test $ac_status = 0; }; }
> > +then
> > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > +    true
> > +  else
> > +    libc_linker_feature=yes
> > +  fi
> > +fi
> > +rm -f conftest*
> > +if test $libc_linker_feature = yes; then
> > +  libc_cv_z_mark_plt=yes
> > +else
> > +  libc_cv_z_mark_plt=no
> > +fi
> > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > +printf "%s\n" "$libc_linker_feature" >&6; }
> > +config_vars="$config_vars
> > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > +
> >  if test x"$build_mathvec" = xnotset; then
> >    build_mathvec=yes
> >  fi
> > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > index 1215dcb1e4..40f642db31 100644
> > --- a/sysdeps/x86_64/configure.ac
> > +++ b/sysdeps/x86_64/configure.ac
> > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> >                 [$libc_cv_cc_mprefer_vector_width])
> >
> > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > +
> >  if test x"$build_mathvec" = xnotset; then
> >    build_mathvec=yes
> >  fi
> > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > new file mode 100644
> > index 0000000000..dd41d95b88
> > --- /dev/null
> > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > @@ -0,0 +1,21 @@
> > +/* Configuration of lookup functions.  x64-64 version.
> > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +/* Number of extra dynamic section entries for this architecture.  By
> > +   default there are none.  */
> > +#define DT_THISPROCNUM DT_X86_64_NUM
> > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > index 581a2f1a9e..973042f9e6 100644
> > --- a/sysdeps/x86_64/dl-machine.h
> > +++ b/sysdeps/x86_64/dl-machine.h
> > @@ -30,6 +30,9 @@
> >  #include <dl-machine-rel.h>
> >  #include <isa-level.h>
> >
> > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > +
> >  /* Return nonzero iff ELF header is compatible with the running host.  */
> >  static inline int __attribute__ ((unused))
> >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> >
> >        switch (r_type)
> >         {
> > -       case R_X86_64_GLOB_DAT:
> >         case R_X86_64_JUMP_SLOT:
> > +         map->l_has_jump_slot_reloc = true;
> > +       case R_X86_64_GLOB_DAT:
> >           *reloc_addr = value;
> >           break;
> >
> > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> >  }
> >
> >  #endif /* RESOLVE_MAP */
> > +
> > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > +    && defined SHARED
> > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > +  x86_64_dynamic_after_reloc (map, (lazy))
> > +
> > +static const char *
> > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > +{
> > +  const ElfW(Sym) *const symtab
> > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > +  return strtab + refsym->st_name;
> > +}
> > +
> > +static void
> > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > +                   ElfW(Addr) plt_aligned)
> > +{
> > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > +  ElfW(Addr) l_addr = map->l_addr;
> > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > +  const ElfW(Rela) *reloc = (const void *) start;
> > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > +
> > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > +                                         header.feature_1);
> > +  bool ibt_enabled_p
> > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > +
> > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > +                     DSO_FILENAME (map->l_name));
> > +
> > +  for (; reloc < reloc_end; reloc++)
> > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > +      {
> > +       /* Get the value from the GOT entry.  */
> > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > +
> > +       /* Get the corresponding PLT entry from r_addend.  */
> > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > +       if (!ibt_enabled_p)
> > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > +       /* Get the displacement from the branch target.  */
> > +       ElfW(Addr) disp = value - branch_start - 5;
IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.

> > +       ElfW(Addr) plt_end;
> > +       ElfW(Addr) pad;
> > +
> > +       branch_start += plt_rewrite_bias;
> > +       plt_end = (branch_start & -pltent) + pltent;
> > +
> > +       /* Update the PLT entry.  */
> > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
0x80000000 -> (unsigned long long)I32_MIN?
0xffffffff -> (unsigned long long)U32_MAX?
Or at least a define.
Its easy to get the number of hex characters wrong.
> > +         {
> > +           /* If the target branch can be reached with a direct branch,
> > +              rewrite the PLT entry with a direct branch.  */
> > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > +             {
> > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > +                                                                reloc);
> > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > +                                 "direct branch\n", sym_name,
> > +                                 DSO_FILENAME (map->l_name));
> > +             }
> > +
> > +           pad = branch_start + 5;
again a define.
> > +
> > +           if (__glibc_unlikely (pad > plt_end))
> > +             {
> > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > +                                     & DL_DEBUG_BINDINGS))
> > +                 {
> > +                   const char *sym_name
> > +                     = x86_64_reloc_symbol_name (map, reloc);
> > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > +                                     "in '%s'\n", sym_name,
> > +                                     DSO_FILENAME (map->l_name));
> > +                 }
> > +
> > +               continue;
> > +             }
> > +
> > +           /* Write out direct branch.  */
> > +           *(uint8_t *) branch_start = 0xe9;
will it ever be disp8?
But again, imo `0xe9` should be defined as something along the lines of
JMP_DISP32_OPCODE
> > +           *((uint32_t *) (branch_start + 1)) = disp;
Does it need to be a u32 with aliasing attribute? Maybe just memcpy?
> > +         }
> > +       else
> > +         {
> > +           if (GL(dl_x86_feature_control).plt_rewrite
> > +               != plt_rewrite_jmpabs)
> > +             continue;
> > +
> > +           pad = branch_start + 11;
> > +
I assume this is 16 (plt entry size) - 5 (jmp insn size)?
Should this use the `DT_X86_64_PLTENT` field?
> > +           if (pad > plt_end)
> > +             continue;
> > +
> > +           /* Rewrite the PLT entry with JMPABS.  */
> > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > +             {
> > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > +                                                                reloc);
> > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > +                                 "JMPABS\n", sym_name,
> > +                                 DSO_FILENAME (map->l_name));
> > +             }
> > +
> > +           /* "jmpabs $target" for 64-bit displacement.  */
> > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > +           *(uint64_t *) (branch_start + 3) = value;
again this is not a clear way to encode instructions...
What instruction is meant to be?
```
.byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
```
->
```
0000000000000000 <.text>:
       0: d5                            <unknown>
       1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
       7: 00 00                        addb %al, (%rax)
       9: 00 00                        addb %al, (%rax)

```
> > +         }
> > +
> > +       /* Fill the unused part of the PLT entry with INT3.  */
> > +       for (; pad < plt_end; pad++)
> > +         *(uint8_t *) pad = 0xcc;
maybe `INT3_OPCODE`?
> > +      }
> > +}
> > +
> > +static inline void
> > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > +{
> > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > +                   + map->l_addr);
> > +  size_t pagesize = GLRO(dl_pagesize);
> > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > +                 + plt - plt_aligned);
> > +
> > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > +                     DSO_FILENAME (map->l_name));
> > +
> > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > +                                   PROT_WRITE | PROT_READ) < 0))
> > +    {
> > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > +                         DSO_FILENAME (map->l_name));
> > +      return;
> > +    }
> > +
> > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > +
> > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > +          DSO_FILENAME (map->l_name));
> > +
> > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > +                                   PROT_EXEC | PROT_READ) < 0))
> > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > +                     "failed to change PLT back to read-only");
> > +}
> > +
> > +/* Rewrite PLT entries to direct branch if possible.  */
> > +
> > +static inline void
> > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > +{
> > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > +  if (lazy)
> > +    return;
> > +
> > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > +    return;
> > +
> > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > +  if (!map->l_has_jump_slot_reloc)
> > +    return;
> > +
> > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > +    return;
> > +
> > +  /* Ignore DT_X86_64_PLT if
> > +     1. DT_JMPREL isn't available or its value is 0.
> > +     2. DT_PLTRELSZ is 0.
> > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > +     than 16 bytes.  */
> > +  if (map->l_info[DT_JMPREL] == NULL
> > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > +    return;
Another sanity check might be `PLTSZ % PLTENT == 0`
> > +
> > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > +    {
> > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > +                         -1, 0);
> > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
imo clearer would be to just initialize:
GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
then make all the failure cases just return (or a goto if these is cleanup).
> > +      else
> > +       {
> > +         *(int32_t *) plt = -1;
What is this meant to be doing?
> > +
> > +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> > +            rewrite PLT.  */
> > +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> > +           /* Use JMPABS on APX processors.  */
> > +           GL(dl_x86_feature_control).plt_rewrite
> > +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> > +                ? plt_rewrite_jmpabs
> > +                : plt_rewrite_jmp);
> > +         else
> > +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > +
> > +         __munmap (plt, 4096);
is the mmap just to see if we can get r/w/e permissions? If so is there a better
way to check memory permissions than mmap (which in the case of w->e IIRC
will be expensive?
Is it possible for us to have the permissions now but a short while
later to not?
> > +       }
> > +    }
> > +
> > +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> > +    return;
> > +
> > +  x86_64_rewrite_plt_in_place (map);
> > +}
> > +#endif
> > diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> > new file mode 100644
> > index 0000000000..ddb8e78077
> > --- /dev/null
> > +++ b/sysdeps/x86_64/link_map.h
> > @@ -0,0 +1,22 @@
> > +/* Additional fields in struct link_map.  x86-64 version.
> > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +/* Has R_X86_64_JUMP_SLOT relocation.  */
> > +bool l_has_jump_slot_reloc;
> > +
> > +#include <sysdeps/x86/link_map.h>
> > diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> > new file mode 100644
> > index 0000000000..0dd4724a5a
> > --- /dev/null
> > +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> > @@ -0,0 +1,31 @@
> > +/* Test PLT rewrite.
> > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#include <string.h>
> > +#include <support/check.h>
> > +
> > +extern const char *foo (void);
> > +
> > +static int
> > +do_test (void)
> > +{
> > +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> > +  return 0;
> > +}
> > +
> > +#include <support/test-driver.c>
> > diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > new file mode 100644
> > index 0000000000..361b907790
> > --- /dev/null
> > +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > @@ -0,0 +1,32 @@
> > +/* Check PLT rewrite works correctly.
> > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> > +   change it to direct branch.  */
> > +
> > +const char *
> > +bar (void)
> > +{
> > +  return "PLT rewrite works";
> > +}
> > +
> > +const char *
> > +foo (void)
> > +{
> > +  return bar ();
> > +}
> > --
> > 2.43.0
> >
>
> PING.
>
> --
> H.J.
Cristian Rodríguez Jan. 1, 2024, 11:28 p.m. UTC | #3
On Mon, Jan 1, 2024 at 11:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:

> On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Changes in v2:
> >
> > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > 2. Added documentation.
> > 3. Added a testcase.
> >
> > H.J.
> > --
> > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > relocation.
> >
>
> > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
>
>
Why does it default to off ? I'm pretty confident that the vast majority of
users do not know what the heck the PLT even is, but are paying the price
of it.. the only things that come to mind that could break are rather
fragile debugging tools.


>
>
H.J. Lu Jan. 2, 2024, 1:01 a.m. UTC | #4
On Mon, Jan 1, 2024 at 10:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Changes in v2:
> > >
> > > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > > 2. Added documentation.
> > > 3. Added a testcase.
> > >
> > > H.J.
> > > --
> > > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > > relocation.
> > >
> > > For x86-64, add
> > >
> > >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> > >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> > >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> > >
> > > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > > table.
> > > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > > entry.
> > >
> > > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > > memory offset of the indirect branch instruction.
> > >
> > > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > > with direct branch after relocation when the lazy binding is disabled.
> > > PLT rewrite is disabled by default.  Add
> > >
> > > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> > >
> > > to enable PLT rewrite at run-time.
> > > ---
> > >  elf/dynamic-link.h                   |   5 +
> > >  elf/elf.h                            |   5 +
> > >  elf/tst-glibcelf.py                  |   1 +
> > >  manual/tunables.texi                 |   9 +
> > >  scripts/glibcelf.py                  |   4 +
> > >  sysdeps/x86/cet-control.h            |  14 ++
> > >  sysdeps/x86/cpu-features.c           |  10 ++
> > >  sysdeps/x86/dl-procruntime.c         |   1 +
> > >  sysdeps/x86/dl-tunables.list         |   5 +
> > >  sysdeps/x86_64/Makefile              |  19 +++
> > >  sysdeps/x86_64/configure             |  35 ++++
> > >  sysdeps/x86_64/configure.ac          |   4 +
> > >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> > >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> > >  sysdeps/x86_64/link_map.h            |  22 +++
> > >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> > >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> > >  17 files changed, 454 insertions(+), 1 deletion(-)
> > >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> > >  create mode 100644 sysdeps/x86_64/link_map.h
> > >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> > >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> > >
> > > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > > index e7f755fc75..5351671044 100644
> > > --- a/elf/dynamic-link.h
> > > +++ b/elf/dynamic-link.h
> > > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > >        }                                                                              \
> > >    } while (0);
> > >
> > > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > > +# endif
> > > +
> > >  /* This can't just be an inline function because GCC is too dumb
> > >     to inline functions containing inlines themselves.  */
> > >  # ifdef RTLD_BOOTSTRAP
> > > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > >        ELF_DYNAMIC_DO_RELR (map);                                             \
> > >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> > >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> > >    } while (0)
> > >
> > >  #endif
> > > diff --git a/elf/elf.h b/elf/elf.h
> > > index 5c1c1972d1..eda4802f56 100644
> > > --- a/elf/elf.h
> > > +++ b/elf/elf.h
> > > @@ -3639,6 +3639,11 @@ enum
> > >  /* x86-64 sh_type values.  */
> > >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> > >
> > > +/* x86-64 d_tag values.  */
> > > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> Why not +2?

From

https://www.sco.com/developers/gabi/latest/ch5.dynamic.html#tag_encodings

To make it simpler for tools to interpret the contents of dynamic
section entries,
the value of each tag, except for those in two special compatibility
ranges, will
determine the interpretation of the d_un union. A tag whose value is an even
number indicates a dynamic section entry that uses d_ptr. A tag whose value
is an odd number indicates a dynamic section entry that uses d_val or that
uses neither d_ptr nor d_val.

> Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
> `PLTENTSZ`?

They are defined in x86-64 psABI:


https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/6d824a52a42d173eb838b879616c1be5870b593e

The typical dynamic tag name for entry size is DT_XXX_YYYENT:

#define DT_RELAENT 9 /* Size of one Rela reloc */
#define DT_SYMENT 11 /* Size of one symbol table entry */
#define DT_RELENT 19 /* Size of one Rel reloc */
#define DT_RELRENT 37 /* Size of one RELR relative relocaction

> > > +#define DT_X86_64_NUM          4
> > >
> > >  /* AM33 relocations.  */
> > >  #define R_MN10300_NONE         0       /* No reloc.  */
> > > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > > index 6142ca28ae..52293f4adf 100644
> > > --- a/elf/tst-glibcelf.py
> > > +++ b/elf/tst-glibcelf.py
> > > @@ -187,6 +187,7 @@ DT_VALNUM
> > >  DT_VALRNGHI
> > >  DT_VALRNGLO
> > >  DT_VERSIONTAGNUM
> > > +DT_X86_64_NUM
> > >  ELFCLASSNUM
> > >  ELFDATANUM
> > >  EM_NUM
> > > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > > index b31f16da84..f9bd83622e 100644
> > > --- a/manual/tunables.texi
> > > +++ b/manual/tunables.texi
> > > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> > >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> > >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> > >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> > >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> > >  glibc.cpu.x86_ibt:
> > >  glibc.cpu.hwcaps:
> > > @@ -614,6 +615,14 @@ this tunable.
> > >  This tunable is specific to 64-bit x86-64.
> > >  @end deftp
> > >
> > > +@deftp Tunable glibc.cpu.plt_rewrite
> > > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > > +the PLT section with direct branch after relocation if possible when
> > > +the lazy binding is disabled.
> > > +
> > > +This tunable is specific to x86-64.
> > > +@end deftp
> > > +
> > >  @node Memory Related Tunables
> > >  @section Memory Related Tunables
> > >  @cindex memory related tunables
> > > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > > index b52e83d613..3a21e25201 100644
> > > --- a/scripts/glibcelf.py
> > > +++ b/scripts/glibcelf.py
> > > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> > >      """Supplemental DT_* constants for EM_RISCV."""
> > >  class DtSPARC(Dt):
> > >      """Supplemental DT_* constants for EM_SPARC."""
> > > +class DtX86_64(Dt):
> > > +    """Supplemental DT_* constants for EM_X86_64."""
> > >  _dt_skip = '''
> > >  DT_ENCODING DT_PROCNUM
> > >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> > >  DT_PPC_NUM
> > >  DT_PPC64_NUM
> > >  DT_SPARC_NUM
> > > +DT_X86_64_NUM
> > >  '''.strip().split()
> > >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> > >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> > >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> > >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> > >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> > >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> > >  del _dt_skip
> > >
> > > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > > index 3bd00019e8..216d8cc576 100644
> > > --- a/sysdeps/x86/cet-control.h
> > > +++ b/sysdeps/x86/cet-control.h
> > > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> > >    cet_permissive
> > >  };
> > >
> > > +/* PLT rewrite control.  */
> > > +enum dl_plt_rewrite_control
> > > +{
> > > +  /* No PLT rewrite.  */
> > > +  plt_rewrite_none,
> > > +  /* PLT rewrite is enabled at run-time.  */
> > > +  plt_rewrite_enabled,
> > > +  /* Rewrite PLT with JMP at run-time.  */
> > > +  plt_rewrite_jmp,
> > > +  /* Rewrite PLT with JMPABS at run-time.  */
> > > +  plt_rewrite_jmpabs
> > > +};
> > > +
> > >  struct dl_x86_feature_control
> > >  {
> > >    enum dl_x86_cet_control ibt : 2;
> > >    enum dl_x86_cet_control shstk : 2;
> > > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> > >  };
> > >
> > >  #endif /* cet-control.h */
> > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > index 0bf923d48b..5f2962bbe5 100644
> > > --- a/sysdeps/x86/cpu-features.c
> > > +++ b/sysdeps/x86/cpu-features.c
> > > @@ -27,6 +27,13 @@
> > >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > >    attribute_hidden;
> > >
> > > +static void
> > > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > +{
> > > +  if (valp->numval)
> > > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > > +}
> > > +
> > >  #ifdef __LP64__
> > >  static void
> > >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > > @@ -996,6 +1003,9 @@ no_cpuid:
> > >
> > >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> > >
> > > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > > +
> > >  #ifdef __LP64__
> > >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> > >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > > index 2fb682ded3..03a612f3f3 100644
> > > --- a/sysdeps/x86/dl-procruntime.c
> > > +++ b/sysdeps/x86/dl-procruntime.c
> > > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> > >  = {
> > >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> > >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > > +    .plt_rewrite = plt_rewrite_none,
> > >    }
> > >  # endif
> > >  # if !defined SHARED || defined PROCINFO_DECL
> > > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > > index feb7004036..49da67fd74 100644
> > > --- a/sysdeps/x86/dl-tunables.list
> > > +++ b/sysdeps/x86/dl-tunables.list
> > > @@ -66,5 +66,10 @@ glibc {
> > >      x86_shared_cache_size {
> > >        type: SIZE_T
> > >      }
> > > +    plt_rewrite {
> > > +      type: INT_32
> > > +      minval: 0
> > > +      maxval: 1
> > > +    }
> > >    }
> > >  }
> > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > index 00120ca9ca..106f1d0f55 100644
> > > --- a/sysdeps/x86_64/Makefile
> > > +++ b/sysdeps/x86_64/Makefile
> > > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> > >  tests-container += tst-glibc-hwcaps-cache
> > >  endif
> > >
> > > +ifeq (yes,$(have-z-mark-plt))
> > > +tests += \
> > > +  tst-plt-rewrite1 \
> > > +# tests
> > > +modules-names += \
> > > +  tst-plt-rewritemod1 \
> > > +# modules-names
> > > +
> > > +tst-plt-rewrite1-no-pie = yes
> > > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > > +       $(evaluate-test)
> > > +endif
> > > +
> > >  endif # $(subdir) == elf
> > >
> > >  ifeq ($(subdir),csu)
> > > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > > index e307467afa..a15eaa349d 100755
> > > --- a/sysdeps/x86_64/configure
> > > +++ b/sysdeps/x86_64/configure
> > > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> > >  config_vars="$config_vars
> > >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> > >
> > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > > +libc_linker_feature=no
> > > +cat > conftest.c <<EOF
> > > +int _start (void) { return 42; }
> > > +EOF
> > > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > > +                 -fPIC -shared -o conftest.so conftest.c
> > > +                 1>&5'
> > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > +  (eval $ac_try) 2>&5
> > > +  ac_status=$?
> > > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > > +  test $ac_status = 0; }; }
> > > +then
> > > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > > +    true
> > > +  else
> > > +    libc_linker_feature=yes
> > > +  fi
> > > +fi
> > > +rm -f conftest*
> > > +if test $libc_linker_feature = yes; then
> > > +  libc_cv_z_mark_plt=yes
> > > +else
> > > +  libc_cv_z_mark_plt=no
> > > +fi
> > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > > +printf "%s\n" "$libc_linker_feature" >&6; }
> > > +config_vars="$config_vars
> > > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > > +
> > >  if test x"$build_mathvec" = xnotset; then
> > >    build_mathvec=yes
> > >  fi
> > > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > > index 1215dcb1e4..40f642db31 100644
> > > --- a/sysdeps/x86_64/configure.ac
> > > +++ b/sysdeps/x86_64/configure.ac
> > > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> > >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> > >                 [$libc_cv_cc_mprefer_vector_width])
> > >
> > > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > > +
> > >  if test x"$build_mathvec" = xnotset; then
> > >    build_mathvec=yes
> > >  fi
> > > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > > new file mode 100644
> > > index 0000000000..dd41d95b88
> > > --- /dev/null
> > > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > > @@ -0,0 +1,21 @@
> > > +/* Configuration of lookup functions.  x64-64 version.
> > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > +   This file is part of the GNU C Library.
> > > +
> > > +   The GNU C Library is free software; you can redistribute it and/or
> > > +   modify it under the terms of the GNU Lesser General Public
> > > +   License as published by the Free Software Foundation; either
> > > +   version 2.1 of the License, or (at your option) any later version.
> > > +
> > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > +   Lesser General Public License for more details.
> > > +
> > > +   You should have received a copy of the GNU Lesser General Public
> > > +   License along with the GNU C Library; if not, see
> > > +   <https://www.gnu.org/licenses/>.  */
> > > +
> > > +/* Number of extra dynamic section entries for this architecture.  By
> > > +   default there are none.  */
> > > +#define DT_THISPROCNUM DT_X86_64_NUM
> > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > index 581a2f1a9e..973042f9e6 100644
> > > --- a/sysdeps/x86_64/dl-machine.h
> > > +++ b/sysdeps/x86_64/dl-machine.h
> > > @@ -30,6 +30,9 @@
> > >  #include <dl-machine-rel.h>
> > >  #include <isa-level.h>
> > >
> > > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > > +
> > >  /* Return nonzero iff ELF header is compatible with the running host.  */
> > >  static inline int __attribute__ ((unused))
> > >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> > >
> > >        switch (r_type)
> > >         {
> > > -       case R_X86_64_GLOB_DAT:
> > >         case R_X86_64_JUMP_SLOT:
> > > +         map->l_has_jump_slot_reloc = true;
> > > +       case R_X86_64_GLOB_DAT:
> > >           *reloc_addr = value;
> > >           break;
> > >
> > > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > >  }
> > >
> > >  #endif /* RESOLVE_MAP */
> > > +
> > > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > > +    && defined SHARED
> > > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > > +  x86_64_dynamic_after_reloc (map, (lazy))
> > > +
> > > +static const char *
> > > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > > +{
> > > +  const ElfW(Sym) *const symtab
> > > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > > +  return strtab + refsym->st_name;
> > > +}
> > > +
> > > +static void
> > > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > > +                   ElfW(Addr) plt_aligned)
> > > +{
> > > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > > +  ElfW(Addr) l_addr = map->l_addr;
> > > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > > +  const ElfW(Rela) *reloc = (const void *) start;
> > > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > > +
> > > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > > +                                         header.feature_1);
> > > +  bool ibt_enabled_p
> > > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > > +
> > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > > +                     DSO_FILENAME (map->l_name));
> > > +
> > > +  for (; reloc < reloc_end; reloc++)
> > > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > > +      {
> > > +       /* Get the value from the GOT entry.  */
> > > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > > +
> > > +       /* Get the corresponding PLT entry from r_addend.  */
> > > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > > +       if (!ibt_enabled_p)
> > > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > > +       /* Get the displacement from the branch target.  */
> > > +       ElfW(Addr) disp = value - branch_start - 5;
> IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.

Will fix it.

> > > +       ElfW(Addr) plt_end;
> > > +       ElfW(Addr) pad;
> > > +
> > > +       branch_start += plt_rewrite_bias;
> > > +       plt_end = (branch_start & -pltent) + pltent;
> > > +
> > > +       /* Update the PLT entry.  */
> > > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> 0x80000000 -> (unsigned long long)I32_MIN?
> 0xffffffff -> (unsigned long long)U32_MAX?

I will change it.

> Or at least a define.
> Its easy to get the number of hex characters wrong.
> > > +         {
> > > +           /* If the target branch can be reached with a direct branch,
> > > +              rewrite the PLT entry with a direct branch.  */
> > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > +             {
> > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > +                                                                reloc);
> > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > +                                 "direct branch\n", sym_name,
> > > +                                 DSO_FILENAME (map->l_name));
> > > +             }
> > > +
> > > +           pad = branch_start + 5;
> again a define.

Will fix it.

> > > +
> > > +           if (__glibc_unlikely (pad > plt_end))
> > > +             {
> > > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > > +                                     & DL_DEBUG_BINDINGS))
> > > +                 {
> > > +                   const char *sym_name
> > > +                     = x86_64_reloc_symbol_name (map, reloc);
> > > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > > +                                     "in '%s'\n", sym_name,
> > > +                                     DSO_FILENAME (map->l_name));
> > > +                 }
> > > +
> > > +               continue;
> > > +             }
> > > +
> > > +           /* Write out direct branch.  */
> > > +           *(uint8_t *) branch_start = 0xe9;
> will it ever be disp8?

This should be as fast as possible.  Adding an unlikely disp8 check will
slow it down.

> But again, imo `0xe9` should be defined as something along the lines of
> JMP_DISP32_OPCODE

Will fix it.

> > > +           *((uint32_t *) (branch_start + 1)) = disp;
> Does it need to be a u32 with aliasing attribute? Maybe just memcpy?

elf_machine_rela has some similar codes.  It is OK.

> > > +         }
> > > +       else
> > > +         {
> > > +           if (GL(dl_x86_feature_control).plt_rewrite
> > > +               != plt_rewrite_jmpabs)
> > > +             continue;
> > > +
> > > +           pad = branch_start + 11;
> > > +
> I assume this is 16 (plt entry size) - 5 (jmp insn size)?
> Should this use the `DT_X86_64_PLTENT` field?

Good idea.  Will change it.

> > > +           if (pad > plt_end)
> > > +             continue;
> > > +
> > > +           /* Rewrite the PLT entry with JMPABS.  */
> > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > +             {
> > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > +                                                                reloc);
> > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > +                                 "JMPABS\n", sym_name,
> > > +                                 DSO_FILENAME (map->l_name));
> > > +             }
> > > +
> > > +           /* "jmpabs $target" for 64-bit displacement.  */
> > > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > > +           *(uint64_t *) (branch_start + 3) = value;
> again this is not a clear way to encode instructions...

This is JMPABS in Intel APX, which has a 3-byte opcode + 64bit address.

> What instruction is meant to be?
> ```
> .byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
> ```
> ->
> ```
> 0000000000000000 <.text>:
>        0: d5                            <unknown>
>        1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
>        7: 00 00                        addb %al, (%rax)
>        9: 00 00                        addb %al, (%rax)
>
> ```

You need the latest binutils master branch which support JMPABS in
Intel APX:

0000000000000000 <.text>:
   0: d5 00 a1 7f 12 34 00 00 00 00 00 jmpabs $0x34127f


> > > +         }
> > > +
> > > +       /* Fill the unused part of the PLT entry with INT3.  */
> > > +       for (; pad < plt_end; pad++)
> > > +         *(uint8_t *) pad = 0xcc;
> maybe `INT3_OPCODE`?

Will change it.

> > > +      }
> > > +}
> > > +
> > > +static inline void
> > > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > > +{
> > > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > > +                   + map->l_addr);
> > > +  size_t pagesize = GLRO(dl_pagesize);
> > > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > > +                 + plt - plt_aligned);
> > > +
> > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > > +                     DSO_FILENAME (map->l_name));
> > > +
> > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > +                                   PROT_WRITE | PROT_READ) < 0))
> > > +    {
> > > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > > +                         DSO_FILENAME (map->l_name));
> > > +      return;
> > > +    }
> > > +
> > > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > > +
> > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > > +          DSO_FILENAME (map->l_name));
> > > +
> > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > +                                   PROT_EXEC | PROT_READ) < 0))
> > > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > > +                     "failed to change PLT back to read-only");
> > > +}
> > > +
> > > +/* Rewrite PLT entries to direct branch if possible.  */
> > > +
> > > +static inline void
> > > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > > +{
> > > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > > +  if (lazy)
> > > +    return;
> > > +
> > > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > > +    return;
> > > +
> > > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > > +  if (!map->l_has_jump_slot_reloc)
> > > +    return;
> > > +
> > > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > > +    return;
> > > +
> > > +  /* Ignore DT_X86_64_PLT if
> > > +     1. DT_JMPREL isn't available or its value is 0.
> > > +     2. DT_PLTRELSZ is 0.
> > > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > > +     than 16 bytes.  */
> > > +  if (map->l_info[DT_JMPREL] == NULL
> > > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > > +    return;
> Another sanity check might be `PLTSZ % PLTENT == 0`
> > > +
> > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > > +    {
> > > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > > +                         -1, 0);
> > > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> imo clearer would be to just initialize:
> GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> then make all the failure cases just return (or a goto if these is cleanup).

This function may be called many times.   If the first mprotect check fails,
we shouldn't call it again.   With this approach, the mprotect check will
only be done once.

> > > +      else
> > > +       {
> > > +         *(int32_t *) plt = -1;
> What is this meant to be doing?

Update the read-write page and check if it can be changed to read-exec.

> > > +
> > > +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> > > +            rewrite PLT.  */
> > > +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> > > +           /* Use JMPABS on APX processors.  */
> > > +           GL(dl_x86_feature_control).plt_rewrite
> > > +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> > > +                ? plt_rewrite_jmpabs
> > > +                : plt_rewrite_jmp);
> > > +         else
> > > +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > +
> > > +         __munmap (plt, 4096);
> is the mmap just to see if we can get r/w/e permissions? If so is there a better
> way to check memory permissions than mmap (which in the case of w->e IIRC

I don't know if there is a better way.

> will be expensive?
> Is it possible for us to have the permissions now but a short while
> later to not?

It is very unlikely.  But I don't know for sure.

> > > +       }
> > > +    }
> > > +
> > > +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> > > +    return;
> > > +
> > > +  x86_64_rewrite_plt_in_place (map);
> > > +}
> > > +#endif
> > > diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> > > new file mode 100644
> > > index 0000000000..ddb8e78077
> > > --- /dev/null
> > > +++ b/sysdeps/x86_64/link_map.h
> > > @@ -0,0 +1,22 @@
> > > +/* Additional fields in struct link_map.  x86-64 version.
> > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > +   This file is part of the GNU C Library.
> > > +
> > > +   The GNU C Library is free software; you can redistribute it and/or
> > > +   modify it under the terms of the GNU Lesser General Public
> > > +   License as published by the Free Software Foundation; either
> > > +   version 2.1 of the License, or (at your option) any later version.
> > > +
> > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > +   Lesser General Public License for more details.
> > > +
> > > +   You should have received a copy of the GNU Lesser General Public
> > > +   License along with the GNU C Library; if not, see
> > > +   <https://www.gnu.org/licenses/>.  */
> > > +
> > > +/* Has R_X86_64_JUMP_SLOT relocation.  */
> > > +bool l_has_jump_slot_reloc;
> > > +
> > > +#include <sysdeps/x86/link_map.h>
> > > diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > new file mode 100644
> > > index 0000000000..0dd4724a5a
> > > --- /dev/null
> > > +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > @@ -0,0 +1,31 @@
> > > +/* Test PLT rewrite.
> > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > +   This file is part of the GNU C Library.
> > > +
> > > +   The GNU C Library is free software; you can redistribute it and/or
> > > +   modify it under the terms of the GNU Lesser General Public
> > > +   License as published by the Free Software Foundation; either
> > > +   version 2.1 of the License, or (at your option) any later version.
> > > +
> > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > +   Lesser General Public License for more details.
> > > +
> > > +   You should have received a copy of the GNU Lesser General Public
> > > +   License along with the GNU C Library; if not, see
> > > +   <https://www.gnu.org/licenses/>.  */
> > > +
> > > +#include <string.h>
> > > +#include <support/check.h>
> > > +
> > > +extern const char *foo (void);
> > > +
> > > +static int
> > > +do_test (void)
> > > +{
> > > +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> > > +  return 0;
> > > +}
> > > +
> > > +#include <support/test-driver.c>
> > > diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > new file mode 100644
> > > index 0000000000..361b907790
> > > --- /dev/null
> > > +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > @@ -0,0 +1,32 @@
> > > +/* Check PLT rewrite works correctly.
> > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > +   This file is part of the GNU C Library.
> > > +
> > > +   The GNU C Library is free software; you can redistribute it and/or
> > > +   modify it under the terms of the GNU Lesser General Public
> > > +   License as published by the Free Software Foundation; either
> > > +   version 2.1 of the License, or (at your option) any later version.
> > > +
> > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > +   Lesser General Public License for more details.
> > > +
> > > +   You should have received a copy of the GNU Lesser General Public
> > > +   License along with the GNU C Library; if not, see
> > > +   <https://www.gnu.org/licenses/>.  */
> > > +
> > > +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> > > +   change it to direct branch.  */
> > > +
> > > +const char *
> > > +bar (void)
> > > +{
> > > +  return "PLT rewrite works";
> > > +}
> > > +
> > > +const char *
> > > +foo (void)
> > > +{
> > > +  return bar ();
> > > +}
> > > --
> > > 2.43.0
> > >
> >
> > PING.
> >
> > --
> > H.J.
H.J. Lu Jan. 2, 2024, 1:02 a.m. UTC | #5
On Mon, Jan 1, 2024 at 3:28 PM Cristian Rodríguez <cristian@rodriguez.im> wrote:
>
>
>
> On Mon, Jan 1, 2024 at 11:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>> >
>> > Changes in v2:
>> >
>> > 1. Changed tunables to glibc.cpu.plt_rewrite.
>> > 2. Added documentation.
>> > 3. Added a testcase.
>> >
>> > H.J.
>> > --
>> > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
>> > relocation.
>> >
>>
>> > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
>>
>
> Why does it default to off ? I'm pretty confident that the vast majority of users do not know what the heck the PLT even is, but are paying the price of it.. the only things that come to mind that could break are rather fragile debugging tools.
>

SELinux may disallow it.
Noah Goldstein Jan. 2, 2024, 2:18 a.m. UTC | #6
On Mon, Jan 1, 2024 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Jan 1, 2024 at 10:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Changes in v2:
> > > >
> > > > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > > > 2. Added documentation.
> > > > 3. Added a testcase.
> > > >
> > > > H.J.
> > > > --
> > > > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > > > relocation.
> > > >
> > > > For x86-64, add
> > > >
> > > >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> > > >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> > > >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> > > >
> > > > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > > > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > > > table.
> > > > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > > > entry.
> > > >
> > > > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > > > memory offset of the indirect branch instruction.
> > > >
> > > > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > > > with direct branch after relocation when the lazy binding is disabled.
> > > > PLT rewrite is disabled by default.  Add
> > > >
> > > > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> > > >
> > > > to enable PLT rewrite at run-time.
> > > > ---
> > > >  elf/dynamic-link.h                   |   5 +
> > > >  elf/elf.h                            |   5 +
> > > >  elf/tst-glibcelf.py                  |   1 +
> > > >  manual/tunables.texi                 |   9 +
> > > >  scripts/glibcelf.py                  |   4 +
> > > >  sysdeps/x86/cet-control.h            |  14 ++
> > > >  sysdeps/x86/cpu-features.c           |  10 ++
> > > >  sysdeps/x86/dl-procruntime.c         |   1 +
> > > >  sysdeps/x86/dl-tunables.list         |   5 +
> > > >  sysdeps/x86_64/Makefile              |  19 +++
> > > >  sysdeps/x86_64/configure             |  35 ++++
> > > >  sysdeps/x86_64/configure.ac          |   4 +
> > > >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> > > >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> > > >  sysdeps/x86_64/link_map.h            |  22 +++
> > > >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> > > >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> > > >  17 files changed, 454 insertions(+), 1 deletion(-)
> > > >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> > > >  create mode 100644 sysdeps/x86_64/link_map.h
> > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> > > >
> > > > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > > > index e7f755fc75..5351671044 100644
> > > > --- a/elf/dynamic-link.h
> > > > +++ b/elf/dynamic-link.h
> > > > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > >        }                                                                              \
> > > >    } while (0);
> > > >
> > > > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > > > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > > > +# endif
> > > > +
> > > >  /* This can't just be an inline function because GCC is too dumb
> > > >     to inline functions containing inlines themselves.  */
> > > >  # ifdef RTLD_BOOTSTRAP
> > > > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > >        ELF_DYNAMIC_DO_RELR (map);                                             \
> > > >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> > > >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > > > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> > > >    } while (0)
> > > >
> > > >  #endif
> > > > diff --git a/elf/elf.h b/elf/elf.h
> > > > index 5c1c1972d1..eda4802f56 100644
> > > > --- a/elf/elf.h
> > > > +++ b/elf/elf.h
> > > > @@ -3639,6 +3639,11 @@ enum
> > > >  /* x86-64 sh_type values.  */
> > > >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> > > >
> > > > +/* x86-64 d_tag values.  */
> > > > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > > > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > > > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> > Why not +2?
>
> From
>
> https://www.sco.com/developers/gabi/latest/ch5.dynamic.html#tag_encodings
>
> To make it simpler for tools to interpret the contents of dynamic
> section entries,
> the value of each tag, except for those in two special compatibility
> ranges, will
> determine the interpretation of the d_un union. A tag whose value is an even
> number indicates a dynamic section entry that uses d_ptr. A tag whose value
> is an odd number indicates a dynamic section entry that uses d_val or that
> uses neither d_ptr nor d_val.
>
> > Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
> > `PLTENTSZ`?
>
> They are defined in x86-64 psABI:
>
>
> https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/6d824a52a42d173eb838b879616c1be5870b593e
>
> The typical dynamic tag name for entry size is DT_XXX_YYYENT:
>
> #define DT_RELAENT 9 /* Size of one Rela reloc */
> #define DT_SYMENT 11 /* Size of one symbol table entry */
> #define DT_RELENT 19 /* Size of one Rel reloc */
> #define DT_RELRENT 37 /* Size of one RELR relative relocaction
>
> > > > +#define DT_X86_64_NUM          4
> > > >
> > > >  /* AM33 relocations.  */
> > > >  #define R_MN10300_NONE         0       /* No reloc.  */
> > > > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > > > index 6142ca28ae..52293f4adf 100644
> > > > --- a/elf/tst-glibcelf.py
> > > > +++ b/elf/tst-glibcelf.py
> > > > @@ -187,6 +187,7 @@ DT_VALNUM
> > > >  DT_VALRNGHI
> > > >  DT_VALRNGLO
> > > >  DT_VERSIONTAGNUM
> > > > +DT_X86_64_NUM
> > > >  ELFCLASSNUM
> > > >  ELFDATANUM
> > > >  EM_NUM
> > > > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > > > index b31f16da84..f9bd83622e 100644
> > > > --- a/manual/tunables.texi
> > > > +++ b/manual/tunables.texi
> > > > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> > > >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> > > >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> > > >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > > > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> > > >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> > > >  glibc.cpu.x86_ibt:
> > > >  glibc.cpu.hwcaps:
> > > > @@ -614,6 +615,14 @@ this tunable.
> > > >  This tunable is specific to 64-bit x86-64.
> > > >  @end deftp
> > > >
> > > > +@deftp Tunable glibc.cpu.plt_rewrite
> > > > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > > > +the PLT section with direct branch after relocation if possible when
> > > > +the lazy binding is disabled.
> > > > +
> > > > +This tunable is specific to x86-64.
> > > > +@end deftp
> > > > +
> > > >  @node Memory Related Tunables
> > > >  @section Memory Related Tunables
> > > >  @cindex memory related tunables
> > > > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > > > index b52e83d613..3a21e25201 100644
> > > > --- a/scripts/glibcelf.py
> > > > +++ b/scripts/glibcelf.py
> > > > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> > > >      """Supplemental DT_* constants for EM_RISCV."""
> > > >  class DtSPARC(Dt):
> > > >      """Supplemental DT_* constants for EM_SPARC."""
> > > > +class DtX86_64(Dt):
> > > > +    """Supplemental DT_* constants for EM_X86_64."""
> > > >  _dt_skip = '''
> > > >  DT_ENCODING DT_PROCNUM
> > > >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > > > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> > > >  DT_PPC_NUM
> > > >  DT_PPC64_NUM
> > > >  DT_SPARC_NUM
> > > > +DT_X86_64_NUM
> > > >  '''.strip().split()
> > > >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> > > >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > > > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> > > >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> > > >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> > > >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > > > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> > > >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> > > >  del _dt_skip
> > > >
> > > > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > > > index 3bd00019e8..216d8cc576 100644
> > > > --- a/sysdeps/x86/cet-control.h
> > > > +++ b/sysdeps/x86/cet-control.h
> > > > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> > > >    cet_permissive
> > > >  };
> > > >
> > > > +/* PLT rewrite control.  */
> > > > +enum dl_plt_rewrite_control
> > > > +{
> > > > +  /* No PLT rewrite.  */
> > > > +  plt_rewrite_none,
> > > > +  /* PLT rewrite is enabled at run-time.  */
> > > > +  plt_rewrite_enabled,
> > > > +  /* Rewrite PLT with JMP at run-time.  */
> > > > +  plt_rewrite_jmp,
> > > > +  /* Rewrite PLT with JMPABS at run-time.  */
> > > > +  plt_rewrite_jmpabs
> > > > +};
> > > > +
> > > >  struct dl_x86_feature_control
> > > >  {
> > > >    enum dl_x86_cet_control ibt : 2;
> > > >    enum dl_x86_cet_control shstk : 2;
> > > > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> > > >  };
> > > >
> > > >  #endif /* cet-control.h */
> > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > > index 0bf923d48b..5f2962bbe5 100644
> > > > --- a/sysdeps/x86/cpu-features.c
> > > > +++ b/sysdeps/x86/cpu-features.c
> > > > @@ -27,6 +27,13 @@
> > > >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > > >    attribute_hidden;
> > > >
> > > > +static void
> > > > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > +{
> > > > +  if (valp->numval)
> > > > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > > > +}
> > > > +
> > > >  #ifdef __LP64__
> > > >  static void
> > > >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > > > @@ -996,6 +1003,9 @@ no_cpuid:
> > > >
> > > >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> > > >
> > > > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > > > +
> > > >  #ifdef __LP64__
> > > >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> > > >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > > > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > > > index 2fb682ded3..03a612f3f3 100644
> > > > --- a/sysdeps/x86/dl-procruntime.c
> > > > +++ b/sysdeps/x86/dl-procruntime.c
> > > > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> > > >  = {
> > > >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> > > >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > > > +    .plt_rewrite = plt_rewrite_none,
> > > >    }
> > > >  # endif
> > > >  # if !defined SHARED || defined PROCINFO_DECL
> > > > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > > > index feb7004036..49da67fd74 100644
> > > > --- a/sysdeps/x86/dl-tunables.list
> > > > +++ b/sysdeps/x86/dl-tunables.list
> > > > @@ -66,5 +66,10 @@ glibc {
> > > >      x86_shared_cache_size {
> > > >        type: SIZE_T
> > > >      }
> > > > +    plt_rewrite {
> > > > +      type: INT_32
> > > > +      minval: 0
> > > > +      maxval: 1
> > > > +    }
> > > >    }
> > > >  }
> > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > > index 00120ca9ca..106f1d0f55 100644
> > > > --- a/sysdeps/x86_64/Makefile
> > > > +++ b/sysdeps/x86_64/Makefile
> > > > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> > > >  tests-container += tst-glibc-hwcaps-cache
> > > >  endif
> > > >
> > > > +ifeq (yes,$(have-z-mark-plt))
> > > > +tests += \
> > > > +  tst-plt-rewrite1 \
> > > > +# tests
> > > > +modules-names += \
> > > > +  tst-plt-rewritemod1 \
> > > > +# modules-names
> > > > +
> > > > +tst-plt-rewrite1-no-pie = yes
> > > > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > > > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > > > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > > > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > > > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > > > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > > > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > > > +       $(evaluate-test)
> > > > +endif
> > > > +
> > > >  endif # $(subdir) == elf
> > > >
> > > >  ifeq ($(subdir),csu)
> > > > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > > > index e307467afa..a15eaa349d 100755
> > > > --- a/sysdeps/x86_64/configure
> > > > +++ b/sysdeps/x86_64/configure
> > > > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> > > >  config_vars="$config_vars
> > > >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> > > >
> > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > > > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > > > +libc_linker_feature=no
> > > > +cat > conftest.c <<EOF
> > > > +int _start (void) { return 42; }
> > > > +EOF
> > > > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > > > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > > > +                 -fPIC -shared -o conftest.so conftest.c
> > > > +                 1>&5'
> > > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > > +  (eval $ac_try) 2>&5
> > > > +  ac_status=$?
> > > > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > > > +  test $ac_status = 0; }; }
> > > > +then
> > > > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > > > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > > > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > > > +    true
> > > > +  else
> > > > +    libc_linker_feature=yes
> > > > +  fi
> > > > +fi
> > > > +rm -f conftest*
> > > > +if test $libc_linker_feature = yes; then
> > > > +  libc_cv_z_mark_plt=yes
> > > > +else
> > > > +  libc_cv_z_mark_plt=no
> > > > +fi
> > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > > > +printf "%s\n" "$libc_linker_feature" >&6; }
> > > > +config_vars="$config_vars
> > > > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > > > +
> > > >  if test x"$build_mathvec" = xnotset; then
> > > >    build_mathvec=yes
> > > >  fi
> > > > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > > > index 1215dcb1e4..40f642db31 100644
> > > > --- a/sysdeps/x86_64/configure.ac
> > > > +++ b/sysdeps/x86_64/configure.ac
> > > > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> > > >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> > > >                 [$libc_cv_cc_mprefer_vector_width])
> > > >
> > > > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > > > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > > > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > > > +
> > > >  if test x"$build_mathvec" = xnotset; then
> > > >    build_mathvec=yes
> > > >  fi
> > > > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > > > new file mode 100644
> > > > index 0000000000..dd41d95b88
> > > > --- /dev/null
> > > > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > > > @@ -0,0 +1,21 @@
> > > > +/* Configuration of lookup functions.  x64-64 version.
> > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > +   This file is part of the GNU C Library.
> > > > +
> > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > +   modify it under the terms of the GNU Lesser General Public
> > > > +   License as published by the Free Software Foundation; either
> > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > +
> > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > +   Lesser General Public License for more details.
> > > > +
> > > > +   You should have received a copy of the GNU Lesser General Public
> > > > +   License along with the GNU C Library; if not, see
> > > > +   <https://www.gnu.org/licenses/>.  */
> > > > +
> > > > +/* Number of extra dynamic section entries for this architecture.  By
> > > > +   default there are none.  */
> > > > +#define DT_THISPROCNUM DT_X86_64_NUM
> > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > > index 581a2f1a9e..973042f9e6 100644
> > > > --- a/sysdeps/x86_64/dl-machine.h
> > > > +++ b/sysdeps/x86_64/dl-machine.h
> > > > @@ -30,6 +30,9 @@
> > > >  #include <dl-machine-rel.h>
> > > >  #include <isa-level.h>
> > > >
> > > > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > > > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > > > +
> > > >  /* Return nonzero iff ELF header is compatible with the running host.  */
> > > >  static inline int __attribute__ ((unused))
> > > >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > > > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> > > >
> > > >        switch (r_type)
> > > >         {
> > > > -       case R_X86_64_GLOB_DAT:
> > > >         case R_X86_64_JUMP_SLOT:
> > > > +         map->l_has_jump_slot_reloc = true;
> > > > +       case R_X86_64_GLOB_DAT:
> > > >           *reloc_addr = value;
> > > >           break;
> > > >
> > > > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > >  }
> > > >
> > > >  #endif /* RESOLVE_MAP */
> > > > +
> > > > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > > > +    && defined SHARED
> > > > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > > > +  x86_64_dynamic_after_reloc (map, (lazy))
> > > > +
> > > > +static const char *
> > > > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > > > +{
> > > > +  const ElfW(Sym) *const symtab
> > > > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > > > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > > > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > > > +  return strtab + refsym->st_name;
> > > > +}
> > > > +
> > > > +static void
> > > > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > > > +                   ElfW(Addr) plt_aligned)
> > > > +{
> > > > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > > > +  ElfW(Addr) l_addr = map->l_addr;
> > > > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > > > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > > > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > > > +  const ElfW(Rela) *reloc = (const void *) start;
> > > > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > > > +
> > > > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > > > +                                         header.feature_1);
> > > > +  bool ibt_enabled_p
> > > > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > > > +
> > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > > > +                     DSO_FILENAME (map->l_name));
> > > > +
> > > > +  for (; reloc < reloc_end; reloc++)
> > > > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > > > +      {
> > > > +       /* Get the value from the GOT entry.  */
> > > > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > > > +
> > > > +       /* Get the corresponding PLT entry from r_addend.  */
> > > > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > > > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > > > +       if (!ibt_enabled_p)
> > > > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > > > +       /* Get the displacement from the branch target.  */
> > > > +       ElfW(Addr) disp = value - branch_start - 5;
> > IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.
>
> Will fix it.
>
> > > > +       ElfW(Addr) plt_end;
> > > > +       ElfW(Addr) pad;
> > > > +
> > > > +       branch_start += plt_rewrite_bias;
> > > > +       plt_end = (branch_start & -pltent) + pltent;
> > > > +
> > > > +       /* Update the PLT entry.  */
> > > > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> > 0x80000000 -> (unsigned long long)I32_MIN?
> > 0xffffffff -> (unsigned long long)U32_MAX?
>
> I will change it.
>
> > Or at least a define.
> > Its easy to get the number of hex characters wrong.
> > > > +         {
> > > > +           /* If the target branch can be reached with a direct branch,
> > > > +              rewrite the PLT entry with a direct branch.  */
> > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > +             {
> > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > +                                                                reloc);
> > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > +                                 "direct branch\n", sym_name,
> > > > +                                 DSO_FILENAME (map->l_name));
> > > > +             }
> > > > +
> > > > +           pad = branch_start + 5;
> > again a define.
>
> Will fix it.
>
> > > > +
> > > > +           if (__glibc_unlikely (pad > plt_end))
> > > > +             {
> > > > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > > > +                                     & DL_DEBUG_BINDINGS))
> > > > +                 {
> > > > +                   const char *sym_name
> > > > +                     = x86_64_reloc_symbol_name (map, reloc);
> > > > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > > > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > > > +                                     "in '%s'\n", sym_name,
> > > > +                                     DSO_FILENAME (map->l_name));
> > > > +                 }
> > > > +
> > > > +               continue;
> > > > +             }
> > > > +
> > > > +           /* Write out direct branch.  */
> > > > +           *(uint8_t *) branch_start = 0xe9;
> > will it ever be disp8?
>
> This should be as fast as possible.  Adding an unlikely disp8 check will
> slow it down.
>
> > But again, imo `0xe9` should be defined as something along the lines of
> > JMP_DISP32_OPCODE
>
> Will fix it.
>
> > > > +           *((uint32_t *) (branch_start + 1)) = disp;
> > Does it need to be a u32 with aliasing attribute? Maybe just memcpy?
>
> elf_machine_rela has some similar codes.  It is OK.
>
> > > > +         }
> > > > +       else
> > > > +         {
> > > > +           if (GL(dl_x86_feature_control).plt_rewrite
> > > > +               != plt_rewrite_jmpabs)
> > > > +             continue;
> > > > +
> > > > +           pad = branch_start + 11;
> > > > +
> > I assume this is 16 (plt entry size) - 5 (jmp insn size)?
> > Should this use the `DT_X86_64_PLTENT` field?
>
> Good idea.  Will change it.
>
> > > > +           if (pad > plt_end)
> > > > +             continue;
> > > > +
> > > > +           /* Rewrite the PLT entry with JMPABS.  */
> > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > +             {
> > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > +                                                                reloc);
> > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > +                                 "JMPABS\n", sym_name,
> > > > +                                 DSO_FILENAME (map->l_name));
> > > > +             }
> > > > +
> > > > +           /* "jmpabs $target" for 64-bit displacement.  */
> > > > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > > > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > > > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > > > +           *(uint64_t *) (branch_start + 3) = value;
> > again this is not a clear way to encode instructions...
>
> This is JMPABS in Intel APX, which has a 3-byte opcode + 64bit address.
>
> > What instruction is meant to be?
> > ```
> > .byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
> > ```
> > ->
> > ```
> > 0000000000000000 <.text>:
> >        0: d5                            <unknown>
> >        1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
> >        7: 00 00                        addb %al, (%rax)
> >        9: 00 00                        addb %al, (%rax)
> >
> > ```
>
> You need the latest binutils master branch which support JMPABS in
> Intel APX:
>
> 0000000000000000 <.text>:
>    0: d5 00 a1 7f 12 34 00 00 00 00 00 jmpabs $0x34127f
>
> > > > +         }
> > > > +
> > > > +       /* Fill the unused part of the PLT entry with INT3.  */
> > > > +       for (; pad < plt_end; pad++)
> > > > +         *(uint8_t *) pad = 0xcc;
> > maybe `INT3_OPCODE`?
>
> Will change it.
>
> > > > +      }
> > > > +}
> > > > +
> > > > +static inline void
> > > > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > > > +{
> > > > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > > > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > > > +                   + map->l_addr);
> > > > +  size_t pagesize = GLRO(dl_pagesize);
> > > > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > > > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > > > +                 + plt - plt_aligned);
> > > > +
> > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > > > +                     DSO_FILENAME (map->l_name));
> > > > +
> > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > +                                   PROT_WRITE | PROT_READ) < 0))
> > > > +    {
> > > > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > > > +                         DSO_FILENAME (map->l_name));
> > > > +      return;
> > > > +    }
> > > > +
> > > > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > > > +
> > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > > > +          DSO_FILENAME (map->l_name));
> > > > +
> > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > +                                   PROT_EXEC | PROT_READ) < 0))
> > > > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > > > +                     "failed to change PLT back to read-only");
> > > > +}
> > > > +
> > > > +/* Rewrite PLT entries to direct branch if possible.  */
> > > > +
> > > > +static inline void
> > > > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > > > +{
> > > > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > > > +  if (lazy)
> > > > +    return;
> > > > +
> > > > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > > > +    return;
> > > > +
> > > > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > > > +  if (!map->l_has_jump_slot_reloc)
> > > > +    return;
> > > > +
> > > > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > > > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > > > +    return;
> > > > +
> > > > +  /* Ignore DT_X86_64_PLT if
> > > > +     1. DT_JMPREL isn't available or its value is 0.
> > > > +     2. DT_PLTRELSZ is 0.
> > > > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > > > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > > > +     than 16 bytes.  */
> > > > +  if (map->l_info[DT_JMPREL] == NULL
> > > > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > > > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > > > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > > > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > > > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > > > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > > > +    return;
> > Another sanity check might be `PLTSZ % PLTENT == 0`
> > > > +
> > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > > > +    {
> > > > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > > > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > > > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > > > +                         -1, 0);
> > > > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > > > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > imo clearer would be to just initialize:
> > GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > then make all the failure cases just return (or a goto if these is cleanup).
>
> This function may be called many times.   If the first mprotect check fails,
> we shouldn't call it again.   With this approach, the mprotect check will
> only be done once.
Yeah, i see that. Just instead of logic being

if(fail0) {
 // set plt_rewrite_none
}
else {
...
if(fail1)
 // set plt_rewrite_none
...
}
if(plt_rewrite_none) {
 // return
}

It might be cleaner to do:

// set plt_rewrite_none
if(fail0) // return
...
if(fail1) // return>
> > > > +      else
> > > > +       {
> > > > +         *(int32_t *) plt = -1;
> > What is this meant to be doing?
>
> Update the read-write page and check if it can be changed to read-exec.
>
How does writing -1 contribute to that?
> > > > +
> > > > +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> > > > +            rewrite PLT.  */
> > > > +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> > > > +           /* Use JMPABS on APX processors.  */
> > > > +           GL(dl_x86_feature_control).plt_rewrite
> > > > +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> > > > +                ? plt_rewrite_jmpabs
> > > > +                : plt_rewrite_jmp);
> > > > +         else
> > > > +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > +
> > > > +         __munmap (plt, 4096);
> > is the mmap just to see if we can get r/w/e permissions? If so is there a better
> > way to check memory permissions than mmap (which in the case of w->e IIRC
>
> I don't know if there is a better way.
>
you are just checking whether the OS supports such transitions?

> > will be expensive?
> > Is it possible for us to have the permissions now but a short while
> > later to not?
>
> It is very unlikely.  But I don't know for sure.
>
> > > > +       }
> > > > +    }
> > > > +
> > > > +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> > > > +    return;
> > > > +
> > > > +  x86_64_rewrite_plt_in_place (map);
> > > > +}
> > > > +#endif
> > > > diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> > > > new file mode 100644
> > > > index 0000000000..ddb8e78077
> > > > --- /dev/null
> > > > +++ b/sysdeps/x86_64/link_map.h
> > > > @@ -0,0 +1,22 @@
> > > > +/* Additional fields in struct link_map.  x86-64 version.
> > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > +   This file is part of the GNU C Library.
> > > > +
> > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > +   modify it under the terms of the GNU Lesser General Public
> > > > +   License as published by the Free Software Foundation; either
> > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > +
> > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > +   Lesser General Public License for more details.
> > > > +
> > > > +   You should have received a copy of the GNU Lesser General Public
> > > > +   License along with the GNU C Library; if not, see
> > > > +   <https://www.gnu.org/licenses/>.  */
> > > > +
> > > > +/* Has R_X86_64_JUMP_SLOT relocation.  */
> > > > +bool l_has_jump_slot_reloc;
> > > > +
> > > > +#include <sysdeps/x86/link_map.h>
> > > > diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > new file mode 100644
> > > > index 0000000000..0dd4724a5a
> > > > --- /dev/null
> > > > +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > @@ -0,0 +1,31 @@
> > > > +/* Test PLT rewrite.
> > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > +   This file is part of the GNU C Library.
> > > > +
> > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > +   modify it under the terms of the GNU Lesser General Public
> > > > +   License as published by the Free Software Foundation; either
> > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > +
> > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > +   Lesser General Public License for more details.
> > > > +
> > > > +   You should have received a copy of the GNU Lesser General Public
> > > > +   License along with the GNU C Library; if not, see
> > > > +   <https://www.gnu.org/licenses/>.  */
> > > > +
> > > > +#include <string.h>
> > > > +#include <support/check.h>
> > > > +
> > > > +extern const char *foo (void);
> > > > +
> > > > +static int
> > > > +do_test (void)
> > > > +{
> > > > +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> > > > +  return 0;
> > > > +}
> > > > +
> > > > +#include <support/test-driver.c>
> > > > diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > new file mode 100644
> > > > index 0000000000..361b907790
> > > > --- /dev/null
> > > > +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > @@ -0,0 +1,32 @@
> > > > +/* Check PLT rewrite works correctly.
> > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > +   This file is part of the GNU C Library.
> > > > +
> > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > +   modify it under the terms of the GNU Lesser General Public
> > > > +   License as published by the Free Software Foundation; either
> > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > +
> > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > +   Lesser General Public License for more details.
> > > > +
> > > > +   You should have received a copy of the GNU Lesser General Public
> > > > +   License along with the GNU C Library; if not, see
> > > > +   <https://www.gnu.org/licenses/>.  */
> > > > +
> > > > +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> > > > +   change it to direct branch.  */
> > > > +
> > > > +const char *
> > > > +bar (void)
> > > > +{
> > > > +  return "PLT rewrite works";
> > > > +}
> > > > +
> > > > +const char *
> > > > +foo (void)
> > > > +{
> > > > +  return bar ();
> > > > +}
> > > > --
> > > > 2.43.0
> > > >
> > >
> > > PING.
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
H.J. Lu Jan. 2, 2024, 3:56 a.m. UTC | #7
On Mon, Jan 1, 2024 at 6:18 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Jan 1, 2024 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Jan 1, 2024 at 10:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Changes in v2:
> > > > >
> > > > > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > > > > 2. Added documentation.
> > > > > 3. Added a testcase.
> > > > >
> > > > > H.J.
> > > > > --
> > > > > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > > > > relocation.
> > > > >
> > > > > For x86-64, add
> > > > >
> > > > >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> > > > >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> > > > >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> > > > >
> > > > > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > > > > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > > > > table.
> > > > > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > > > > entry.
> > > > >
> > > > > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > > > > memory offset of the indirect branch instruction.
> > > > >
> > > > > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > > > > with direct branch after relocation when the lazy binding is disabled.
> > > > > PLT rewrite is disabled by default.  Add
> > > > >
> > > > > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> > > > >
> > > > > to enable PLT rewrite at run-time.
> > > > > ---
> > > > >  elf/dynamic-link.h                   |   5 +
> > > > >  elf/elf.h                            |   5 +
> > > > >  elf/tst-glibcelf.py                  |   1 +
> > > > >  manual/tunables.texi                 |   9 +
> > > > >  scripts/glibcelf.py                  |   4 +
> > > > >  sysdeps/x86/cet-control.h            |  14 ++
> > > > >  sysdeps/x86/cpu-features.c           |  10 ++
> > > > >  sysdeps/x86/dl-procruntime.c         |   1 +
> > > > >  sysdeps/x86/dl-tunables.list         |   5 +
> > > > >  sysdeps/x86_64/Makefile              |  19 +++
> > > > >  sysdeps/x86_64/configure             |  35 ++++
> > > > >  sysdeps/x86_64/configure.ac          |   4 +
> > > > >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> > > > >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> > > > >  sysdeps/x86_64/link_map.h            |  22 +++
> > > > >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> > > > >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> > > > >  17 files changed, 454 insertions(+), 1 deletion(-)
> > > > >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> > > > >  create mode 100644 sysdeps/x86_64/link_map.h
> > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > >
> > > > > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > > > > index e7f755fc75..5351671044 100644
> > > > > --- a/elf/dynamic-link.h
> > > > > +++ b/elf/dynamic-link.h
> > > > > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > >        }                                                                              \
> > > > >    } while (0);
> > > > >
> > > > > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > > > > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > > > > +# endif
> > > > > +
> > > > >  /* This can't just be an inline function because GCC is too dumb
> > > > >     to inline functions containing inlines themselves.  */
> > > > >  # ifdef RTLD_BOOTSTRAP
> > > > > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > >        ELF_DYNAMIC_DO_RELR (map);                                             \
> > > > >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> > > > >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > > > > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> > > > >    } while (0)
> > > > >
> > > > >  #endif
> > > > > diff --git a/elf/elf.h b/elf/elf.h
> > > > > index 5c1c1972d1..eda4802f56 100644
> > > > > --- a/elf/elf.h
> > > > > +++ b/elf/elf.h
> > > > > @@ -3639,6 +3639,11 @@ enum
> > > > >  /* x86-64 sh_type values.  */
> > > > >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> > > > >
> > > > > +/* x86-64 d_tag values.  */
> > > > > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > > > > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > > > > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> > > Why not +2?
> >
> > From
> >
> > https://www.sco.com/developers/gabi/latest/ch5.dynamic.html#tag_encodings
> >
> > To make it simpler for tools to interpret the contents of dynamic
> > section entries,
> > the value of each tag, except for those in two special compatibility
> > ranges, will
> > determine the interpretation of the d_un union. A tag whose value is an even
> > number indicates a dynamic section entry that uses d_ptr. A tag whose value
> > is an odd number indicates a dynamic section entry that uses d_val or that
> > uses neither d_ptr nor d_val.
> >
> > > Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
> > > `PLTENTSZ`?
> >
> > They are defined in x86-64 psABI:
> >
> >
> > https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/6d824a52a42d173eb838b879616c1be5870b593e
> >
> > The typical dynamic tag name for entry size is DT_XXX_YYYENT:
> >
> > #define DT_RELAENT 9 /* Size of one Rela reloc */
> > #define DT_SYMENT 11 /* Size of one symbol table entry */
> > #define DT_RELENT 19 /* Size of one Rel reloc */
> > #define DT_RELRENT 37 /* Size of one RELR relative relocaction
> >
> > > > > +#define DT_X86_64_NUM          4
> > > > >
> > > > >  /* AM33 relocations.  */
> > > > >  #define R_MN10300_NONE         0       /* No reloc.  */
> > > > > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > > > > index 6142ca28ae..52293f4adf 100644
> > > > > --- a/elf/tst-glibcelf.py
> > > > > +++ b/elf/tst-glibcelf.py
> > > > > @@ -187,6 +187,7 @@ DT_VALNUM
> > > > >  DT_VALRNGHI
> > > > >  DT_VALRNGLO
> > > > >  DT_VERSIONTAGNUM
> > > > > +DT_X86_64_NUM
> > > > >  ELFCLASSNUM
> > > > >  ELFDATANUM
> > > > >  EM_NUM
> > > > > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > > > > index b31f16da84..f9bd83622e 100644
> > > > > --- a/manual/tunables.texi
> > > > > +++ b/manual/tunables.texi
> > > > > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> > > > >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> > > > >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> > > > >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > > > > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> > > > >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> > > > >  glibc.cpu.x86_ibt:
> > > > >  glibc.cpu.hwcaps:
> > > > > @@ -614,6 +615,14 @@ this tunable.
> > > > >  This tunable is specific to 64-bit x86-64.
> > > > >  @end deftp
> > > > >
> > > > > +@deftp Tunable glibc.cpu.plt_rewrite
> > > > > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > > > > +the PLT section with direct branch after relocation if possible when
> > > > > +the lazy binding is disabled.
> > > > > +
> > > > > +This tunable is specific to x86-64.
> > > > > +@end deftp
> > > > > +
> > > > >  @node Memory Related Tunables
> > > > >  @section Memory Related Tunables
> > > > >  @cindex memory related tunables
> > > > > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > > > > index b52e83d613..3a21e25201 100644
> > > > > --- a/scripts/glibcelf.py
> > > > > +++ b/scripts/glibcelf.py
> > > > > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> > > > >      """Supplemental DT_* constants for EM_RISCV."""
> > > > >  class DtSPARC(Dt):
> > > > >      """Supplemental DT_* constants for EM_SPARC."""
> > > > > +class DtX86_64(Dt):
> > > > > +    """Supplemental DT_* constants for EM_X86_64."""
> > > > >  _dt_skip = '''
> > > > >  DT_ENCODING DT_PROCNUM
> > > > >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > > > > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> > > > >  DT_PPC_NUM
> > > > >  DT_PPC64_NUM
> > > > >  DT_SPARC_NUM
> > > > > +DT_X86_64_NUM
> > > > >  '''.strip().split()
> > > > >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> > > > >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > > > > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> > > > >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> > > > >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> > > > >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > > > > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> > > > >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> > > > >  del _dt_skip
> > > > >
> > > > > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > > > > index 3bd00019e8..216d8cc576 100644
> > > > > --- a/sysdeps/x86/cet-control.h
> > > > > +++ b/sysdeps/x86/cet-control.h
> > > > > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> > > > >    cet_permissive
> > > > >  };
> > > > >
> > > > > +/* PLT rewrite control.  */
> > > > > +enum dl_plt_rewrite_control
> > > > > +{
> > > > > +  /* No PLT rewrite.  */
> > > > > +  plt_rewrite_none,
> > > > > +  /* PLT rewrite is enabled at run-time.  */
> > > > > +  plt_rewrite_enabled,
> > > > > +  /* Rewrite PLT with JMP at run-time.  */
> > > > > +  plt_rewrite_jmp,
> > > > > +  /* Rewrite PLT with JMPABS at run-time.  */
> > > > > +  plt_rewrite_jmpabs
> > > > > +};
> > > > > +
> > > > >  struct dl_x86_feature_control
> > > > >  {
> > > > >    enum dl_x86_cet_control ibt : 2;
> > > > >    enum dl_x86_cet_control shstk : 2;
> > > > > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> > > > >  };
> > > > >
> > > > >  #endif /* cet-control.h */
> > > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > > > index 0bf923d48b..5f2962bbe5 100644
> > > > > --- a/sysdeps/x86/cpu-features.c
> > > > > +++ b/sysdeps/x86/cpu-features.c
> > > > > @@ -27,6 +27,13 @@
> > > > >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > > > >    attribute_hidden;
> > > > >
> > > > > +static void
> > > > > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > > +{
> > > > > +  if (valp->numval)
> > > > > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > > > > +}
> > > > > +
> > > > >  #ifdef __LP64__
> > > > >  static void
> > > > >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > > > > @@ -996,6 +1003,9 @@ no_cpuid:
> > > > >
> > > > >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> > > > >
> > > > > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > > > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > > > > +
> > > > >  #ifdef __LP64__
> > > > >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> > > > >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > > > > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > > > > index 2fb682ded3..03a612f3f3 100644
> > > > > --- a/sysdeps/x86/dl-procruntime.c
> > > > > +++ b/sysdeps/x86/dl-procruntime.c
> > > > > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> > > > >  = {
> > > > >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> > > > >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > > > > +    .plt_rewrite = plt_rewrite_none,
> > > > >    }
> > > > >  # endif
> > > > >  # if !defined SHARED || defined PROCINFO_DECL
> > > > > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > > > > index feb7004036..49da67fd74 100644
> > > > > --- a/sysdeps/x86/dl-tunables.list
> > > > > +++ b/sysdeps/x86/dl-tunables.list
> > > > > @@ -66,5 +66,10 @@ glibc {
> > > > >      x86_shared_cache_size {
> > > > >        type: SIZE_T
> > > > >      }
> > > > > +    plt_rewrite {
> > > > > +      type: INT_32
> > > > > +      minval: 0
> > > > > +      maxval: 1
> > > > > +    }
> > > > >    }
> > > > >  }
> > > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > > > index 00120ca9ca..106f1d0f55 100644
> > > > > --- a/sysdeps/x86_64/Makefile
> > > > > +++ b/sysdeps/x86_64/Makefile
> > > > > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> > > > >  tests-container += tst-glibc-hwcaps-cache
> > > > >  endif
> > > > >
> > > > > +ifeq (yes,$(have-z-mark-plt))
> > > > > +tests += \
> > > > > +  tst-plt-rewrite1 \
> > > > > +# tests
> > > > > +modules-names += \
> > > > > +  tst-plt-rewritemod1 \
> > > > > +# modules-names
> > > > > +
> > > > > +tst-plt-rewrite1-no-pie = yes
> > > > > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > > > > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > > > > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > > > > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > > > > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > > > > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > > > > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > > > > +       $(evaluate-test)
> > > > > +endif
> > > > > +
> > > > >  endif # $(subdir) == elf
> > > > >
> > > > >  ifeq ($(subdir),csu)
> > > > > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > > > > index e307467afa..a15eaa349d 100755
> > > > > --- a/sysdeps/x86_64/configure
> > > > > +++ b/sysdeps/x86_64/configure
> > > > > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> > > > >  config_vars="$config_vars
> > > > >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> > > > >
> > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > > > > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > > > > +libc_linker_feature=no
> > > > > +cat > conftest.c <<EOF
> > > > > +int _start (void) { return 42; }
> > > > > +EOF
> > > > > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > > > > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > > > > +                 -fPIC -shared -o conftest.so conftest.c
> > > > > +                 1>&5'
> > > > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > > > +  (eval $ac_try) 2>&5
> > > > > +  ac_status=$?
> > > > > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > > > > +  test $ac_status = 0; }; }
> > > > > +then
> > > > > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > > > > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > > > > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > > > > +    true
> > > > > +  else
> > > > > +    libc_linker_feature=yes
> > > > > +  fi
> > > > > +fi
> > > > > +rm -f conftest*
> > > > > +if test $libc_linker_feature = yes; then
> > > > > +  libc_cv_z_mark_plt=yes
> > > > > +else
> > > > > +  libc_cv_z_mark_plt=no
> > > > > +fi
> > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > > > > +printf "%s\n" "$libc_linker_feature" >&6; }
> > > > > +config_vars="$config_vars
> > > > > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > > > > +
> > > > >  if test x"$build_mathvec" = xnotset; then
> > > > >    build_mathvec=yes
> > > > >  fi
> > > > > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > > > > index 1215dcb1e4..40f642db31 100644
> > > > > --- a/sysdeps/x86_64/configure.ac
> > > > > +++ b/sysdeps/x86_64/configure.ac
> > > > > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> > > > >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> > > > >                 [$libc_cv_cc_mprefer_vector_width])
> > > > >
> > > > > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > > > > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > > > > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > > > > +
> > > > >  if test x"$build_mathvec" = xnotset; then
> > > > >    build_mathvec=yes
> > > > >  fi
> > > > > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > new file mode 100644
> > > > > index 0000000000..dd41d95b88
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > @@ -0,0 +1,21 @@
> > > > > +/* Configuration of lookup functions.  x64-64 version.
> > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > +   This file is part of the GNU C Library.
> > > > > +
> > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > +   License as published by the Free Software Foundation; either
> > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > +   Lesser General Public License for more details.
> > > > > +
> > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > +   License along with the GNU C Library; if not, see
> > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > +
> > > > > +/* Number of extra dynamic section entries for this architecture.  By
> > > > > +   default there are none.  */
> > > > > +#define DT_THISPROCNUM DT_X86_64_NUM
> > > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > > > index 581a2f1a9e..973042f9e6 100644
> > > > > --- a/sysdeps/x86_64/dl-machine.h
> > > > > +++ b/sysdeps/x86_64/dl-machine.h
> > > > > @@ -30,6 +30,9 @@
> > > > >  #include <dl-machine-rel.h>
> > > > >  #include <isa-level.h>
> > > > >
> > > > > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > > > > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > > > > +
> > > > >  /* Return nonzero iff ELF header is compatible with the running host.  */
> > > > >  static inline int __attribute__ ((unused))
> > > > >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > > > > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> > > > >
> > > > >        switch (r_type)
> > > > >         {
> > > > > -       case R_X86_64_GLOB_DAT:
> > > > >         case R_X86_64_JUMP_SLOT:
> > > > > +         map->l_has_jump_slot_reloc = true;
> > > > > +       case R_X86_64_GLOB_DAT:
> > > > >           *reloc_addr = value;
> > > > >           break;
> > > > >
> > > > > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > >  }
> > > > >
> > > > >  #endif /* RESOLVE_MAP */
> > > > > +
> > > > > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > > > > +    && defined SHARED
> > > > > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > > > > +  x86_64_dynamic_after_reloc (map, (lazy))
> > > > > +
> > > > > +static const char *
> > > > > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > > > > +{
> > > > > +  const ElfW(Sym) *const symtab
> > > > > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > > > > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > > > > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > > > > +  return strtab + refsym->st_name;
> > > > > +}
> > > > > +
> > > > > +static void
> > > > > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > > > > +                   ElfW(Addr) plt_aligned)
> > > > > +{
> > > > > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > > > > +  ElfW(Addr) l_addr = map->l_addr;
> > > > > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > > > > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > > > > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > > > > +  const ElfW(Rela) *reloc = (const void *) start;
> > > > > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > > > > +
> > > > > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > > > > +                                         header.feature_1);
> > > > > +  bool ibt_enabled_p
> > > > > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > > > > +
> > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > > > > +                     DSO_FILENAME (map->l_name));
> > > > > +
> > > > > +  for (; reloc < reloc_end; reloc++)
> > > > > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > > > > +      {
> > > > > +       /* Get the value from the GOT entry.  */
> > > > > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > > > > +
> > > > > +       /* Get the corresponding PLT entry from r_addend.  */
> > > > > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > > > > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > > > > +       if (!ibt_enabled_p)
> > > > > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > > > > +       /* Get the displacement from the branch target.  */
> > > > > +       ElfW(Addr) disp = value - branch_start - 5;
> > > IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.
> >
> > Will fix it.
> >
> > > > > +       ElfW(Addr) plt_end;
> > > > > +       ElfW(Addr) pad;
> > > > > +
> > > > > +       branch_start += plt_rewrite_bias;
> > > > > +       plt_end = (branch_start & -pltent) + pltent;
> > > > > +
> > > > > +       /* Update the PLT entry.  */
> > > > > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> > > 0x80000000 -> (unsigned long long)I32_MIN?
> > > 0xffffffff -> (unsigned long long)U32_MAX?
> >
> > I will change it.
> >
> > > Or at least a define.
> > > Its easy to get the number of hex characters wrong.
> > > > > +         {
> > > > > +           /* If the target branch can be reached with a direct branch,
> > > > > +              rewrite the PLT entry with a direct branch.  */
> > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > +             {
> > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > +                                                                reloc);
> > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > +                                 "direct branch\n", sym_name,
> > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > +             }
> > > > > +
> > > > > +           pad = branch_start + 5;
> > > again a define.
> >
> > Will fix it.
> >
> > > > > +
> > > > > +           if (__glibc_unlikely (pad > plt_end))
> > > > > +             {
> > > > > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > > > > +                                     & DL_DEBUG_BINDINGS))
> > > > > +                 {
> > > > > +                   const char *sym_name
> > > > > +                     = x86_64_reloc_symbol_name (map, reloc);
> > > > > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > > > > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > > > > +                                     "in '%s'\n", sym_name,
> > > > > +                                     DSO_FILENAME (map->l_name));
> > > > > +                 }
> > > > > +
> > > > > +               continue;
> > > > > +             }
> > > > > +
> > > > > +           /* Write out direct branch.  */
> > > > > +           *(uint8_t *) branch_start = 0xe9;
> > > will it ever be disp8?
> >
> > This should be as fast as possible.  Adding an unlikely disp8 check will
> > slow it down.
> >
> > > But again, imo `0xe9` should be defined as something along the lines of
> > > JMP_DISP32_OPCODE
> >
> > Will fix it.
> >
> > > > > +           *((uint32_t *) (branch_start + 1)) = disp;
> > > Does it need to be a u32 with aliasing attribute? Maybe just memcpy?
> >
> > elf_machine_rela has some similar codes.  It is OK.
> >
> > > > > +         }
> > > > > +       else
> > > > > +         {
> > > > > +           if (GL(dl_x86_feature_control).plt_rewrite
> > > > > +               != plt_rewrite_jmpabs)
> > > > > +             continue;
> > > > > +
> > > > > +           pad = branch_start + 11;
> > > > > +
> > > I assume this is 16 (plt entry size) - 5 (jmp insn size)?
> > > Should this use the `DT_X86_64_PLTENT` field?
> >
> > Good idea.  Will change it.
> >
> > > > > +           if (pad > plt_end)
> > > > > +             continue;
> > > > > +
> > > > > +           /* Rewrite the PLT entry with JMPABS.  */
> > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > +             {
> > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > +                                                                reloc);
> > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > +                                 "JMPABS\n", sym_name,
> > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > +             }
> > > > > +
> > > > > +           /* "jmpabs $target" for 64-bit displacement.  */
> > > > > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > > > > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > > > > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > > > > +           *(uint64_t *) (branch_start + 3) = value;
> > > again this is not a clear way to encode instructions...
> >
> > This is JMPABS in Intel APX, which has a 3-byte opcode + 64bit address.
> >
> > > What instruction is meant to be?
> > > ```
> > > .byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
> > > ```
> > > ->
> > > ```
> > > 0000000000000000 <.text>:
> > >        0: d5                            <unknown>
> > >        1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
> > >        7: 00 00                        addb %al, (%rax)
> > >        9: 00 00                        addb %al, (%rax)
> > >
> > > ```
> >
> > You need the latest binutils master branch which support JMPABS in
> > Intel APX:
> >
> > 0000000000000000 <.text>:
> >    0: d5 00 a1 7f 12 34 00 00 00 00 00 jmpabs $0x34127f
> >
> > > > > +         }
> > > > > +
> > > > > +       /* Fill the unused part of the PLT entry with INT3.  */
> > > > > +       for (; pad < plt_end; pad++)
> > > > > +         *(uint8_t *) pad = 0xcc;
> > > maybe `INT3_OPCODE`?
> >
> > Will change it.
> >
> > > > > +      }
> > > > > +}
> > > > > +
> > > > > +static inline void
> > > > > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > > > > +{
> > > > > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > > > > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > > > > +                   + map->l_addr);
> > > > > +  size_t pagesize = GLRO(dl_pagesize);
> > > > > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > > > > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > > > > +                 + plt - plt_aligned);
> > > > > +
> > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > > > > +                     DSO_FILENAME (map->l_name));
> > > > > +
> > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > +                                   PROT_WRITE | PROT_READ) < 0))
> > > > > +    {
> > > > > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > > > > +                         DSO_FILENAME (map->l_name));
> > > > > +      return;
> > > > > +    }
> > > > > +
> > > > > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > > > > +
> > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > > > > +          DSO_FILENAME (map->l_name));
> > > > > +
> > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > +                                   PROT_EXEC | PROT_READ) < 0))
> > > > > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > > > > +                     "failed to change PLT back to read-only");
> > > > > +}
> > > > > +
> > > > > +/* Rewrite PLT entries to direct branch if possible.  */
> > > > > +
> > > > > +static inline void
> > > > > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > > > > +{
> > > > > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > > > > +  if (lazy)
> > > > > +    return;
> > > > > +
> > > > > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > > > > +    return;
> > > > > +
> > > > > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > > > > +  if (!map->l_has_jump_slot_reloc)
> > > > > +    return;
> > > > > +
> > > > > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > > > > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > > > > +    return;
> > > > > +
> > > > > +  /* Ignore DT_X86_64_PLT if
> > > > > +     1. DT_JMPREL isn't available or its value is 0.
> > > > > +     2. DT_PLTRELSZ is 0.
> > > > > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > > > > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > > > > +     than 16 bytes.  */
> > > > > +  if (map->l_info[DT_JMPREL] == NULL
> > > > > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > > > > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > > > > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > > > > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > > > > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > > > > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > > > > +    return;
> > > Another sanity check might be `PLTSZ % PLTENT == 0`
> > > > > +
> > > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > > > > +    {
> > > > > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > > > > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > > > > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > > > > +                         -1, 0);
> > > > > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > > > > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > imo clearer would be to just initialize:
> > > GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > then make all the failure cases just return (or a goto if these is cleanup).
> >
> > This function may be called many times.   If the first mprotect check fails,
> > we shouldn't call it again.   With this approach, the mprotect check will
> > only be done once.
> Yeah, i see that. Just instead of logic being
>
> if(fail0) {
>  // set plt_rewrite_none
> }
> else {
> ...
> if(fail1)
>  // set plt_rewrite_none
> ...
> }
> if(plt_rewrite_none) {
>  // return
> }
>
> It might be cleaner to do:
>
> // set plt_rewrite_none
> if(fail0) // return
> ...
> if(fail1) // return>

We can't return after mprotect failure since we need
to unmap the testing page.   How about this?

 if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
    {
      enum dl_plt_rewrite_control plt_rewrite = plt_rewrite_none;

      /* PLT rewrite is enabled.  Check if mprotect works.  */
      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
                          MAP_PRIVATE | MAP_ANONYMOUS,
                          -1, 0);
      if (__glibc_unlikely (plt != MAP_FAILED))
        {
          /* Touch the PROT_READ | PROT_WRITE page.  */
          *(int32_t *) plt = -1;

          /* If the updated PROT_READ | PROT_WRITE page can be changed
             to PROT_EXEC | PROT_READ, rewrite PLT.  */
          if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
            /* Use JMPABS on APX processors.  */
            plt_rewrite = (CPU_FEATURE_PRESENT_P (__get_cpu_features (),
                                                  APX_F)
                           ? plt_rewrite_jmpabs : plt_rewrite_jmp);

          __munmap (plt, 4096);
        }

      GL(dl_x86_feature_control).plt_rewrite = plt_rewrite;
    }

  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
    return;


> > > > > +      else
> > > > > +       {
> > > > > +         *(int32_t *) plt = -1;
> > > What is this meant to be doing?
> >
> > Update the read-write page and check if it can be changed to read-exec.
> >
> How does writing -1 contribute to that?

It touches the page before mprotect to PROT_EXEC | PROT_READ.

> > > > > +
> > > > > +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> > > > > +            rewrite PLT.  */
> > > > > +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> > > > > +           /* Use JMPABS on APX processors.  */
> > > > > +           GL(dl_x86_feature_control).plt_rewrite
> > > > > +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> > > > > +                ? plt_rewrite_jmpabs
> > > > > +                : plt_rewrite_jmp);
> > > > > +         else
> > > > > +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > > +
> > > > > +         __munmap (plt, 4096);
> > > is the mmap just to see if we can get r/w/e permissions? If so is there a better
> > > way to check memory permissions than mmap (which in the case of w->e IIRC
> >
> > I don't know if there is a better way.
> >
> you are just checking whether the OS supports such transitions?

Yes.

> > > will be expensive?
> > > Is it possible for us to have the permissions now but a short while
> > > later to not?
> >
> > It is very unlikely.  But I don't know for sure.
> >
> > > > > +       }
> > > > > +    }
> > > > > +
> > > > > +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> > > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> > > > > +    return;
> > > > > +
> > > > > +  x86_64_rewrite_plt_in_place (map);
> > > > > +}
> > > > > +#endif
> > > > > diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> > > > > new file mode 100644
> > > > > index 0000000000..ddb8e78077
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/link_map.h
> > > > > @@ -0,0 +1,22 @@
> > > > > +/* Additional fields in struct link_map.  x86-64 version.
> > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > +   This file is part of the GNU C Library.
> > > > > +
> > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > +   License as published by the Free Software Foundation; either
> > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > +   Lesser General Public License for more details.
> > > > > +
> > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > +   License along with the GNU C Library; if not, see
> > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > +
> > > > > +/* Has R_X86_64_JUMP_SLOT relocation.  */
> > > > > +bool l_has_jump_slot_reloc;
> > > > > +
> > > > > +#include <sysdeps/x86/link_map.h>
> > > > > diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > new file mode 100644
> > > > > index 0000000000..0dd4724a5a
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > @@ -0,0 +1,31 @@
> > > > > +/* Test PLT rewrite.
> > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > +   This file is part of the GNU C Library.
> > > > > +
> > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > +   License as published by the Free Software Foundation; either
> > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > +   Lesser General Public License for more details.
> > > > > +
> > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > +   License along with the GNU C Library; if not, see
> > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > +
> > > > > +#include <string.h>
> > > > > +#include <support/check.h>
> > > > > +
> > > > > +extern const char *foo (void);
> > > > > +
> > > > > +static int
> > > > > +do_test (void)
> > > > > +{
> > > > > +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> > > > > +  return 0;
> > > > > +}
> > > > > +
> > > > > +#include <support/test-driver.c>
> > > > > diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > new file mode 100644
> > > > > index 0000000000..361b907790
> > > > > --- /dev/null
> > > > > +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > @@ -0,0 +1,32 @@
> > > > > +/* Check PLT rewrite works correctly.
> > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > +   This file is part of the GNU C Library.
> > > > > +
> > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > +   License as published by the Free Software Foundation; either
> > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > +
> > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > +   Lesser General Public License for more details.
> > > > > +
> > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > +   License along with the GNU C Library; if not, see
> > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > +
> > > > > +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> > > > > +   change it to direct branch.  */
> > > > > +
> > > > > +const char *
> > > > > +bar (void)
> > > > > +{
> > > > > +  return "PLT rewrite works";
> > > > > +}
> > > > > +
> > > > > +const char *
> > > > > +foo (void)
> > > > > +{
> > > > > +  return bar ();
> > > > > +}
> > > > > --
> > > > > 2.43.0
> > > > >
> > > >
> > > > PING.
> > > >
> > > > --
> > > > H.J.
> >
> >
> >
> > --
> > H.J.
Sam James Jan. 2, 2024, 4:16 a.m. UTC | #8
"H.J. Lu" <hjl.tools@gmail.com> writes:

> On Mon, Jan 1, 2024 at 3:28 PM Cristian Rodríguez <cristian@rodriguez.im> wrote:
>>
>>
>>
>> On Mon, Jan 1, 2024 at 11:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>>
>>> On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>> >
>>> > Changes in v2:
>>> >
>>> > 1. Changed tunables to glibc.cpu.plt_rewrite.
>>> > 2. Added documentation.
>>> > 3. Added a testcase.
>>> >
>>> > H.J.
>>> > --
>>> > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
>>> > relocation.
>>> >
>>>
>>> > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
>>>
>>
>> Why does it default to off ? I'm pretty confident that the vast
>> majority of users do not know what the heck the PLT even is, but are
>> paying the price of it.. the only things that come to mind that
>> could break are rather fragile debugging tools.
>>
>
> SELinux may disallow it.

Most people aren't running SELinux and the people who do (and their
vendors) are well-used to adjusting policies for stuff like this,
though.

But we don't have any sort of global configuration file for tunables
(yet), right? Just env vars?
Noah Goldstein Jan. 2, 2024, 4:36 a.m. UTC | #9
On Mon, Jan 1, 2024 at 7:56 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Jan 1, 2024 at 6:18 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Mon, Jan 1, 2024 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Mon, Jan 1, 2024 at 10:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > Changes in v2:
> > > > > >
> > > > > > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > > > > > 2. Added documentation.
> > > > > > 3. Added a testcase.
> > > > > >
> > > > > > H.J.
> > > > > > --
> > > > > > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > > > > > relocation.
> > > > > >
> > > > > > For x86-64, add
> > > > > >
> > > > > >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> > > > > >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> > > > > >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> > > > > >
> > > > > > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > > > > > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > > > > > table.
> > > > > > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > > > > > entry.
> > > > > >
> > > > > > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > > > > > memory offset of the indirect branch instruction.
> > > > > >
> > > > > > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > > > > > with direct branch after relocation when the lazy binding is disabled.
> > > > > > PLT rewrite is disabled by default.  Add
> > > > > >
> > > > > > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> > > > > >
> > > > > > to enable PLT rewrite at run-time.
> > > > > > ---
> > > > > >  elf/dynamic-link.h                   |   5 +
> > > > > >  elf/elf.h                            |   5 +
> > > > > >  elf/tst-glibcelf.py                  |   1 +
> > > > > >  manual/tunables.texi                 |   9 +
> > > > > >  scripts/glibcelf.py                  |   4 +
> > > > > >  sysdeps/x86/cet-control.h            |  14 ++
> > > > > >  sysdeps/x86/cpu-features.c           |  10 ++
> > > > > >  sysdeps/x86/dl-procruntime.c         |   1 +
> > > > > >  sysdeps/x86/dl-tunables.list         |   5 +
> > > > > >  sysdeps/x86_64/Makefile              |  19 +++
> > > > > >  sysdeps/x86_64/configure             |  35 ++++
> > > > > >  sysdeps/x86_64/configure.ac          |   4 +
> > > > > >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> > > > > >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> > > > > >  sysdeps/x86_64/link_map.h            |  22 +++
> > > > > >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> > > > > >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> > > > > >  17 files changed, 454 insertions(+), 1 deletion(-)
> > > > > >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> > > > > >  create mode 100644 sysdeps/x86_64/link_map.h
> > > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > >
> > > > > > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > > > > > index e7f755fc75..5351671044 100644
> > > > > > --- a/elf/dynamic-link.h
> > > > > > +++ b/elf/dynamic-link.h
> > > > > > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > >        }                                                                              \
> > > > > >    } while (0);
> > > > > >
> > > > > > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > > > > > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > > > > > +# endif
> > > > > > +
> > > > > >  /* This can't just be an inline function because GCC is too dumb
> > > > > >     to inline functions containing inlines themselves.  */
> > > > > >  # ifdef RTLD_BOOTSTRAP
> > > > > > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > >        ELF_DYNAMIC_DO_RELR (map);                                             \
> > > > > >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> > > > > >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > > > > > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> > > > > >    } while (0)
> > > > > >
> > > > > >  #endif
> > > > > > diff --git a/elf/elf.h b/elf/elf.h
> > > > > > index 5c1c1972d1..eda4802f56 100644
> > > > > > --- a/elf/elf.h
> > > > > > +++ b/elf/elf.h
> > > > > > @@ -3639,6 +3639,11 @@ enum
> > > > > >  /* x86-64 sh_type values.  */
> > > > > >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> > > > > >
> > > > > > +/* x86-64 d_tag values.  */
> > > > > > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > > > > > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > > > > > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> > > > Why not +2?
> > >
> > > From
> > >
> > > https://www.sco.com/developers/gabi/latest/ch5.dynamic.html#tag_encodings
> > >
> > > To make it simpler for tools to interpret the contents of dynamic
> > > section entries,
> > > the value of each tag, except for those in two special compatibility
> > > ranges, will
> > > determine the interpretation of the d_un union. A tag whose value is an even
> > > number indicates a dynamic section entry that uses d_ptr. A tag whose value
> > > is an odd number indicates a dynamic section entry that uses d_val or that
> > > uses neither d_ptr nor d_val.
> > >
> > > > Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
> > > > `PLTENTSZ`?
> > >
> > > They are defined in x86-64 psABI:
> > >
> > >
> > > https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/6d824a52a42d173eb838b879616c1be5870b593e
> > >
> > > The typical dynamic tag name for entry size is DT_XXX_YYYENT:
> > >
> > > #define DT_RELAENT 9 /* Size of one Rela reloc */
> > > #define DT_SYMENT 11 /* Size of one symbol table entry */
> > > #define DT_RELENT 19 /* Size of one Rel reloc */
> > > #define DT_RELRENT 37 /* Size of one RELR relative relocaction
> > >
> > > > > > +#define DT_X86_64_NUM          4
> > > > > >
> > > > > >  /* AM33 relocations.  */
> > > > > >  #define R_MN10300_NONE         0       /* No reloc.  */
> > > > > > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > > > > > index 6142ca28ae..52293f4adf 100644
> > > > > > --- a/elf/tst-glibcelf.py
> > > > > > +++ b/elf/tst-glibcelf.py
> > > > > > @@ -187,6 +187,7 @@ DT_VALNUM
> > > > > >  DT_VALRNGHI
> > > > > >  DT_VALRNGLO
> > > > > >  DT_VERSIONTAGNUM
> > > > > > +DT_X86_64_NUM
> > > > > >  ELFCLASSNUM
> > > > > >  ELFDATANUM
> > > > > >  EM_NUM
> > > > > > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > > > > > index b31f16da84..f9bd83622e 100644
> > > > > > --- a/manual/tunables.texi
> > > > > > +++ b/manual/tunables.texi
> > > > > > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> > > > > >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> > > > > >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> > > > > >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > > > > > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> > > > > >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> > > > > >  glibc.cpu.x86_ibt:
> > > > > >  glibc.cpu.hwcaps:
> > > > > > @@ -614,6 +615,14 @@ this tunable.
> > > > > >  This tunable is specific to 64-bit x86-64.
> > > > > >  @end deftp
> > > > > >
> > > > > > +@deftp Tunable glibc.cpu.plt_rewrite
> > > > > > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > > > > > +the PLT section with direct branch after relocation if possible when
> > > > > > +the lazy binding is disabled.
> > > > > > +
> > > > > > +This tunable is specific to x86-64.
> > > > > > +@end deftp
> > > > > > +
> > > > > >  @node Memory Related Tunables
> > > > > >  @section Memory Related Tunables
> > > > > >  @cindex memory related tunables
> > > > > > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > > > > > index b52e83d613..3a21e25201 100644
> > > > > > --- a/scripts/glibcelf.py
> > > > > > +++ b/scripts/glibcelf.py
> > > > > > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> > > > > >      """Supplemental DT_* constants for EM_RISCV."""
> > > > > >  class DtSPARC(Dt):
> > > > > >      """Supplemental DT_* constants for EM_SPARC."""
> > > > > > +class DtX86_64(Dt):
> > > > > > +    """Supplemental DT_* constants for EM_X86_64."""
> > > > > >  _dt_skip = '''
> > > > > >  DT_ENCODING DT_PROCNUM
> > > > > >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > > > > > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> > > > > >  DT_PPC_NUM
> > > > > >  DT_PPC64_NUM
> > > > > >  DT_SPARC_NUM
> > > > > > +DT_X86_64_NUM
> > > > > >  '''.strip().split()
> > > > > >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> > > > > >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > > > > > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> > > > > >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> > > > > >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> > > > > >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > > > > > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> > > > > >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> > > > > >  del _dt_skip
> > > > > >
> > > > > > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > > > > > index 3bd00019e8..216d8cc576 100644
> > > > > > --- a/sysdeps/x86/cet-control.h
> > > > > > +++ b/sysdeps/x86/cet-control.h
> > > > > > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> > > > > >    cet_permissive
> > > > > >  };
> > > > > >
> > > > > > +/* PLT rewrite control.  */
> > > > > > +enum dl_plt_rewrite_control
> > > > > > +{
> > > > > > +  /* No PLT rewrite.  */
> > > > > > +  plt_rewrite_none,
> > > > > > +  /* PLT rewrite is enabled at run-time.  */
> > > > > > +  plt_rewrite_enabled,
> > > > > > +  /* Rewrite PLT with JMP at run-time.  */
> > > > > > +  plt_rewrite_jmp,
> > > > > > +  /* Rewrite PLT with JMPABS at run-time.  */
> > > > > > +  plt_rewrite_jmpabs
> > > > > > +};
> > > > > > +
> > > > > >  struct dl_x86_feature_control
> > > > > >  {
> > > > > >    enum dl_x86_cet_control ibt : 2;
> > > > > >    enum dl_x86_cet_control shstk : 2;
> > > > > > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> > > > > >  };
> > > > > >
> > > > > >  #endif /* cet-control.h */
> > > > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > > > > index 0bf923d48b..5f2962bbe5 100644
> > > > > > --- a/sysdeps/x86/cpu-features.c
> > > > > > +++ b/sysdeps/x86/cpu-features.c
> > > > > > @@ -27,6 +27,13 @@
> > > > > >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > > > > >    attribute_hidden;
> > > > > >
> > > > > > +static void
> > > > > > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > > > +{
> > > > > > +  if (valp->numval)
> > > > > > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > > > > > +}
> > > > > > +
> > > > > >  #ifdef __LP64__
> > > > > >  static void
> > > > > >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > > > > > @@ -996,6 +1003,9 @@ no_cpuid:
> > > > > >
> > > > > >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> > > > > >
> > > > > > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > > > > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > > > > > +
> > > > > >  #ifdef __LP64__
> > > > > >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> > > > > >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > > > > > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > > > > > index 2fb682ded3..03a612f3f3 100644
> > > > > > --- a/sysdeps/x86/dl-procruntime.c
> > > > > > +++ b/sysdeps/x86/dl-procruntime.c
> > > > > > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> > > > > >  = {
> > > > > >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> > > > > >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > > > > > +    .plt_rewrite = plt_rewrite_none,
> > > > > >    }
> > > > > >  # endif
> > > > > >  # if !defined SHARED || defined PROCINFO_DECL
> > > > > > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > > > > > index feb7004036..49da67fd74 100644
> > > > > > --- a/sysdeps/x86/dl-tunables.list
> > > > > > +++ b/sysdeps/x86/dl-tunables.list
> > > > > > @@ -66,5 +66,10 @@ glibc {
> > > > > >      x86_shared_cache_size {
> > > > > >        type: SIZE_T
> > > > > >      }
> > > > > > +    plt_rewrite {
> > > > > > +      type: INT_32
> > > > > > +      minval: 0
> > > > > > +      maxval: 1
> > > > > > +    }
> > > > > >    }
> > > > > >  }
> > > > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > > > > index 00120ca9ca..106f1d0f55 100644
> > > > > > --- a/sysdeps/x86_64/Makefile
> > > > > > +++ b/sysdeps/x86_64/Makefile
> > > > > > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> > > > > >  tests-container += tst-glibc-hwcaps-cache
> > > > > >  endif
> > > > > >
> > > > > > +ifeq (yes,$(have-z-mark-plt))
> > > > > > +tests += \
> > > > > > +  tst-plt-rewrite1 \
> > > > > > +# tests
> > > > > > +modules-names += \
> > > > > > +  tst-plt-rewritemod1 \
> > > > > > +# modules-names
> > > > > > +
> > > > > > +tst-plt-rewrite1-no-pie = yes
> > > > > > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > > > > > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > > > > > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > > > > > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > > > > > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > > > > > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > > > > > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > > > > > +       $(evaluate-test)
> > > > > > +endif
> > > > > > +
> > > > > >  endif # $(subdir) == elf
> > > > > >
> > > > > >  ifeq ($(subdir),csu)
> > > > > > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > > > > > index e307467afa..a15eaa349d 100755
> > > > > > --- a/sysdeps/x86_64/configure
> > > > > > +++ b/sysdeps/x86_64/configure
> > > > > > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> > > > > >  config_vars="$config_vars
> > > > > >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> > > > > >
> > > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > > > > > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > > > > > +libc_linker_feature=no
> > > > > > +cat > conftest.c <<EOF
> > > > > > +int _start (void) { return 42; }
> > > > > > +EOF
> > > > > > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > > > > > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > > > > > +                 -fPIC -shared -o conftest.so conftest.c
> > > > > > +                 1>&5'
> > > > > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > > > > +  (eval $ac_try) 2>&5
> > > > > > +  ac_status=$?
> > > > > > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > > > > > +  test $ac_status = 0; }; }
> > > > > > +then
> > > > > > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > > > > > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > > > > > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > > > > > +    true
> > > > > > +  else
> > > > > > +    libc_linker_feature=yes
> > > > > > +  fi
> > > > > > +fi
> > > > > > +rm -f conftest*
> > > > > > +if test $libc_linker_feature = yes; then
> > > > > > +  libc_cv_z_mark_plt=yes
> > > > > > +else
> > > > > > +  libc_cv_z_mark_plt=no
> > > > > > +fi
> > > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > > > > > +printf "%s\n" "$libc_linker_feature" >&6; }
> > > > > > +config_vars="$config_vars
> > > > > > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > > > > > +
> > > > > >  if test x"$build_mathvec" = xnotset; then
> > > > > >    build_mathvec=yes
> > > > > >  fi
> > > > > > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > > > > > index 1215dcb1e4..40f642db31 100644
> > > > > > --- a/sysdeps/x86_64/configure.ac
> > > > > > +++ b/sysdeps/x86_64/configure.ac
> > > > > > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> > > > > >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> > > > > >                 [$libc_cv_cc_mprefer_vector_width])
> > > > > >
> > > > > > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > > > > > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > > > > > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > > > > > +
> > > > > >  if test x"$build_mathvec" = xnotset; then
> > > > > >    build_mathvec=yes
> > > > > >  fi
> > > > > > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > > new file mode 100644
> > > > > > index 0000000000..dd41d95b88
> > > > > > --- /dev/null
> > > > > > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > > @@ -0,0 +1,21 @@
> > > > > > +/* Configuration of lookup functions.  x64-64 version.
> > > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > > +   This file is part of the GNU C Library.
> > > > > > +
> > > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > > +   License as published by the Free Software Foundation; either
> > > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > > +
> > > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > > +   Lesser General Public License for more details.
> > > > > > +
> > > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > > +   License along with the GNU C Library; if not, see
> > > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > > +
> > > > > > +/* Number of extra dynamic section entries for this architecture.  By
> > > > > > +   default there are none.  */
> > > > > > +#define DT_THISPROCNUM DT_X86_64_NUM
> > > > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > > > > index 581a2f1a9e..973042f9e6 100644
> > > > > > --- a/sysdeps/x86_64/dl-machine.h
> > > > > > +++ b/sysdeps/x86_64/dl-machine.h
> > > > > > @@ -30,6 +30,9 @@
> > > > > >  #include <dl-machine-rel.h>
> > > > > >  #include <isa-level.h>
> > > > > >
> > > > > > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > > > > > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > > > > > +
> > > > > >  /* Return nonzero iff ELF header is compatible with the running host.  */
> > > > > >  static inline int __attribute__ ((unused))
> > > > > >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > > > > > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> > > > > >
> > > > > >        switch (r_type)
> > > > > >         {
> > > > > > -       case R_X86_64_GLOB_DAT:
> > > > > >         case R_X86_64_JUMP_SLOT:
> > > > > > +         map->l_has_jump_slot_reloc = true;
> > > > > > +       case R_X86_64_GLOB_DAT:
> > > > > >           *reloc_addr = value;
> > > > > >           break;
> > > > > >
> > > > > > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > >  }
> > > > > >
> > > > > >  #endif /* RESOLVE_MAP */
> > > > > > +
> > > > > > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > > > > > +    && defined SHARED
> > > > > > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > > > > > +  x86_64_dynamic_after_reloc (map, (lazy))
> > > > > > +
> > > > > > +static const char *
> > > > > > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > > > > > +{
> > > > > > +  const ElfW(Sym) *const symtab
> > > > > > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > > > > > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > > > > > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > > > > > +  return strtab + refsym->st_name;
> > > > > > +}
> > > > > > +
> > > > > > +static void
> > > > > > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > > > > > +                   ElfW(Addr) plt_aligned)
> > > > > > +{
> > > > > > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > > > > > +  ElfW(Addr) l_addr = map->l_addr;
> > > > > > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > > > > > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > > > > > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > > > > > +  const ElfW(Rela) *reloc = (const void *) start;
> > > > > > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > > > > > +
> > > > > > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > > > > > +                                         header.feature_1);
> > > > > > +  bool ibt_enabled_p
> > > > > > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > > > > > +
> > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > > > > > +                     DSO_FILENAME (map->l_name));
> > > > > > +
> > > > > > +  for (; reloc < reloc_end; reloc++)
> > > > > > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > > > > > +      {
> > > > > > +       /* Get the value from the GOT entry.  */
> > > > > > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > > > > > +
> > > > > > +       /* Get the corresponding PLT entry from r_addend.  */
> > > > > > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > > > > > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > > > > > +       if (!ibt_enabled_p)
> > > > > > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > > > > > +       /* Get the displacement from the branch target.  */
> > > > > > +       ElfW(Addr) disp = value - branch_start - 5;
> > > > IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.
> > >
> > > Will fix it.
> > >
> > > > > > +       ElfW(Addr) plt_end;
> > > > > > +       ElfW(Addr) pad;
> > > > > > +
> > > > > > +       branch_start += plt_rewrite_bias;
> > > > > > +       plt_end = (branch_start & -pltent) + pltent;
> > > > > > +
> > > > > > +       /* Update the PLT entry.  */
> > > > > > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> > > > 0x80000000 -> (unsigned long long)I32_MIN?
> > > > 0xffffffff -> (unsigned long long)U32_MAX?
> > >
> > > I will change it.
> > >
> > > > Or at least a define.
> > > > Its easy to get the number of hex characters wrong.
> > > > > > +         {
> > > > > > +           /* If the target branch can be reached with a direct branch,
> > > > > > +              rewrite the PLT entry with a direct branch.  */
> > > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > > +             {
> > > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > > +                                                                reloc);
> > > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > > +                                 "direct branch\n", sym_name,
> > > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > > +             }
> > > > > > +
> > > > > > +           pad = branch_start + 5;
> > > > again a define.
> > >
> > > Will fix it.
> > >
> > > > > > +
> > > > > > +           if (__glibc_unlikely (pad > plt_end))
> > > > > > +             {
> > > > > > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > > > > > +                                     & DL_DEBUG_BINDINGS))
> > > > > > +                 {
> > > > > > +                   const char *sym_name
> > > > > > +                     = x86_64_reloc_symbol_name (map, reloc);
> > > > > > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > > > > > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > > > > > +                                     "in '%s'\n", sym_name,
> > > > > > +                                     DSO_FILENAME (map->l_name));
> > > > > > +                 }
> > > > > > +
> > > > > > +               continue;
> > > > > > +             }
> > > > > > +
> > > > > > +           /* Write out direct branch.  */
> > > > > > +           *(uint8_t *) branch_start = 0xe9;
> > > > will it ever be disp8?
> > >
> > > This should be as fast as possible.  Adding an unlikely disp8 check will
> > > slow it down.
> > >
> > > > But again, imo `0xe9` should be defined as something along the lines of
> > > > JMP_DISP32_OPCODE
> > >
> > > Will fix it.
> > >
> > > > > > +           *((uint32_t *) (branch_start + 1)) = disp;
> > > > Does it need to be a u32 with aliasing attribute? Maybe just memcpy?
> > >
> > > elf_machine_rela has some similar codes.  It is OK.
> > >
> > > > > > +         }
> > > > > > +       else
> > > > > > +         {
> > > > > > +           if (GL(dl_x86_feature_control).plt_rewrite
> > > > > > +               != plt_rewrite_jmpabs)
> > > > > > +             continue;
> > > > > > +
> > > > > > +           pad = branch_start + 11;
> > > > > > +
> > > > I assume this is 16 (plt entry size) - 5 (jmp insn size)?
> > > > Should this use the `DT_X86_64_PLTENT` field?
> > >
> > > Good idea.  Will change it.
> > >
> > > > > > +           if (pad > plt_end)
> > > > > > +             continue;
> > > > > > +
> > > > > > +           /* Rewrite the PLT entry with JMPABS.  */
> > > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > > +             {
> > > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > > +                                                                reloc);
> > > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > > +                                 "JMPABS\n", sym_name,
> > > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > > +             }
> > > > > > +
> > > > > > +           /* "jmpabs $target" for 64-bit displacement.  */
> > > > > > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > > > > > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > > > > > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > > > > > +           *(uint64_t *) (branch_start + 3) = value;
> > > > again this is not a clear way to encode instructions...
> > >
> > > This is JMPABS in Intel APX, which has a 3-byte opcode + 64bit address.
> > >
> > > > What instruction is meant to be?
> > > > ```
> > > > .byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
> > > > ```
> > > > ->
> > > > ```
> > > > 0000000000000000 <.text>:
> > > >        0: d5                            <unknown>
> > > >        1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
> > > >        7: 00 00                        addb %al, (%rax)
> > > >        9: 00 00                        addb %al, (%rax)
> > > >
> > > > ```
> > >
> > > You need the latest binutils master branch which support JMPABS in
> > > Intel APX:
> > >
> > > 0000000000000000 <.text>:
> > >    0: d5 00 a1 7f 12 34 00 00 00 00 00 jmpabs $0x34127f
> > >
> > > > > > +         }
> > > > > > +
> > > > > > +       /* Fill the unused part of the PLT entry with INT3.  */
> > > > > > +       for (; pad < plt_end; pad++)
> > > > > > +         *(uint8_t *) pad = 0xcc;
> > > > maybe `INT3_OPCODE`?
> > >
> > > Will change it.
> > >
> > > > > > +      }
> > > > > > +}
> > > > > > +
> > > > > > +static inline void
> > > > > > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > > > > > +{
> > > > > > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > > > > > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > > > > > +                   + map->l_addr);
> > > > > > +  size_t pagesize = GLRO(dl_pagesize);
> > > > > > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > > > > > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > > > > > +                 + plt - plt_aligned);
> > > > > > +
> > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > > > > > +                     DSO_FILENAME (map->l_name));
> > > > > > +
> > > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > > +                                   PROT_WRITE | PROT_READ) < 0))
> > > > > > +    {
> > > > > > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > > > > > +                         DSO_FILENAME (map->l_name));
> > > > > > +      return;
> > > > > > +    }
> > > > > > +
> > > > > > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > > > > > +
> > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > > > > > +          DSO_FILENAME (map->l_name));
> > > > > > +
> > > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > > +                                   PROT_EXEC | PROT_READ) < 0))
> > > > > > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > > > > > +                     "failed to change PLT back to read-only");
> > > > > > +}
> > > > > > +
> > > > > > +/* Rewrite PLT entries to direct branch if possible.  */
> > > > > > +
> > > > > > +static inline void
> > > > > > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > > > > > +{
> > > > > > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > > > > > +  if (lazy)
> > > > > > +    return;
> > > > > > +
> > > > > > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > > > > > +    return;
> > > > > > +
> > > > > > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > > > > > +  if (!map->l_has_jump_slot_reloc)
> > > > > > +    return;
> > > > > > +
> > > > > > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > > > > > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > > > > > +    return;
> > > > > > +
> > > > > > +  /* Ignore DT_X86_64_PLT if
> > > > > > +     1. DT_JMPREL isn't available or its value is 0.
> > > > > > +     2. DT_PLTRELSZ is 0.
> > > > > > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > > > > > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > > > > > +     than 16 bytes.  */
> > > > > > +  if (map->l_info[DT_JMPREL] == NULL
> > > > > > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > > > > > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > > > > > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > > > > > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > > > > > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > > > > > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > > > > > +    return;
> > > > Another sanity check might be `PLTSZ % PLTENT == 0`
> > > > > > +
> > > > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > > > > > +    {
> > > > > > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > > > > > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > > > > > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > > > > > +                         -1, 0);
> > > > > > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > > > > > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > imo clearer would be to just initialize:
> > > > GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > then make all the failure cases just return (or a goto if these is cleanup).
> > >
> > > This function may be called many times.   If the first mprotect check fails,
> > > we shouldn't call it again.   With this approach, the mprotect check will
> > > only be done once.
> > Yeah, i see that. Just instead of logic being
> >
> > if(fail0) {
> >  // set plt_rewrite_none
> > }
> > else {
> > ...
> > if(fail1)
> >  // set plt_rewrite_none
> > ...
> > }
> > if(plt_rewrite_none) {
> >  // return
> > }
> >
> > It might be cleaner to do:
> >
> > // set plt_rewrite_none
> > if(fail0) // return
> > ...
> > if(fail1) // return>
>
> We can't return after mprotect failure since we need
> to unmap the testing page.   How about this?
>
>  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
>     {
>       enum dl_plt_rewrite_control plt_rewrite = plt_rewrite_none;
>
>       /* PLT rewrite is enabled.  Check if mprotect works.  */
>       void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
>                           MAP_PRIVATE | MAP_ANONYMOUS,
>                           -1, 0);
>       if (__glibc_unlikely (plt != MAP_FAILED))
>         {
>           /* Touch the PROT_READ | PROT_WRITE page.  */
>           *(int32_t *) plt = -1;
>
>           /* If the updated PROT_READ | PROT_WRITE page can be changed
>              to PROT_EXEC | PROT_READ, rewrite PLT.  */
>           if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
>             /* Use JMPABS on APX processors.  */
>             plt_rewrite = (CPU_FEATURE_PRESENT_P (__get_cpu_features (),
>                                                   APX_F)
>                            ? plt_rewrite_jmpabs : plt_rewrite_jmp);
>
>           __munmap (plt, 4096);
>         }
>
>       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite;
>     }
>
>   /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
>   if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
>     return;
>
imo this is cleaner, but not a big deal either way.
>
> > > > > > +      else
> > > > > > +       {
> > > > > > +         *(int32_t *) plt = -1;
> > > > What is this meant to be doing?
> > >
> > > Update the read-write page and check if it can be changed to read-exec.
> > >
> > How does writing -1 contribute to that?
>
> It touches the page before mprotect to PROT_EXEC | PROT_READ.
oh, you are making sure w->e even after being paged in.
sure.
although imo needs a comment, otherwise just appears to be a dead store.
>
> > > > > > +
> > > > > > +         /* If the memory can be changed to PROT_EXEC | PROT_READ,
> > > > > > +            rewrite PLT.  */
> > > > > > +         if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> > > > > > +           /* Use JMPABS on APX processors.  */
> > > > > > +           GL(dl_x86_feature_control).plt_rewrite
> > > > > > +             = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
> > > > > > +                ? plt_rewrite_jmpabs
> > > > > > +                : plt_rewrite_jmp);
> > > > > > +         else
> > > > > > +           GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > > > +
> > > > > > +         __munmap (plt, 4096);
> > > > is the mmap just to see if we can get r/w/e permissions? If so is there a better
> > > > way to check memory permissions than mmap (which in the case of w->e IIRC
> > >
> > > I don't know if there is a better way.
> > >
> > you are just checking whether the OS supports such transitions?
>
> Yes.
>
> > > > will be expensive?
> > > > Is it possible for us to have the permissions now but a short while
> > > > later to not?
> > >
> > > It is very unlikely.  But I don't know for sure.
> > >
> > > > > > +       }
> > > > > > +    }
> > > > > > +
> > > > > > +  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> > > > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> > > > > > +    return;
> > > > > > +
> > > > > > +  x86_64_rewrite_plt_in_place (map);
> > > > > > +}
> > > > > > +#endif
> > > > > > diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
> > > > > > new file mode 100644
> > > > > > index 0000000000..ddb8e78077
> > > > > > --- /dev/null
> > > > > > +++ b/sysdeps/x86_64/link_map.h
> > > > > > @@ -0,0 +1,22 @@
> > > > > > +/* Additional fields in struct link_map.  x86-64 version.
> > > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > > +   This file is part of the GNU C Library.
> > > > > > +
> > > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > > +   License as published by the Free Software Foundation; either
> > > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > > +
> > > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > > +   Lesser General Public License for more details.
> > > > > > +
> > > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > > +   License along with the GNU C Library; if not, see
> > > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > > +
> > > > > > +/* Has R_X86_64_JUMP_SLOT relocation.  */
> > > > > > +bool l_has_jump_slot_reloc;
> > > > > > +
> > > > > > +#include <sysdeps/x86/link_map.h>
> > > > > > diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > > new file mode 100644
> > > > > > index 0000000000..0dd4724a5a
> > > > > > --- /dev/null
> > > > > > +++ b/sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > > @@ -0,0 +1,31 @@
> > > > > > +/* Test PLT rewrite.
> > > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > > +   This file is part of the GNU C Library.
> > > > > > +
> > > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > > +   License as published by the Free Software Foundation; either
> > > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > > +
> > > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > > +   Lesser General Public License for more details.
> > > > > > +
> > > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > > +   License along with the GNU C Library; if not, see
> > > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > > +
> > > > > > +#include <string.h>
> > > > > > +#include <support/check.h>
> > > > > > +
> > > > > > +extern const char *foo (void);
> > > > > > +
> > > > > > +static int
> > > > > > +do_test (void)
> > > > > > +{
> > > > > > +  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
> > > > > > +  return 0;
> > > > > > +}
> > > > > > +
> > > > > > +#include <support/test-driver.c>
> > > > > > diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > > new file mode 100644
> > > > > > index 0000000000..361b907790
> > > > > > --- /dev/null
> > > > > > +++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > > @@ -0,0 +1,32 @@
> > > > > > +/* Check PLT rewrite works correctly.
> > > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > > +   This file is part of the GNU C Library.
> > > > > > +
> > > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > > +   License as published by the Free Software Foundation; either
> > > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > > +
> > > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > > +   Lesser General Public License for more details.
> > > > > > +
> > > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > > +   License along with the GNU C Library; if not, see
> > > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > > +
> > > > > > +/* foo calls bar with indirect branch via PLT.  PLT rewrite should
> > > > > > +   change it to direct branch.  */
> > > > > > +
> > > > > > +const char *
> > > > > > +bar (void)
> > > > > > +{
> > > > > > +  return "PLT rewrite works";
> > > > > > +}
> > > > > > +
> > > > > > +const char *
> > > > > > +foo (void)
> > > > > > +{
> > > > > > +  return bar ();
> > > > > > +}
> > > > > > --
> > > > > > 2.43.0
> > > > > >
> > > > >
> > > > > PING.
> > > > >
> > > > > --
> > > > > H.J.
> > >
> > >
> > >
> > > --
> > > H.J.
>
>
>
> --
> H.J.
H.J. Lu Jan. 2, 2024, 2:52 p.m. UTC | #10
On Mon, Jan 1, 2024 at 8:17 PM Sam James <sam@gentoo.org> wrote:
>
>
> "H.J. Lu" <hjl.tools@gmail.com> writes:
>
> > On Mon, Jan 1, 2024 at 3:28 PM Cristian Rodríguez <cristian@rodriguez.im> wrote:
> >>
> >>
> >>
> >> On Mon, Jan 1, 2024 at 11:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>>
> >>> On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>> >
> >>> > Changes in v2:
> >>> >
> >>> > 1. Changed tunables to glibc.cpu.plt_rewrite.
> >>> > 2. Added documentation.
> >>> > 3. Added a testcase.
> >>> >
> >>> > H.J.
> >>> > --
> >>> > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> >>> > relocation.
> >>> >
> >>>
> >>> > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> >>>
> >>
> >> Why does it default to off ? I'm pretty confident that the vast
> >> majority of users do not know what the heck the PLT even is, but are
> >> paying the price of it.. the only things that come to mind that
> >> could break are rather fragile debugging tools.
> >>
> >
> > SELinux may disallow it.
>
> Most people aren't running SELinux and the people who do (and their
> vendors) are well-used to adjusting policies for stuff like this,
> though.
>
> But we don't have any sort of global configuration file for tunables
> (yet), right? Just env vars?

A global configuration file for tunables can be useful in other cases.
H.J. Lu Jan. 2, 2024, 2:54 p.m. UTC | #11
On Mon, Jan 1, 2024 at 8:37 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Mon, Jan 1, 2024 at 7:56 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Mon, Jan 1, 2024 at 6:18 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > >
> > > On Mon, Jan 1, 2024 at 5:01 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Mon, Jan 1, 2024 at 10:36 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > > > >
> > > > > On Mon, Jan 1, 2024 at 6:13 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > On Thu, Dec 21, 2023 at 7:04 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > >
> > > > > > > Changes in v2:
> > > > > > >
> > > > > > > 1. Changed tunables to glibc.cpu.plt_rewrite.
> > > > > > > 2. Added documentation.
> > > > > > > 3. Added a testcase.
> > > > > > >
> > > > > > > H.J.
> > > > > > > --
> > > > > > > Add ELF_DYNAMIC_AFTER_RELOC to allow target specific processing after
> > > > > > > relocation.
> > > > > > >
> > > > > > > For x86-64, add
> > > > > > >
> > > > > > >  #define DT_X86_64_PLT     (DT_LOPROC + 0)
> > > > > > >  #define DT_X86_64_PLTSZ   (DT_LOPROC + 1)
> > > > > > >  #define DT_X86_64_PLTENT  (DT_LOPROC + 3)
> > > > > > >
> > > > > > > 1. DT_X86_64_PLT: The address of the procedure linkage table.
> > > > > > > 2. DT_X86_64_PLTSZ: The total size, in bytes, of the procedure linkage
> > > > > > > table.
> > > > > > > 3. DT_X86_64_PLTENT: The size, in bytes, of a procedure linkage table
> > > > > > > entry.
> > > > > > >
> > > > > > > With the r_addend field of the R_X86_64_JUMP_SLOT relocation set to the
> > > > > > > memory offset of the indirect branch instruction.
> > > > > > >
> > > > > > > Define ELF_DYNAMIC_AFTER_RELOC for x86-64 to rewrite the PLT section
> > > > > > > with direct branch after relocation when the lazy binding is disabled.
> > > > > > > PLT rewrite is disabled by default.  Add
> > > > > > >
> > > > > > > $ GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1
> > > > > > >
> > > > > > > to enable PLT rewrite at run-time.
> > > > > > > ---
> > > > > > >  elf/dynamic-link.h                   |   5 +
> > > > > > >  elf/elf.h                            |   5 +
> > > > > > >  elf/tst-glibcelf.py                  |   1 +
> > > > > > >  manual/tunables.texi                 |   9 +
> > > > > > >  scripts/glibcelf.py                  |   4 +
> > > > > > >  sysdeps/x86/cet-control.h            |  14 ++
> > > > > > >  sysdeps/x86/cpu-features.c           |  10 ++
> > > > > > >  sysdeps/x86/dl-procruntime.c         |   1 +
> > > > > > >  sysdeps/x86/dl-tunables.list         |   5 +
> > > > > > >  sysdeps/x86_64/Makefile              |  19 +++
> > > > > > >  sysdeps/x86_64/configure             |  35 ++++
> > > > > > >  sysdeps/x86_64/configure.ac          |   4 +
> > > > > > >  sysdeps/x86_64/dl-dtprocnum.h        |  21 +++
> > > > > > >  sysdeps/x86_64/dl-machine.h          | 237 ++++++++++++++++++++++++++-
> > > > > > >  sysdeps/x86_64/link_map.h            |  22 +++
> > > > > > >  sysdeps/x86_64/tst-plt-rewrite1.c    |  31 ++++
> > > > > > >  sysdeps/x86_64/tst-plt-rewritemod1.c |  32 ++++
> > > > > > >  17 files changed, 454 insertions(+), 1 deletion(-)
> > > > > > >  create mode 100644 sysdeps/x86_64/dl-dtprocnum.h
> > > > > > >  create mode 100644 sysdeps/x86_64/link_map.h
> > > > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewrite1.c
> > > > > > >  create mode 100644 sysdeps/x86_64/tst-plt-rewritemod1.c
> > > > > > >
> > > > > > > diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
> > > > > > > index e7f755fc75..5351671044 100644
> > > > > > > --- a/elf/dynamic-link.h
> > > > > > > +++ b/elf/dynamic-link.h
> > > > > > > @@ -177,6 +177,10 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > > >        }                                                                              \
> > > > > > >    } while (0);
> > > > > > >
> > > > > > > +# ifndef ELF_DYNAMIC_AFTER_RELOC
> > > > > > > +#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
> > > > > > > +# endif
> > > > > > > +
> > > > > > >  /* This can't just be an inline function because GCC is too dumb
> > > > > > >     to inline functions containing inlines themselves.  */
> > > > > > >  # ifdef RTLD_BOOTSTRAP
> > > > > > > @@ -192,6 +196,7 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > > >        ELF_DYNAMIC_DO_RELR (map);                                             \
> > > > > > >      ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);               \
> > > > > > >      ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);                      \
> > > > > > > +    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));                             \
> > > > > > >    } while (0)
> > > > > > >
> > > > > > >  #endif
> > > > > > > diff --git a/elf/elf.h b/elf/elf.h
> > > > > > > index 5c1c1972d1..eda4802f56 100644
> > > > > > > --- a/elf/elf.h
> > > > > > > +++ b/elf/elf.h
> > > > > > > @@ -3639,6 +3639,11 @@ enum
> > > > > > >  /* x86-64 sh_type values.  */
> > > > > > >  #define SHT_X86_64_UNWIND      0x70000001 /* Unwind information.  */
> > > > > > >
> > > > > > > +/* x86-64 d_tag values.  */
> > > > > > > +#define DT_X86_64_PLT          (DT_LOPROC + 0)
> > > > > > > +#define DT_X86_64_PLTSZ                (DT_LOPROC + 1)
> > > > > > > +#define DT_X86_64_PLTENT       (DT_LOPROC + 3)
> > > > > Why not +2?
> > > >
> > > > From
> > > >
> > > > https://www.sco.com/developers/gabi/latest/ch5.dynamic.html#tag_encodings
> > > >
> > > > To make it simpler for tools to interpret the contents of dynamic
> > > > section entries,
> > > > the value of each tag, except for those in two special compatibility
> > > > ranges, will
> > > > determine the interpretation of the d_un union. A tag whose value is an even
> > > > number indicates a dynamic section entry that uses d_ptr. A tag whose value
> > > > is an odd number indicates a dynamic section entry that uses d_val or that
> > > > uses neither d_ptr nor d_val.
> > > >
> > > > > Also, to me at least, `PLTENT` reads as "number of PLT entries". Maybe
> > > > > `PLTENTSZ`?
> > > >
> > > > They are defined in x86-64 psABI:
> > > >
> > > >
> > > > https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/6d824a52a42d173eb838b879616c1be5870b593e
> > > >
> > > > The typical dynamic tag name for entry size is DT_XXX_YYYENT:
> > > >
> > > > #define DT_RELAENT 9 /* Size of one Rela reloc */
> > > > #define DT_SYMENT 11 /* Size of one symbol table entry */
> > > > #define DT_RELENT 19 /* Size of one Rel reloc */
> > > > #define DT_RELRENT 37 /* Size of one RELR relative relocaction
> > > >
> > > > > > > +#define DT_X86_64_NUM          4
> > > > > > >
> > > > > > >  /* AM33 relocations.  */
> > > > > > >  #define R_MN10300_NONE         0       /* No reloc.  */
> > > > > > > diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
> > > > > > > index 6142ca28ae..52293f4adf 100644
> > > > > > > --- a/elf/tst-glibcelf.py
> > > > > > > +++ b/elf/tst-glibcelf.py
> > > > > > > @@ -187,6 +187,7 @@ DT_VALNUM
> > > > > > >  DT_VALRNGHI
> > > > > > >  DT_VALRNGLO
> > > > > > >  DT_VERSIONTAGNUM
> > > > > > > +DT_X86_64_NUM
> > > > > > >  ELFCLASSNUM
> > > > > > >  ELFDATANUM
> > > > > > >  EM_NUM
> > > > > > > diff --git a/manual/tunables.texi b/manual/tunables.texi
> > > > > > > index b31f16da84..f9bd83622e 100644
> > > > > > > --- a/manual/tunables.texi
> > > > > > > +++ b/manual/tunables.texi
> > > > > > > @@ -57,6 +57,7 @@ glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
> > > > > > >  glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
> > > > > > >  glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> > > > > > >  glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
> > > > > > > +glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
> > > > > > >  glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
> > > > > > >  glibc.cpu.x86_ibt:
> > > > > > >  glibc.cpu.hwcaps:
> > > > > > > @@ -614,6 +615,14 @@ this tunable.
> > > > > > >  This tunable is specific to 64-bit x86-64.
> > > > > > >  @end deftp
> > > > > > >
> > > > > > > +@deftp Tunable glibc.cpu.plt_rewrite
> > > > > > > +When this tunable is set to @code{1}, the dynamic linker will rewrite
> > > > > > > +the PLT section with direct branch after relocation if possible when
> > > > > > > +the lazy binding is disabled.
> > > > > > > +
> > > > > > > +This tunable is specific to x86-64.
> > > > > > > +@end deftp
> > > > > > > +
> > > > > > >  @node Memory Related Tunables
> > > > > > >  @section Memory Related Tunables
> > > > > > >  @cindex memory related tunables
> > > > > > > diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
> > > > > > > index b52e83d613..3a21e25201 100644
> > > > > > > --- a/scripts/glibcelf.py
> > > > > > > +++ b/scripts/glibcelf.py
> > > > > > > @@ -439,6 +439,8 @@ class DtRISCV(Dt):
> > > > > > >      """Supplemental DT_* constants for EM_RISCV."""
> > > > > > >  class DtSPARC(Dt):
> > > > > > >      """Supplemental DT_* constants for EM_SPARC."""
> > > > > > > +class DtX86_64(Dt):
> > > > > > > +    """Supplemental DT_* constants for EM_X86_64."""
> > > > > > >  _dt_skip = '''
> > > > > > >  DT_ENCODING DT_PROCNUM
> > > > > > >  DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
> > > > > > > @@ -451,6 +453,7 @@ DT_MIPS_NUM
> > > > > > >  DT_PPC_NUM
> > > > > > >  DT_PPC64_NUM
> > > > > > >  DT_SPARC_NUM
> > > > > > > +DT_X86_64_NUM
> > > > > > >  '''.strip().split()
> > > > > > >  _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
> > > > > > >  _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
> > > > > > > @@ -461,6 +464,7 @@ _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
> > > > > > >  _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
> > > > > > >  _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
> > > > > > >  _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
> > > > > > > +_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
> > > > > > >  _register_elf_h(Dt, skip=_dt_skip, ranges=True)
> > > > > > >  del _dt_skip
> > > > > > >
> > > > > > > diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
> > > > > > > index 3bd00019e8..216d8cc576 100644
> > > > > > > --- a/sysdeps/x86/cet-control.h
> > > > > > > +++ b/sysdeps/x86/cet-control.h
> > > > > > > @@ -32,10 +32,24 @@ enum dl_x86_cet_control
> > > > > > >    cet_permissive
> > > > > > >  };
> > > > > > >
> > > > > > > +/* PLT rewrite control.  */
> > > > > > > +enum dl_plt_rewrite_control
> > > > > > > +{
> > > > > > > +  /* No PLT rewrite.  */
> > > > > > > +  plt_rewrite_none,
> > > > > > > +  /* PLT rewrite is enabled at run-time.  */
> > > > > > > +  plt_rewrite_enabled,
> > > > > > > +  /* Rewrite PLT with JMP at run-time.  */
> > > > > > > +  plt_rewrite_jmp,
> > > > > > > +  /* Rewrite PLT with JMPABS at run-time.  */
> > > > > > > +  plt_rewrite_jmpabs
> > > > > > > +};
> > > > > > > +
> > > > > > >  struct dl_x86_feature_control
> > > > > > >  {
> > > > > > >    enum dl_x86_cet_control ibt : 2;
> > > > > > >    enum dl_x86_cet_control shstk : 2;
> > > > > > > +  enum dl_plt_rewrite_control plt_rewrite : 2;
> > > > > > >  };
> > > > > > >
> > > > > > >  #endif /* cet-control.h */
> > > > > > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > > > > > index 0bf923d48b..5f2962bbe5 100644
> > > > > > > --- a/sysdeps/x86/cpu-features.c
> > > > > > > +++ b/sysdeps/x86/cpu-features.c
> > > > > > > @@ -27,6 +27,13 @@
> > > > > > >  extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
> > > > > > >    attribute_hidden;
> > > > > > >
> > > > > > > +static void
> > > > > > > +TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
> > > > > > > +{
> > > > > > > +  if (valp->numval)
> > > > > > > +    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
> > > > > > > +}
> > > > > > > +
> > > > > > >  #ifdef __LP64__
> > > > > > >  static void
> > > > > > >  TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
> > > > > > > @@ -996,6 +1003,9 @@ no_cpuid:
> > > > > > >
> > > > > > >    TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
> > > > > > >
> > > > > > > +  TUNABLE_GET (plt_rewrite, tunable_val_t *,
> > > > > > > +              TUNABLE_CALLBACK (set_plt_rewrite));
> > > > > > > +
> > > > > > >  #ifdef __LP64__
> > > > > > >    TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
> > > > > > >                TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
> > > > > > > diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
> > > > > > > index 2fb682ded3..03a612f3f3 100644
> > > > > > > --- a/sysdeps/x86/dl-procruntime.c
> > > > > > > +++ b/sysdeps/x86/dl-procruntime.c
> > > > > > > @@ -67,6 +67,7 @@ PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
> > > > > > >  = {
> > > > > > >      .ibt = DEFAULT_DL_X86_CET_CONTROL,
> > > > > > >      .shstk = DEFAULT_DL_X86_CET_CONTROL,
> > > > > > > +    .plt_rewrite = plt_rewrite_none,
> > > > > > >    }
> > > > > > >  # endif
> > > > > > >  # if !defined SHARED || defined PROCINFO_DECL
> > > > > > > diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
> > > > > > > index feb7004036..49da67fd74 100644
> > > > > > > --- a/sysdeps/x86/dl-tunables.list
> > > > > > > +++ b/sysdeps/x86/dl-tunables.list
> > > > > > > @@ -66,5 +66,10 @@ glibc {
> > > > > > >      x86_shared_cache_size {
> > > > > > >        type: SIZE_T
> > > > > > >      }
> > > > > > > +    plt_rewrite {
> > > > > > > +      type: INT_32
> > > > > > > +      minval: 0
> > > > > > > +      maxval: 1
> > > > > > > +    }
> > > > > > >    }
> > > > > > >  }
> > > > > > > diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
> > > > > > > index 00120ca9ca..106f1d0f55 100644
> > > > > > > --- a/sysdeps/x86_64/Makefile
> > > > > > > +++ b/sysdeps/x86_64/Makefile
> > > > > > > @@ -175,6 +175,25 @@ ifeq (no,$(build-hardcoded-path-in-tests))
> > > > > > >  tests-container += tst-glibc-hwcaps-cache
> > > > > > >  endif
> > > > > > >
> > > > > > > +ifeq (yes,$(have-z-mark-plt))
> > > > > > > +tests += \
> > > > > > > +  tst-plt-rewrite1 \
> > > > > > > +# tests
> > > > > > > +modules-names += \
> > > > > > > +  tst-plt-rewritemod1 \
> > > > > > > +# modules-names
> > > > > > > +
> > > > > > > +tst-plt-rewrite1-no-pie = yes
> > > > > > > +LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
> > > > > > > +LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
> > > > > > > +tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
> > > > > > > +$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
> > > > > > > +$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
> > > > > > > +       $(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
> > > > > > > +       grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
> > > > > > > +       $(evaluate-test)
> > > > > > > +endif
> > > > > > > +
> > > > > > >  endif # $(subdir) == elf
> > > > > > >
> > > > > > >  ifeq ($(subdir),csu)
> > > > > > > diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> > > > > > > index e307467afa..a15eaa349d 100755
> > > > > > > --- a/sysdeps/x86_64/configure
> > > > > > > +++ b/sysdeps/x86_64/configure
> > > > > > > @@ -25,6 +25,41 @@ printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
> > > > > > >  config_vars="$config_vars
> > > > > > >  config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
> > > > > > >
> > > > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
> > > > > > > +printf %s "checking for linker that supports -z mark-plt... " >&6; }
> > > > > > > +libc_linker_feature=no
> > > > > > > +cat > conftest.c <<EOF
> > > > > > > +int _start (void) { return 42; }
> > > > > > > +EOF
> > > > > > > +if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
> > > > > > > +                 -Wl,-z,mark-plt -nostdlib -nostartfiles
> > > > > > > +                 -fPIC -shared -o conftest.so conftest.c
> > > > > > > +                 1>&5'
> > > > > > > +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> > > > > > > +  (eval $ac_try) 2>&5
> > > > > > > +  ac_status=$?
> > > > > > > +  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> > > > > > > +  test $ac_status = 0; }; }
> > > > > > > +then
> > > > > > > +  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
> > > > > > > +      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
> > > > > > > +      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
> > > > > > > +    true
> > > > > > > +  else
> > > > > > > +    libc_linker_feature=yes
> > > > > > > +  fi
> > > > > > > +fi
> > > > > > > +rm -f conftest*
> > > > > > > +if test $libc_linker_feature = yes; then
> > > > > > > +  libc_cv_z_mark_plt=yes
> > > > > > > +else
> > > > > > > +  libc_cv_z_mark_plt=no
> > > > > > > +fi
> > > > > > > +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
> > > > > > > +printf "%s\n" "$libc_linker_feature" >&6; }
> > > > > > > +config_vars="$config_vars
> > > > > > > +have-z-mark-plt = $libc_cv_z_mark_plt"
> > > > > > > +
> > > > > > >  if test x"$build_mathvec" = xnotset; then
> > > > > > >    build_mathvec=yes
> > > > > > >  fi
> > > > > > > diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> > > > > > > index 1215dcb1e4..40f642db31 100644
> > > > > > > --- a/sysdeps/x86_64/configure.ac
> > > > > > > +++ b/sysdeps/x86_64/configure.ac
> > > > > > > @@ -10,6 +10,10 @@ LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
> > > > > > >  LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
> > > > > > >                 [$libc_cv_cc_mprefer_vector_width])
> > > > > > >
> > > > > > > +LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
> > > > > > > +                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
> > > > > > > +LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
> > > > > > > +
> > > > > > >  if test x"$build_mathvec" = xnotset; then
> > > > > > >    build_mathvec=yes
> > > > > > >  fi
> > > > > > > diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > > > new file mode 100644
> > > > > > > index 0000000000..dd41d95b88
> > > > > > > --- /dev/null
> > > > > > > +++ b/sysdeps/x86_64/dl-dtprocnum.h
> > > > > > > @@ -0,0 +1,21 @@
> > > > > > > +/* Configuration of lookup functions.  x64-64 version.
> > > > > > > +   Copyright (C) 2023 Free Software Foundation, Inc.
> > > > > > > +   This file is part of the GNU C Library.
> > > > > > > +
> > > > > > > +   The GNU C Library is free software; you can redistribute it and/or
> > > > > > > +   modify it under the terms of the GNU Lesser General Public
> > > > > > > +   License as published by the Free Software Foundation; either
> > > > > > > +   version 2.1 of the License, or (at your option) any later version.
> > > > > > > +
> > > > > > > +   The GNU C Library is distributed in the hope that it will be useful,
> > > > > > > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > > > > > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > > > > > +   Lesser General Public License for more details.
> > > > > > > +
> > > > > > > +   You should have received a copy of the GNU Lesser General Public
> > > > > > > +   License along with the GNU C Library; if not, see
> > > > > > > +   <https://www.gnu.org/licenses/>.  */
> > > > > > > +
> > > > > > > +/* Number of extra dynamic section entries for this architecture.  By
> > > > > > > +   default there are none.  */
> > > > > > > +#define DT_THISPROCNUM DT_X86_64_NUM
> > > > > > > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > > > > > > index 581a2f1a9e..973042f9e6 100644
> > > > > > > --- a/sysdeps/x86_64/dl-machine.h
> > > > > > > +++ b/sysdeps/x86_64/dl-machine.h
> > > > > > > @@ -30,6 +30,9 @@
> > > > > > >  #include <dl-machine-rel.h>
> > > > > > >  #include <isa-level.h>
> > > > > > >
> > > > > > > +/* Translate a processor specific dynamic tag to the index in l_info array.  */
> > > > > > > +#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
> > > > > > > +
> > > > > > >  /* Return nonzero iff ELF header is compatible with the running host.  */
> > > > > > >  static inline int __attribute__ ((unused))
> > > > > > >  elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > > > > > > @@ -304,8 +307,9 @@ and creates an unsatisfiable circular dependency.\n",
> > > > > > >
> > > > > > >        switch (r_type)
> > > > > > >         {
> > > > > > > -       case R_X86_64_GLOB_DAT:
> > > > > > >         case R_X86_64_JUMP_SLOT:
> > > > > > > +         map->l_has_jump_slot_reloc = true;
> > > > > > > +       case R_X86_64_GLOB_DAT:
> > > > > > >           *reloc_addr = value;
> > > > > > >           break;
> > > > > > >
> > > > > > > @@ -541,3 +545,234 @@ elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
> > > > > > >  }
> > > > > > >
> > > > > > >  #endif /* RESOLVE_MAP */
> > > > > > > +
> > > > > > > +#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
> > > > > > > +    && defined SHARED
> > > > > > > +# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
> > > > > > > +  x86_64_dynamic_after_reloc (map, (lazy))
> > > > > > > +
> > > > > > > +static const char *
> > > > > > > +x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
> > > > > > > +{
> > > > > > > +  const ElfW(Sym) *const symtab
> > > > > > > +    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
> > > > > > > +  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
> > > > > > > +  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
> > > > > > > +  return strtab + refsym->st_name;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void
> > > > > > > +x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
> > > > > > > +                   ElfW(Addr) plt_aligned)
> > > > > > > +{
> > > > > > > +  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
> > > > > > > +  ElfW(Addr) l_addr = map->l_addr;
> > > > > > > +  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
> > > > > > > +  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
> > > > > > > +  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
> > > > > > > +  const ElfW(Rela) *reloc = (const void *) start;
> > > > > > > +  const ElfW(Rela) *reloc_end = (const void *) (start + size);
> > > > > > > +
> > > > > > > +  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
> > > > > > > +                                         header.feature_1);
> > > > > > > +  bool ibt_enabled_p
> > > > > > > +    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
> > > > > > > +
> > > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
> > > > > > > +                     DSO_FILENAME (map->l_name));
> > > > > > > +
> > > > > > > +  for (; reloc < reloc_end; reloc++)
> > > > > > > +    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
> > > > > > > +      {
> > > > > > > +       /* Get the value from the GOT entry.  */
> > > > > > > +       ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
> > > > > > > +
> > > > > > > +       /* Get the corresponding PLT entry from r_addend.  */
> > > > > > > +       ElfW(Addr) branch_start = l_addr + reloc->r_addend;
> > > > > > > +       /* Skip ENDBR64 if IBT isn't enabled.  */
> > > > > > > +       if (!ibt_enabled_p)
> > > > > > > +         branch_start = ALIGN_DOWN (branch_start, pltent);
> > > > > > > +       /* Get the displacement from the branch target.  */
> > > > > > > +       ElfW(Addr) disp = value - branch_start - 5;
> > > > > IMO 5 should be a usefully named define. Maybe "JMP_INSN_SIZE" or something.
> > > >
> > > > Will fix it.
> > > >
> > > > > > > +       ElfW(Addr) plt_end;
> > > > > > > +       ElfW(Addr) pad;
> > > > > > > +
> > > > > > > +       branch_start += plt_rewrite_bias;
> > > > > > > +       plt_end = (branch_start & -pltent) + pltent;
> > > > > > > +
> > > > > > > +       /* Update the PLT entry.  */
> > > > > > > +       if ((disp + 0x80000000ULL) <= 0xffffffffULL)
> > > > > 0x80000000 -> (unsigned long long)I32_MIN?
> > > > > 0xffffffff -> (unsigned long long)U32_MAX?
> > > >
> > > > I will change it.
> > > >
> > > > > Or at least a define.
> > > > > Its easy to get the number of hex characters wrong.
> > > > > > > +         {
> > > > > > > +           /* If the target branch can be reached with a direct branch,
> > > > > > > +              rewrite the PLT entry with a direct branch.  */
> > > > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > > > +             {
> > > > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > > > +                                                                reloc);
> > > > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > > > +                                 "direct branch\n", sym_name,
> > > > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > > > +             }
> > > > > > > +
> > > > > > > +           pad = branch_start + 5;
> > > > > again a define.
> > > >
> > > > Will fix it.
> > > >
> > > > > > > +
> > > > > > > +           if (__glibc_unlikely (pad > plt_end))
> > > > > > > +             {
> > > > > > > +               if (__glibc_unlikely (GLRO(dl_debug_mask)
> > > > > > > +                                     & DL_DEBUG_BINDINGS))
> > > > > > > +                 {
> > > > > > > +                   const char *sym_name
> > > > > > > +                     = x86_64_reloc_symbol_name (map, reloc);
> > > > > > > +                   _dl_debug_printf ("\ninvalid r_addend of "
> > > > > > > +                                     "R_X86_64_JUMP_SLOT against '%s' "
> > > > > > > +                                     "in '%s'\n", sym_name,
> > > > > > > +                                     DSO_FILENAME (map->l_name));
> > > > > > > +                 }
> > > > > > > +
> > > > > > > +               continue;
> > > > > > > +             }
> > > > > > > +
> > > > > > > +           /* Write out direct branch.  */
> > > > > > > +           *(uint8_t *) branch_start = 0xe9;
> > > > > will it ever be disp8?
> > > >
> > > > This should be as fast as possible.  Adding an unlikely disp8 check will
> > > > slow it down.
> > > >
> > > > > But again, imo `0xe9` should be defined as something along the lines of
> > > > > JMP_DISP32_OPCODE
> > > >
> > > > Will fix it.
> > > >
> > > > > > > +           *((uint32_t *) (branch_start + 1)) = disp;
> > > > > Does it need to be a u32 with aliasing attribute? Maybe just memcpy?
> > > >
> > > > elf_machine_rela has some similar codes.  It is OK.
> > > >
> > > > > > > +         }
> > > > > > > +       else
> > > > > > > +         {
> > > > > > > +           if (GL(dl_x86_feature_control).plt_rewrite
> > > > > > > +               != plt_rewrite_jmpabs)
> > > > > > > +             continue;
> > > > > > > +
> > > > > > > +           pad = branch_start + 11;
> > > > > > > +
> > > > > I assume this is 16 (plt entry size) - 5 (jmp insn size)?
> > > > > Should this use the `DT_X86_64_PLTENT` field?
> > > >
> > > > Good idea.  Will change it.
> > > >
> > > > > > > +           if (pad > plt_end)
> > > > > > > +             continue;
> > > > > > > +
> > > > > > > +           /* Rewrite the PLT entry with JMPABS.  */
> > > > > > > +           if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
> > > > > > > +             {
> > > > > > > +               const char *sym_name = x86_64_reloc_symbol_name (map,
> > > > > > > +                                                                reloc);
> > > > > > > +               _dl_debug_printf ("changing '%s' PLT entry in '%s' to "
> > > > > > > +                                 "JMPABS\n", sym_name,
> > > > > > > +                                 DSO_FILENAME (map->l_name));
> > > > > > > +             }
> > > > > > > +
> > > > > > > +           /* "jmpabs $target" for 64-bit displacement.  */
> > > > > > > +           *(uint8_t *) (branch_start + 0) = 0xd5;
> > > > > > > +           *(uint8_t *) (branch_start + 1) = 0x0;
> > > > > > > +           *(uint8_t *) (branch_start + 2) = 0xa1;
> > > > > > > +           *(uint64_t *) (branch_start + 3) = value;
> > > > > again this is not a clear way to encode instructions...
> > > >
> > > > This is JMPABS in Intel APX, which has a 3-byte opcode + 64bit address.
> > > >
> > > > > What instruction is meant to be?
> > > > > ```
> > > > > .byte 0xd5, 0x0, 0xa1, 0x7f, 0x12, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00
> > > > > ```
> > > > > ->
> > > > > ```
> > > > > 0000000000000000 <.text>:
> > > > >        0: d5                            <unknown>
> > > > >        1: 00 a1 7f 12 34 00            addb %ah, 3412607(%rcx)
> > > > >        7: 00 00                        addb %al, (%rax)
> > > > >        9: 00 00                        addb %al, (%rax)
> > > > >
> > > > > ```
> > > >
> > > > You need the latest binutils master branch which support JMPABS in
> > > > Intel APX:
> > > >
> > > > 0000000000000000 <.text>:
> > > >    0: d5 00 a1 7f 12 34 00 00 00 00 00 jmpabs $0x34127f
> > > >
> > > > > > > +         }
> > > > > > > +
> > > > > > > +       /* Fill the unused part of the PLT entry with INT3.  */
> > > > > > > +       for (; pad < plt_end; pad++)
> > > > > > > +         *(uint8_t *) pad = 0xcc;
> > > > > maybe `INT3_OPCODE`?
> > > >
> > > > Will change it.
> > > >
> > > > > > > +      }
> > > > > > > +}
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +x86_64_rewrite_plt_in_place (struct link_map *map)
> > > > > > > +{
> > > > > > > +  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
> > > > > > > +  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
> > > > > > > +                   + map->l_addr);
> > > > > > > +  size_t pagesize = GLRO(dl_pagesize);
> > > > > > > +  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
> > > > > > > +  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
> > > > > > > +                 + plt - plt_aligned);
> > > > > > > +
> > > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > > +    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
> > > > > > > +                     DSO_FILENAME (map->l_name));
> > > > > > > +
> > > > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > > > +                                   PROT_WRITE | PROT_READ) < 0))
> > > > > > > +    {
> > > > > > > +      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > > +       _dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
> > > > > > > +                         DSO_FILENAME (map->l_name));
> > > > > > > +      return;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
> > > > > > > +
> > > > > > > +  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
> > > > > > > +   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
> > > > > > > +          DSO_FILENAME (map->l_name));
> > > > > > > +
> > > > > > > +  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
> > > > > > > +                                   PROT_EXEC | PROT_READ) < 0))
> > > > > > > +    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
> > > > > > > +                     "failed to change PLT back to read-only");
> > > > > > > +}
> > > > > > > +
> > > > > > > +/* Rewrite PLT entries to direct branch if possible.  */
> > > > > > > +
> > > > > > > +static inline void
> > > > > > > +x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
> > > > > > > +{
> > > > > > > +  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
> > > > > > > +  if (lazy)
> > > > > > > +    return;
> > > > > > > +
> > > > > > > +  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
> > > > > > > +    return;
> > > > > > > +
> > > > > > > +  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
> > > > > > > +  if (!map->l_has_jump_slot_reloc)
> > > > > > > +    return;
> > > > > > > +
> > > > > > > +  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
> > > > > > > +  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
> > > > > > > +    return;
> > > > > > > +
> > > > > > > +  /* Ignore DT_X86_64_PLT if
> > > > > > > +     1. DT_JMPREL isn't available or its value is 0.
> > > > > > > +     2. DT_PLTRELSZ is 0.
> > > > > > > +     3. DT_X86_64_PLTSZ isn't available or its value is 0.
> > > > > > > +     4. DT_X86_64_PLTENT isn't available or its value is smaller
> > > > > > > +     than 16 bytes.  */
> > > > > > > +  if (map->l_info[DT_JMPREL] == NULL
> > > > > > > +      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
> > > > > > > +      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
> > > > > > > +      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
> > > > > > > +      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
> > > > > > > +      || map->l_info[DT_X86_64 (PLTENT)] == NULL
> > > > > > > +      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
> > > > > > > +    return;
> > > > > Another sanity check might be `PLTSZ % PLTENT == 0`
> > > > > > > +
> > > > > > > +  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> > > > > > > +    {
> > > > > > > +      /* PLT rewrite is enabled.  Check if mprotect works.  */
> > > > > > > +      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> > > > > > > +                         MAP_PRIVATE | MAP_ANONYMOUS,
> > > > > > > +                         -1, 0);
> > > > > > > +      if (__glibc_unlikely (plt == MAP_FAILED))
> > > > > > > +       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > > imo clearer would be to just initialize:
> > > > > GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
> > > > > then make all the failure cases just return (or a goto if these is cleanup).
> > > >
> > > > This function may be called many times.   If the first mprotect check fails,
> > > > we shouldn't call it again.   With this approach, the mprotect check will
> > > > only be done once.
> > > Yeah, i see that. Just instead of logic being
> > >
> > > if(fail0) {
> > >  // set plt_rewrite_none
> > > }
> > > else {
> > > ...
> > > if(fail1)
> > >  // set plt_rewrite_none
> > > ...
> > > }
> > > if(plt_rewrite_none) {
> > >  // return
> > > }
> > >
> > > It might be cleaner to do:
> > >
> > > // set plt_rewrite_none
> > > if(fail0) // return
> > > ...
> > > if(fail1) // return>
> >
> > We can't return after mprotect failure since we need
> > to unmap the testing page.   How about this?
> >
> >  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
> >     {
> >       enum dl_plt_rewrite_control plt_rewrite = plt_rewrite_none;
> >
> >       /* PLT rewrite is enabled.  Check if mprotect works.  */
> >       void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
> >                           MAP_PRIVATE | MAP_ANONYMOUS,
> >                           -1, 0);
> >       if (__glibc_unlikely (plt != MAP_FAILED))
> >         {
> >           /* Touch the PROT_READ | PROT_WRITE page.  */
> >           *(int32_t *) plt = -1;
> >
> >           /* If the updated PROT_READ | PROT_WRITE page can be changed
> >              to PROT_EXEC | PROT_READ, rewrite PLT.  */
> >           if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
> >             /* Use JMPABS on APX processors.  */
> >             plt_rewrite = (CPU_FEATURE_PRESENT_P (__get_cpu_features (),
> >                                                   APX_F)
> >                            ? plt_rewrite_jmpabs : plt_rewrite_jmp);
> >
> >           __munmap (plt, 4096);
> >         }
> >
> >       GL(dl_x86_feature_control).plt_rewrite = plt_rewrite;
> >     }
> >
> >   /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
> >   if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
> >     return;
> >
> imo this is cleaner, but not a big deal either way.

I will use it.

> >
> > > > > > > +      else
> > > > > > > +       {
> > > > > > > +         *(int32_t *) plt = -1;
> > > > > What is this meant to be doing?
> > > >
> > > > Update the read-write page and check if it can be changed to read-exec.
> > > >
> > > How does writing -1 contribute to that?
> >
> > It touches the page before mprotect to PROT_EXEC | PROT_READ.
> oh, you are making sure w->e even after being paged in.
> sure.
> although imo needs a comment, otherwise just appears to be a dead store.

Will add some comments.

Thanks.
diff mbox series

Patch

diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h
index e7f755fc75..5351671044 100644
--- a/elf/dynamic-link.h
+++ b/elf/dynamic-link.h
@@ -177,6 +177,10 @@  elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
       }									      \
   } while (0);
 
+# ifndef ELF_DYNAMIC_AFTER_RELOC
+#  define ELF_DYNAMIC_AFTER_RELOC(map, lazy)
+# endif
+
 /* This can't just be an inline function because GCC is too dumb
    to inline functions containing inlines themselves.  */
 # ifdef RTLD_BOOTSTRAP
@@ -192,6 +196,7 @@  elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
       ELF_DYNAMIC_DO_RELR (map);					      \
     ELF_DYNAMIC_DO_REL ((map), (scope), edr_lazy, skip_ifunc);		      \
     ELF_DYNAMIC_DO_RELA ((map), (scope), edr_lazy, skip_ifunc);		      \
+    ELF_DYNAMIC_AFTER_RELOC ((map), (edr_lazy));			      \
   } while (0)
 
 #endif
diff --git a/elf/elf.h b/elf/elf.h
index 5c1c1972d1..eda4802f56 100644
--- a/elf/elf.h
+++ b/elf/elf.h
@@ -3639,6 +3639,11 @@  enum
 /* x86-64 sh_type values.  */
 #define SHT_X86_64_UNWIND	0x70000001 /* Unwind information.  */
 
+/* x86-64 d_tag values.  */
+#define DT_X86_64_PLT		(DT_LOPROC + 0)
+#define DT_X86_64_PLTSZ		(DT_LOPROC + 1)
+#define DT_X86_64_PLTENT	(DT_LOPROC + 3)
+#define DT_X86_64_NUM		4
 
 /* AM33 relocations.  */
 #define R_MN10300_NONE		0	/* No reloc.  */
diff --git a/elf/tst-glibcelf.py b/elf/tst-glibcelf.py
index 6142ca28ae..52293f4adf 100644
--- a/elf/tst-glibcelf.py
+++ b/elf/tst-glibcelf.py
@@ -187,6 +187,7 @@  DT_VALNUM
 DT_VALRNGHI
 DT_VALRNGLO
 DT_VERSIONTAGNUM
+DT_X86_64_NUM
 ELFCLASSNUM
 ELFDATANUM
 EM_NUM
diff --git a/manual/tunables.texi b/manual/tunables.texi
index b31f16da84..f9bd83622e 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -57,6 +57,7 @@  glibc.pthread.stack_cache_size: 0x2800000 (min: 0x0, max: 0xffffffffffffffff)
 glibc.cpu.hwcap_mask: 0x6 (min: 0x0, max: 0xffffffffffffffff)
 glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
 glibc.elision.skip_trylock_internal_abort: 3 (min: 0, max: 2147483647)
+glibc.cpu.plt_rewrite: 0 (min: 0, max: 1)
 glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0xffffffffffffffff)
 glibc.cpu.x86_ibt:
 glibc.cpu.hwcaps:
@@ -614,6 +615,14 @@  this tunable.
 This tunable is specific to 64-bit x86-64.
 @end deftp
 
+@deftp Tunable glibc.cpu.plt_rewrite
+When this tunable is set to @code{1}, the dynamic linker will rewrite
+the PLT section with direct branch after relocation if possible when
+the lazy binding is disabled.
+
+This tunable is specific to x86-64.
+@end deftp
+
 @node Memory Related Tunables
 @section Memory Related Tunables
 @cindex memory related tunables
diff --git a/scripts/glibcelf.py b/scripts/glibcelf.py
index b52e83d613..3a21e25201 100644
--- a/scripts/glibcelf.py
+++ b/scripts/glibcelf.py
@@ -439,6 +439,8 @@  class DtRISCV(Dt):
     """Supplemental DT_* constants for EM_RISCV."""
 class DtSPARC(Dt):
     """Supplemental DT_* constants for EM_SPARC."""
+class DtX86_64(Dt):
+    """Supplemental DT_* constants for EM_X86_64."""
 _dt_skip = '''
 DT_ENCODING DT_PROCNUM
 DT_ADDRRNGLO DT_ADDRRNGHI DT_ADDRNUM
@@ -451,6 +453,7 @@  DT_MIPS_NUM
 DT_PPC_NUM
 DT_PPC64_NUM
 DT_SPARC_NUM
+DT_X86_64_NUM
 '''.strip().split()
 _register_elf_h(DtAARCH64, prefix='DT_AARCH64_', skip=_dt_skip, parent=Dt)
 _register_elf_h(DtALPHA, prefix='DT_ALPHA_', skip=_dt_skip, parent=Dt)
@@ -461,6 +464,7 @@  _register_elf_h(DtPPC, prefix='DT_PPC_', skip=_dt_skip, parent=Dt)
 _register_elf_h(DtPPC64, prefix='DT_PPC64_', skip=_dt_skip, parent=Dt)
 _register_elf_h(DtRISCV, prefix='DT_RISCV_', skip=_dt_skip, parent=Dt)
 _register_elf_h(DtSPARC, prefix='DT_SPARC_', skip=_dt_skip, parent=Dt)
+_register_elf_h(DtX86_64, prefix='DT_X86_64_', skip=_dt_skip, parent=Dt)
 _register_elf_h(Dt, skip=_dt_skip, ranges=True)
 del _dt_skip
 
diff --git a/sysdeps/x86/cet-control.h b/sysdeps/x86/cet-control.h
index 3bd00019e8..216d8cc576 100644
--- a/sysdeps/x86/cet-control.h
+++ b/sysdeps/x86/cet-control.h
@@ -32,10 +32,24 @@  enum dl_x86_cet_control
   cet_permissive
 };
 
+/* PLT rewrite control.  */
+enum dl_plt_rewrite_control
+{
+  /* No PLT rewrite.  */
+  plt_rewrite_none,
+  /* PLT rewrite is enabled at run-time.  */
+  plt_rewrite_enabled,
+  /* Rewrite PLT with JMP at run-time.  */
+  plt_rewrite_jmp,
+  /* Rewrite PLT with JMPABS at run-time.  */
+  plt_rewrite_jmpabs
+};
+
 struct dl_x86_feature_control
 {
   enum dl_x86_cet_control ibt : 2;
   enum dl_x86_cet_control shstk : 2;
+  enum dl_plt_rewrite_control plt_rewrite : 2;
 };
 
 #endif /* cet-control.h */
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 0bf923d48b..5f2962bbe5 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -27,6 +27,13 @@ 
 extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *)
   attribute_hidden;
 
+static void
+TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp)
+{
+  if (valp->numval)
+    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_enabled;
+}
+
 #ifdef __LP64__
 static void
 TUNABLE_CALLBACK (set_prefer_map_32bit_exec) (tunable_val_t *valp)
@@ -996,6 +1003,9 @@  no_cpuid:
 
   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
 
+  TUNABLE_GET (plt_rewrite, tunable_val_t *,
+	       TUNABLE_CALLBACK (set_plt_rewrite));
+
 #ifdef __LP64__
   TUNABLE_GET (prefer_map_32bit_exec, tunable_val_t *,
 	       TUNABLE_CALLBACK (set_prefer_map_32bit_exec));
diff --git a/sysdeps/x86/dl-procruntime.c b/sysdeps/x86/dl-procruntime.c
index 2fb682ded3..03a612f3f3 100644
--- a/sysdeps/x86/dl-procruntime.c
+++ b/sysdeps/x86/dl-procruntime.c
@@ -67,6 +67,7 @@  PROCINFO_CLASS struct dl_x86_feature_control _dl_x86_feature_control
 = {
     .ibt = DEFAULT_DL_X86_CET_CONTROL,
     .shstk = DEFAULT_DL_X86_CET_CONTROL,
+    .plt_rewrite = plt_rewrite_none,
   }
 # endif
 # if !defined SHARED || defined PROCINFO_DECL
diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
index feb7004036..49da67fd74 100644
--- a/sysdeps/x86/dl-tunables.list
+++ b/sysdeps/x86/dl-tunables.list
@@ -66,5 +66,10 @@  glibc {
     x86_shared_cache_size {
       type: SIZE_T
     }
+    plt_rewrite {
+      type: INT_32
+      minval: 0
+      maxval: 1
+    }
   }
 }
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 00120ca9ca..106f1d0f55 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -175,6 +175,25 @@  ifeq (no,$(build-hardcoded-path-in-tests))
 tests-container += tst-glibc-hwcaps-cache
 endif
 
+ifeq (yes,$(have-z-mark-plt))
+tests += \
+  tst-plt-rewrite1 \
+# tests
+modules-names += \
+  tst-plt-rewritemod1 \
+# modules-names
+
+tst-plt-rewrite1-no-pie = yes
+LDFLAGS-tst-plt-rewrite1 = -Wl,-z,mark-plt,-z,now
+LDFLAGS-tst-plt-rewritemod1.so = -Wl,-z,mark-plt,-z,now
+tst-plt-rewrite1-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=1 LD_DEBUG=files:bindings
+$(objpfx)tst-plt-rewrite1: $(objpfx)tst-plt-rewritemod1.so
+$(objpfx)tst-plt-rewrite1.out: /dev/null $(objpfx)tst-plt-rewrite1
+	$(tst-plt-rewrite1-ENV) $(make-test-out) > $@ 2>&1; \
+	grep -q -E "changing 'bar' PLT entry in .*/elf/tst-plt-rewritemod1.so' to direct branch" $@; \
+	$(evaluate-test)
+endif
+
 endif # $(subdir) == elf
 
 ifeq ($(subdir),csu)
diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
index e307467afa..a15eaa349d 100755
--- a/sysdeps/x86_64/configure
+++ b/sysdeps/x86_64/configure
@@ -25,6 +25,41 @@  printf "%s\n" "$libc_cv_cc_mprefer_vector_width" >&6; }
 config_vars="$config_vars
 config-cflags-mprefer-vector-width = $libc_cv_cc_mprefer_vector_width"
 
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for linker that supports -z mark-plt" >&5
+printf %s "checking for linker that supports -z mark-plt... " >&6; }
+libc_linker_feature=no
+cat > conftest.c <<EOF
+int _start (void) { return 42; }
+EOF
+if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp
+		  -Wl,-z,mark-plt -nostdlib -nostartfiles
+		  -fPIC -shared -o conftest.so conftest.c
+		  1>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+  if ${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS $no_ssp -Wl,-z,mark-plt -nostdlib \
+      -nostartfiles -fPIC -shared -o conftest.so conftest.c 2>&1 \
+      | grep "warning: -z mark-plt ignored" > /dev/null 2>&1; then
+    true
+  else
+    libc_linker_feature=yes
+  fi
+fi
+rm -f conftest*
+if test $libc_linker_feature = yes; then
+  libc_cv_z_mark_plt=yes
+else
+  libc_cv_z_mark_plt=no
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_linker_feature" >&5
+printf "%s\n" "$libc_linker_feature" >&6; }
+config_vars="$config_vars
+have-z-mark-plt = $libc_cv_z_mark_plt"
+
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
index 1215dcb1e4..40f642db31 100644
--- a/sysdeps/x86_64/configure.ac
+++ b/sysdeps/x86_64/configure.ac
@@ -10,6 +10,10 @@  LIBC_TRY_CC_OPTION([-mprefer-vector-width=128],
 LIBC_CONFIG_VAR([config-cflags-mprefer-vector-width],
 		[$libc_cv_cc_mprefer_vector_width])
 
+LIBC_LINKER_FEATURE([-z mark-plt], [-Wl,-z,mark-plt],
+                    [libc_cv_z_mark_plt=yes], [libc_cv_z_mark_plt=no])
+LIBC_CONFIG_VAR([have-z-mark-plt], [$libc_cv_z_mark_plt])
+
 if test x"$build_mathvec" = xnotset; then
   build_mathvec=yes
 fi
diff --git a/sysdeps/x86_64/dl-dtprocnum.h b/sysdeps/x86_64/dl-dtprocnum.h
new file mode 100644
index 0000000000..dd41d95b88
--- /dev/null
+++ b/sysdeps/x86_64/dl-dtprocnum.h
@@ -0,0 +1,21 @@ 
+/* Configuration of lookup functions.  x64-64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Number of extra dynamic section entries for this architecture.  By
+   default there are none.  */
+#define DT_THISPROCNUM	DT_X86_64_NUM
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 581a2f1a9e..973042f9e6 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -30,6 +30,9 @@ 
 #include <dl-machine-rel.h>
 #include <isa-level.h>
 
+/* Translate a processor specific dynamic tag to the index in l_info array.  */
+#define DT_X86_64(x) (DT_X86_64_##x - DT_LOPROC + DT_NUM)
+
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
 elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
@@ -304,8 +307,9 @@  and creates an unsatisfiable circular dependency.\n",
 
       switch (r_type)
 	{
-	case R_X86_64_GLOB_DAT:
 	case R_X86_64_JUMP_SLOT:
+	  map->l_has_jump_slot_reloc = true;
+	case R_X86_64_GLOB_DAT:
 	  *reloc_addr = value;
 	  break;
 
@@ -541,3 +545,234 @@  elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
 }
 
 #endif /* RESOLVE_MAP */
+
+#if !defined ELF_DYNAMIC_AFTER_RELOC && !defined RTLD_BOOTSTRAP \
+    && defined SHARED
+# define ELF_DYNAMIC_AFTER_RELOC(map, lazy) \
+  x86_64_dynamic_after_reloc (map, (lazy))
+
+static const char *
+x86_64_reloc_symbol_name (struct link_map *map, const ElfW(Rela) *reloc)
+{
+  const ElfW(Sym) *const symtab
+    = (const void *) map->l_info[DT_SYMTAB]->d_un.d_ptr;
+  const ElfW(Sym) *const refsym = &symtab[ELFW (R_SYM) (reloc->r_info)];
+  const char *strtab = (const char *) map->l_info[DT_STRTAB]->d_un.d_ptr;
+  return strtab + refsym->st_name;
+}
+
+static void
+x86_64_rewrite_plt (struct link_map *map, ElfW(Addr) plt_rewrite,
+		    ElfW(Addr) plt_aligned)
+{
+  ElfW(Addr) plt_rewrite_bias = plt_rewrite - plt_aligned;
+  ElfW(Addr) l_addr = map->l_addr;
+  ElfW(Addr) pltent = map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val;
+  ElfW(Addr) start = map->l_info[DT_JMPREL]->d_un.d_ptr;
+  ElfW(Addr) size = map->l_info[DT_PLTRELSZ]->d_un.d_val;
+  const ElfW(Rela) *reloc = (const void *) start;
+  const ElfW(Rela) *reloc_end = (const void *) (start + size);
+
+  unsigned int feature_1 = THREAD_GETMEM (THREAD_SELF,
+					  header.feature_1);
+  bool ibt_enabled_p
+    = (feature_1 & GNU_PROPERTY_X86_FEATURE_1_IBT) != 0;
+
+  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
+    _dl_debug_printf ("\nchanging PLT in '%s' to direct branch\n",
+		      DSO_FILENAME (map->l_name));
+
+  for (; reloc < reloc_end; reloc++)
+    if (ELFW(R_TYPE) (reloc->r_info) == R_X86_64_JUMP_SLOT)
+      {
+	/* Get the value from the GOT entry.  */
+	ElfW(Addr) value = *(ElfW(Addr) *) (l_addr + reloc->r_offset);
+
+	/* Get the corresponding PLT entry from r_addend.  */
+	ElfW(Addr) branch_start = l_addr + reloc->r_addend;
+	/* Skip ENDBR64 if IBT isn't enabled.  */
+	if (!ibt_enabled_p)
+	  branch_start = ALIGN_DOWN (branch_start, pltent);
+	/* Get the displacement from the branch target.  */
+	ElfW(Addr) disp = value - branch_start - 5;
+	ElfW(Addr) plt_end;
+	ElfW(Addr) pad;
+
+	branch_start += plt_rewrite_bias;
+	plt_end = (branch_start & -pltent) + pltent;
+
+	/* Update the PLT entry.  */
+	if ((disp + 0x80000000ULL) <= 0xffffffffULL)
+	  {
+	    /* If the target branch can be reached with a direct branch,
+	       rewrite the PLT entry with a direct branch.  */
+	    if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
+	      {
+		const char *sym_name = x86_64_reloc_symbol_name (map,
+								 reloc);
+		_dl_debug_printf ("changing '%s' PLT entry in '%s' to "
+				  "direct branch\n", sym_name,
+				  DSO_FILENAME (map->l_name));
+	      }
+
+	    pad = branch_start + 5;
+
+	    if (__glibc_unlikely (pad > plt_end))
+	      {
+		if (__glibc_unlikely (GLRO(dl_debug_mask)
+				      & DL_DEBUG_BINDINGS))
+		  {
+		    const char *sym_name
+		      = x86_64_reloc_symbol_name (map, reloc);
+		    _dl_debug_printf ("\ninvalid r_addend of "
+				      "R_X86_64_JUMP_SLOT against '%s' "
+				      "in '%s'\n", sym_name,
+				      DSO_FILENAME (map->l_name));
+		  }
+
+		continue;
+	      }
+
+	    /* Write out direct branch.  */
+	    *(uint8_t *) branch_start = 0xe9;
+	    *((uint32_t *) (branch_start + 1)) = disp;
+	  }
+	else
+	  {
+	    if (GL(dl_x86_feature_control).plt_rewrite
+		!= plt_rewrite_jmpabs)
+	      continue;
+
+	    pad = branch_start + 11;
+
+	    if (pad > plt_end)
+	      continue;
+
+	    /* Rewrite the PLT entry with JMPABS.  */
+	    if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
+	      {
+		const char *sym_name = x86_64_reloc_symbol_name (map,
+								 reloc);
+		_dl_debug_printf ("changing '%s' PLT entry in '%s' to "
+				  "JMPABS\n", sym_name,
+				  DSO_FILENAME (map->l_name));
+	      }
+
+	    /* "jmpabs $target" for 64-bit displacement.  */
+	    *(uint8_t *) (branch_start + 0) = 0xd5;
+	    *(uint8_t *) (branch_start + 1) = 0x0;
+	    *(uint8_t *) (branch_start + 2) = 0xa1;
+	    *(uint64_t *) (branch_start + 3) = value;
+	  }
+
+	/* Fill the unused part of the PLT entry with INT3.  */
+	for (; pad < plt_end; pad++)
+	  *(uint8_t *) pad = 0xcc;
+      }
+}
+
+static inline void
+x86_64_rewrite_plt_in_place (struct link_map *map)
+{
+  /* Adjust DT_X86_64_PLT address and DT_X86_64_PLTSZ values.  */
+  ElfW(Addr) plt = (map->l_info[DT_X86_64 (PLT)]->d_un.d_ptr
+		    + map->l_addr);
+  size_t pagesize = GLRO(dl_pagesize);
+  ElfW(Addr) plt_aligned = ALIGN_DOWN (plt, pagesize);
+  size_t pltsz = (map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val
+		  + plt - plt_aligned);
+
+  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
+    _dl_debug_printf ("\nchanging PLT in '%s' to writable\n",
+		      DSO_FILENAME (map->l_name));
+
+  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
+				    PROT_WRITE | PROT_READ) < 0))
+    {
+      if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
+	_dl_debug_printf ("\nfailed to change PLT in '%s' to writable\n",
+			  DSO_FILENAME (map->l_name));
+      return;
+    }
+
+  x86_64_rewrite_plt (map, plt_aligned, plt_aligned);
+
+  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_FILES))
+   _dl_debug_printf ("\nchanging PLT in '%s' back to read-only\n",
+          DSO_FILENAME (map->l_name));
+
+  if (__glibc_unlikely (__mprotect ((void *) plt_aligned, pltsz,
+				    PROT_EXEC | PROT_READ) < 0))
+    _dl_signal_error (0, DSO_FILENAME (map->l_name), NULL,
+		      "failed to change PLT back to read-only");
+}
+
+/* Rewrite PLT entries to direct branch if possible.  */
+
+static inline void
+x86_64_dynamic_after_reloc (struct link_map *map, int lazy)
+{
+  /* Ignore DT_X86_64_PLT if the lazy binding is enabled.  */
+  if (lazy)
+    return;
+
+  if (__glibc_likely (map->l_info[DT_X86_64 (PLT)] == NULL))
+    return;
+
+  /* Ignore DT_X86_64_PLT if there is no R_X86_64_JUMP_SLOT.  */
+  if (!map->l_has_jump_slot_reloc)
+    return;
+
+  /* Ignore DT_X86_64_PLT on ld.so to avoid changing its own PLT.  */
+  if (map == &GL(dl_rtld_map) || map->l_real == &GL(dl_rtld_map))
+    return;
+
+  /* Ignore DT_X86_64_PLT if
+     1. DT_JMPREL isn't available or its value is 0.
+     2. DT_PLTRELSZ is 0.
+     3. DT_X86_64_PLTSZ isn't available or its value is 0.
+     4. DT_X86_64_PLTENT isn't available or its value is smaller
+     than 16 bytes.  */
+  if (map->l_info[DT_JMPREL] == NULL
+      || map->l_info[DT_JMPREL]->d_un.d_ptr == 0
+      || map->l_info[DT_PLTRELSZ]->d_un.d_val == 0
+      || map->l_info[DT_X86_64 (PLTSZ)] == NULL
+      || map->l_info[DT_X86_64 (PLTSZ)]->d_un.d_val == 0
+      || map->l_info[DT_X86_64 (PLTENT)] == NULL
+      || map->l_info[DT_X86_64 (PLTENT)]->d_un.d_val < 16)
+    return;
+
+  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_enabled)
+    {
+      /* PLT rewrite is enabled.  Check if mprotect works.  */
+      void *plt = __mmap (NULL, 4096, PROT_READ | PROT_WRITE,
+			  MAP_PRIVATE | MAP_ANONYMOUS,
+			  -1, 0);
+      if (__glibc_unlikely (plt == MAP_FAILED))
+	GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
+      else
+	{
+	  *(int32_t *) plt = -1;
+
+	  /* If the memory can be changed to PROT_EXEC | PROT_READ,
+	     rewrite PLT.  */
+	  if (__mprotect (plt, 4096, PROT_EXEC | PROT_READ) == 0)
+	    /* Use JMPABS on APX processors.  */
+	    GL(dl_x86_feature_control).plt_rewrite
+	      = (CPU_FEATURE_PRESENT_P (__get_cpu_features (), APX_F)
+		 ? plt_rewrite_jmpabs
+		 : plt_rewrite_jmp);
+	  else
+	    GL(dl_x86_feature_control).plt_rewrite = plt_rewrite_none;
+
+	  __munmap (plt, 4096);
+	}
+    }
+
+  /* Ignore DT_X86_64_PLT if PLT rewrite isn't enabled.  */
+  if (GL(dl_x86_feature_control).plt_rewrite == plt_rewrite_none)
+    return;
+
+  x86_64_rewrite_plt_in_place (map);
+}
+#endif
diff --git a/sysdeps/x86_64/link_map.h b/sysdeps/x86_64/link_map.h
new file mode 100644
index 0000000000..ddb8e78077
--- /dev/null
+++ b/sysdeps/x86_64/link_map.h
@@ -0,0 +1,22 @@ 
+/* Additional fields in struct link_map.  x86-64 version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Has R_X86_64_JUMP_SLOT relocation.  */
+bool l_has_jump_slot_reloc;
+
+#include <sysdeps/x86/link_map.h>
diff --git a/sysdeps/x86_64/tst-plt-rewrite1.c b/sysdeps/x86_64/tst-plt-rewrite1.c
new file mode 100644
index 0000000000..0dd4724a5a
--- /dev/null
+++ b/sysdeps/x86_64/tst-plt-rewrite1.c
@@ -0,0 +1,31 @@ 
+/* Test PLT rewrite.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+#include <support/check.h>
+
+extern const char *foo (void);
+
+static int
+do_test (void)
+{
+  TEST_COMPARE (strcmp (foo (), "PLT rewrite works"), 0);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/tst-plt-rewritemod1.c b/sysdeps/x86_64/tst-plt-rewritemod1.c
new file mode 100644
index 0000000000..361b907790
--- /dev/null
+++ b/sysdeps/x86_64/tst-plt-rewritemod1.c
@@ -0,0 +1,32 @@ 
+/* Check PLT rewrite works correctly.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* foo calls bar with indirect branch via PLT.  PLT rewrite should
+   change it to direct branch.  */
+
+const char *
+bar (void)
+{
+  return "PLT rewrite works";
+}
+
+const char *
+foo (void)
+{
+  return bar ();
+}