diff mbox series

[v5,1/2] rtld: Add rtld.nns tunable for the number of supported namespaces

Message ID 9e6146dfb8a23058a086b9cef668b6a4c610ec14.1592841472.git.szabolcs.nagy@arm.com
State New
Headers show
Series Improve surplus TLS accounting | expand

Commit Message

Szabolcs Nagy June 22, 2020, 4:21 p.m. UTC
TLS_STATIC_SURPLUS is 1664 bytes currently which is not enough to
support DL_NNS (== 16) number of dynamic link namespaces, if we
assume 192 bytes of TLS are reserved for libc use and 144 bytes
are reserved for other system libraries that use IE TLS.

A new tunable is introduced to control the number of supported
namespaces and to adjust the surplus static TLS size as follows:

surplus_tls = 192 * (rtld.nns-1) + 144 * rtld.nns + 512

The default is rtld.nns == 4 and then the surplus TLS size is the
same as before, so the behaviour is unchanged by default. If an
application creates more namespaces than the rtld.nns setting
allows, then it is not guaranteed to work, but the limit is not
checked. So existing usage will continue to work, but in the
future if an application creates more than 4 dynamic link
namespaces then the tunable will need to be set.

In this patch DL_NNS is a fixed value and provides a maximum to
the rtld.nns setting.

Static linking used fixed 2048 bytes surplus TLS, this is changed
so the same contract is used as for dynamic linking.  With static
linking DL_NNS == 1 so rtld.nns tunable is forced to 1, so by
default the surplus TLS is reduced to 144 + 512 = 656 bytes. This
change is not expected to cause problems.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.

---

v5:
- Split the patch into two: rtld.nns and tls optimization parts.
- Use rtld.nns instead of dl.nns.
- Renamed the init function and moved the calls close to
  the first use of surplus tls size.
- Updated the tunable documentation.
v4:
- Rebased and moved this log out of the commit message.
- Minor commit message wording changes.
v3:
- archived at
  https://sourceware.org/pipermail/libc-alpha/2020-March/111660.html
- Replace TLS_STATIC_SURPLUS with GLRO(dl_tls_static_surplus) and
  simplify related logic.
- In case of static linking, replace GL(dl_tls_static_size) with
  GLRO(dl_tls_static_surplus) in the code paths before the
  GL(dl_tls_static_size) value is actually computed.
- Update comments and the test code.
- Document the new tunables.
- Update description, mention static linking.
v2:
- Add dl.nns tunable.
- Add dl.optional_static_tls tunable.
- New surplus TLS usage contract that works reliably up to dl.nns
  namespaces.
---
 csu/libc-tls.c             | 28 +++++++++----------
 elf/dl-tls.c               | 55 ++++++++++++++++++++++++++++++++++----
 elf/dl-tunables.list       |  9 +++++++
 elf/rtld.c                 |  3 +++
 manual/tunables.texi       | 21 +++++++++++++++
 sysdeps/generic/ldsodefs.h |  8 ++++++
 6 files changed, 105 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/csu/libc-tls.c b/csu/libc-tls.c
index 73ade0fec5..e2603157e8 100644
--- a/csu/libc-tls.c
+++ b/csu/libc-tls.c
@@ -46,13 +46,16 @@  bool _dl_tls_dtv_gaps;
 struct dtv_slotinfo_list *_dl_tls_dtv_slotinfo_list;
 /* Number of modules in the static TLS block.  */
 size_t _dl_tls_static_nelem;
-/* Size of the static TLS block.  Giving this initialized value
-   preallocates some surplus bytes in the static TLS area.  */
-size_t _dl_tls_static_size = 2048;
+/* Size of the static TLS block.  */
+size_t _dl_tls_static_size;
 /* Size actually allocated in the static TLS block.  */
 size_t _dl_tls_static_used;
 /* Alignment requirement of the static TLS block.  */
 size_t _dl_tls_static_align;
+/* Size of surplus space in the static TLS area for dynamically
+   loaded modules with IE-model TLS or for TLSDESC optimization.
+   See comments in elf/dl-tls.c where it is initialized.  */
+size_t _dl_tls_static_surplus;
 
 /* Generation counter for the dtv.  */
 size_t _dl_tls_generation;
@@ -81,10 +84,8 @@  init_slotinfo (void)
 static void
 init_static_tls (size_t memsz, size_t align)
 {
-  /* That is the size of the TLS memory for this object.  The initialized
-     value of _dl_tls_static_size is provided by dl-open.c to request some
-     surplus that permits dynamic loading of modules with IE-model TLS.  */
-  GL(dl_tls_static_size) = roundup (memsz + GL(dl_tls_static_size),
+  /* That is the size of the TLS memory for this object.  */
+  GL(dl_tls_static_size) = roundup (memsz + GLRO(dl_tls_static_surplus),
 				    TLS_TCB_ALIGN);
 #if TLS_TCB_AT_TP
   GL(dl_tls_static_size) += TLS_TCB_SIZE;
@@ -125,25 +126,24 @@  __libc_setup_tls (void)
 	  break;
 	}
 
+  /* Calculate the size of the static TLS surplus.  */
+  _dl_tls_static_surplus_init ();
+
   /* We have to set up the TCB block which also (possibly) contains
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
      In this case we are right away out of memory and the user gets
-     what she/he deserves.
-
-     The initialized value of _dl_tls_static_size is provided by dl-open.c
-     to request some surplus that permits dynamic loading of modules with
-     IE-model TLS.  */
+     what she/he deserves.  */
 #if TLS_TCB_AT_TP
   /* Align the TCB offset to the maximum alignment, as
      _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
      and dl_tls_static_align.  */
-  tcb_offset = roundup (memsz + GL(dl_tls_static_size), max_align);
+  tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
   tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
 #elif TLS_DTV_AT_TP
   tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
   tlsblock = __sbrk (tcb_offset + memsz + max_align
-		     + TLS_PRE_TCB_SIZE + GL(dl_tls_static_size));
+		     + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus));
   tlsblock += TLS_PRE_TCB_SIZE;
 #else
   /* In case a model with a different layout for the TCB and DTV
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index fa03234610..2201a1cc1d 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -29,10 +29,54 @@ 
 #include <dl-tls.h>
 #include <ldsodefs.h>
 
-/* Amount of excess space to allocate in the static TLS area
-   to allow dynamic loading of modules defining IE-model TLS data.  */
-#define TLS_STATIC_SURPLUS	64 + DL_NNS * 100
+#define TUNABLE_NAMESPACE rtld
+#include <dl-tunables.h>
+
+/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
+
+   - IE TLS in libc.so for all dlmopen namespaces except in the initial
+     one where libc.so is not loaded dynamically but at startup time,
+   - IE TLS in other libraries which may be dynamically loaded even in the
+     initial namespace,
+   - and optionally for optimizing dynamic TLS access.
+
+   The maximum number of namespaces is DL_NNS, but to support that many
+   namespaces correctly the static TLS allocation should be significantly
+   increased, which may cause problems with small thread stacks due to the
+   way static TLS is accounted (bug 11787).
+
+   So there is a rtld.nns tunable limit on the number of supported namespaces
+   that affects the size of the static TLS and by default it's small enough
+   not to cause problems with existing applications. The limit is not
+   enforced or checked: it is the user's responsibility to increase rtld.nns
+   if more dlmopen namespaces are used.  */
+
+/* Size of initial-exec TLS in libc.so.  */
+#define LIBC_IE_TLS 192
+/* Size of initial-exec TLS in libraries other than libc.so.
+   This should be large enough to cover runtime libraries of the
+   compiler such as libgomp and libraries in libc other than libc.so.  */
+#define OTHER_IE_TLS 144
+/* Size of additional surplus TLS, placeholder for TLS optimizations.  */
+#define OPT_SURPLUS_TLS 512
 
+void
+_dl_tls_static_surplus_init (void)
+{
+  size_t nns;
+
+#if HAVE_TUNABLES
+  nns = TUNABLE_GET (nns, size_t, NULL);
+#else
+  /* Default values of the tunables.  */
+  nns = 4;
+#endif
+  if (nns > DL_NNS)
+    nns = DL_NNS;
+  GLRO(dl_tls_static_surplus) = ((nns - 1) * LIBC_IE_TLS
+				 + nns * OTHER_IE_TLS
+				 + OPT_SURPLUS_TLS);
+}
 
 /* Out-of-memory handler.  */
 static void
@@ -218,7 +262,8 @@  _dl_determine_tlsoffset (void)
     }
 
   GL(dl_tls_static_used) = offset;
-  GL(dl_tls_static_size) = (roundup (offset + TLS_STATIC_SURPLUS, max_align)
+  GL(dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
+				     max_align)
 			    + TLS_TCB_SIZE);
 #elif TLS_DTV_AT_TP
   /* The TLS blocks start right after the TCB.  */
@@ -262,7 +307,7 @@  _dl_determine_tlsoffset (void)
     }
 
   GL(dl_tls_static_used) = offset;
-  GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS,
+  GL(dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
 				    TLS_TCB_ALIGN);
 #else
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 0d398dd251..b07742d7b3 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -126,4 +126,13 @@  glibc {
       default: 3
     }
   }
+
+  rtld {
+    nns {
+      type: SIZE_T
+      minval: 1
+      maxval: 16
+      default: 4
+    }
+  }
 }
diff --git a/elf/rtld.c b/elf/rtld.c
index f4c2602d65..f339f6894f 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -780,6 +780,9 @@  init_tls (void)
       }
   assert (i == GL(dl_tls_max_dtv_idx));
 
+  /* Calculate the size of the static TLS surplus.  */
+  _dl_tls_static_surplus_init ();
+
   /* Compute the TLS offsets for the various blocks.  */
   _dl_determine_tlsoffset ();
 
diff --git a/manual/tunables.texi b/manual/tunables.texi
index ec18b10834..978e08f4fb 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -31,6 +31,7 @@  their own namespace.
 @menu
 * Tunable names::  The structure of a tunable name
 * Memory Allocation Tunables::  Tunables in the memory allocation subsystem
+* Dynamic Linking Tunables:: Tunables in the dynamic linking subsystem
 * Elision Tunables::  Tunables in elision subsystem
 * POSIX Thread Tunables:: Tunables in the POSIX thread subsystem
 * Hardware Capability Tunables::  Tunables that modify the hardware
@@ -226,6 +227,26 @@  pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
 passed to @code{malloc} for the largest bin size to enable.
 @end deftp
 
+@node Dynamic Linking Tunables
+@section Dynamic Linking Tunables
+@cindex dynamic linking tunables
+@cindex rtld tunables
+
+@deftp {Tunable namespace} glibc.rtld
+Dynamic linker behavior can be modified by setting the
+following tunables in the @code{rtld} namespace:
+@end deftp
+
+@deftp Tunable glibc.rtld.nns
+Sets the number of supported dynamic link namespaces (see @code{dlmopen}).
+Currently this limit can be set between 1 and 16 inclusive, the default is 4.
+Each link namespace consumes some memory in all thread, and thus raising the
+limit will increase the amount of memory each thread uses. Raising the limit
+is useful when your application uses more than 4 dynamic linker audit modules
+e.g. LD_AUDIT, or will use more than 4 dynamic link namespaces as created
+by @code{dlmopen} with an lmid argument of @code{LM_ID_NEWLM}.
+@end deftp
+
 @node Elision Tunables
 @section Elision Tunables
 @cindex elision tunables
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index c525ffa12c..3b0c6d9620 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -583,6 +583,11 @@  struct rtld_global_ro
      binaries, don't honor for PIEs).  */
   EXTERN ElfW(Addr) _dl_use_load_bias;
 
+  /* Size of surplus space in the static TLS area for dynamically
+     loaded modules with IE-model TLS or for TLSDESC optimization.
+     See comments in elf/dl-tls.c where it is initialized.  */
+  EXTERN size_t _dl_tls_static_surplus;
+
   /* Name of the shared object to be profiled (if any).  */
   EXTERN const char *_dl_profile;
   /* Filename of the output file.  */
@@ -1101,6 +1106,9 @@  extern size_t _dl_count_modids (void) attribute_hidden;
 /* Calculate offset of the TLS blocks in the static TLS block.  */
 extern void _dl_determine_tlsoffset (void) attribute_hidden;
 
+/* Calculate the size of the static TLS surplus.  */
+void _dl_tls_static_surplus_init (void) attribute_hidden;
+
 #ifndef SHARED
 /* Set up the TCB for statically linked applications.  This is called
    early during startup because we always use TLS (for errno and the