@@ -39,6 +39,25 @@ _dl_tlsdesc_return:
cfi_endproc
.size _dl_tlsdesc_return, .-_dl_tlsdesc_return
+ .hidden _dl_tlsdesc_return_lazy
+ .global _dl_tlsdesc_return_lazy
+ .type _dl_tlsdesc_return_lazy,#function
+ cfi_startproc
+ eabi_fnstart
+ .align 2
+_dl_tlsdesc_return_lazy:
+ /* This DMB synchronizes the load of td->entry ([r0,#4])
+ at the call site with the release store to it in
+ _dl_tlsdesc_lazy_resolver_fixup so the load from [r0]
+ here happens after the initialization of td->argument. */
+ DMB ()
+ sfi_breg r0, \
+ ldr r0, [\B]
+ BX (lr)
+ eabi_fnend
+ cfi_endproc
+ .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
+
.hidden _dl_tlsdesc_undefweak
.global _dl_tlsdesc_undefweak
.type _dl_tlsdesc_undefweak,#function
@@ -92,6 +111,11 @@ _dl_tlsdesc_dynamic:
cfi_rel_offset (r3,4)
cfi_rel_offset (r4,8)
cfi_rel_offset (lr,12)
+ /* This DMB synchronizes the load of td->entry ([r0,#4])
+ at the call site with the release store to it in
+ _dl_tlsdesc_lazy_resolver_fixup so the load from [r0]
+ here happens after the initialization of td->argument. */
+ DMB ()
sfi_breg r0, \
ldr r1, [\B] /* td */
GET_TLS (lr)
@@ -48,6 +48,7 @@ struct tlsdesc_dynamic_arg
extern ptrdiff_t attribute_hidden
_dl_tlsdesc_return(struct tlsdesc *),
+ _dl_tlsdesc_return_lazy(struct tlsdesc *),
_dl_tlsdesc_undefweak(struct tlsdesc *),
_dl_tlsdesc_resolve_hold(struct tlsdesc *),
_dl_tlsdesc_lazy_resolver(struct tlsdesc *);
@@ -277,6 +277,15 @@
cfi_restore_state
# endif /* ARCH_HAS_HARD_TP */
+/* Memory barrier for TLSDESC resolvers. */
+#if __ARM_ARCH >= 7
+# define DMB() dmb ish
+#elif __ARM_ARCH >= 6
+# define DMB() mcr p15, 0, r0, c7, c10, 5
+#else
+# define DMB() bl __sync_synchronize
+#endif
+
# ifndef ARM_SFI_MACROS
# define ARM_SFI_MACROS 1
/* This assembly macro is prepended to any load/store instruction,
@@ -23,6 +23,7 @@
#include <dl-tlsdesc.h>
#include <dl-unmap-segments.h>
#include <tlsdeschtab.h>
+#include <atomic.h>
/* This function is used to lazily resolve TLS_DESC REL relocations
Besides the TLS descriptor itself, we get the module's got address
@@ -30,22 +31,26 @@
void
attribute_hidden
-_dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
- Elf32_Addr *got)
+_dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc *td, Elf32_Addr *got)
{
struct link_map *l = (struct link_map *)got[1];
lookup_t result;
- unsigned long value;
+ unsigned long value = atomic_load_relaxed (&td->argument.value);
+ /* After GL(dl_load_lock) is grabbed only one caller can see td->entry in
+ initial state in _dl_tlsdesc_resolve_early_return_p, other concurrent
+ callers will return and retry calling td->entry. The updated td->entry
+ synchronizes with the single writer so all read accesses here can use
+ relaxed order. */
if (_dl_tlsdesc_resolve_early_return_p
(td, (void*)(D_PTR (l, l_info[ADDRIDX (DT_TLSDESC_PLT)]) + l->l_addr)))
return;
- if (td->argument.value & 0x80000000)
+ if (value & 0x80000000)
{
/* A global symbol, this is the symbol index. */
/* The code below was borrowed from _dl_fixup(). */
- const Elf_Symndx symndx = td->argument.value ^ 0x80000000;
+ const Elf_Symndx symndx = value ^ 0x80000000;
const ElfW(Sym) *const symtab
= (const void *) D_PTR (l, l_info[DT_SYMTAB]);
const char *strtab = (const void *) D_PTR (l, l_info[DT_STRTAB]);
@@ -76,7 +81,9 @@ _dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
value = sym->st_value;
else
{
- td->entry = _dl_tlsdesc_undefweak;
+ /* _dl_tlsdesc_undefweak does not access td->argument
+ so this store can be relaxed. */
+ atomic_store_relaxed (&td->entry, _dl_tlsdesc_undefweak);
goto done;
}
}
@@ -92,7 +99,6 @@ _dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
{
/* A local symbol, this is the offset within our tls section.
*/
- value = td->argument.value;
result = l;
}
@@ -101,14 +107,19 @@ _dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
#else
if (!TRY_STATIC_TLS (l, result))
{
- td->argument.pointer = _dl_make_tlsdesc_dynamic (result, value);
- td->entry = _dl_tlsdesc_dynamic;
+ void *p = _dl_make_tlsdesc_dynamic (result, value);
+ atomic_store_relaxed (&td->argument.pointer, p);
+ /* This release store synchronizes with the DMB in
+ _dl_tlsdesc_dynamic (relying on the arm memory model). */
+ atomic_store_release (&td->entry, _dl_tlsdesc_dynamic);
}
else
#endif
{
- td->argument.value = value + result->l_tls_offset;
- td->entry = _dl_tlsdesc_return;
+ atomic_store_relaxed (&td->argument.value, value + result->l_tls_offset);
+ /* This release store synchronizes with the DMB in
+ _dl_tlsdesc_return_lazy (relying on the arm memory model). */
+ atomic_store_release (&td->entry, _dl_tlsdesc_return_lazy);
}
done:
@@ -123,11 +134,10 @@ _dl_tlsdesc_lazy_resolver_fixup (struct tlsdesc volatile *td,
void
attribute_hidden
-_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc volatile *td,
- void *caller)
+_dl_tlsdesc_resolve_hold_fixup (struct tlsdesc *td, void *caller)
{
/* Maybe we're lucky and can return early. */
- if (caller != td->entry)
+ if (caller != atomic_load_relaxed (&td->entry))
return;
/* Locking here will stop execution until the running resolver runs