ld.so: Introduce delayed relocation processing

Message ID 20180606140223.4D11F439942E1@oldenburg.str.redhat.com
State New
Headers show
Series
  • ld.so: Introduce delayed relocation processing
Related show

Commit Message

Florian Weimer June 6, 2018, 2:02 p.m.
This makes it possible to use IFUNC resolvers which depend
on relocations themselves, as long as these reloctions do
not depend on IFUNCs.

So far, delayed relocation processing is only implemented for
x86-64.

2018-06-06  Florian Weimer  <fweimer@redhat.com>

	* elf/Makefile (dl-routines): Add delayed-reloc.
	* elf/rtld.c (dl_main): Call _dl_delayed_reloc_init,
	_dl_delayed_reloc_apply.
	* elf/dl-open.c (dl_open_worker): Likewise.
	(dl_open): Call _dl_delayed_reloc_clear.
	* include/link.h (struct link_map): Add l_delayed_relocations.
	* sysdeps/x86_64/dl-machine.h (elf_machine_rela): Replace warning
	about relocations against STT_GNU_IFUNC symbols in not-fully
	relocated objects with delayed relocation recording.  Delay
	R_X86_64_COPY and R_X86_64_IRELATIVE relocations if necessary.
	* elf/dl-delayed-reloc.c: New file.
	* elf/dl-delayed-reloc.h: Likewise.
	* sysdeps/generic/dl-delayed-reloc-machine.h: Likewise.
	* sysdeps/x86_64/dl-delayed-reloc-machine.h: Likewise.

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 2dcd2b88e0..f54c4657c7 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -33,7 +33,7 @@  dl-routines	= $(addprefix dl-,load lookup object reloc deps hwcaps \
 				  runtime init fini debug misc \
 				  version profile tls origin scope \
 				  execstack open close trampoline \
-				  exception sort-maps)
+				  exception sort-maps delayed-reloc)
 ifeq (yes,$(use-ldconfig))
 dl-routines += dl-cache
 endif
diff --git a/elf/dl-delayed-reloc.c b/elf/dl-delayed-reloc.c
new file mode 100644
index 0000000000..39c864fc64
--- /dev/null
+++ b/elf/dl-delayed-reloc.c
@@ -0,0 +1,247 @@ 
+/* Delayed relocation processing.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if HAVE_IFUNC
+
+# include <assert.h>
+# include <dl-delayed-reloc.h>
+# include <errno.h>
+# include <ldsodefs.h>
+# include <sys/mman.h>
+# include <unistd.h>
+
+/* Machine-specific definitions.  */
+# include <dl-delayed-reloc-machine.h>
+
+/* This struct covers a whole page containing individual struct
+   dl_delayed_reloc elements, which are allocated individually by
+   allocate_reloc below.  */
+struct dl_delayed_reloc_array
+{
+  struct dl_delayed_reloc_array *next;
+  struct dl_delayed_reloc data[];
+};
+
+/* Pointer to global state.  We use this indirection so that we do not
+   have to add the entire struct to the BSS segment.  */
+static struct dl_delayed_reloc_global *global;
+
+/* Allocate a new struct dl_delayed_reloc_array object.  Update global
+   *accordingly.  */
+static void
+allocate_array (void)
+{
+  size_t page_size = GLRO(dl_pagesize);
+  void *ptr = __mmap (NULL, page_size, PROT_READ | PROT_WRITE,
+                      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (ptr == MAP_FAILED)
+    _dl_signal_error (ENOMEM, NULL, NULL,
+                      "cannot allocate IFUNC resolver information");
+  struct dl_delayed_reloc_array *new_head = ptr;
+
+  if (global->array_list_tail == NULL)
+    {
+      /* First allocation.  */
+      global->array_list_head = new_head;
+      global->array_list_tail = new_head;
+      global->array_limit
+        = (page_size - offsetof (struct dl_delayed_reloc_array, data))
+            / sizeof (new_head->data[0]);
+    }
+  else
+    {
+      global->array_list_tail->next = new_head;
+      global->array_list_tail = new_head;
+      global->tail_array_count = 0;
+    }
+}
+
+/* Allocate one struct dl_delayed_reloc element from the active
+   allocation array.  */
+static struct dl_delayed_reloc *
+allocate_reloc (void)
+{
+  assert (global != NULL);
+
+  /* Allocate a new array if none exists or the current array is
+     full.  */
+  if (global->tail_array_count == global->array_limit)
+    allocate_array ();
+  assert (global->tail_array_count < global->array_limit);
+  return &global->array_list_tail->data[global->tail_array_count++];
+}
+
+/* Deallocate the list of array allocations starting at
+   array_list.  */
+static void
+free_allocations (void)
+{
+  size_t page_size = GLRO(dl_pagesize);
+  struct dl_delayed_reloc_array *p = global->array_list_head;
+  while (p != NULL)
+    {
+      struct dl_delayed_reloc_array *next = p->next;
+      __munmap (p, page_size);
+      p = next;
+    }
+  /* The caller needs to call _dl_delayed_reloc_init again to start
+     over.  */
+  global = NULL;
+}
+
+/* Called in debugging mode to print details about a delayed
+   relocation.  */
+static void
+report_delayed_relocation (struct link_map **current_map,
+                           struct dl_delayed_reloc *dr)
+{
+  if (dr->map != *current_map)
+    {
+      *current_map = dr->map;
+
+      /* l_name is NULL for the main executable.  */
+      const char *map_name;
+      if (dr->map->l_name != NULL && *dr->map->l_name != '\0')
+        map_name = dr->map->l_name;
+      else
+        map_name = "<executable>";
+
+      _dl_debug_printf ("applying delayed relocations for %s\n", map_name);
+    }
+
+  if (dr->sym != NULL)
+    {
+      const char *strtab
+        = (const char *) D_PTR (dr->sym_map, l_info[DT_STRTAB]);
+      if (dr->sym_map->l_name != NULL)
+        _dl_debug_printf ("delayed relocation of symbol %s in %s\n",
+                          strtab + dr->sym->st_name, dr->sym_map->l_name);
+      else
+        _dl_debug_printf ("delayed relocation of symbol %s\n",
+                          strtab + dr->sym->st_name);
+    }
+  else
+    {
+      unsigned long int where = (uintptr_t) dr->reloc_addr;
+      _dl_debug_printf ("delayed relative relocation at 0x%lx\n", where);
+    }
+}
+
+/* Process all delayed IFUNC resolutions for IFUNC_MAP alone.  */
+static void
+apply_relocations (void)
+{
+  size_t array_limit = global->array_limit;
+  if (array_limit == 0)
+    /* No delayed relocations have been allocated, so there is nothing
+       to do.  */
+    return;
+
+  /* Used for debugging output, to report switches from relocated
+     object to another.  */
+  struct link_map *current_map = NULL;
+  unsigned long int count = 0;
+
+  for (struct dl_delayed_reloc_array *list = global->array_list_head;
+       list != NULL; list = list->next)
+    {
+      for (size_t index = 0; index < array_limit; ++index)
+        {
+          struct dl_delayed_reloc *dr = list->data + index;
+          if (dr->reloc == NULL)
+            /* An incompletely filled array marks the end of the
+               list.  */
+            goto out;
+
+          if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
+            report_delayed_relocation (&current_map, dr);
+          _dl_delayed_reloc_machine (dr);
+
+          /* Mark the object as fully relocated, for subsequent dlopen
+             calls.  This will clear the flag even if there are still
+             pending relocations to process, but we keep executing the
+             loop, so this is not a problem.  */
+          dr->map->l_delayed_relocations = false;
+
+          ++count;
+        }
+
+    }
+
+ out:
+  if (__glibc_unlikely (GLRO(dl_debug_mask) & DL_DEBUG_BINDINGS))
+    _dl_debug_printf ("%lu delayed relocations performed\n", count);
+}
+
+void
+_dl_delayed_reloc_init (struct dl_delayed_reloc_global *new_global)
+{
+  assert (global == NULL);
+  global = new_global;
+  *global = (struct dl_delayed_reloc_global) { };
+}
+
+void
+_dl_delayed_reloc_record (struct link_map *map,
+                          const ElfW(Sym) *refsym,
+                          const ElfW(Rela) *reloc,
+                          ElfW(Addr) *reloc_addr,
+                          struct link_map *sym_map,
+                          const ElfW(Sym) *sym)
+{
+  /* reloc == NULL is a marker to find the end of the allocations.  */
+  assert (reloc != NULL);
+
+  /* Add the delayed relocation to the global list.  */
+  struct dl_delayed_reloc *dr = allocate_reloc ();
+  *dr = (struct dl_delayed_reloc)
+    {
+      .map = map,
+      .refsym = refsym,
+      .reloc = reloc,
+      .reloc_addr = reloc_addr,
+      .sym = sym,
+      .sym_map = sym_map,
+    };
+
+  /* The map containing the relocation will now need special
+     processing for future copy and relative IFUNC relocations.  */
+  map->l_delayed_relocations = true;
+}
+
+void
+_dl_delayed_reloc_apply (void)
+{
+  assert (global != NULL);
+
+  apply_relocations ();
+  free_allocations ();
+}
+
+void
+_dl_delayed_reloc_clear (void)
+{
+  /* This can be called from error handling, where the initialization
+     may not yet have happened.  */
+  if (global == NULL)
+    return;
+
+  free_allocations ();
+}
+
+#endif /* HAVE_IFUNC */
diff --git a/elf/dl-delayed-reloc.h b/elf/dl-delayed-reloc.h
new file mode 100644
index 0000000000..11ecb8e318
--- /dev/null
+++ b/elf/dl-delayed-reloc.h
@@ -0,0 +1,179 @@ 
+/* Private declarations for delayed relocation processing.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Delayed relocation processing attempts to address relocation
+   dependencies which are not reflected in DT_NEEDED dependencies
+   because objects are incompletely linked, or the relocation order
+   derived from the dependencies causes objects to refer to objects
+   relocated later due to symbol interposition.  See bugs 20019,
+   21041, 23240.
+
+   Architecture-specific changes are required to implement delayed
+   relocation processing, typically in the implementation of
+   elf_machine_rel or elf_machine_rela.  There are three reasons why a
+   relocation needs to be delayed:
+
+     * The relocation is bound with the help of an IFUNC resolver, and
+       the IFUNC resolver resides in an object which has not been
+       fully relocated yet (i.e., l_relocated is false for its link
+       map, or l_delayed_relocations is true).  The reason for the
+       delay is that the IFUNC resolver may depend on the
+       yet-unprocessed relocations.
+
+     * The relocation is a copy relocation and the source symbol
+       resides in an object which has not been fully relocated yet
+       (l_delayed_relocations is true).  The reason for the delay is
+       that the copy relocation could otherwise copy data which has
+       not been fully initialized yet because some of the delayed
+       relocations affect it.
+
+     * The is a relative IFUNC relocation, and the link map for the
+       object containing the relocation has other delayed relocations
+       (l_delayed_relocations is true).  The reason for the delay is
+       that the resolver for the relocation depends on other delayed
+       relocations for this object which have not yet been performed.
+
+   All delayed relocations are processed in the order they were
+   recorded, which reflects the initial relocation processing order
+   between objects, and the relocation order as determined by the
+   static link editor within objects.
+
+   This does not process correctly all implicit dependencies between
+   IFUNC resolvers (e.g., two IFUNC resolvers calling each other will
+   still not work), but it addresses common cases of implicit
+   dependencies, for example as the result of symbol interposition or
+   due to missing DT_NEEDED entries in objects.  In particular, data
+   relocations used by the IFUNC resolvers in glibc itself will be
+   resolved before these IFUNC resolvers run, so it is safe to call
+   glibc string functions from other (non-glibc) IFUNC resolvers, or
+   use pointers to glibc functions (with IFUNC resolvers) in global
+   data initializers (inside or outside of glibc).
+
+   Note that delayed relocation processing does not address the
+   phenomenon that IFUNC resolvers run before ELF constructors and C++
+   constructors.  This means that IFUNC resolvers cannot assume that
+   glibc itself has been initialized, or that global data structures
+   in other objects have the expected values, particularly if their
+   initializers are not constants.  */
+
+#ifndef _DL_DELAYED_RELOC_H
+#define _DL_DELAYED_RELOC_H
+
+#include <link.h>
+
+/* All functions declared in this file must be called while the global
+   rtld lock is acquired.  */
+
+/* Internal storage for delayed relocations.  */
+struct dl_delayed_reloc_array;
+
+/* The full implementation is only available if IFUNCs are
+   supported.  */
+#if HAVE_IFUNC
+
+struct dl_delayed_reloc_global
+{
+  /* List of allocated arrays of struct dl_delayed_reloc elements.  */
+  struct dl_delayed_reloc_array *array_list_head;
+
+  /* Last element of the list.  Used for allocations.  */
+  struct dl_delayed_reloc_array *array_list_tail;
+
+  /* Number of entries in the current allocation array.  */
+  size_t tail_array_count;
+
+  /* Maximum number of entries in each array.  */
+  size_t array_limit;
+};
+
+/* Prepare for relocation processing.  *PGLOBAL must remain in scope
+   until _dl_delayed_reloc_apply or _dl_delayed_reloc_clear is
+   called.  */
+void _dl_delayed_reloc_init (struct dl_delayed_reloc_global *pglobal)
+  attribute_hidden;
+
+/* Apply all pending delayed relocations.  Deallocate all auxiliary
+   allocations.  */
+void _dl_delayed_reloc_apply (void) attribute_hidden;
+
+/* Clear all allocated delayed relocations.  Deallocate all auxiliary
+   allocations.  This function is intended for clearing up after a
+   dlopen failure.  */
+void _dl_delayed_reloc_clear (void) attribute_hidden;
+
+/* Delayed relocation.  These are stored in arrays inside struct
+   dl_delayed_reloc_array.
+
+   In case a copy or relative IFUNC relocation is encountered while
+   the link map has delayed (IFUNC) relocations, these relocations are
+   added to the list as well because the result of these relocations
+   could affect their results.  */
+struct dl_delayed_reloc
+{
+  /* Information about the relocation.  */
+  const ElfW(Sym) *refsym;
+  struct link_map *map;
+  const ElfW(Rela) *reloc;
+  ElfW(Addr) *reloc_addr;
+
+  /* Information about the target symbol (either an IFUNC resolver, or
+     the source of a copy relocation).  The corresponding symbol map
+     is implied.  NULL for relative IFUNC relocations.  */
+  const ElfW(Sym) *sym;
+
+  /* The link map corresponding to sym.  */
+  struct link_map *sym_map;
+};
+
+/* Record a delayed relocation for SYM at *RELOC_ADDR.  The relocation
+   is associated with MAP.  Can raise an error using _dl_signal_error.
+   SYM can be NULL if the relocation is a relative IFUNC
+   relocation.  */
+void _dl_delayed_reloc_record (struct link_map *map,
+                               const ElfW(Sym) *refsym,
+                               const ElfW(Rela) *reloc,
+                               ElfW(Addr) *reloc_addr,
+                               struct link_map *sym_map,
+                               const ElfW(Sym) *sym) attribute_hidden;
+
+#else /* !HAVE_IFUNC  */
+
+/* Dummy implementations for targets without IFUNC support.  */
+
+struct dl_delayed_reloc_global
+{
+};
+
+static inline void
+_dl_delayed_reloc_init (struct dl_delayed_reloc_global *global)
+{
+}
+
+static inline void
+_dl_delayed_reloc_apply (void)
+{
+}
+
+static inline void
+_dl_delayed_reloc_clear (void)
+{
+}
+
+#endif /* !HAVE_IFUNC */
+
+#endif /* _DL_DELAYED_RELOC_H */
diff --git a/elf/dl-open.c b/elf/dl-open.c
index 92b498eb2d..ae385e9c88 100644
--- a/elf/dl-open.c
+++ b/elf/dl-open.c
@@ -35,7 +35,7 @@ 
 #include <libc-internal.h>
 
 #include <dl-dst.h>
-
+#include <dl-delayed-reloc.h>
 
 /* We must be careful not to leave us in an inconsistent state.  Thus we
    catch any error and re-raise it after cleaning up.  */
@@ -324,6 +324,9 @@  dl_open_worker (void *a)
   while (l != NULL);
   _dl_sort_maps (maps, nmaps, NULL, false);
 
+  struct dl_delayed_reloc_global delayed_relocations;
+  _dl_delayed_reloc_init (&delayed_relocations);
+
   int relocation_in_progress = 0;
 
   for (unsigned int i = nmaps; i-- > 0; )
@@ -491,6 +494,7 @@  TLS generation counter wrapped!  Please report this."));
 	}
     }
 
+  _dl_delayed_reloc_apply ();
   _dl_relocate_apply_relro (new);
 
   /* Notify the debugger all new objects have been relocated.  */
@@ -608,6 +612,7 @@  no more namespaces available for dlmopen()"));
 	  if ((mode & __RTLD_AUDIT) == 0)
 	    GL(dl_tls_dtv_gaps) = true;
 
+	  _dl_delayed_reloc_clear ();
 	  _dl_close_worker (args.map, true);
 	}
 
diff --git a/elf/rtld.c b/elf/rtld.c
index 10062e48f6..5b5134ec45 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -41,6 +41,7 @@ 
 #include <tls.h>
 #include <stap-probe.h>
 #include <stackinfo.h>
+#include <dl-delayed-reloc.h>
 
 #include <assert.h>
 
@@ -2110,6 +2111,9 @@  ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
 	_dl_show_scope (l, 0);
     }
 
+  /* Used to keep track of delayed IFUNC relocations.  */
+  struct dl_delayed_reloc_global delayed_relocations;
+
   if (prelinked)
     {
       if (main_map->l_info [ADDRIDX (DT_GNU_CONFLICT)] != NULL)
@@ -2166,6 +2170,9 @@  ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
       GLRO(dl_lazy) |= consider_profiling;
 
       HP_TIMING_NOW (start);
+
+      _dl_delayed_reloc_init (&delayed_relocations);
+
       unsigned i = main_map->l_searchlist.r_nlist;
       while (i-- > 0)
 	{
@@ -2252,10 +2259,11 @@  ERROR: ld.so: object '%s' cannot be loaded as audit interface: %s; ignored.\n",
       HP_TIMING_ACCUM_NT (relocate_time, add);
     }
 
-  /* Activate RELRO protection.  In the prelink case, this was already
-     done earlier.  */
+  /* Perform delayed IFUNC relocations and activate RELRO protection.
+     In the prelink case, this was already done earlier.  */
   if (! prelinked)
     {
+      _dl_delayed_reloc_apply ();
       /* Make sure that this covers the dynamic linker as well.
 	 TODO: rtld_multiple_ref is always true because libc.so needs
 	 the dynamic linker internally.  */
diff --git a/include/link.h b/include/link.h
index 5924594548..fb9c012767 100644
--- a/include/link.h
+++ b/include/link.h
@@ -202,6 +202,9 @@  struct link_map
     unsigned int l_free_initfini:1; /* Nonzero if l_initfini can be
 				       freed, ie. not allocated with
 				       the dummy malloc in ld.so.  */
+    /* Link maps has pending delayed (IFUNC) relocations.  Only used
+       during relocation.  */
+    unsigned int l_delayed_relocations:1;
 
 #include <link_map.h>
 
diff --git a/sysdeps/generic/dl-delayed-reloc-machine.h b/sysdeps/generic/dl-delayed-reloc-machine.h
new file mode 100644
index 0000000000..9cd078d043
--- /dev/null
+++ b/sysdeps/generic/dl-delayed-reloc-machine.h
@@ -0,0 +1,30 @@ 
+/* Delayed relocation processing.  Generic version.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_DELAYED_RELOC_MACHINE_H
+#define _DL_DELAYED_RELOC_MACHINE_H
+
+/* Process a delayed relocation.  In the default implementation, there
+   are no delayed relocations, so this implementation does
+   nothing.  */
+static inline void
+_dl_delayed_reloc_machine (const struct dl_delayed_reloc *reloc)
+{
+}
+
+#endif  /* _DL_DELAYED_RELOC_MACHINE_H */
diff --git a/sysdeps/x86_64/dl-delayed-reloc-machine.h b/sysdeps/x86_64/dl-delayed-reloc-machine.h
new file mode 100644
index 0000000000..d49e9ba27e
--- /dev/null
+++ b/sysdeps/x86_64/dl-delayed-reloc-machine.h
@@ -0,0 +1,97 @@ 
+/* Delayed relocation processing.  x86-64 version.
+   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_DELAYED_RELOC_MACHINE_H
+#define _DL_DELAYED_RELOC_MACHINE_H
+
+/* This needs to be kept in sync with elf_machine_rela in
+   dl-machine.h.  */
+static inline void
+_dl_delayed_reloc_machine (const struct dl_delayed_reloc *dr)
+{
+  const ElfW(Rela) *reloc = dr->reloc;
+  const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
+  ElfW(Addr) *const reloc_addr = dr->reloc_addr;
+  const ElfW(Sym) *refsym = dr->refsym;
+  const ElfW(Sym) *sym = dr->sym;
+  ElfW(Addr) value = SYMBOL_ADDRESS (dr->sym_map, sym, true);
+
+  if (r_type == R_X86_64_IRELATIVE)
+    {
+      /* Special case: IRELATIVE relocations do not have an associated
+         symbol.  */
+      value = dr->map->l_addr + reloc->r_addend;
+      value = ((ElfW(Addr) (*) (void)) value) ();
+      *reloc_addr = value;
+      return;
+    }
+
+  if (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC
+      && sym->st_shndx != SHN_UNDEF)
+    value = ((ElfW(Addr) (*) (void)) value) ();
+
+  /* This switch statement needs to be kept in sync with the switch
+     statement in elf_machine_rela.  */
+  switch (r_type)
+    {
+    case R_X86_64_GLOB_DAT:
+    case R_X86_64_JUMP_SLOT:
+      *reloc_addr = value + reloc->r_addend;
+      break;
+
+    case R_X86_64_64:
+      /* value + r_addend may be > 0xffffffff and R_X86_64_64
+         relocation updates the whole 64-bit entry.  */
+      *(Elf64_Addr *) reloc_addr = (Elf64_Addr) value + reloc->r_addend;
+      break;
+    case R_X86_64_32:
+      value += reloc->r_addend;
+      *(unsigned int *) reloc_addr = value;
+
+      const char *fmt;
+      if (__glibc_unlikely (value > UINT_MAX))
+        {
+          const char *strtab;
+
+          fmt = "\
+%s: Symbol `%s' causes overflow in R_X86_64_32 relocation\n";
+        print_err:
+          strtab = (const char *) D_PTR (dr->map, l_info[DT_STRTAB]);
+
+          _dl_error_printf (fmt, RTLD_PROGNAME, strtab + refsym->st_name);
+        }
+      break;
+    case R_X86_64_PC32:
+      value += reloc->r_addend - (ElfW(Addr)) reloc_addr;
+      *(unsigned int *) reloc_addr = value;
+      if (__glibc_unlikely (value != (int) value))
+        {
+          fmt = "\
+%s: Symbol `%s' causes overflow in R_X86_64_PC32 relocation\n";
+          goto print_err;
+        }
+    case R_X86_64_COPY:
+      memcpy (reloc_addr, (void *) value,
+              MIN (sym->st_size, refsym->st_size));
+      break;
+    default:
+      _dl_reloc_bad_type (dr->map, r_type, 0);
+    }
+}
+
+#endif  /* _DL_DELAYED_RELOC_MACHINE_H */
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index 1942ed5061..e88986a429 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -27,6 +27,7 @@ 
 #include <tls.h>
 #include <dl-tlsdesc.h>
 #include <cpu-features.c>
+#include <dl-delayed-reloc.h>
 
 /* Return nonzero iff ELF header is compatible with the running host.  */
 static inline int __attribute__ ((unused))
@@ -314,22 +315,21 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	  && __glibc_likely (!skip_ifunc))
 	{
 # ifndef RTLD_BOOTSTRAP
-	  if (sym_map != map
-	      && sym_map->l_type != lt_executable
-	      && !sym_map->l_relocated)
+	  if (!sym_map->l_relocated || sym_map->l_delayed_relocations)
 	    {
-	      const char *strtab
-		= (const char *) D_PTR (map, l_info[DT_STRTAB]);
-	      _dl_error_printf ("\
-%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
-				RTLD_PROGNAME, map->l_name,
-				sym_map->l_name,
-				strtab + refsym->st_name);
+	      /* If the target map has not yet been fully relocated,
+		 delay the processing of the relocation until
+		 later.  */
+	      _dl_delayed_reloc_record
+		(map, refsym, reloc, reloc_addr, sym_map, sym);
+	      return;
 	    }
 # endif
 	  value = ((ElfW(Addr) (*) (void)) value) ();
 	}
 
+      /* This switch statement needs to be kept in sync with the
+	 switch statement in _dl_delayed_reloc_machine.  */
       switch (r_type)
 	{
 # ifndef RTLD_BOOTSTRAP
@@ -496,8 +496,14 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	    /* This can happen in trace mode if an object could not be
 	       found.  */
 	    break;
-	  memcpy (reloc_addr_arg, (void *) value,
-		  MIN (sym->st_size, refsym->st_size));
+	  if (sym_map->l_delayed_relocations)
+	    /* The relocation result could depend on earlier delayed
+	       relocations.  */
+	    _dl_delayed_reloc_record
+	      (map, refsym, reloc, reloc_addr, sym_map, sym);
+	  else
+	    memcpy (reloc_addr_arg, (void *) value,
+		    MIN (sym->st_size, refsym->st_size));
 	  if (__glibc_unlikely (sym->st_size > refsym->st_size)
 	      || (__glibc_unlikely (sym->st_size < refsym->st_size)
 		  && GLRO(dl_verbose)))
@@ -509,9 +515,19 @@  elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
 	  break;
 #  endif
 	case R_X86_64_IRELATIVE:
-	  value = map->l_addr + reloc->r_addend;
-	  value = ((ElfW(Addr) (*) (void)) value) ();
-	  *reloc_addr = value;
+	  if (map->l_delayed_relocations)
+	    /* We need to delay these IFUNC relocation because the
+	       IFUNC resolver may pick up the address of a symbol
+	       which in turn is determined by a (delayed) IFUNC
+	       relocation.  */
+	    _dl_delayed_reloc_record
+	      (map, refsym, reloc, reloc_addr, map, NULL);
+	  else
+	    {
+	      value = map->l_addr + reloc->r_addend;
+	      value = ((ElfW(Addr) (*) (void)) value) ();
+	      *reloc_addr = value;
+	    }
 	  break;
 	default:
 	  _dl_reloc_bad_type (map, r_type, 0);