diff mbox series

[v3,26/32] elf: Switch to a region-based protected memory allocator

Message ID 0464b6b67dd67197bd9afe735bfc9e900a25b399.1701944612.git.fweimer@redhat.com
State New
Headers show
Series RELRO linkmaps | expand

Commit Message

Florian Weimer Dec. 7, 2023, 10:32 a.m. UTC
The old allocator is retained for debugging purposes.
---
 elf/Makefile               |   1 +
 elf/dl-protmem-internal.h  |  65 +++++-
 elf/dl-protmem.c           | 392 +++++++++++++++++++++++++++++++++++++
 elf/dl-protmem_bootstrap.h |   3 +-
 elf/tst-dl-protmem.c       | 354 +++++++++++++++++++++++++++++++++
 5 files changed, 812 insertions(+), 3 deletions(-)
 create mode 100644 elf/tst-dl-protmem.c

Comments

Joseph Myers March 5, 2024, 11:36 p.m. UTC | #1
On Thu, 7 Dec 2023, Florian Weimer wrote:

> +#if __GNUC_PREREQ (11, 0)
> +          /* Suppress invalid GCC warning with -O3 (GCC PR 110546):
> +             error: '_dl_protmem_free' called on pointer returned from a
> +               mismatched allocation function [-Werror=mismatched-dealloc]
> +             note: returned from '_dl_protmem_allocate.constprop */

This quoted "note:" seems to be missing a closing "'" after the function 
name.  The patch is OK with that fixed.
diff mbox series

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 7ababc0fc4..2ebf5d2702 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -503,6 +503,7 @@  tests-internal += \
   tst-audit19a \
   tst-create_format1 \
   tst-dl-hwcaps_split \
+  tst-dl-protmem \
   tst-dl_find_object \
   tst-dl_find_object-threads \
   tst-dlmopen2 \
diff --git a/elf/dl-protmem-internal.h b/elf/dl-protmem-internal.h
index ce50d174a6..00f4639b60 100644
--- a/elf/dl-protmem-internal.h
+++ b/elf/dl-protmem-internal.h
@@ -19,10 +19,24 @@ 
 /* These declarations are needed by <dl-protmem_bootstrap.h>, which
    has to be inlined into _dl_start.  */
 
-/* Header before all protected memory allocations.  */
+#ifndef DL_PROTMEM_INTERNAL_H
+#define DL_PROTMEM_INTERNAL_H
+
+/* Define this to 1 to switch to the debugging allocator.  */
+#ifndef DL_PROTMEM_DEBUG
+# define DL_PROTMEM_DEBUG 0
+#endif
+
+/* Minimum chunk size.  Used to preserve alignment.  */
+enum { _dlpm_chunk_minimal_size = 8 };
+
+#if DL_PROTMEM_DEBUG
+/* The debugging allocator uses mmap directly and offers full size
+   checking.  */
 struct dl_protmem_header
 {
-  struct dl_protmem_header *next;
+  struct dl_protmem_header *next
+    __attribute__ ((__aligned__ (_dlpm_chunk_minimal_size)));
   unsigned int size;
 };
 
@@ -37,3 +51,50 @@  struct dl_protmem_state
      this structure.  */
   struct dl_protmem_header *root;
 };
+
+/* The initial allocation contains just the state singleton.  */
+# define DL_PROTMEM_INITIAL_REGION_SIZE (sizeof (struct dl_protmem_state))
+
+#else  /* Non-debugging allocator.  */
+
+/* The initial allocation covers about 150 link maps, which should be
+   enough for most programs.  */
+# if __WORDSIZE == 32
+#  define DL_PROTMEM_INITIAL_REGION_SIZE 131072
+# else
+#  define DL_PROTMEM_INITIAL_REGION_SIZE 262144
+# endif
+
+# define DL_PROTMEM_REGION_COUNT 12
+
+/* Struct tag denoting freelist entries.  */
+struct dl_protmem_freelist_chunk;
+
+/* Global state for the protected memory allocator.  */
+struct dl_protmem_state
+{
+  /* GLRO (dl_protmem) points to this field.  */
+  struct rtld_protmem protmem
+    __attribute__ ((__aligned__ (_dlpm_chunk_minimal_size)));
+
+  /* Pointers to mmap-allocated regions.  For index i, the size of the
+     allocation is DL_PROTMEM_INITIAL_ALLOCATION << i.  The space of
+     the combined regions is sufficient for hundreds of thousands of
+     link maps, so the dynamic linker runs into scalability issues
+     well before it is exhausted.  */
+  void *regions[DL_PROTMEM_REGION_COUNT];
+
+  /* List of unused allocation for each region, in increasing address
+     order.  See _dlpm_chunk_size for how the freed chunk size is
+     encoded.  */
+  struct dl_protmem_freelist_chunk *freelist[DL_PROTMEM_REGION_COUNT];
+
+  /* Pending free chunk that can be merged with other deallocations.
+     One pending chunk per region avoids accidental merging across
+     regions.  */
+  struct dl_protmem_freelist_chunk *pending_free[DL_PROTMEM_REGION_COUNT];
+};
+
+#endif /* Non-debbuging allocator.  */
+
+#endif /* DL_PROTMEM_INTERNAL_H */
diff --git a/elf/dl-protmem.c b/elf/dl-protmem.c
index f5a66868e6..cd416e33a5 100644
--- a/elf/dl-protmem.c
+++ b/elf/dl-protmem.c
@@ -21,6 +21,7 @@ 
 #include <dl-protmem.h>
 #include <dl-protmem-internal.h>
 
+#include <array_length.h>
 #include <assert.h>
 #include <sys/mman.h>
 
@@ -38,6 +39,8 @@  _dl_protmem_state (void)
           - offsetof (struct dl_protmem_state, protmem));
 }
 
+/* Debugging allocator.  The real allocator is below.  */
+#if DL_PROTMEM_DEBUG
 void
 _dl_protmem_init (void)
 {
@@ -130,3 +133,392 @@  _dl_protmem_end (void)
     /* If the mapping is left read-write, this is not fatal.  */
     (void) __mprotect (hdr, hdr->size, PROT_READ);
 }
+
+#else /* The non-debugging allocator follows.  */
+
+/* Address of a chunk on the free list.  This is an abstract pointer,
+   never to be dereferenced explictly.  Use the accessor functions
+   below instead.
+
+   Metadata layout: The first word is the pointer to the next chunk,
+   except the that the lowest bit (unused due to alignment) is used as
+   a flag.  If it is 1, the chunk size is the minimal size, and the
+   size is not stored separately.  If the flag is 0, the size is
+   stored in the second metadata word.  */
+typedef struct dl_protmem_freelist_chunk *chunk;
+
+/* Returns the size of a chunk on the free list whose start address is
+   FREEPTR.  The size includes the metadata.  */
+static inline size_t
+_dlpm_chunk_size (chunk ptr)
+{
+  uintptr_t *p = (uintptr_t *)ptr;
+  if (*p & 1)
+    return _dlpm_chunk_minimal_size;
+  else
+    return p[1];
+}
+
+/* Returns the address of the next free list element.  */
+static inline chunk
+_dlpm_chunk_next (chunk ptr)
+{
+  uintptr_t *p = (uintptr_t *)ptr;
+  /* Mask away the size bit.  */
+  return (chunk) (*p & -2);
+}
+
+static inline void
+_dlpm_chunk_set_next (chunk ptr, chunk newnext)
+{
+  /* Preserve the value of the size bit.  */
+  uintptr_t *p = (uintptr_t *)ptr;
+  *p = (uintptr_t) newnext | (*p & 1);
+}
+
+/* Creates a new freelist chunk at PTR, with NEXT as the next chunk,
+   and SIZE as the size of this chunk (which includes the
+   metadata).  Returns PTR.  */
+static inline chunk
+_dlpm_chunk_make (chunk ptr, chunk next, size_t size)
+{
+  uintptr_t *p = (uintptr_t *)ptr;
+  if (size <= _dlpm_chunk_minimal_size)
+    /* Compressed size.  */
+    *p = (uintptr_t) next | 1;
+  else
+    {
+      p[0] = (uintptr_t) next;
+      p[1] = size;
+    }
+  return ptr;
+}
+
+/* Return true if PTR2 comes immediately after PTR1 in memory.  PTR2
+   can be NULL.  */
+static inline bool
+_dlpm_chunk_adjancent (chunk ptr1, chunk ptr2)
+{
+  return (uintptr_t) ptr2 == (uintptr_t) ptr1 + _dlpm_chunk_size (ptr1);
+}
+
+/* Put the pending allocation on the free list.  */
+static void
+_dlpm_free_pending (struct dl_protmem_state *state, unsigned int region)
+{
+  chunk pending = state->pending_free[region];
+  state->pending_free[region] = NULL;
+
+  /* The current chunk pointer.  In the while loop below, coalescing
+     potentially happens at the end of this chunk, so that the chunk
+     address does not change.  */
+  chunk current = state->freelist[region];
+
+  /* Special cases before loop start.  */
+
+  if (current == NULL)
+    {
+      /* The freelist is empty.  Nothing to coalesce.  */
+      state->freelist[region] = pending;
+      return;
+    }
+
+  /* During the loop below, this merge is handled as part of the next
+     chunk processing.  */
+  if (pending < current)
+    {
+      /* The new chunk will be first on the freelist.  */
+      state->freelist[region] = pending;
+
+      /* See if we can coalesce.  */
+      if (_dlpm_chunk_adjancent (pending, current))
+        {
+          chunk new_next = _dlpm_chunk_next (current);
+          size_t new_size = (_dlpm_chunk_size (pending)
+                             + _dlpm_chunk_size (current));
+          _dlpm_chunk_make (pending, new_next, new_size);
+        }
+      else
+        _dlpm_chunk_set_next (pending, current);
+      return;
+    }
+
+  while (true)
+    {
+      chunk next = _dlpm_chunk_next (current);
+      if (_dlpm_chunk_adjancent (current, pending))
+        {
+          /* We can coalesce.  See if this completely fills a gap.  */
+          if (_dlpm_chunk_adjancent (pending, next))
+            {
+              /* Merge three chunks.  */
+              chunk new_next = _dlpm_chunk_next (next);
+              size_t new_size = (_dlpm_chunk_size (current)
+                                 + _dlpm_chunk_size (pending)
+                                 + _dlpm_chunk_size (next));
+              /* The address of the current chunk does not change, so
+                 the next pointer leading to it remains valid.  */
+              _dlpm_chunk_make (current, new_next, new_size);
+            }
+          else
+            {
+              /* Merge two chunks.  */
+              size_t new_size = (_dlpm_chunk_size (current)
+                                 + _dlpm_chunk_size (pending));
+              /* The current chunk pointer remains unchanged.  */
+              _dlpm_chunk_make (current, next, new_size);
+            }
+          break;
+        }
+      if (next == NULL)
+        {
+          /* New last chunk on freelist.  */
+          _dlpm_chunk_set_next (current, pending);
+          break;
+        }
+      if (pending < next)
+        {
+          /* This is the right spot on the freelist.  */
+          _dlpm_chunk_set_next (current, pending);
+
+          /* See if we can coalesce with the next chunk.  */
+          if (_dlpm_chunk_adjancent (pending, next))
+            {
+              chunk new_next = _dlpm_chunk_next (next);
+              size_t new_size = (_dlpm_chunk_size (pending)
+                                 + _dlpm_chunk_size (next));
+              _dlpm_chunk_make (pending, new_next, new_size);
+            }
+          else
+            _dlpm_chunk_set_next (pending, next);
+          break;
+        }
+      current = next;
+    }
+}
+
+/* Returns the region index for the pointer.  Terminates the process
+   if PTR is not on the heap.  */
+static unsigned int
+_dlpm_find_region (struct dl_protmem_state *state, void *ptr)
+{
+  /* Find the region in which the pointer is located.  */
+  size_t region_size = DL_PROTMEM_INITIAL_REGION_SIZE;
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    {
+      if (ptr >= state->regions[i] && ptr < state->regions[i] + region_size)
+        return i;
+      region_size *= 2;
+    }
+
+  _dl_fatal_printf ("\
+Fatal glibc error: Protected memory allocation not found\n");
+}
+
+void
+_dl_protmem_init (void)
+{
+  struct dl_protmem_state *state = _dl_protmem_state ();
+  state->regions[0] = state;
+  /* The part of the region after the allocator state (with the
+     embeded protected memory area) is unused.  */
+  state->freelist[0] = (chunk) (state + 1);
+  void *initial_region_end = (void *) state + DL_PROTMEM_INITIAL_REGION_SIZE;
+  _dlpm_chunk_make (state->freelist[0], NULL,
+                    initial_region_end - (void *) state->freelist[0]);
+  _dl_protmem_begin_count = 1;
+}
+
+void
+_dl_protmem_begin (void)
+{
+  if (_dl_protmem_begin_count++ != 0)
+      /* Already unprotected.  */
+    return;
+
+  struct dl_protmem_state *state = _dl_protmem_state ();
+  size_t region_size = DL_PROTMEM_INITIAL_REGION_SIZE;
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    if (state->regions[i] != NULL)
+      {
+        if (__mprotect (state->regions[i], region_size,
+                        PROT_READ | PROT_WRITE) != 0)
+          _dl_signal_error (ENOMEM, NULL, NULL,
+                            "Cannot make protected memory writable");
+        region_size *= 2;
+      }
+}
+
+void
+_dl_protmem_end (void)
+{
+  if (--_dl_protmem_begin_count > 0)
+    return;
+
+  struct dl_protmem_state *state = _dl_protmem_state ();
+  size_t region_size = DL_PROTMEM_INITIAL_REGION_SIZE;
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    if (state->regions[i] != NULL)
+      /* Ignore errors here because we can continue running with
+         read-write memory, with reduced hardening.  */
+      (void) __mprotect (state->regions[i], region_size, PROT_READ);
+}
+
+void *
+_dl_protmem_allocate (size_t requested_size)
+{
+  /* Round up the size to the next multiple of 8, to preserve chunk
+     alignment.  */
+  {
+    size_t adjusted_size = roundup (requested_size, _dlpm_chunk_minimal_size);
+    if (adjusted_size < requested_size)
+      return NULL;              /* Overflow.  */
+    requested_size = adjusted_size;
+  }
+
+  struct dl_protmem_state *state = _dl_protmem_state ();
+
+  /* Try to find an exact match among the pending chunks.  */
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    {
+      chunk pending = state->pending_free[i];
+      if (pending == NULL)
+        continue;
+      size_t pending_size = _dlpm_chunk_size (pending);
+      if (pending_size == requested_size)
+        {
+          state->pending_free[i] = NULL;
+          return pending;
+        }
+    }
+
+  /* Remove all pending allocations.  */
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    if (state->pending_free[i] != NULL)
+      _dlpm_free_pending (state, i);
+
+  /* This points to the previous chunk of the best chunk found so far,
+     or the root of the freelist.  This place needs to be updated to
+     remove the best chunk from the freelist.  */
+  chunk best_previous_p = NULL;
+  size_t best_p_size = -1;
+
+  /* Best-fit search along the free lists.  */
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    if (state->freelist[i] != NULL)
+      {
+        /* Use the head pointer of  the list as the next pointer.
+           The missing size field is not updated below.  */
+        chunk last_p = (chunk) &state->freelist[i];
+        chunk p = state->freelist[i];
+        while (true)
+          {
+            size_t candidate_size = _dlpm_chunk_size (p);
+            chunk next_p = _dlpm_chunk_next (p);
+            if (candidate_size == requested_size)
+              {
+                /* Perfect fit.  No further search needed.
+                   Remove this chunk from the free list.  */
+                _dlpm_chunk_set_next (last_p, next_p);
+                return p;
+              }
+            if (candidate_size > requested_size
+                && candidate_size < best_p_size)
+              /* Chunk with a better usable size.  */
+              {
+                best_previous_p = last_p;
+                best_p_size = candidate_size;
+              }
+            if (next_p == NULL)
+              break;
+            last_p = p;
+            p = next_p;
+          }
+      }
+
+  if (best_previous_p == NULL)
+    {
+      /* No usable chunk found.  Grow the heap.  */
+      size_t region_size = DL_PROTMEM_INITIAL_REGION_SIZE;
+      for (unsigned int i = 0; i < array_length (state->regions); ++i)
+        {
+          if (state->regions[i] == NULL && region_size >= requested_size)
+            {
+              void *ptr = __mmap (NULL, region_size, PROT_READ | PROT_WRITE,
+                                  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+              if (ptr == MAP_FAILED)
+                return NULL;
+              state->regions[i] = ptr;
+              if (region_size == requested_size)
+                /* Perfect fit: the entire region serves as the allocation.  */
+                return ptr;
+
+              /* Create a free list with one entry for the entire region.  */
+              state->freelist[i] = _dlpm_chunk_make (ptr, NULL, region_size);
+              best_previous_p = (chunk) &state->freelist[i];
+              best_p_size = region_size;
+
+              /* Chunk is split below.  */
+              break;
+            }
+          region_size *= 2;
+        }
+
+      /* All regions have been exhausted.  */
+      if (best_previous_p == NULL)
+        return NULL;
+    }
+
+  /* Split the chunk.  */
+  chunk p = _dlpm_chunk_next (best_previous_p);
+  void *p_end = (void *) p + best_p_size; /* Memory after this chunk.  */
+  chunk p_next = _dlpm_chunk_next (p); /* Following chunk on freelist.  */
+  void *remaining = (void *) p + requested_size; /* Place of the new chunk.  */
+  /* Replace the chunk on the free list with its remainder.  */
+  _dlpm_chunk_set_next (best_previous_p,
+                        _dlpm_chunk_make (remaining,
+                                          p_next, p_end - remaining));
+  return p;
+}
+
+void
+_dl_protmem_free (void *ptr, size_t requested_size)
+{
+  requested_size = roundup (requested_size, _dlpm_chunk_minimal_size);
+
+  struct dl_protmem_state *state = _dl_protmem_state ();
+  unsigned int region = _dlpm_find_region (state, ptr);
+
+  {
+    chunk pending = state->pending_free[region];
+    if (pending != NULL)
+      {
+        /* First try merging with the old allocation.  */
+        if (_dlpm_chunk_adjancent (pending, ptr))
+          {
+            /* Extend the existing pending chunk.  The start address does
+               not change.  */
+            _dlpm_chunk_make (pending, NULL,
+                              _dlpm_chunk_size (pending) + requested_size);
+            return;
+          }
+        if (_dlpm_chunk_adjancent (ptr, pending))
+          {
+            /* Create a new chunk that has the exsting chunk at the end.  */
+            state->pending_free[region]
+              = _dlpm_chunk_make (ptr, NULL,
+                                  requested_size + _dlpm_chunk_size (pending));
+            return;
+          }
+
+        /* Merging did not work out.  Get rid of the old pending
+           allocation.  */
+        _dlpm_free_pending (state, region);
+      }
+  }
+
+  /* No pending allocation at this point.  Create new free chunk.  */
+  state->pending_free[region] = _dlpm_chunk_make (ptr, NULL, requested_size);
+}
+
+#endif /* Non-debugging allocator.  */
diff --git a/elf/dl-protmem_bootstrap.h b/elf/dl-protmem_bootstrap.h
index a9d763bc7b..b5b44ca7a9 100644
--- a/elf/dl-protmem_bootstrap.h
+++ b/elf/dl-protmem_bootstrap.h
@@ -28,7 +28,8 @@  _dl_protmem_bootstrap (void)
 {
   /* The protected memory area is nested within the bootstrap
      allocation.  */
-  struct dl_protmem_state *ptr = _dl_early_mmap (sizeof (*ptr));
+  struct dl_protmem_state *ptr
+    = _dl_early_mmap (DL_PROTMEM_INITIAL_REGION_SIZE);
   if (ptr == NULL)
     return NULL;
   return &ptr->protmem;
diff --git a/elf/tst-dl-protmem.c b/elf/tst-dl-protmem.c
new file mode 100644
index 0000000000..6061845ca7
--- /dev/null
+++ b/elf/tst-dl-protmem.c
@@ -0,0 +1,354 @@ 
+/* Internal test for the protected memory allocator.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <array_length.h>
+#include <libc-diag.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <support/check.h>
+#include <support/xunistd.h>
+#include <sys/mman.h>
+
+static int do_test (void);
+#include <support/test-driver.c>
+
+/* Tracking allocated memory.  Allocation granularity is assumed to be
+   8 bytes.  */
+
+/* Lowest level.  Covers 65536 * 32 * 8 bytes (24 bit of address space).  */
+struct level3
+{
+  uint32_t bits[1 << 16];
+};
+
+/* Mid-level covers.  20 bits of address space.  */
+struct level2
+{
+  struct level3 *level2[1 << 20];
+};
+
+/* Top level.  20 bits of address space.  */
+static struct level2 *level1[1 << 20];
+
+/* Byte address to index in level1.  */
+static inline unsigned int
+level1_index (uintptr_t u)
+{
+#if UINTPTR_WIDTH > 44
+  return u >> 44;
+#else
+  return 0;
+#endif
+}
+
+/* Byte address to index in level1[N]->level2.  */
+static inline unsigned int
+level2_index (uintptr_t u)
+{
+  return (u >> 24) & ((1 << 20) - 1);
+}
+
+/* Byte address to index in level1[N]->level2[M]->level3.  */
+static inline unsigned int
+level3_index (uintptr_t u)
+{
+  unsigned int a = u >> 3;             /* Every 8th byte tracked.  */;
+  return (a >> 5) & ((1 << 16) - 1);
+}
+
+/* Mask for the bit in level3_index.  */
+static inline uint32_t
+level3_mask (uintptr_t u)
+{
+  return (uint32_t) 1U << ((u >> 3) & 31);
+}
+
+/* Flip a bit from unset to set.  Return false if the bit was already set.  */
+static bool
+set_unset_bit_at (void *p)
+{
+  uintptr_t u = (uintptr_t) p;
+  struct level2 *l2 = level1[level1_index (u)];
+  if (l2 == NULL)
+    {
+      l2 = xmmap (NULL, sizeof (*l2), PROT_READ | PROT_WRITE,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1);
+      level1[level1_index (u)] = l2;
+    }
+  struct level3 *l3 = l2->level2[level2_index (u)];
+  if (l3 == NULL)
+    {
+      l3 = xmmap (NULL, sizeof (*l3), PROT_READ | PROT_WRITE,
+                  MAP_PRIVATE | MAP_ANONYMOUS, -1);
+      l2->level2[level2_index (u)] = l3;
+    }
+  unsigned int idx = level3_index (u);
+  uint32_t mask = level3_mask (u);
+  if (l3->bits[idx] & mask)
+    return false;
+  l3->bits[idx] |= mask;
+  return true;
+}
+
+/* Flip a bit from set to unset.  Return false if the bit was already
+   cleared.  */
+static bool
+clear_set_bit_at (void *p)
+{
+  uintptr_t u = (uintptr_t) p;
+  struct level2 *l2 = level1[level1_index (u)];
+  if (l2 == NULL)
+    return false;
+  struct level3 *l3 = l2->level2[level2_index (u)];
+  if (l3 == NULL)
+    return false;
+  unsigned int idx = level3_index (u);
+  uint32_t mask = level3_mask (u);
+  if (!(l3->bits[idx] & mask))
+    return false;
+  l3->bits[idx] &= ~mask;
+  return true;
+}
+
+/* Record an allocation in the bitmap.  Errors if the covered bytes
+   are already allocated.  */
+static void
+record_allocate (void *p, size_t size)
+{
+  TEST_VERIFY_EXIT (p != NULL);
+  TEST_VERIFY_EXIT (size > 0);
+  if (((uintptr_t) p & 7) != 0)
+    FAIL_EXIT1 ("unaligned allocation: %p of %zu bytes", p, size);
+  for (size_t i = 0; i < size; i += 8)
+    if (!set_unset_bit_at (p + i))
+      FAIL_EXIT1 ("already allocated byte %p in %zu-byte allocation at %p"
+                  " (offset %zu)", p + i, size, p, i);
+}
+
+/* Record a deallocation in the bitmap.  Errors if the covered bytes
+   are not allcoated.  */
+static void
+record_free (void *p, size_t size)
+{
+  TEST_VERIFY_EXIT (p != NULL);
+  TEST_VERIFY_EXIT (size > 0);
+  if (((uintptr_t) p & 7) != 0)
+    FAIL_EXIT1 ("unaligned free: %p of %zu bytes", p, size);
+  for (size_t i = 0; i < size; i += 8)
+    if (!clear_set_bit_at (p + i))
+      FAIL_EXIT1 ("already deallocated byte %p in %zu-byte deallocation at %p"
+                  " (offset %zu)", p + i, size, p, i);
+}
+
+/* This hack results in a definition of struct rtld_global_ro and
+   related data structures.  Do this after all the other header
+   inclusions, to minimize the impact.  */
+#define SHARED
+#include <ldsodefs.h>
+
+/* Create our own version of GLRO (dl_protmem).  */
+static struct rtld_protmem *dl_protmem;
+#undef GLRO
+#define GLRO(x) x
+
+#define SHARED
+#include <dl-protmem.h>
+#include <dl-protmem.c>
+#include <sysdeps/generic/dl-early_mmap.h> /* Avoid direct system call.  */
+#include <dl-protmem_bootstrap.h>
+
+#if !DL_PROTMEM_DEBUG
+/* Return the allocation bit for an address.  */
+static bool
+bit_at (void *p)
+{
+  uintptr_t u = (uintptr_t) p;
+  struct level2 *l2 = level1[level1_index (u)];
+  if (l2 == NULL)
+    return false;
+  struct level3 *l3 = l2->level2[level2_index (u)];
+  if (l3 == NULL)
+    return false;
+  unsigned int idx = level3_index (u);
+  uint32_t mask = level3_mask (u);
+  return l3->bits[idx] & mask;
+}
+
+/* Assert that SIZE bytes at P are unallocated.  */
+static void
+check_free_chunk (void *p, size_t size)
+{
+  if (((uintptr_t) p & 7) != 0)
+    FAIL_EXIT1 ("unaligned free chunk: %p of %zu bytes", p, size);
+  for (size_t i = 0; i < size; i += 8)
+    if (bit_at (p + i))
+        FAIL_EXIT1 ("allocated byte %p in free chunk at %p (%zu bytes,"
+                  " offset %zu)", p + i, p, size, i);
+}
+#endif
+
+/* Dump statistics for the allocator regions (freelist length, maximum
+   free allocation size).  If VERBOSE, log the entire freelist.  */
+static void
+dump_regions (bool verbose)
+{
+#if !DL_PROTMEM_DEBUG
+  struct dl_protmem_state *state = _dl_protmem_state ();
+  for (unsigned int i = 0; i < array_length (state->regions); ++i)
+    {
+      if (verbose && state->regions[i] != NULL)
+        printf ("  region %u at %p\n", i, state->regions[i]);
+
+      chunk pending = state->pending_free[i];
+      unsigned int count;
+      unsigned int max_size;
+      if (pending == NULL)
+        {
+          count = 0;
+          max_size = 0;
+        }
+      else
+        {
+          count = 1;
+          max_size = _dlpm_chunk_size (pending);
+          check_free_chunk (pending, max_size);
+          if (verbose)
+            printf ("    pending free chunk %p, %u\n", pending, max_size);
+        }
+
+      uintptr_t last = 0;
+      for (chunk c = state->freelist[i]; c != NULL; c = _dlpm_chunk_next (c))
+        {
+          ++count;
+          size_t sz = _dlpm_chunk_size (c);
+          if (verbose)
+            printf ("    free chunk %p, %zu\n", c, sz);
+          check_free_chunk (c, sz);
+          if (sz > max_size)
+            max_size = sz;
+          TEST_VERIFY ((uintptr_t) c > last);
+          last = (uintptr_t) c;
+        }
+
+      if (count > 0)
+        {
+          if (verbose)
+            printf ("    ");
+          else
+            printf ("  region %u at %p: ", i, state->regions[i]);
+          printf ("freelist length %u, maximum size %u\n", count, max_size);
+        }
+    }
+#endif
+}
+
+
+static int
+do_test (void)
+{
+  dl_protmem = _dl_protmem_bootstrap ();
+  _dl_protmem_init ();
+
+  /* Perform a random allocations in a loop.  */
+  srand (1);
+  {
+    struct allocation
+    {
+      void *ptr;
+      size_t size;
+    } allocations[10007] = {};
+    for (unsigned int i = 0; i < 20 * 1000; ++i)
+      {
+        struct allocation *a
+          = &allocations[rand () % array_length (allocations)];
+        if (a->ptr == NULL)
+          {
+            a->size = 8 * ((rand() % 37) + 1);
+            a->ptr = _dl_protmem_allocate (a->size);
+            record_allocate (a->ptr, a->size);
+            /* Clobber the new allocation, in case some metadata still
+               references it.  */
+            memset (a->ptr, 0xcc, a->size);
+          }
+        else
+          {
+            record_free (a->ptr, a->size);
+            _dl_protmem_free (a->ptr, a->size);
+            a->ptr = NULL;
+            a->size = 0;
+          }
+      }
+
+    puts ("info: after running test loop");
+    dump_regions (false);
+
+    for (unsigned int i = 0; i < array_length (allocations); ++i)
+      if (allocations[i].ptr != NULL)
+        {
+          record_free (allocations[i].ptr, allocations[i].size);
+          _dl_protmem_free (allocations[i].ptr, allocations[i].size);
+        }
+    puts ("info: after post-loop deallocations");
+    dump_regions (true);
+  }
+
+  /* Do a few larger allocations to show that coalescing works.  Note
+     that the first allocation has some metadata in it, so the free
+     chunk is not an integral power of two.  */
+  {
+    void *ptrs[50];
+    for (unsigned int i = 0; i < array_length (ptrs); ++i)
+      {
+        ptrs[i] = _dl_protmem_allocate (65536);
+        record_allocate (ptrs[i], 65536);
+      }
+    puts ("info: after large allocations");
+    dump_regions (true);
+    for (unsigned int i = 0; i < array_length (ptrs); ++i)
+      {
+        record_free (ptrs[i], 65536);
+        _dl_protmem_free (ptrs[i], 65536);
+      }
+    puts ("info: after freeing allocations");
+    dump_regions (true);
+
+    ptrs[0] = _dl_protmem_allocate (8);
+    record_allocate (ptrs[0], 8);
+    puts ("info: after dummy allocation");
+    dump_regions (true);
+
+    record_free (ptrs[0], 8);
+#if __GNUC_PREREQ (11, 0)
+          /* Suppress invalid GCC warning with -O3 (GCC PR 110546):
+             error: '_dl_protmem_free' called on pointer returned from a
+               mismatched allocation function [-Werror=mismatched-dealloc]
+             note: returned from '_dl_protmem_allocate.constprop */
+    DIAG_IGNORE_NEEDS_COMMENT (11, "-Wmismatched-dealloc");
+#endif
+    _dl_protmem_free (ptrs[0], 8);
+#if __GNUC_PREREQ (11, 0) && __OPTIMIZE__ >= 3
+    DIAG_POP_NEEDS_COMMENT;
+#endif
+    puts ("info: after dummy deallocation");
+    dump_regions (true);
+  }
+
+  return 0;
+}