[2/5] Add single-threaded path to malloc/realloc/calloc/memalloc

Message ID DB6PR0801MB20538FF70A42648C916C69C0834B0@DB6PR0801MB2053.eurprd08.prod.outlook.com
State New
Headers show
Series
  • Add single-threaded fast path to malloc
Related show

Commit Message

Wilco Dijkstra Oct. 12, 2017, 9:35 a.m.
This patch adds a single-threaded fast path to malloc, realloc, 
calloc and memalloc.  When we're single-threaded, we can bypass
arena_get (which always locks the arena it returns) and just use
the main arena.  Also avoid retrying a different arena.

Passes GLIBC tests, OK for commit?

ChangeLog:
2017-10-11  Wilco Dijkstra  <wdijkstr@arm.com>

	* malloc/malloc.c (sysdep-cancel.h): Add include.
	(__libc_malloc): Add SINGLE_THREAD_P path.
	(__libc_realloc): Likewise.
	(_mid_memalign): Likewise.
	(__libc_calloc): Likewise.
--

Comments

Wilco Dijkstra Oct. 12, 2017, 10:55 p.m. | #1
DJ Delorie wrote:
> Does SINGLE_THREAD_P return true if a multi-threaded app joins all its
> threads, so that there's only one thread remaining?  If so, there will
> still be more arenas in play that might be usable, and the remaining
> thread might be better served by whatever arena it had been using, due
> to data locality on NUMA machines.

No that doesn't happen, so it's not a problem today.

> I also wonder if there's an upper limit to the main arena, where we'd
> have to use other arenas to fully use available VM.

Not as far as I can tell, there is the traditional stack and heap that grow
towards each other via sbrk. I don't believe you could reduce that since
mmaps in the middle would not be allowed.

> If either of these are issues, it might be better to conditionalize the
> lock calls than to duplicate the code, so we keep the alt-arena code in
> play.

That would mean sorting out all the locking in the arena code as well...
If we want to retry a different arena then the code that fails to allocate
more memory should do that (to avoid adding the retry in the fast path).

Wilco

Patch

diff --git a/malloc/malloc.c b/malloc/malloc.c
index 0dd095cb79e2bc2a4e111155bb9cb115e7f92d50..7db4816a358164822988e0e2752714a2643fdb26 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -243,6 +243,10 @@ 
 
 #include <malloc/malloc-internal.h>
 
+/* For SINGLE_THREAD_P.  */
+#include <sysdep-cancel.h>
+
+
 /*
   Debugging:
 
@@ -3034,6 +3038,14 @@  __libc_malloc (size_t bytes)
   DIAG_POP_NEEDS_COMMENT;
 #endif
 
+  if (SINGLE_THREAD_P)
+    {
+      victim = _int_malloc (&main_arena, bytes);
+      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (victim)));
+      return victim;
+    }
+
   arena_get (ar_ptr, bytes);
 
   victim = _int_malloc (ar_ptr, bytes);
@@ -3190,6 +3202,15 @@  __libc_realloc (void *oldmem, size_t bytes)
       return newmem;
     }
 
+  if (SINGLE_THREAD_P)
+    {
+      newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
+      assert (!newp || chunk_is_mmapped (mem2chunk (newp)) ||
+	      ar_ptr == arena_for_chunk (mem2chunk (newp)));
+
+      return newp;
+    }
+
   __libc_lock_lock (ar_ptr->mutex);
 
   newp = _int_realloc (ar_ptr, oldp, oldsize, nb);
@@ -3265,6 +3286,15 @@  _mid_memalign (size_t alignment, size_t bytes, void *address)
       alignment = a;
     }
 
+  if (SINGLE_THREAD_P)
+    {
+      p = _int_memalign (&main_arena, alignment, bytes);
+      assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
+	      &main_arena == arena_for_chunk (mem2chunk (p)));
+
+      return p;
+    }
+
   arena_get (ar_ptr, bytes + alignment + MINSIZE);
 
   p = _int_memalign (ar_ptr, alignment, bytes);
@@ -3357,7 +3387,11 @@  __libc_calloc (size_t n, size_t elem_size)
 
   MAYBE_INIT_TCACHE ();
 
-  arena_get (av, sz);
+  if (SINGLE_THREAD_P)
+    av = &main_arena;
+  else
+    arena_get (av, sz);
+
   if (av)
     {
       /* Check if we hand out the top chunk, in which case there may be no
@@ -3387,19 +3421,21 @@  __libc_calloc (size_t n, size_t elem_size)
     }
   mem = _int_malloc (av, sz);
 
-
   assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
           av == arena_for_chunk (mem2chunk (mem)));
 
-  if (mem == 0 && av != NULL)
+  if (!SINGLE_THREAD_P)
     {
-      LIBC_PROBE (memory_calloc_retry, 1, sz);
-      av = arena_get_retry (av, sz);
-      mem = _int_malloc (av, sz);
-    }
+      if (mem == 0 && av != NULL)
+	{
+	  LIBC_PROBE (memory_calloc_retry, 1, sz);
+	  av = arena_get_retry (av, sz);
+	  mem = _int_malloc (av, sz);
+	}
 
-  if (av != NULL)
-    __libc_lock_unlock (av->mutex);
+      if (av != NULL)
+	__libc_lock_unlock (av->mutex);
+    }
 
   /* Allocation failed even after a retry.  */
   if (mem == 0)