@@ -302,6 +302,7 @@ static int num_nodes = 0;
#include <sys/types.h>
#include <dirent.h>
#include <string.h>
+#include <sys/syscall.h>
/*
* Wouldn't it be nice to get this with a single syscall instead?
@@ -328,13 +329,26 @@ static int numa_node_count(void)
return ret;
}
+static inline pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
static void ptmalloc_init(void)
{
const char *s;
int i, secure = 0;
+ static pid_t init_tid;
if (!__sync_bool_compare_and_swap(&__malloc_initialized, -1, 0)) {
do {
+ if (init_tid == gettid()) {
+ /* We have recursed back into malloc()
+ from ptmalloc_init. At this point
+ we can survive by using the main_arena,
+ so just return. */
+ return;
+ }
sched_yield();
} while (__malloc_initialized <= 0);
return;
@@ -356,7 +370,15 @@ static void ptmalloc_init(void)
#endif /* !defined NO_THREADS */
mutex_init(&main_arena.mutex);
main_arena.next = &main_arena;
+ main_arena.local_next = &main_arena;
main_arena.numa_node = -1;
+
+ /* numa_node_count() can recurse into malloc(). Use main_arena
+ for all numa nodes and set init_tid to allow recursion. */
+ for (i = 0; i < MAX_NUMA_NODES; i++) {
+ numa_arena[i] = &main_arena;
+ }
+ init_tid = gettid();
num_nodes = numa_node_count();
for (i = 0; i < num_nodes; i++) {
numa_arena[i] = _int_new_arena(0, i);
@@ -779,7 +801,6 @@ static struct malloc_state *arena_get2(struct malloc_state *a_tsd, size_t size)
* Calling getcpu() for every allocation is too expensive - but we can turn
* the syscall into a pointer dereference to a kernel shared memory page.
*/
-#include <sys/syscall.h>
static inline int getnode(void)
{
int node, ret;