Patchwork Go patch committed: Rework locking code to split stack less

login
register
mail settings
Submitter Ian Taylor
Date Dec. 17, 2010, 6:44 a.m.
Message ID <mcr8vzox5ib.fsf@google.com>
Download mbox | patch
Permalink /patch/75840/
State New
Headers show

Comments

Ian Taylor - Dec. 17, 2010, 6:44 a.m.
This patch changes the locking code used in the libgo runtime to split
the stack much less frequently.  Rather than always calling
pthread_mutex_lock, which requires spliting the stack, it now uses
atomic compare-and-swap to try to acquire or release the lock directly
without using the mutex.  To make this work more easily I actually
changed it from a mutex to a semaphore (on GNU/Linux I could instead use
a futex, which would probably be more efficient).  Bootstrapped and
tested on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

Patch

Index: libgo/runtime/mfinal.c
===================================================================
--- libgo/runtime/mfinal.c	(revision 167736)
+++ libgo/runtime/mfinal.c	(working copy)
@@ -5,7 +5,13 @@ 
 #include "runtime.h"
 #include "malloc.h"
 
-static Lock finlock = LOCK_INITIALIZER;
+static Lock finlock;
+
+void
+runtime_initfintab()
+{
+	runtime_initlock(&finlock);
+}
 
 // Finalizer hash table.  Direct hash, linear scan, at most 3/4 full.
 // Table size is power of 3 so that hash can be key % max.
Index: libgo/runtime/sigqueue.goc
===================================================================
--- libgo/runtime/sigqueue.goc	(revision 167736)
+++ libgo/runtime/sigqueue.goc	(working copy)
@@ -67,7 +67,7 @@  __go_sigsend(int32 s)
 		mask = sig.mask;
 		if(mask & bit)
 			break;		// signal already in queue
-		if(cas(&sig.mask, mask, mask|bit)) {
+		if(runtime_cas(&sig.mask, mask, mask|bit)) {
 			// Added to queue.
 			// Only send a wakeup for the first signal in each round.
 			if(mask == 0)
@@ -86,7 +86,7 @@  func Sigrecv() (m uint32) {
 	noteclear(&sig);
 	for(;;) {
 		m = sig.mask;
-		if(cas(&sig.mask, m, 0))
+		if(runtime_cas(&sig.mask, m, 0))
 			break;
 	}
 }
Index: libgo/runtime/malloc.goc
===================================================================
--- libgo/runtime/malloc.goc	(revision 167736)
+++ libgo/runtime/malloc.goc	(working copy)
@@ -270,6 +270,9 @@  runtime_allocmcache(void)
 void
 runtime_mallocinit(void)
 {
+	runtime_initfintab();
+	runtime_Mprof_Init();
+
 	runtime_SysMemInit();
 	runtime_InitSizes();
 	runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
Index: libgo/runtime/mprof.goc
===================================================================
--- libgo/runtime/mprof.goc	(revision 167736)
+++ libgo/runtime/mprof.goc	(working copy)
@@ -14,7 +14,7 @@  package runtime
 typedef struct __go_open_array Slice;
 
 // NOTE(rsc): Everything here could use cas if contention became an issue.
-static Lock proflock = LOCK_INITIALIZER;
+static Lock proflock;
 
 // Per-call-stack allocation information.
 // Lookup by hashing call stack into a linked-list hash table.
@@ -185,6 +185,12 @@  found:
 	return nil;
 }
 
+void
+runtime_Mprof_Init()
+{
+	runtime_initlock(&proflock);
+}
+
 // Called by malloc to record a profiled block.
 void
 runtime_MProf_Malloc(void *p, uintptr size)
Index: libgo/runtime/malloc.h
===================================================================
--- libgo/runtime/malloc.h	(revision 167736)
+++ libgo/runtime/malloc.h	(working copy)
@@ -375,6 +375,7 @@  enum
 	RefFlags = 0xFFFF0000U,
 };
 
+void	runtime_Mprof_Init(void);
 void	runtime_MProf_Malloc(void*, uintptr);
 void	runtime_MProf_Free(void*, uintptr);
 void	runtime_MProf_Mark(void (*scan)(byte *, int64));
Index: libgo/runtime/runtime.h
===================================================================
--- libgo/runtime/runtime.h	(revision 167736)
+++ libgo/runtime/runtime.h	(working copy)
@@ -13,6 +13,7 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <pthread.h>
+#include <semaphore.h>
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
@@ -53,7 +54,8 @@  typedef	struct	Lock		Lock;
 
 struct	Lock
 {
-	pthread_mutex_t	mutex;
+	uint32 key;
+	sem_t sem;
 };
 
 /* A Note.  */
@@ -119,6 +121,7 @@  struct	M
 
 void*	runtime_mal(uintptr);
 void	runtime_mallocinit(void);
+void	runtime_initfintab(void);
 void	siginit(void);
 bool	__go_sigsend(int32 sig);
 int64	runtime_nanotime(void);
@@ -138,12 +141,10 @@  void	__go_cachestats(void);
  * as fast as spin locks (just a few user-level instructions),
  * but on the contention path they sleep in the kernel.
  */
-#define	LOCK_INITIALIZER	{ PTHREAD_MUTEX_INITIALIZER }
 void	runtime_initlock(Lock*);
 void	runtime_lock(Lock*);
 void	runtime_unlock(Lock*);
 void	runtime_destroylock(Lock*);
-bool	runtime_trylock(Lock*);
 
 void semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
 void semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
@@ -178,7 +179,7 @@  void	runtime_addfinalizer(void*, void(*f
 void	runtime_walkfintab(void (*fn)(void*), void (*scan)(byte *, int64));
 #define runtime_mmap mmap
 #define runtime_munmap(p, s) munmap((p), (s))
-#define cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
+#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 
 struct __go_func_type;
 void reflect_call(const struct __go_func_type *, const void *, _Bool, void **,
Index: libgo/runtime/thread.c
===================================================================
--- libgo/runtime/thread.c	(revision 167736)
+++ libgo/runtime/thread.c	(working copy)
@@ -7,32 +7,67 @@ 
 void
 runtime_initlock(Lock *l)
 {
-	if(pthread_mutex_init(&l->mutex, NULL) != 0)
-		runtime_throw("pthread_mutex_init failed");
+	l->key = 0;
+	if(sem_init(&l->sem, 0, 0) != 0)
+		runtime_throw("sem_init failed");
+}
+
+static uint32
+runtime_xadd(uint32 volatile *val, int32 delta)
+{
+	uint32 oval, nval;
+
+	for(;;){
+		oval = *val;
+		nval = oval + delta;
+		if(runtime_cas(val, oval, nval))
+			return nval;
+	}
+}
+
+// noinline so that runtime_lock doesn't have to split the stack.
+static void runtime_lock_full(Lock *l) __attribute__ ((noinline));
+
+static void
+runtime_lock_full(Lock *l)
+{
+	if(sem_wait(&l->sem) != 0)
+		runtime_throw("sem_wait failed");
 }
 
 void
 runtime_lock(Lock *l)
 {
-	if(pthread_mutex_lock(&l->mutex) != 0)
-		runtime_throw("lock failed");
+	if(m->locks < 0)
+		runtime_throw("lock count");
+	m->locks++;
+
+	if(runtime_xadd(&l->key, 1) > 1)	// someone else has it; wait
+		runtime_lock_full(l);
 }
 
-void
-runtime_unlock(Lock *l)
+static void runtime_unlock_full(Lock *l) __attribute__ ((noinline));
+
+static void
+runtime_unlock_full(Lock *l)
 {
-	if(pthread_mutex_unlock(&l->mutex) != 0)
-		runtime_throw("unlock failed");
+	if(sem_post(&l->sem) != 0)
+		runtime_throw("sem_post failed");
 }
 
 void
-runtime_destroylock(Lock *l)
+runtime_unlock(Lock *l)
 {
-	pthread_mutex_destroy(&l->mutex);
+	m->locks--;
+	if(m->locks < 0)
+		runtime_throw("lock count");
+
+	if(runtime_xadd(&l->key, -1) > 0)	// someone else is waiting
+		runtime_unlock_full(l);
 }
 
-bool
-runtime_trylock(Lock *l)
+void
+runtime_destroylock(Lock *l)
 {
-	return pthread_mutex_trylock(&l->mutex) == 0;
+	sem_destroy(&l->sem);
 }
Index: libgo/runtime/mgc0.c
===================================================================
--- libgo/runtime/mgc0.c	(revision 167736)
+++ libgo/runtime/mgc0.c	(working copy)
@@ -27,7 +27,7 @@  struct BlockList
 };
 
 static bool finstarted;
-static Lock finqlock = LOCK_INITIALIZER;
+static pthread_mutex_t finqlock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_cond_t finqcond = PTHREAD_COND_INITIALIZER;
 static Finalizer *finq;
 static int32 fingwait;
@@ -284,7 +284,7 @@  sweep(void)
 			sweepspan(s);
 }
 
-static Lock gcsema = LOCK_INITIALIZER;
+static pthread_mutex_t gcsema = PTHREAD_MUTEX_INITIALIZER;
 
 // Initialized from $GOGC.  GOGC=off means no gc.
 //
@@ -327,8 +327,8 @@  runtime_gc(int32 force __attribute__ ((u
 	if(gcpercent < 0)
 		return;
 
-	runtime_lock(&finqlock);
-	runtime_lock(&gcsema);
+	pthread_mutex_lock(&finqlock);
+	pthread_mutex_lock(&gcsema);
 	m->locks++;	// disable gc during the mallocs in newproc
 	t0 = runtime_nanotime();
 	runtime_stoptheworld();
@@ -345,7 +345,7 @@  runtime_gc(int32 force __attribute__ ((u
 	mstats.pause_ns += t1 - t0;
 	if(mstats.debuggc)
 		runtime_printf("pause %llu\n", (unsigned long long)t1-t0);
-	runtime_unlock(&gcsema);
+	pthread_mutex_unlock(&gcsema);
 	runtime_starttheworld();
 
 	// finqlock is still held.
@@ -362,7 +362,7 @@  runtime_gc(int32 force __attribute__ ((u
 		}
 	}
 	m->locks--;
-	runtime_unlock(&finqlock);
+	pthread_mutex_unlock(&finqlock);
 }
 
 static void
@@ -373,16 +373,16 @@  runfinq(void* dummy)
 	USED(dummy);
 
 	for(;;) {
-		runtime_lock(&finqlock);
+		pthread_mutex_lock(&finqlock);
 		f = finq;
 		finq = nil;
 		if(f == nil) {
 			fingwait = 1;
-			pthread_cond_wait(&finqcond, &finqlock.mutex);
-			runtime_unlock(&finqlock);
+			pthread_cond_wait(&finqcond, &finqlock);
+			pthread_mutex_unlock(&finqlock);
 			continue;
 		}
-		runtime_unlock(&finqlock);
+		pthread_mutex_unlock(&finqlock);
 		for(; f; f=next) {
 			void *params[1];