From patchwork Tue Nov 22 20:25:01 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Ian Lance Taylor <iant@google.com>
X-Patchwork-Id: 127157
Return-Path: 
 <gcc-patches-return-308365-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	by ozlabs.org (Postfix) with SMTP id 22C3C1007D4
	for <incoming@patchwork.ozlabs.org>;
	Wed, 23 Nov 2011 07:25:38 +1100 (EST)
Received: (qmail 20853 invoked by alias); 22 Nov 2011 20:25:32 -0000
Received: (qmail 20795 invoked by uid 22791); 22 Nov 2011 20:25:25 -0000
X-SWARE-Spam-Status: No, hits=-1.1 required=5.0	tests=BAYES_50, DKIM_SIGNED,
	DKIM_VALID, DKIM_VALID_AU, RCVD_IN_DNSWL_LOW, RP_MATCHES_RCVD,
	TW_UF, T_TVD_MIME_NO_HEADERS
X-Spam-Check-By: sourceware.org
Received: from mail-iy0-f175.google.com (HELO mail-iy0-f175.google.com)
	(209.85.210.175) by sourceware.org (qpsmtpd/0.43rc1) with
	ESMTP; Tue, 22 Nov 2011 20:25:06 +0000
Received: by iahk25 with SMTP id k25so687552iah.20 for
	<gcc-patches@gcc.gnu.org>; Tue, 22 Nov 2011 12:25:05 -0800 (PST)
Received: by 10.50.87.227 with SMTP id bb3mr24347318igb.29.1321993505433;
	Tue, 22 Nov 2011 12:25:05 -0800 (PST)
Received: by 10.50.87.227 with SMTP id bb3mr24347301igb.29.1321993505329;
	Tue, 22 Nov 2011 12:25:05 -0800 (PST)
Received: from coign.google.com ([2620:0:1000:2301:f2de:f1ff:fe40:72a8]) by
	mx.google.com with ESMTPS id
	n30sm63798286ibl.4.2011.11.22.12.25.03 (version=TLSv1/SSLv3
	cipher=OTHER); Tue, 22 Nov 2011 12:25:04 -0800 (PST)
From: Ian Lance Taylor <iant@google.com>
To: gcc-patches@gcc.gnu.org, gofrontend-dev@googlegroups.com
Subject: Go patch committed: New lock/note implementation
Date: Tue, 22 Nov 2011 12:25:01 -0800
Message-ID: <mcr62ibdhwi.fsf@dhcp-172-18-216-180.mtv.corp.google.com>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1 (gnu/linux)
MIME-Version: 1.0
X-IsSubscribed: yes
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org

This patch updates the implementations of locks and notes used in libgo
to use the current version from the master Go library.  This now uses
futexes when running on GNU/Linux, while still using semaphores on other
systems.  This implementation should be faster, and does not require
explicit initialization.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  I tested both the futex and the semaphore
versions.  Committed to mainline.

Ian

diff -r 8b1402d2f0d3 libgo/Makefile.am
--- a/libgo/Makefile.am	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/Makefile.am	Tue Nov 22 12:18:39 2011 -0800
@@ -394,6 +394,12 @@
 rtems_task_variable_add_file =
 endif
 
+if LIBGO_IS_LINUX
+runtime_lock_files = runtime/lock_futex.c runtime/thread-linux.c
+else
+runtime_lock_files = runtime/lock_sema.c runtime/thread-sema.c
+endif
+
 runtime_files = \
 	runtime/go-append.c \
 	runtime/go-assert.c \
@@ -432,7 +438,6 @@
 	runtime/go-new-channel.c \
 	runtime/go-new-map.c \
 	runtime/go-new.c \
-	runtime/go-note.c \
 	runtime/go-panic.c \
 	runtime/go-print.c \
 	runtime/go-rec-big.c \
@@ -474,6 +479,7 @@
 	runtime/go-unsafe-pointer.c \
 	runtime/go-unwind.c \
 	runtime/cpuprof.c \
+	$(runtime_lock_files) \
 	runtime/mcache.c \
 	runtime/mcentral.c \
 	$(runtime_mem_file) \
diff -r 8b1402d2f0d3 libgo/runtime/cpuprof.c
--- a/libgo/runtime/cpuprof.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/cpuprof.c	Tue Nov 22 12:18:39 2011 -0800
@@ -115,12 +115,6 @@
 static bool evict(Profile*, Entry*);
 static bool flushlog(Profile*);
 
-void
-runtime_cpuprofinit(void)
-{
-	runtime_initlock(&lk);
-}
-
 // LostProfileData is a no-op function used in profiles
 // to mark the number of profiling stack traces that were
 // discarded due to slow data writers.
diff -r 8b1402d2f0d3 libgo/runtime/go-main.c
--- a/libgo/runtime/go-main.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-main.c	Tue Nov 22 12:18:39 2011 -0800
@@ -47,11 +47,10 @@
   g = &runtime_g0;
   m->curg = g;
   g->m = m;
-  runtime_initpanic ();
   runtime_mallocinit ();
-  runtime_cpuprofinit ();
   __go_gc_goroutine_init (&argc);
 
+  runtime_osinit();
   runtime_goargs();
   runtime_goenvs();
 
diff -r 8b1402d2f0d3 libgo/runtime/go-nanotime.c
--- a/libgo/runtime/go-nanotime.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-nanotime.c	Tue Nov 22 12:18:39 2011 -0800
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Return time in nanoseconds.  This is only used for computing runtime.
+// Return current time in nanoseconds.
 
 #include <sys/time.h>
 
diff -r 8b1402d2f0d3 libgo/runtime/go-note.c
--- a/libgo/runtime/go-note.c	Tue Nov 22 12:16:53 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-/* go-note.c -- implement notesleep, notewakeup and noteclear.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-/* A note is a one-time notification.  noteclear clears the note.
-   notesleep waits for a call to notewakeup.  notewakeup wakes up
-   every thread waiting on the note.  */
-
-#include "go-assert.h"
-#include "runtime.h"
-
-/* We use a single global lock and condition variable.  It would be
-   better to use a futex on GNU/Linux.  */
-
-static pthread_mutex_t note_lock = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t note_cond = PTHREAD_COND_INITIALIZER;
-
-/* noteclear is called before any calls to notesleep or
-   notewakeup.  */
-
-void
-runtime_noteclear (Note* n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  n->woken = 0;
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
-
-/* Wait until notewakeup is called.  */
-
-void
-runtime_notesleep (Note* n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  while (!n->woken)
-    {
-      i = pthread_cond_wait (&note_cond, &note_lock);
-      __go_assert (i == 0);
-    }
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
-
-/* Wake up every thread sleeping on the note.  */
-
-void
-runtime_notewakeup (Note *n)
-{
-  int32 i;
-
-  i = pthread_mutex_lock (&note_lock);
-  __go_assert (i == 0);
-
-  n->woken = 1;
-
-  i = pthread_cond_broadcast (&note_cond);
-  __go_assert (i == 0);
-
-  i = pthread_mutex_unlock (&note_lock);
-  __go_assert (i == 0);
-}
diff -r 8b1402d2f0d3 libgo/runtime/go-semacquire.c
--- a/libgo/runtime/go-semacquire.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/go-semacquire.c	Tue Nov 22 12:18:39 2011 -0800
@@ -117,35 +117,3 @@
       __go_assert (i == 0);
     }
 }
-
-
-#ifndef HAVE_SYNC_FETCH_AND_ADD_4
-
-/* For targets which don't have the required sync support.  Really
-   this should be provided by gcc itself.  FIXME.  */
-
-static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
-
-uint32
-__sync_fetch_and_add_4(uint32*, uint32)
-  __attribute__((visibility("hidden")));
-
-uint32
-__sync_fetch_and_add_4(uint32* ptr, uint32 add)
-{
-  int i;
-  uint32 ret;
-
-  i = pthread_mutex_lock(&sync_lock);
-  __go_assert(i == 0);
-
-  ret = *ptr;
-  *ptr += add;
-
-  i = pthread_mutex_unlock(&sync_lock);
-  __go_assert(i == 0);
-
-  return ret;
-}
-
-#endif
diff -r 8b1402d2f0d3 libgo/runtime/lock_futex.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/lock_futex.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,146 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// This implementation depends on OS-specific implementations of
+//
+//	runtime.futexsleep(uint32 *addr, uint32 val, int64 ns)
+//		Atomically,
+//			if(*addr == val) sleep
+//		Might be woken up spuriously; that's allowed.
+//		Don't sleep longer than ns; ns < 0 means forever.
+//
+//	runtime.futexwakeup(uint32 *addr, uint32 cnt)
+//		If any procs are sleeping on addr, wake up at most cnt.
+
+enum
+{
+	MUTEX_UNLOCKED = 0,
+	MUTEX_LOCKED = 1,
+	MUTEX_SLEEPING = 2,
+
+	ACTIVE_SPIN = 4,
+	ACTIVE_SPIN_CNT = 30,
+	PASSIVE_SPIN = 1,
+};
+
+// Possible lock states are MUTEX_UNLOCKED, MUTEX_LOCKED and MUTEX_SLEEPING.
+// MUTEX_SLEEPING means that there is presumably at least one sleeping thread.
+// Note that there can be spinning threads during all states - they do not
+// affect mutex's state.
+void
+runtime_lock(Lock *l)
+{
+	uint32 i, v, wait, spin;
+
+	if(m->locks++ < 0)
+		runtime_throw("runtime_lock: lock count");
+
+	// Speculative grab for lock.
+	v = runtime_xchg(&l->key, MUTEX_LOCKED);
+	if(v == MUTEX_UNLOCKED)
+		return;
+
+	// wait is either MUTEX_LOCKED or MUTEX_SLEEPING
+	// depending on whether there is a thread sleeping
+	// on this mutex.  If we ever change l->key from
+	// MUTEX_SLEEPING to some other value, we must be
+	// careful to change it back to MUTEX_SLEEPING before
+	// returning, to ensure that the sleeping thread gets
+	// its wakeup call.
+	wait = v;
+
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin = 0;
+	if(runtime_ncpu > 1)
+		spin = ACTIVE_SPIN;
+
+	for(;;) {
+		// Try for lock, spinning.
+		for(i = 0; i < spin; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime_procyield(ACTIVE_SPIN_CNT);
+		}
+
+		// Try for lock, rescheduling.
+		for(i=0; i < PASSIVE_SPIN; i++) {
+			while(l->key == MUTEX_UNLOCKED)
+				if(runtime_cas(&l->key, MUTEX_UNLOCKED, wait))
+					return;
+			runtime_osyield();
+		}
+
+		// Sleep.
+		v = runtime_xchg(&l->key, MUTEX_SLEEPING);
+		if(v == MUTEX_UNLOCKED)
+			return;
+		wait = MUTEX_SLEEPING;
+		runtime_futexsleep(&l->key, MUTEX_SLEEPING, -1);
+	}
+}
+
+void
+runtime_unlock(Lock *l)
+{
+	uint32 v;
+
+	if(--m->locks < 0)
+		runtime_throw("runtime_unlock: lock count");
+
+	v = runtime_xchg(&l->key, MUTEX_UNLOCKED);
+	if(v == MUTEX_UNLOCKED)
+		runtime_throw("unlock of unlocked lock");
+	if(v == MUTEX_SLEEPING)
+		runtime_futexwakeup(&l->key, 1);
+}
+
+// One-time notifications.
+void
+runtime_noteclear(Note *n)
+{
+	n->key = 0;
+}
+
+void
+runtime_notewakeup(Note *n)
+{
+	runtime_xchg(&n->key, 1);
+	runtime_futexwakeup(&n->key, 1);
+}
+
+void
+runtime_notesleep(Note *n)
+{
+	while(runtime_atomicload(&n->key) == 0)
+		runtime_futexsleep(&n->key, 0, -1);
+}
+
+void
+runtime_notetsleep(Note *n, int64 ns)
+{
+	int64 deadline, now;
+
+	if(ns < 0) {
+		runtime_notesleep(n);
+		return;
+	}
+
+	if(runtime_atomicload(&n->key) != 0)
+		return;
+
+	deadline = runtime_nanotime() + ns;
+	for(;;) {
+		runtime_futexsleep(&n->key, 0, ns);
+		if(runtime_atomicload(&n->key) != 0)
+			return;
+		now = runtime_nanotime();
+		if(now >= deadline)
+			return;
+		ns = deadline - now;
+	}
+}
diff -r 8b1402d2f0d3 libgo/runtime/lock_sema.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/lock_sema.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,217 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+// This implementation depends on OS-specific implementations of
+//
+//	uintptr runtime.semacreate(void)
+//		Create a semaphore, which will be assigned to m->waitsema.
+//		The zero value is treated as absence of any semaphore,
+//		so be sure to return a non-zero value.
+//
+//	int32 runtime.semasleep(int64 ns)
+//		If ns < 0, acquire m->waitsema and return 0.
+//		If ns >= 0, try to acquire m->waitsema for at most ns nanoseconds.
+//		Return 0 if the semaphore was acquired, -1 if interrupted or timed out.
+//
+//	int32 runtime.semawakeup(M *mp)
+//		Wake up mp, which is or will soon be sleeping on mp->waitsema.
+//
+
+enum
+{
+	LOCKED = 1,
+
+	ACTIVE_SPIN = 4,
+	ACTIVE_SPIN_CNT = 30,
+	PASSIVE_SPIN = 1,
+};
+
+void
+runtime_lock(Lock *l)
+{
+	uintptr v;
+	uint32 i, spin;
+
+	if(m->locks++ < 0)
+		runtime_throw("runtime_lock: lock count");
+
+	// Speculative grab for lock.
+	if(runtime_casp(&l->waitm, nil, (void*)LOCKED))
+		return;
+
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+
+	// On uniprocessor's, no point spinning.
+	// On multiprocessors, spin for ACTIVE_SPIN attempts.
+	spin = 0;
+	if(runtime_ncpu > 1)
+		spin = ACTIVE_SPIN;
+
+	for(i=0;; i++) {
+		v = (uintptr)runtime_atomicloadp(&l->waitm);
+		if((v&LOCKED) == 0) {
+unlocked:
+			if(runtime_casp(&l->waitm, (void*)v, (void*)(v|LOCKED)))
+				return;
+			i = 0;
+		}
+		if(i<spin)
+			runtime_procyield(ACTIVE_SPIN_CNT);
+		else if(i<spin+PASSIVE_SPIN)
+			runtime_osyield();
+		else {
+			// Someone else has it.
+			// l->waitm points to a linked list of M's waiting
+			// for this lock, chained through m->nextwaitm.
+			// Queue this M.
+			for(;;) {
+				m->nextwaitm = (void*)(v&~LOCKED);
+				if(runtime_casp(&l->waitm, (void*)v, (void*)((uintptr)m|LOCKED)))
+					break;
+				v = (uintptr)runtime_atomicloadp(&l->waitm);
+				if((v&LOCKED) == 0)
+					goto unlocked;
+			}
+			if(v&LOCKED) {
+				// Queued.  Wait.
+				runtime_semasleep(-1);
+				i = 0;
+			}
+		}
+	}
+}
+
+void
+runtime_unlock(Lock *l)
+{
+	uintptr v;
+	M *mp;
+
+	if(--m->locks < 0)
+		runtime_throw("runtime_unlock: lock count");
+
+	for(;;) {
+		v = (uintptr)runtime_atomicloadp(&l->waitm);
+		if(v == LOCKED) {
+			if(runtime_casp(&l->waitm, (void*)LOCKED, nil))
+				break;
+		} else {
+			// Other M's are waiting for the lock.
+			// Dequeue an M.
+			mp = (void*)(v&~LOCKED);
+			if(runtime_casp(&l->waitm, (void*)v, mp->nextwaitm)) {
+				// Dequeued an M.  Wake it.
+				runtime_semawakeup(mp);
+				break;
+			}
+		}
+	}
+}
+
+// One-time notifications.
+void
+runtime_noteclear(Note *n)
+{
+	n->waitm = nil;
+}
+
+void
+runtime_notewakeup(Note *n)
+{
+	M *mp;
+
+	do
+		mp = runtime_atomicloadp(&n->waitm);
+	while(!runtime_casp(&n->waitm, mp, (void*)LOCKED));
+
+	// Successfully set waitm to LOCKED.
+	// What was it before?
+	if(mp == nil) {
+		// Nothing was waiting.  Done.
+	} else if(mp == (M*)LOCKED) {
+		// Two notewakeups!  Not allowed.
+		runtime_throw("notewakeup - double wakeup");
+	} else {
+		// Must be the waiting m.  Wake it up.
+		runtime_semawakeup(mp);
+	}
+}
+
+void
+runtime_notesleep(Note *n)
+{
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+	if(!runtime_casp(&n->waitm, nil, m)) {  // must be LOCKED (got wakeup)
+		if(n->waitm != (void*)LOCKED)
+			runtime_throw("notesleep - waitm out of sync");
+		return;
+	}
+	// Queued.  Sleep.
+	runtime_semasleep(-1);
+}
+
+void
+runtime_notetsleep(Note *n, int64 ns)
+{
+	M *mp;
+	int64 deadline, now;
+
+	if(ns < 0) {
+		runtime_notesleep(n);
+		return;
+	}
+
+	if(m->waitsema == 0)
+		m->waitsema = runtime_semacreate();
+
+	// Register for wakeup on n->waitm.
+	if(!runtime_casp(&n->waitm, nil, m)) {  // must be LOCKED (got wakeup already)
+		if(n->waitm != (void*)LOCKED)
+			runtime_throw("notetsleep - waitm out of sync");
+		return;
+	}
+
+	deadline = runtime_nanotime() + ns;
+	for(;;) {
+		// Registered.  Sleep.
+		if(runtime_semasleep(ns) >= 0) {
+			// Acquired semaphore, semawakeup unregistered us.
+			// Done.
+			return;
+		}
+
+		// Interrupted or timed out.  Still registered.  Semaphore not acquired.
+		now = runtime_nanotime();
+		if(now >= deadline)
+			break;
+
+		// Deadline hasn't arrived.  Keep sleeping.
+		ns = deadline - now;
+	}
+
+	// Deadline arrived.  Still registered.  Semaphore not acquired.
+	// Want to give up and return, but have to unregister first,
+	// so that any notewakeup racing with the return does not
+	// try to grant us the semaphore when we don't expect it.
+	for(;;) {
+		mp = runtime_atomicloadp(&n->waitm);
+		if(mp == m) {
+			// No wakeup yet; unregister if possible.
+			if(runtime_casp(&n->waitm, mp, nil))
+				return;
+		} else if(mp == (M*)LOCKED) {
+			// Wakeup happened so semaphore is available.
+			// Grab it to avoid getting out of sync.
+			if(runtime_semasleep(-1) < 0)
+				runtime_throw("runtime: unable to acquire - semaphore out of sync");
+			return;
+		} else {
+			runtime_throw("runtime: unexpected waitm - semaphore out of sync");
+		}
+	}
+}
diff -r 8b1402d2f0d3 libgo/runtime/malloc.goc
--- a/libgo/runtime/malloc.goc	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/malloc.goc	Tue Nov 22 12:18:39 2011 -0800
@@ -376,12 +376,6 @@
 	runtime_MHeap_Init(&runtime_mheap, runtime_SysAlloc);
 	m->mcache = runtime_allocmcache();
 
-	// Initialize malloc profiling.
-	runtime_Mprof_Init();
-
-	// Initialize finalizer.
-	runtime_initfintab();
-
 	// See if it works.
 	runtime_free(runtime_malloc(1));
 }
diff -r 8b1402d2f0d3 libgo/runtime/malloc.h
--- a/libgo/runtime/malloc.h	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/malloc.h	Tue Nov 22 12:18:39 2011 -0800
@@ -405,7 +405,6 @@
 	FlagNoGC = 1<<2,	// must not free or scan for pointers
 };
 
-void	runtime_Mprof_Init(void);
 void	runtime_MProf_Malloc(void*, uintptr);
 void	runtime_MProf_Free(void*, uintptr);
 void	runtime_MProf_Mark(void (*scan)(byte *, int64));
diff -r 8b1402d2f0d3 libgo/runtime/mcentral.c
--- a/libgo/runtime/mcentral.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mcentral.c	Tue Nov 22 12:18:39 2011 -0800
@@ -26,7 +26,6 @@
 void
 runtime_MCentral_Init(MCentral *c, int32 sizeclass)
 {
-	runtime_initlock(c);
 	c->sizeclass = sizeclass;
 	runtime_MSpanList_Init(&c->nonempty);
 	runtime_MSpanList_Init(&c->empty);
diff -r 8b1402d2f0d3 libgo/runtime/mfinal.c
--- a/libgo/runtime/mfinal.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mfinal.c	Tue Nov 22 12:18:39 2011 -0800
@@ -41,15 +41,6 @@
 	uint8 pad[0 /* CacheLineSize - sizeof(Fintab) */];	
 } fintab[TABSZ];
 
-void
-runtime_initfintab()
-{
-	int32 i;
-
-	for(i=0; i<TABSZ; i++)
-		runtime_initlock(&fintab[i]);
-}
-
 static void
 addfintab(Fintab *t, void *k, void (*fn)(void*), const struct __go_func_type *ft)
 {
diff -r 8b1402d2f0d3 libgo/runtime/mgc0.c
--- a/libgo/runtime/mgc0.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mgc0.c	Tue Nov 22 12:18:39 2011 -0800
@@ -860,12 +860,6 @@
 		p = runtime_getenv("GOGCTRACE");
 		if(p != nil)
 			gctrace = runtime_atoi(p);
-
-		runtime_initlock(&work.fmu);
-		runtime_initlock(&work.emu);
-		runtime_initlock(&work.markgate);
-		runtime_initlock(&work.sweepgate);
-		runtime_initlock(&work.Lock);
 	}
 	if(gcpercent < 0)
 		return;
diff -r 8b1402d2f0d3 libgo/runtime/mheap.c
--- a/libgo/runtime/mheap.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mheap.c	Tue Nov 22 12:18:39 2011 -0800
@@ -40,7 +40,6 @@
 {
 	uint32 i;
 
-	runtime_initlock(h);
 	runtime_FixAlloc_Init(&h->spanalloc, sizeof(MSpan), alloc, RecordSpan, h);
 	runtime_FixAlloc_Init(&h->cachealloc, sizeof(MCache), alloc, nil, nil);
 	// h->mapcache needs no init
diff -r 8b1402d2f0d3 libgo/runtime/mprof.goc
--- a/libgo/runtime/mprof.goc	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/mprof.goc	Tue Nov 22 12:18:39 2011 -0800
@@ -186,12 +186,6 @@
 	return nil;
 }
 
-void
-runtime_Mprof_Init()
-{
-	runtime_initlock(&proflock);
-}
-
 // Called by malloc to record a profiled block.
 void
 runtime_MProf_Malloc(void *p, uintptr size)
diff -r 8b1402d2f0d3 libgo/runtime/runtime.c
--- a/libgo/runtime/runtime.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/runtime.c	Tue Nov 22 12:18:39 2011 -0800
@@ -14,12 +14,6 @@
 static Lock paniclk;
 
 void
-runtime_initpanic(void)
-{
-	runtime_initlock(&paniclk);
-}
-
-void
 runtime_startpanic(void)
 {
 	if(m->dying) {
@@ -56,7 +50,6 @@
 		// Wait forever without chewing up cpu.
 		// It will exit when it's done.
 		static Lock deadlock;
-		runtime_initlock(&deadlock);
 		runtime_lock(&deadlock);
 		runtime_lock(&deadlock);
 	}
diff -r 8b1402d2f0d3 libgo/runtime/runtime.h
--- a/libgo/runtime/runtime.h	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/runtime.h	Tue Nov 22 12:18:39 2011 -0800
@@ -48,33 +48,17 @@
 typedef	uint8			bool;
 typedef	uint8			byte;
 typedef	struct	G		G;
+typedef	union	Lock		Lock;
 typedef	struct	M		M;
+typedef	union	Note		Note;
 typedef	struct	MCache		MCache;
 typedef struct	FixAlloc	FixAlloc;
-typedef	struct	Lock		Lock;
 
 typedef	struct	__go_defer_stack	Defer;
 typedef	struct	__go_panic_stack	Panic;
 typedef	struct	__go_open_array		Slice;
 typedef	struct	__go_string		String;
 
-/* We use mutexes for locks.  6g uses futexes directly, and perhaps
-   someday we will do that too.  */
-
-struct	Lock
-{
-	uint32 key;
-	sem_t sem;
-};
-
-/* A Note.  */
-
-typedef	struct	Note		Note;
-
-struct Note {
-	int32 woken;
-};
-
 /* Per CPU declarations.  */
 
 #ifdef __rtems__
@@ -99,8 +83,19 @@
 	false	= 0,
 };
 
-/* Structures.  */
-
+/*
+ * structures
+ */
+union	Lock
+{
+	uint32	key;	// futex-based impl
+	M*	waitm;	// linked list of waiting M's (sema-based impl)
+};
+union	Note
+{
+	uint32	key;	// futex-based impl
+	M*	waitm;	// waiting M (sema-based impl)
+};
 struct	G
 {
 	Defer*	defer;
@@ -141,6 +136,10 @@
 	int32	profilehz;
 	uint32	fastrand;
 	MCache	*mcache;
+	M*	nextwaitm;	// next M waiting for lock
+	uintptr	waitsema;	// semaphore for parking on locks
+	uint32	waitsemacount;
+	uint32	waitsemalock;
 
 	/* For the list of all threads.  */
 	struct __go_thread_id *list_entry;
@@ -173,6 +172,7 @@
  * external data
  */
 extern	uint32	runtime_panicking;
+int32	runtime_ncpu;
 
 /*
  * common functions and data
@@ -183,13 +183,13 @@
  * very low level c-called
  */
 void	runtime_args(int32, byte**);
+void	runtime_osinit();
 void	runtime_goargs(void);
 void	runtime_goenvs(void);
 void	runtime_throw(const char*);
 void*	runtime_mal(uintptr);
 String	runtime_gostringnocopy(byte*);
 void	runtime_mallocinit(void);
-void	runtime_initfintab(void);
 void	siginit(void);
 bool	__go_sigsend(int32 sig);
 int64	runtime_nanotime(void);
@@ -208,27 +208,45 @@
  * mutual exclusion locks.  in the uncontended case,
  * as fast as spin locks (just a few user-level instructions),
  * but on the contention path they sleep in the kernel.
+ * a zeroed Lock is unlocked (no need to initialize each lock).
  */
-void	runtime_initlock(Lock*);
 void	runtime_lock(Lock*);
 void	runtime_unlock(Lock*);
-void	runtime_destroylock(Lock*);
-
-void runtime_semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
-void runtime_semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
 
 /*
  * sleep and wakeup on one-time events.
  * before any calls to notesleep or notewakeup,
  * must call noteclear to initialize the Note.
- * then, any number of threads can call notesleep
+ * then, exactly one thread can call notesleep
  * and exactly one thread can call notewakeup (once).
- * once notewakeup has been called, all the notesleeps
- * will return.  future notesleeps will return immediately.
+ * once notewakeup has been called, the notesleep
+ * will return.  future notesleep will return immediately.
+ * subsequent noteclear must be called only after
+ * previous notesleep has returned, e.g. it's disallowed
+ * to call noteclear straight after notewakeup.
+ *
+ * notetsleep is like notesleep but wakes up after
+ * a given number of nanoseconds even if the event
+ * has not yet happened.  if a goroutine uses notetsleep to
+ * wake up early, it must wait to call noteclear until it
+ * can be sure that no other goroutine is calling
+ * notewakeup.
  */
 void	runtime_noteclear(Note*);
 void	runtime_notesleep(Note*);
 void	runtime_notewakeup(Note*);
+void	runtime_notetsleep(Note*, int64);
+
+/*
+ * low-level synchronization for implementing the above
+ */
+uintptr	runtime_semacreate(void);
+int32	runtime_semasleep(int64);
+void	runtime_semawakeup(M*);
+// or
+void	runtime_futexsleep(uint32*, uint32, int64);
+void	runtime_futexwakeup(uint32*, uint32);
+
 
 /* Functions.  */
 #define runtime_printf printf
@@ -248,17 +266,22 @@
 #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 #define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 #define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
+#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
+#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
 
-void	runtime_initpanic(void);
 void	runtime_dopanic(int32) __attribute__ ((noreturn));
 void	runtime_startpanic(void);
 const byte*	runtime_getenv(const char*);
 int32	runtime_atoi(const byte*);
 void	runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr);
-void	runtime_cpuprofinit(void);
 void	runtime_resetcpuprofiler(int32);
 void	runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
 uint32	runtime_fastrand1(void);
+void	runtime_semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
+void	runtime_semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
 void	runtime_procyield(uint32);
 void	runtime_osyield(void);
 void	runtime_usleep(uint32);
diff -r 8b1402d2f0d3 libgo/runtime/thread-linux.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/thread-linux.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,100 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#include <errno.h>
+#include <string.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <linux/futex.h>
+
+typedef struct timespec Timespec;
+
+// Atomically,
+//	if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+// Don't sleep longer than ns; ns < 0 means forever.
+void
+runtime_futexsleep(uint32 *addr, uint32 val, int64 ns)
+{
+	Timespec ts, *tsp;
+
+	if(ns < 0)
+		tsp = nil;
+	else {
+		ts.tv_sec = ns/1000000000LL;
+		ts.tv_nsec = ns%1000000000LL;
+		// Avoid overflowdefs
+		if(ts.tv_sec > 1<<30)
+			ts.tv_sec = 1<<30;
+		tsp = &ts;
+	}
+
+	// Some Linux kernels have a bug where futex of
+	// FUTEX_WAIT returns an internal error code
+	// as an errno.  Libpthread ignores the return value
+	// here, and so can we: as it says a few lines up,
+	// spurious wakeups are allowed.
+	syscall(__NR_futex, addr, FUTEX_WAIT, val, tsp, nil, 0);
+}
+
+// If any procs are sleeping on addr, wake up at most cnt.
+void
+runtime_futexwakeup(uint32 *addr, uint32 cnt)
+{
+	int64 ret;
+
+	ret = syscall(__NR_futex, addr, FUTEX_WAKE, cnt, nil, nil, 0);
+
+	if(ret >= 0)
+		return;
+
+	// I don't know that futex wakeup can return
+	// EAGAIN or EINTR, but if it does, it would be
+	// safe to loop and call futex again.
+	runtime_printf("futexwakeup addr=%p returned %lld\n", addr, (long long)ret);
+	*(int32*)0x1006 = 0x1006;
+}
+
+static int32
+getproccount(void)
+{
+	int32 fd, rd, cnt, cpustrlen;
+	const byte *cpustr, *pos;
+	byte *bufpos;
+	byte buf[256];
+
+	fd = open("/proc/stat", O_RDONLY|O_CLOEXEC, 0);
+	if(fd == -1)
+		return 1;
+	cnt = 0;
+	bufpos = buf;
+	cpustr = (const byte*)"\ncpu";
+	cpustrlen = runtime_findnull((const byte*)cpustr);
+	for(;;) {
+		rd = read(fd, bufpos, sizeof(buf)-cpustrlen);
+		if(rd == -1)
+			break;
+		bufpos[rd] = 0;
+		for(pos=buf; (pos=(const byte*)strstr((const char*)pos, (const char*)cpustr)) != nil; cnt++, pos++) {
+		}
+		if(rd < cpustrlen)
+			break;
+		memmove(buf, bufpos+rd-cpustrlen+1, cpustrlen-1);
+		bufpos = buf+cpustrlen-1;
+	}
+	close(fd);
+	return cnt ? cnt : 1;
+}
+
+void
+runtime_osinit(void)
+{
+	runtime_ncpu = getproccount();
+}
diff -r 8b1402d2f0d3 libgo/runtime/thread-sema.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/thread-sema.c	Tue Nov 22 12:18:39 2011 -0800
@@ -0,0 +1,74 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <time.h>
+#include <semaphore.h>
+
+/* Create a semaphore.  */
+
+uintptr
+runtime_semacreate(void)
+{
+  sem_t *p;
+
+  /* Call malloc rather than runtime_malloc.  This will allocate space
+     on the C heap.  We can't call runtime_malloc here because it
+     could cause a deadlock.  */
+  p = malloc (sizeof (sem_t));
+  if (sem_init (p, 0, 0) != 0)
+    runtime_throw ("sem_init");
+  return (uintptr) p;
+}
+
+/* Acquire m->waitsema.  */
+
+int32
+runtime_semasleep (int64 ns)
+{
+  int r;
+
+  if (ns >= 0)
+    {
+      struct timespec ts;
+
+      ns += runtime_nanotime ();
+      ts.tv_sec = ns / 1000000000LL;
+      ts.tv_nsec = ns % 1000000000LL;
+      r = sem_timedwait ((sem_t *) m->waitsema, &ts);
+      if (r != 0)
+	{
+	  if (errno == ETIMEDOUT || errno == EINTR)
+	    return -1;
+	  runtime_throw ("sema_timedwait");
+	}
+      return 0;
+    }
+
+  while (sem_wait ((sem_t *) m->waitsema) != 0)
+    {
+      if (errno == EINTR)
+	continue;
+      runtime_throw ("sem_wait");
+    }
+
+  return 0;
+}
+
+/* Wake up mp->waitsema.  */
+
+void
+runtime_semawakeup (M *mp)
+{
+  if (sem_post ((sem_t *) mp->waitsema) != 0)
+    runtime_throw ("sem_post");
+}
+
+void
+runtime_osinit(void)
+{
+}
diff -r 8b1402d2f0d3 libgo/runtime/thread.c
--- a/libgo/runtime/thread.c	Tue Nov 22 12:16:53 2011 -0800
+++ b/libgo/runtime/thread.c	Tue Nov 22 12:18:39 2011 -0800
@@ -6,100 +6,68 @@
 #include "runtime.h"
 #include "go-assert.h"
 
-void
-runtime_initlock(Lock *l)
-{
-	l->key = 0;
-	if(sem_init(&l->sem, 0, 0) != 0)
-		runtime_throw("sem_init failed");
-}
+/* For targets which don't have the required sync support.  Really
+   these should be provided by gcc itself.  FIXME.  */
 
-// noinline so that runtime_lock doesn't have to split the stack.
-static void runtime_lock_full(Lock *l) __attribute__ ((noinline));
+#if !defined (HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4) || !defined (HAVE_SYNC_FETCH_AND_ADD_4)
 
-static void
-runtime_lock_full(Lock *l)
-{
-	for(;;){
-		if(sem_wait(&l->sem) == 0)
-			return;
-		if(errno != EINTR)
-			runtime_throw("sem_wait failed");
-	}
-}
+static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
 
-void
-runtime_lock(Lock *l)
-{
-	if(m != nil) {
-		if(m->locks < 0)
-			runtime_throw("lock count");
-		m->locks++;
-	}
-
-	if(runtime_xadd(&l->key, 1) > 1)	// someone else has it; wait
-		runtime_lock_full(l);
-}
-
-static void runtime_unlock_full(Lock *l) __attribute__ ((noinline));
-
-static void
-runtime_unlock_full(Lock *l)
-{
-	if(sem_post(&l->sem) != 0)
-		runtime_throw("sem_post failed");
-}
-
-void
-runtime_unlock(Lock *l)
-{
-	if(m != nil) {
-		m->locks--;
-		if(m->locks < 0)
-			runtime_throw("lock count");
-	}
-
-	if(runtime_xadd(&l->key, -1) > 0)	// someone else is waiting
-		runtime_unlock_full(l);
-}
-
-void
-runtime_destroylock(Lock *l)
-{
-	sem_destroy(&l->sem);
-}
+#endif
 
 #ifndef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4
 
-// For targets which don't have the required sync support.  Really
-// this should be provided by gcc itself.  FIXME.
-
-static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
+_Bool
+__sync_bool_compare_and_swap_4 (uint32*, uint32, uint32)
+  __attribute__ ((visibility ("hidden")));
 
 _Bool
-__sync_bool_compare_and_swap_4(uint32*, uint32, uint32)
-  __attribute__((visibility("hidden")));
-
-_Bool
-__sync_bool_compare_and_swap_4(uint32* ptr, uint32 old, uint32 new)
+__sync_bool_compare_and_swap_4 (uint32* ptr, uint32 old, uint32 new)
 {
   int i;
   _Bool ret;
 
-  i = pthread_mutex_lock(&sync_lock);
-  __go_assert(i == 0);
+  i = pthread_mutex_lock (&sync_lock);
+  __go_assert (i == 0);
 
-  if(*ptr != old) {
+  if (*ptr != old)
     ret = 0;
-  } else {
-    *ptr = new;
-    ret = 1;
-  }
+  else
+    {
+      *ptr = new;
+      ret = 1;
+    }
 
-  i = pthread_mutex_unlock(&sync_lock);
-  __go_assert(i == 0);
+  i = pthread_mutex_unlock (&sync_lock);
+  __go_assert (i == 0);
 
   return ret;
 }
 
 #endif
+
+#ifndef HAVE_SYNC_FETCH_AND_ADD_4
+
+uint32
+__sync_fetch_and_add_4 (uint32*, uint32)
+  __attribute__ ((visibility ("hidden")));
+
+uint32
+__sync_fetch_and_add_4 (uint32* ptr, uint32 add)
+{
+  int i;
+  uint32 ret;
+
+  i = pthread_mutex_lock (&sync_lock);
+  __go_assert (i == 0);
+
+  ret = *ptr;
+  *ptr += add;
+
+  i = pthread_mutex_unlock (&sync_lock);
+  __go_assert (i == 0);
+
+  return ret;
+}
+
+#endif