diff mbox

libgo patch committed: Copy/rewrite cgo support code from Go 1.7 runtime

Message ID CAOyqgcUAwiPe58Yw4JiHR2jGd9+e6C3cXKDwa=A=Dj1oT5JLTg@mail.gmail.com
State New
Headers show

Commit Message

Ian Lance Taylor Dec. 19, 2016, 6 p.m. UTC
This patch copies the cgo support code from the Go 1.7 runtime to
libgo.  The cgo support in gccgo is rather different, so all the code
in cgo_gccgo.go is gccgo-specific.  The rest of the code is similar
but slightly different.  This drops _cgo_allocate, which was removed
from the gc toolchain back in 1.5.  Bootstrapped and ran Go testsuite
on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
diff mbox

Patch

Index: gcc/go/gofrontend/MERGE
===================================================================
--- gcc/go/gofrontend/MERGE	(revision 243766)
+++ gcc/go/gofrontend/MERGE	(working copy)
@@ -1,4 +1,4 @@ 
-e6fb629c5b246bceab5fc8e8613cf2cf82b1e98f
+4a0bb435bbb1d1516b486d1998e8dc184576db61
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/go/runtime/cgo_gccgo.go
===================================================================
--- libgo/go/runtime/cgo_gccgo.go	(revision 0)
+++ libgo/go/runtime/cgo_gccgo.go	(working copy)
@@ -0,0 +1,110 @@ 
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	_ "unsafe"
+)
+
+// For historical reasons these functions are called as though they
+// were in the syscall package.
+//go:linkname Cgocall syscall.Cgocall
+//go:linkname CgocallDone syscall.CgocallDone
+//go:linkname CgocallBack syscall.CgocallBack
+//go:linkname CgocallBackDone syscall.CgocallBackDone
+
+// A routine that may be called by SWIG.
+//go:linkname _cgo_panic _cgo_panic
+
+// iscgo is set to true if the cgo tool sets the C variable runtime_iscgo
+// to true.
+var iscgo bool
+
+// cgoHasExtraM is set on startup when an extra M is created for cgo.
+// The extra M must be created before any C/C++ code calls cgocallback.
+var cgoHasExtraM bool
+
+// Cgocall prepares to call from code written in Go to code written in
+// C/C++. This takes the current goroutine out of the Go scheduler, as
+// though it were making a system call. Otherwise the program can
+// lookup if the C code blocks. The idea is to call this function,
+// then immediately call the C/C++ function. After the C/C++ function
+// returns, call cgocalldone. The usual Go code would look like
+//     syscall.Cgocall()
+//     defer syscall.Cgocalldone()
+//     cfunction()
+func Cgocall() {
+	lockOSThread()
+	mp := getg().m
+	mp.ncgocall++
+	mp.ncgo++
+	entersyscall(0)
+}
+
+// CgocallDone prepares to return to Go code from C/C++ code.
+func CgocallDone() {
+	gp := getg()
+	if gp == nil {
+		throw("no g in CgocallDone")
+	}
+	gp.m.ncgo--
+
+	// If we are invoked because the C function called _cgo_panic,
+	// then _cgo_panic will already have exited syscall mode.
+	if gp.atomicstatus == _Gsyscall {
+		exitsyscall(0)
+	}
+
+	unlockOSThread()
+}
+
+// CgocallBack is used when calling from C/C++ code into Go code.
+// The usual approach is
+//     syscall.CgocallBack()
+//     defer syscall.CgocallBackDone()
+//     gofunction()
+//go:nosplit
+func CgocallBack() {
+	if getg() == nil || getg().m == nil {
+		needm(0)
+		mp := getg().m
+		mp.dropextram = true
+	}
+
+	exitsyscall(0)
+
+	if getg().m.ncgo == 0 {
+		// The C call to Go came from a thread created by C.
+		// The C call to Go came from a thread not currently running
+		// any Go. In the case of -buildmode=c-archive or c-shared,
+		// this call may be coming in before package initialization
+		// is complete. Wait until it is.
+		<-main_init_done
+	}
+
+	mp := getg().m
+	if mp.needextram || atomic.Load(&extraMWaiters) > 0 {
+		mp.needextram = false
+		newextram()
+	}
+}
+
+// CgocallBackDone prepares to return to C/C++ code that has called
+// into Go code.
+func CgocallBackDone() {
+	entersyscall(0)
+	mp := getg().m
+	if mp.dropextram && mp.ncgo == 0 {
+		mp.dropextram = false
+		dropm()
+	}
+}
+
+// _cgo_panic may be called by SWIG code to panic.
+func _cgo_panic(p *byte) {
+	exitsyscall(0)
+	panic(gostringnocopy(p))
+}
Index: libgo/go/runtime/cgo_mmap.go
===================================================================
--- libgo/go/runtime/cgo_mmap.go	(revision 243084)
+++ libgo/go/runtime/cgo_mmap.go	(working copy)
@@ -1,43 +0,0 @@ 
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-// Support for memory sanitizer. See runtime/cgo/mmap.go.
-
-// +build linux,amd64
-
-package runtime
-
-import "unsafe"
-
-// _cgo_mmap is filled in by runtime/cgo when it is linked into the
-// program, so it is only non-nil when using cgo.
-//go:linkname _cgo_mmap _cgo_mmap
-var _cgo_mmap unsafe.Pointer
-
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer {
-	if _cgo_mmap != nil {
-		// Make ret a uintptr so that writing to it in the
-		// function literal does not trigger a write barrier.
-		// A write barrier here could break because of the way
-		// that mmap uses the same value both as a pointer and
-		// an errno value.
-		// TODO: Fix mmap to return two values.
-		var ret uintptr
-		systemstack(func() {
-			ret = callCgoMmap(addr, n, prot, flags, fd, off)
-		})
-		return unsafe.Pointer(ret)
-	}
-	return sysMmap(addr, n, prot, flags, fd, off)
-}
-
-// sysMmap calls the mmap system call. It is implemented in assembly.
-func sysMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
-
-// cgoMmap calls the mmap function in the runtime/cgo package on the
-// callCgoMmap calls the mmap function in the runtime/cgo package
-// using the GCC calling convention. It is implemented in assembly.
-func callCgoMmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) uintptr
Index: libgo/go/runtime/os_gccgo.go
===================================================================
--- libgo/go/runtime/os_gccgo.go	(revision 243445)
+++ libgo/go/runtime/os_gccgo.go	(working copy)
@@ -8,6 +8,44 @@  import (
 	"unsafe"
 )
 
+// Temporary for C code to call:
+//go:linkname minit runtime.minit
+
+// minit is called to initialize a new m (including the bootstrap m).
+// Called on the new thread, cannot allocate memory.
+func minit() {
+	// Initialize signal handling.
+	_g_ := getg()
+
+	var st _stack_t
+	sigaltstack(nil, &st)
+	if st.ss_flags&_SS_DISABLE != 0 {
+		signalstack(_g_.m.gsignalstack, _g_.m.gsignalstacksize)
+		_g_.m.newSigstack = true
+	} else {
+		_g_.m.newSigstack = false
+	}
+
+	// FIXME: We should set _g_.m.procid here.
+
+	// restore signal mask from m.sigmask and unblock essential signals
+	nmask := _g_.m.sigmask
+	for i := range sigtable {
+		if sigtable[i].flags&_SigUnblock != 0 {
+			sigdelset(&nmask, int32(i))
+		}
+	}
+	sigprocmask(_SIG_SETMASK, &nmask, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+//go:nosplit
+func unminit() {
+	if getg().m.newSigstack {
+		signalstack(nil, 0)
+	}
+}
+
 var urandom_dev = []byte("/dev/urandom\x00")
 
 func getRandomData(r []byte) {
Index: libgo/go/runtime/proc.go
===================================================================
--- libgo/go/runtime/proc.go	(revision 0)
+++ libgo/go/runtime/proc.go	(working copy)
@@ -0,0 +1,330 @@ 
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"runtime/internal/atomic"
+	"unsafe"
+)
+
+// Functions temporarily called by C code.
+//go:linkname newextram runtime.newextram
+
+// Functions temporarily in C that have not yet been ported.
+func allocm(*p, bool, *unsafe.Pointer, *uintptr) *m
+func malg(bool, bool, *unsafe.Pointer, *uintptr) *g
+func allgadd(*g)
+
+// C functions for ucontext management.
+func setGContext()
+func makeGContext(*g, unsafe.Pointer, uintptr)
+
+// main_init_done is a signal used by cgocallbackg that initialization
+// has been completed. It is made before _cgo_notify_runtime_init_done,
+// so all cgo calls can rely on it existing. When main_init is complete,
+// it is closed, meaning cgocallbackg can reliably receive from it.
+var main_init_done chan bool
+
+// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
+// and casfrom_Gscanstatus instead.
+// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
+// put it in the Gscan state is finished.
+//go:nosplit
+func casgstatus(gp *g, oldval, newval uint32) {
+	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
+		systemstack(func() {
+			print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
+			throw("casgstatus: bad incoming values")
+		})
+	}
+
+	if oldval == _Grunning && gp.gcscanvalid {
+		// If oldvall == _Grunning, then the actual status must be
+		// _Grunning or _Grunning|_Gscan; either way,
+		// we own gp.gcscanvalid, so it's safe to read.
+		// gp.gcscanvalid must not be true when we are running.
+		print("runtime: casgstatus ", hex(oldval), "->", hex(newval), " gp.status=", hex(gp.atomicstatus), " gp.gcscanvalid=true\n")
+		throw("casgstatus")
+	}
+
+	// See http://golang.org/cl/21503 for justification of the yield delay.
+	const yieldDelay = 5 * 1000
+	var nextYield int64
+
+	// loop if gp->atomicstatus is in a scan state giving
+	// GC time to finish and change the state to oldval.
+	for i := 0; !atomic.Cas(&gp.atomicstatus, oldval, newval); i++ {
+		if oldval == _Gwaiting && gp.atomicstatus == _Grunnable {
+			systemstack(func() {
+				throw("casgstatus: waiting for Gwaiting but is Grunnable")
+			})
+		}
+		// Help GC if needed.
+		// if gp.preemptscan && !gp.gcworkdone && (oldval == _Grunning || oldval == _Gsyscall) {
+		// 	gp.preemptscan = false
+		// 	systemstack(func() {
+		// 		gcphasework(gp)
+		// 	})
+		// }
+		// But meanwhile just yield.
+		if i == 0 {
+			nextYield = nanotime() + yieldDelay
+		}
+		if nanotime() < nextYield {
+			for x := 0; x < 10 && gp.atomicstatus != oldval; x++ {
+				procyield(1)
+			}
+		} else {
+			osyield()
+			nextYield = nanotime() + yieldDelay/2
+		}
+	}
+	if newval == _Grunning && gp.gcscanvalid {
+		// Run queueRescan on the system stack so it has more space.
+		systemstack(func() { queueRescan(gp) })
+	}
+}
+
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+//go:nosplit
+func needm(x byte) {
+	if iscgo && !cgoHasExtraM {
+		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can not throw, because scheduler is not initialized yet.
+		write(2, unsafe.Pointer(&earlycgocallback[0]), int32(len(earlycgocallback)))
+		exit(1)
+	}
+
+	// Lock extra list, take head, unlock popped list.
+	// nilokay=false is safe here because of the invariant above,
+	// that the extra list always contains or will soon contain
+	// at least one m.
+	mp := lockextra(false)
+
+	// Set needextram when we've just emptied the list,
+	// so that the eventual call into cgocallbackg will
+	// allocate a new m for the extra list. We delay the
+	// allocation until then so that it can be done
+	// after exitsyscall makes sure it is okay to be
+	// running at all (that is, there's no garbage collection
+	// running right now).
+	mp.needextram = mp.schedlink == 0
+	unlockextra(mp.schedlink.ptr())
+
+	// Save and block signals before installing g.
+	// Once g is installed, any incoming signals will try to execute,
+	// but we won't have the sigaltstack settings and other data
+	// set up appropriately until the end of minit, which will
+	// unblock the signals. This is the same dance as when
+	// starting a new m to run Go code via newosproc.
+	msigsave(mp)
+	sigblock()
+
+	// Install g (= m->curg).
+	setg(mp.curg)
+	atomic.Store(&mp.curg.atomicstatus, _Gsyscall)
+	setGContext()
+
+	// Initialize this thread to use the m.
+	minit()
+}
+
+var earlycgocallback = []byte("fatal error: cgo callback before cgo call\n")
+
+// newextram allocates m's and puts them on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+func newextram() {
+	c := atomic.Xchg(&extraMWaiters, 0)
+	if c > 0 {
+		for i := uint32(0); i < c; i++ {
+			oneNewExtraM()
+		}
+	} else {
+		// Make sure there is at least one extra M.
+		mp := lockextra(true)
+		unlockextra(mp)
+		if mp == nil {
+			oneNewExtraM()
+		}
+	}
+}
+
+// oneNewExtraM allocates an m and puts it on the extra list.
+func oneNewExtraM() {
+	// Create extra goroutine locked to extra m.
+	// The goroutine is the context in which the cgo callback will run.
+	// The sched.pc will never be returned to, but setting it to
+	// goexit makes clear to the traceback routines where
+	// the goroutine stack ends.
+	var g0SP unsafe.Pointer
+	var g0SPSize uintptr
+	mp := allocm(nil, true, &g0SP, &g0SPSize)
+	gp := malg(true, false, nil, nil)
+	gp.gcscanvalid = true // fresh G, so no dequeueRescan necessary
+	gp.gcRescan = -1
+
+	// malg returns status as Gidle, change to Gdead before adding to allg
+	// where GC will see it.
+	// gccgo uses Gdead here, not Gsyscall, because the split
+	// stack context is not initialized.
+	casgstatus(gp, _Gidle, _Gdead)
+	gp.m = mp
+	mp.curg = gp
+	mp.locked = _LockInternal
+	mp.lockedg = gp
+	gp.lockedm = mp
+	gp.goid = int64(atomic.Xadd64(&sched.goidgen, 1))
+	if raceenabled {
+		gp.racectx = racegostart(funcPC(newextram))
+	}
+	// put on allg for garbage collector
+	allgadd(gp)
+
+	// The context for gp will be set up in needm.
+	// Here we need to set the context for g0.
+	makeGContext(mp.g0, g0SP, g0SPSize)
+
+	// Add m to the extra list.
+	mnext := lockextra(true)
+	mp.schedlink.set(mnext)
+	unlockextra(mp)
+}
+
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+func dropm() {
+	// Clear m and g, and return m to the extra list.
+	// After the call to setg we can only call nosplit functions
+	// with no pointer manipulation.
+	mp := getg().m
+
+	// Block signals before unminit.
+	// Unminit unregisters the signal handling stack (but needs g on some systems).
+	// Setg(nil) clears g, which is the signal handler's cue not to run Go handlers.
+	// It's important not to try to handle a signal between those two steps.
+	sigmask := mp.sigmask
+	sigblock()
+	unminit()
+
+	// gccgo sets the stack to Gdead here, because the splitstack
+	// context is not initialized.
+	mp.curg.atomicstatus = _Gdead
+	mp.curg.gcstack = nil
+	mp.curg.gcnextsp = nil
+
+	mnext := lockextra(true)
+	mp.schedlink.set(mnext)
+
+	setg(nil)
+
+	// Commit the release of mp.
+	unlockextra(mp)
+
+	msigrestore(sigmask)
+}
+
+// A helper function for EnsureDropM.
+func getm() uintptr {
+	return uintptr(unsafe.Pointer(getg().m))
+}
+
+var extram uintptr
+var extraMWaiters uint32
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+//go:nosplit
+func lockextra(nilokay bool) *m {
+	const locked = 1
+
+	incr := false
+	for {
+		old := atomic.Loaduintptr(&extram)
+		if old == locked {
+			yield := osyield
+			yield()
+			continue
+		}
+		if old == 0 && !nilokay {
+			if !incr {
+				// Add 1 to the number of threads
+				// waiting for an M.
+				// This is cleared by newextram.
+				atomic.Xadd(&extraMWaiters, 1)
+				incr = true
+			}
+			usleep(1)
+			continue
+		}
+		if atomic.Casuintptr(&extram, old, locked) {
+			return (*m)(unsafe.Pointer(old))
+		}
+		yield := osyield
+		yield()
+		continue
+	}
+}
+
+//go:nosplit
+func unlockextra(mp *m) {
+	atomic.Storeuintptr(&extram, uintptr(unsafe.Pointer(mp)))
+}
Index: libgo/go/runtime/runtime2.go
===================================================================
--- libgo/go/runtime/runtime2.go	(revision 243445)
+++ libgo/go/runtime/runtime2.go	(working copy)
@@ -479,8 +479,6 @@  type m struct {
 	dropextram bool // drop after call is done
 
 	gcing int32
-
-	cgomal *cgoMal // allocations via _cgo_allocate
 }
 
 type p struct {
@@ -801,14 +799,6 @@  var (
 // array.
 type g_ucontext_t [(_sizeof_ucontext_t + 15) / unsafe.Sizeof(unsafe.Pointer(nil))]unsafe.Pointer
 
-// cgoMal tracks allocations made by _cgo_allocate
-// FIXME: _cgo_allocate has been removed from gc and can probably be
-// removed from gccgo too.
-type cgoMal struct {
-	next  *cgoMal
-	alloc unsafe.Pointer
-}
-
 // sigset is the Go version of the C type sigset_t.
 // _sigset_t is defined by the Makefile from <signal.h>.
 type sigset _sigset_t
Index: libgo/go/runtime/signal1_unix.go
===================================================================
--- libgo/go/runtime/signal1_unix.go	(revision 243094)
+++ libgo/go/runtime/signal1_unix.go	(working copy)
@@ -327,7 +327,7 @@  func ensureSigM() {
 //go:norace
 //go:nowritebarrierrec
 func badsignal(sig uintptr, c *sigctxt) {
-	needm()
+	needm(0)
 	if !sigsend(uint32(sig)) {
 		// A foreign thread received the signal sig, and the
 		// Go code does not want to handle it.
Index: libgo/go/runtime/signal_gccgo.go
===================================================================
--- libgo/go/runtime/signal_gccgo.go	(revision 243084)
+++ libgo/go/runtime/signal_gccgo.go	(working copy)
@@ -17,18 +17,19 @@  import (
 func sigaction(signum int32, act *_sigaction, oact *_sigaction) int32
 
 //extern sigprocmask
-func sigprocmask(how int32, set *_sigset_t, oldset *_sigset_t) int32
+func sigprocmask(how int32, set *sigset, oldset *sigset) int32
 
-// The argument should be simply *_sigset_t, but that fails on GNU/Linux
-// which sometimes uses _sigset_t and sometimes uses ___sigset_t.
 //extern sigfillset
-func sigfillset(set unsafe.Pointer) int32
+func sigfillset(set *sigset) int32
 
 //extern sigemptyset
-func sigemptyset(set *_sigset_t) int32
+func sigemptyset(set *sigset) int32
 
 //extern sigaddset
-func sigaddset(set *_sigset_t, signum int32) int32
+func sigaddset(set *sigset, signum int32) int32
+
+//extern sigdelset
+func sigdelset(set *sigset, signum int32) int32
 
 //extern sigaltstack
 func sigaltstack(ss *_stack_t, oss *_stack_t) int32
@@ -57,9 +58,19 @@  func (c *sigctxt) sigcode() uint64 {
 }
 
 //go:nosplit
+func msigsave(mp *m) {
+	sigprocmask(_SIG_SETMASK, nil, &mp.sigmask)
+}
+
+//go:nosplit
+func msigrestore(sigmask sigset) {
+	sigprocmask(_SIG_SETMASK, &sigmask, nil)
+}
+
+//go:nosplit
 func sigblock() {
-	var set _sigset_t
-	sigfillset(unsafe.Pointer(&set))
+	var set sigset
+	sigfillset(&set)
 	sigprocmask(_SIG_SETMASK, &set, nil)
 }
 
@@ -81,7 +92,7 @@  func setsig(i int32, fn uintptr, restart
 	if restart {
 		sa.sa_flags |= _SA_RESTART
 	}
-	sigfillset(unsafe.Pointer(&sa.sa_mask))
+	sigfillset((*sigset)(unsafe.Pointer(&sa.sa_mask)))
 	setSigactionHandler(&sa, fn)
 	sigaction(i, &sa, nil)
 }
@@ -117,10 +128,12 @@  func getsig(i int32) uintptr {
 	return getSigactionHandler(&sa)
 }
 
+func signalstack(p unsafe.Pointer, n uintptr)
+
 //go:nosplit
 //go:nowritebarrierrec
 func updatesigmask(m sigmask) {
-	var mask _sigset_t
+	var mask sigset
 	sigemptyset(&mask)
 	for i := int32(0); i < _NSIG; i++ {
 		if m[(i-1)/32]&(1<<((uint(i)-1)&31)) != 0 {
@@ -131,7 +144,7 @@  func updatesigmask(m sigmask) {
 }
 
 func unblocksig(sig int32) {
-	var mask _sigset_t
+	var mask sigset
 	sigemptyset(&mask)
 	sigaddset(&mask, sig)
 	sigprocmask(_SIG_UNBLOCK, &mask, nil)
Index: libgo/go/runtime/signal_sighandler.go
===================================================================
--- libgo/go/runtime/signal_sighandler.go	(revision 243084)
+++ libgo/go/runtime/signal_sighandler.go	(working copy)
@@ -52,8 +52,8 @@  func sighandler(sig uint32, info *_sigin
 
 		// All signals were blocked due to the sigaction mask;
 		// unblock them.
-		var set _sigset_t
-		sigfillset(unsafe.Pointer(&set))
+		var set sigset
+		sigfillset(&set)
 		sigprocmask(_SIG_UNBLOCK, &set, nil)
 
 		sigpanic()
Index: libgo/go/runtime/stubs.go
===================================================================
--- libgo/go/runtime/stubs.go	(revision 243445)
+++ libgo/go/runtime/stubs.go	(working copy)
@@ -249,6 +249,24 @@  func funcPC(f interface{}) uintptr {
 }
 
 // For gccgo, to communicate from the C code to the Go code.
+//go:linkname setIsCgo runtime.setIsCgo
+func setIsCgo() {
+	iscgo = true
+}
+
+// Temporary for gccgo until we port proc.go.
+//go:linkname makeMainInitDone runtime.makeMainInitDone
+func makeMainInitDone() {
+	main_init_done = make(chan bool)
+}
+
+// Temporary for gccgo until we port proc.go.
+//go:linkname closeMainInitDone runtime.closeMainInitDone
+func closeMainInitDone() {
+	close(main_init_done)
+}
+
+// For gccgo, to communicate from the C code to the Go code.
 //go:linkname setCpuidECX runtime.setCpuidECX
 func setCpuidECX(v uint32) {
 	cpuid_ecx = v
@@ -301,6 +319,9 @@  var writeBarrier struct {
 	alignme uint64 // guarantee alignment so that compiler can use a 32 or 64-bit load
 }
 
+func queueRescan(*g) {
+}
+
 // Here for gccgo until we port atomic_pointer.go and mgc.go.
 //go:nosplit
 func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
@@ -446,6 +467,8 @@  func cpuprofAdd(stk []uintptr) {
 func Breakpoint()
 func LockOSThread()
 func UnlockOSThread()
+func lockOSThread()
+func unlockOSThread()
 func allm() *m
 func allgs() []*g
 
@@ -499,8 +522,6 @@  func getZerobase() *uintptr {
 }
 
 // Temporary for gccgo until we port proc.go.
-func needm()
-func dropm()
 func sigprof()
 func mcount() int32
 func gcount() int32
@@ -529,6 +550,12 @@  func getsched() *schedt {
 	return &sched
 }
 
+// Temporary for gccgo until we port proc.go.
+//go:linkname getCgoHasExtraM runtime.getCgoHasExtraM
+func getCgoHasExtraM() *bool {
+	return &cgoHasExtraM
+}
+
 // Throw and rethrow an exception.
 func throwException()
 func rethrowException()
Index: libgo/runtime/go-cgo.c
===================================================================
--- libgo/runtime/go-cgo.c	(revision 243084)
+++ libgo/runtime/go-cgo.c	(working copy)
@@ -5,193 +5,6 @@ 
    license that can be found in the LICENSE file.  */
 
 #include "runtime.h"
-#include "go-alloc.h"
-#include "go-type.h"
-
-extern void chanrecv1 (ChanType *, Hchan *, void *)
-  __asm__ (GOSYM_PREFIX "runtime.chanrecv1");
-
-/* Prepare to call from code written in Go to code written in C or
-   C++.  This takes the current goroutine out of the Go scheduler, as
-   though it were making a system call.  Otherwise the program can
-   lock up if the C code goes to sleep on a mutex or for some other
-   reason.  This idea is to call this function, then immediately call
-   the C/C++ function.  After the C/C++ function returns, call
-   syscall_cgocalldone.  The usual Go code would look like
-
-       syscall.Cgocall()
-       defer syscall.Cgocalldone()
-       cfunction()
-
-   */
-
-/* We let Go code call these via the syscall package.  */
-void syscall_cgocall(void) __asm__ (GOSYM_PREFIX "syscall.Cgocall");
-void syscall_cgocalldone(void) __asm__ (GOSYM_PREFIX "syscall.CgocallDone");
-void syscall_cgocallback(void) __asm__ (GOSYM_PREFIX "syscall.CgocallBack");
-void syscall_cgocallbackdone(void) __asm__ (GOSYM_PREFIX "syscall.CgocallBackDone");
-
-void
-syscall_cgocall ()
-{
-  M* m;
-
-  if (runtime_needextram && runtime_cas (&runtime_needextram, 1, 0))
-    runtime_newextram ();
-
-  runtime_lockOSThread();
-
-  m = runtime_m ();
-  ++m->ncgocall;
-  ++m->ncgo;
-  runtime_entersyscall (0);
-}
-
-/* Prepare to return to Go code from C/C++ code.  */
-
-void
-syscall_cgocalldone ()
-{
-  G* g;
-
-  g = runtime_g ();
-  __go_assert (g != NULL);
-  --g->m->ncgo;
-  if (g->m->ncgo == 0)
-    {
-      /* We are going back to Go, and we are not in a recursive call.
-	 Let the garbage collector clean up any unreferenced
-	 memory.  */
-      g->m->cgomal = NULL;
-    }
-
-  /* If we are invoked because the C function called _cgo_panic, then
-     _cgo_panic will already have exited syscall mode.  */
-  if (g->atomicstatus == _Gsyscall)
-    runtime_exitsyscall (0);
-
-  runtime_unlockOSThread();
-}
-
-/* Call back from C/C++ code to Go code.  */
-
-void
-syscall_cgocallback ()
-{
-  M *mp;
-
-  mp = runtime_m ();
-  if (mp == NULL)
-    {
-      runtime_needm ();
-      mp = runtime_m ();
-      mp->dropextram = true;
-    }
-
-  runtime_exitsyscall (0);
-
-  if (runtime_m ()->ncgo == 0)
-    {
-      /* The C call to Go came from a thread not currently running any
-	 Go.  In the case of -buildmode=c-archive or c-shared, this
-	 call may be coming in before package initialization is
-	 complete.  Wait until it is.  */
-      chanrecv1 (NULL, runtime_main_init_done, NULL);
-    }
-
-  mp = runtime_m ();
-  if (mp->needextram)
-    {
-      mp->needextram = 0;
-      runtime_newextram ();
-    }
-}
-
-/* Prepare to return to C/C++ code from a callback to Go code.  */
-
-void
-syscall_cgocallbackdone ()
-{
-  M *mp;
-
-  runtime_entersyscall (0);
-  mp = runtime_m ();
-  if (mp->dropextram && mp->ncgo == 0)
-    {
-      mp->dropextram = false;
-      runtime_dropm ();
-    }
-}
-
-/* Allocate memory and save it in a list visible to the Go garbage
-   collector.  */
-
-void *
-alloc_saved (size_t n)
-{
-  void *ret;
-  M *m;
-  CgoMal *c;
-
-  ret = __go_alloc (n);
-
-  m = runtime_m ();
-  c = (CgoMal *) __go_alloc (sizeof (CgoMal));
-  c->next = m->cgomal;
-  c->alloc = ret;
-  m->cgomal = c;
-
-  return ret;
-}
-
-/* These are routines used by SWIG.  The gc runtime library provides
-   the same routines under the same name, though in that case the code
-   is required to import runtime/cgo.  */
-
-void *
-_cgo_allocate (size_t n)
-{
-  void *ret;
-
-  runtime_exitsyscall (0);
-  ret = alloc_saved (n);
-  runtime_entersyscall (0);
-  return ret;
-}
-
-extern const struct __go_type_descriptor string_type_descriptor
-  __asm__ (GOSYM_PREFIX "__go_tdn_string");
-
-void
-_cgo_panic (const char *p)
-{
-  intgo len;
-  unsigned char *data;
-  String *ps;
-  Eface e;
-  const struct __go_type_descriptor *td;
-
-  runtime_exitsyscall (0);
-  len = __builtin_strlen (p);
-  data = alloc_saved (len);
-  __builtin_memcpy (data, p, len);
-  ps = alloc_saved (sizeof *ps);
-  ps->str = data;
-  ps->len = len;
-  td = &string_type_descriptor;
-  memcpy(&e._type, &td, sizeof td); /* This is a const_cast.  */
-  e.data = ps;
-
-  /* We don't call runtime_entersyscall here, because normally what
-     will happen is that we will walk up the stack to a Go deferred
-     function that calls recover.  However, this will do the wrong
-     thing if this panic is recovered and the stack unwinding is
-     caught by a C++ exception handler.  It might be possible to
-     handle this by calling runtime_entersyscall in the personality
-     function in go-unwind.c.  FIXME.  */
-
-  runtime_panic (e);
-}
 
 /* Used for _cgo_wait_runtime_init_done.  This is based on code in
    runtime/cgo/gcc_libinit.c in the master library.  */
@@ -249,8 +62,3 @@  _cgo_notify_runtime_init_done (void)
 // runtime_iscgo is set to true if some cgo code is linked in.
 // This is done by a constructor in the cgo generated code.
 _Bool runtime_iscgo;
-
-// runtime_cgoHasExtraM is set on startup when an extra M is created
-// for cgo.  The extra M must be created before any C/C++ code calls
-// cgocallback.
-_Bool runtime_cgoHasExtraM;
Index: libgo/runtime/go-libmain.c
===================================================================
--- libgo/runtime/go-libmain.c	(revision 243445)
+++ libgo/runtime/go-libmain.c	(working copy)
@@ -61,6 +61,7 @@  initfn (int argc, char **argv, char** en
 
   runtime_isarchive = true;
 
+  setIsCgo ();
   runtime_cpuinit ();
   runtime_initsig(true);
 
Index: libgo/runtime/go-main.c
===================================================================
--- libgo/runtime/go-main.c	(revision 243445)
+++ libgo/runtime/go-main.c	(working copy)
@@ -46,6 +46,9 @@  main (int argc, char **argv)
     return 0;
   runtime_isstarted = true;
 
+  if (runtime_iscgo)
+    setIsCgo ();
+
   __go_end = (uintptr)_end;
   runtime_cpuinit ();
   runtime_check ();
Index: libgo/runtime/malloc.h
===================================================================
--- libgo/runtime/malloc.h	(revision 243084)
+++ libgo/runtime/malloc.h	(working copy)
@@ -543,4 +543,3 @@  int32	runtime_setgcpercent(int32)
 #define PoisonStack ((uintptr)0x6868686868686868ULL)
 
 struct Workbuf;
-void	runtime_proc_scan(struct Workbuf**, void (*)(struct Workbuf**, Obj));
Index: libgo/runtime/mgc0.c
===================================================================
--- libgo/runtime/mgc0.c	(revision 243084)
+++ libgo/runtime/mgc0.c	(working copy)
@@ -1283,7 +1283,6 @@  markroot(ParFor *desc, uint32 i)
 		enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
 		enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
 		enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0});
-		runtime_proc_scan(&wbuf, enqueue1);
 		break;
 
 	case RootFinalizers:
Index: libgo/runtime/proc.c
===================================================================
--- libgo/runtime/proc.c	(revision 243445)
+++ libgo/runtime/proc.c	(working copy)
@@ -359,16 +359,16 @@  enum
 };
 
 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
+extern bool* runtime_getCgoHasExtraM()
+  __asm__ (GOSYM_PREFIX "runtime.getCgoHasExtraM");
 
 Sched*	runtime_sched;
 int32	runtime_gomaxprocs;
-uint32	runtime_needextram = 1;
 M	runtime_m0;
 G	runtime_g0;	// idle goroutine for m0
 G*	runtime_lastg;
 M*	runtime_allm;
 P**	runtime_allp;
-M*	runtime_extram;
 int8*	runtime_goos;
 int32	runtime_ncpu;
 bool	runtime_precisestack;
@@ -418,7 +418,9 @@  static void pidleput(P*);
 static void injectglist(G*);
 static bool preemptall(void);
 static bool exitsyscallfast(void);
-static void allgadd(G*);
+
+void allgadd(G*)
+  __asm__(GOSYM_PREFIX "runtime.allgadd");
 
 bool runtime_isstarted;
 
@@ -498,55 +500,6 @@  struct field_align
   Hchan *p;
 };
 
-// main_init_done is a signal used by cgocallbackg that initialization
-// has been completed.  It is made before _cgo_notify_runtime_init_done,
-// so all cgo calls can rely on it existing.  When main_init is
-// complete, it is closed, meaning cgocallbackg can reliably receive
-// from it.
-Hchan *runtime_main_init_done;
-
-// The chan bool type, for runtime_main_init_done.
-
-extern const struct __go_type_descriptor bool_type_descriptor
-  __asm__ (GOSYM_PREFIX "__go_tdn_bool");
-
-static struct __go_channel_type chan_bool_type_descriptor =
-  {
-    /* __common */
-    {
-      /* __code */
-      GO_CHAN,
-      /* __align */
-      __alignof (Hchan *),
-      /* __field_align */
-      offsetof (struct field_align, p) - 1,
-      /* __size */
-      sizeof (Hchan *),
-      /* __hash */
-      0, /* This value doesn't matter.  */
-      /* __hashfn */
-      NULL,
-      /* __equalfn */
-      NULL,
-      /* __gc */
-      NULL, /* This value doesn't matter */
-      /* __reflection */
-      NULL, /* This value doesn't matter */
-      /* __uncommon */
-      NULL,
-      /* __pointer_to_this */
-      NULL
-    },
-    /* __element_type */
-    &bool_type_descriptor,
-    /* __dir */
-    CHANNEL_BOTH_DIR
-  };
-
-extern Hchan *makechan (ChanType *, int64)
-  __asm__ (GOSYM_PREFIX "runtime.makechan");
-extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
-
 static void
 initDone(void *arg __attribute__ ((unused))) {
 	runtime_unlockOSThread();
@@ -593,13 +546,13 @@  runtime_main(void* dummy __attribute__((
 		runtime_throw("runtime_main not on m0");
 	__go_go(runtime_MHeap_Scavenger, nil);
 
-	runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
+	makeMainInitDone();
 
 	_cgo_notify_runtime_init_done();
 
 	main_init();
 
-	closechan(runtime_main_init_done);
+	closeMainInitDone();
 
 	if(g->_defer != &d || (void*)d.pfn != initDone)
 		runtime_throw("runtime: bad defer entry after init");
@@ -1043,10 +996,12 @@  runtime_mstart(void* mp)
 	// Install signal handlers; after minit so that minit can
 	// prepare the thread to be able to handle the signals.
 	if(m == &runtime_m0) {
-		if(runtime_iscgo && !runtime_cgoHasExtraM) {
-			runtime_cgoHasExtraM = true;
-			runtime_newextram();
-			runtime_needextram = 0;
+		if(runtime_iscgo) {
+			bool* cgoHasExtraM = runtime_getCgoHasExtraM();
+			if(!*cgoHasExtraM) {
+				*cgoHasExtraM = true;
+				runtime_newextram();
+			}
 		}
 		runtime_initsig(false);
 	}
@@ -1079,10 +1034,13 @@  struct CgoThreadStart
 	void (*fn)(void);
 };
 
+M* runtime_allocm(P*, bool, byte**, uintptr*)
+	__asm__(GOSYM_PREFIX "runtime.allocm");
+
 // Allocate a new m unassociated with any thread.
 // Can use p for allocation context if needed.
 M*
-runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
+runtime_allocm(P *p, bool allocatestack, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
 {
 	M *mp;
 
@@ -1099,7 +1057,7 @@  runtime_allocm(P *p, int32 stacksize, by
 
 	mp = runtime_mal(sizeof *mp);
 	mcommoninit(mp);
-	mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
+	mp->g0 = runtime_malg(allocatestack, false, ret_g0_stack, ret_g0_stacksize);
 	mp->g0->m = mp;
 
 	if(p == (P*)g->m->p)
@@ -1125,90 +1083,26 @@  allocg(void)
 	return gp;
 }
 
-static M* lockextra(bool nilokay);
-static void unlockextra(M*);
+void setGContext(void) __asm__ (GOSYM_PREFIX "runtime.setGContext");
 
-// needm is called when a cgo callback happens on a
-// thread without an m (a thread not created by Go).
-// In this case, needm is expected to find an m to use
-// and return with m, g initialized correctly.
-// Since m and g are not set now (likely nil, but see below)
-// needm is limited in what routines it can call. In particular
-// it can only call nosplit functions (textflag 7) and cannot
-// do any scheduling that requires an m.
-//
-// In order to avoid needing heavy lifting here, we adopt
-// the following strategy: there is a stack of available m's
-// that can be stolen. Using compare-and-swap
-// to pop from the stack has ABA races, so we simulate
-// a lock by doing an exchange (via casp) to steal the stack
-// head and replace the top pointer with MLOCKED (1).
-// This serves as a simple spin lock that we can use even
-// without an m. The thread that locks the stack in this way
-// unlocks the stack by storing a valid stack head pointer.
-//
-// In order to make sure that there is always an m structure
-// available to be stolen, we maintain the invariant that there
-// is always one more than needed. At the beginning of the
-// program (if cgo is in use) the list is seeded with a single m.
-// If needm finds that it has taken the last m off the list, its job
-// is - once it has installed its own m so that it can do things like
-// allocate memory - to create a spare m and put it on the list.
-//
-// Each of these extra m's also has a g0 and a curg that are
-// pressed into service as the scheduling stack and current
-// goroutine for the duration of the cgo callback.
-//
-// When the callback is done with the m, it calls dropm to
-// put the m back on the list.
-//
-// Unlike the gc toolchain, we start running on curg, since we are
-// just going to return and let the caller continue.
+// setGContext sets up a new goroutine context for the current g.
 void
-runtime_needm(void)
+setGContext()
 {
-	M *mp;
-
-	if(runtime_needextram) {
-		// Can happen if C/C++ code calls Go from a global ctor.
-		// Can not throw, because scheduler is not initialized yet.
-		int rv __attribute__((unused));
-		rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
-			sizeof("fatal error: cgo callback before cgo call\n")-1);
-		runtime_exit(1);
-	}
-
-	// Lock extra list, take head, unlock popped list.
-	// nilokay=false is safe here because of the invariant above,
-	// that the extra list always contains or will soon contain
-	// at least one m.
-	mp = lockextra(false);
-
-	// Set needextram when we've just emptied the list,
-	// so that the eventual call into cgocallbackg will
-	// allocate a new m for the extra list. We delay the
-	// allocation until then so that it can be done
-	// after exitsyscall makes sure it is okay to be
-	// running at all (that is, there's no garbage collection
-	// running right now).
-	mp->needextram = mp->schedlink == 0;
-	unlockextra((M*)mp->schedlink);
+	int val;
 
-	// Install g (= m->curg).
-	runtime_setg(mp->curg);
-
-	// Initialize g's context as in mstart.
 	initcontext();
-	g->atomicstatus = _Gsyscall;
 	g->entry = nil;
 	g->param = nil;
 #ifdef USING_SPLIT_STACK
 	__splitstack_getcontext(&g->stackcontext[0]);
+	val = 0;
+	__splitstack_block_signals(&val, nil);
 #else
-	g->gcinitialsp = &mp;
+	g->gcinitialsp = &val;
 	g->gcstack = nil;
 	g->gcstacksize = 0;
-	g->gcnextsp = &mp;
+	g->gcnextsp = &val;
 #endif
 	getcontext(ucontext_arg(&g->context[0]));
 
@@ -1219,168 +1113,21 @@  runtime_needm(void)
 		pfn(gp);
 		*(int*)0x22 = 0x22;
 	}
-
-	// Initialize this thread to use the m.
-	runtime_minit();
-
-#ifdef USING_SPLIT_STACK
-	{
-		int dont_block_signals = 0;
-		__splitstack_block_signals(&dont_block_signals, nil);
-	}
-#endif
 }
 
-// newextram allocates an m and puts it on the extra list.
-// It is called with a working local m, so that it can do things
-// like call schedlock and allocate.
+void makeGContext(G*, byte*, uintptr)
+	__asm__(GOSYM_PREFIX "runtime.makeGContext");
+
+// makeGContext makes a new context for a g.
 void
-runtime_newextram(void)
-{
-	M *mp, *mnext;
-	G *gp;
-	byte *g0_sp, *sp;
-	uintptr g0_spsize, spsize;
+makeGContext(G* gp, byte* sp, uintptr spsize) {
 	ucontext_t *uc;
 
-	// Create extra goroutine locked to extra m.
-	// The goroutine is the context in which the cgo callback will run.
-	// The sched.pc will never be returned to, but setting it to
-	// runtime.goexit makes clear to the traceback routines where
-	// the goroutine stack ends.
-	mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
-	gp = runtime_malg(StackMin, &sp, &spsize);
-	gp->atomicstatus = _Gdead;
-	gp->m = mp;
-	mp->curg = gp;
-	mp->locked = _LockInternal;
-	mp->lockedg = gp;
-	gp->lockedm = mp;
-	gp->goid = runtime_xadd64(&runtime_sched->goidgen, 1);
-	// put on allg for garbage collector
-	allgadd(gp);
-
-	// The context for gp will be set up in runtime_needm.  But
-	// here we need to set up the context for g0.
-	uc = ucontext_arg(&mp->g0->context[0]);
+	uc = ucontext_arg(&gp->context[0]);
 	getcontext(uc);
-	uc->uc_stack.ss_sp = g0_sp;
-	uc->uc_stack.ss_size = (size_t)g0_spsize;
+	uc->uc_stack.ss_sp = sp;
+	uc->uc_stack.ss_size = (size_t)spsize;
 	makecontext(uc, kickoff, 0);
-
-	// Add m to the extra list.
-	mnext = lockextra(true);
-	mp->schedlink = (uintptr)mnext;
-	unlockextra(mp);
-}
-
-// dropm is called when a cgo callback has called needm but is now
-// done with the callback and returning back into the non-Go thread.
-// It puts the current m back onto the extra list.
-//
-// The main expense here is the call to signalstack to release the
-// m's signal stack, and then the call to needm on the next callback
-// from this thread. It is tempting to try to save the m for next time,
-// which would eliminate both these costs, but there might not be
-// a next time: the current thread (which Go does not control) might exit.
-// If we saved the m for that thread, there would be an m leak each time
-// such a thread exited. Instead, we acquire and release an m on each
-// call. These should typically not be scheduling operations, just a few
-// atomics, so the cost should be small.
-//
-// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
-// variable using pthread_key_create. Unlike the pthread keys we already use
-// on OS X, this dummy key would never be read by Go code. It would exist
-// only so that we could register at thread-exit-time destructor.
-// That destructor would put the m back onto the extra list.
-// This is purely a performance optimization. The current version,
-// in which dropm happens on each cgo call, is still correct too.
-// We may have to keep the current version on systems with cgo
-// but without pthreads, like Windows.
-void
-runtime_dropm(void)
-{
-	M *mp, *mnext;
-
-	// Undo whatever initialization minit did during needm.
-	runtime_unminit();
-
-	// Clear m and g, and return m to the extra list.
-	// After the call to setg we can only call nosplit functions.
-	mp = g->m;
-	runtime_setg(nil);
-
-	mp->curg->atomicstatus = _Gdead;
-	mp->curg->gcstack = nil;
-	mp->curg->gcnextsp = nil;
-
-	mnext = lockextra(true);
-	mp->schedlink = (uintptr)mnext;
-	unlockextra(mp);
-}
-
-#define MLOCKED ((M*)1)
-
-// lockextra locks the extra list and returns the list head.
-// The caller must unlock the list by storing a new list head
-// to runtime.extram. If nilokay is true, then lockextra will
-// return a nil list head if that's what it finds. If nilokay is false,
-// lockextra will keep waiting until the list head is no longer nil.
-static M*
-lockextra(bool nilokay)
-{
-	M *mp;
-	void (*yield)(void);
-
-	for(;;) {
-		mp = runtime_atomicloadp(&runtime_extram);
-		if(mp == MLOCKED) {
-			yield = runtime_osyield;
-			yield();
-			continue;
-		}
-		if(mp == nil && !nilokay) {
-			runtime_usleep(1);
-			continue;
-		}
-		if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
-			yield = runtime_osyield;
-			yield();
-			continue;
-		}
-		break;
-	}
-	return mp;
-}
-
-static void
-unlockextra(M *mp)
-{
-	runtime_atomicstorep(&runtime_extram, mp);
-}
-
-static int32
-countextra()
-{
-	M *mp, *mc;
-	int32 c;
-
-	for(;;) {
-		mp = runtime_atomicloadp(&runtime_extram);
-		if(mp == MLOCKED) {
-			runtime_osyield();
-			continue;
-		}
-		if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
-			runtime_osyield();
-			continue;
-		}
-		c = 0;
-		for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
-			c++;
-		runtime_atomicstorep(&runtime_extram, mp);
-		return c;
-	}
 }
 
 // Create a new m.  It will start off with a call to fn, or else the scheduler.
@@ -1389,7 +1136,7 @@  newm(void(*fn)(void), P *p)
 {
 	M *mp;
 
-	mp = runtime_allocm(p, -1, nil, nil);
+	mp = runtime_allocm(p, false, nil, nil);
 	mp->nextp = (uintptr)p;
 	mp->mstartfn = (uintptr)(void*)fn;
 
@@ -2287,16 +2034,35 @@  syscall_runtime_AfterFork(void)
 
 // Allocate a new g, with a stack big enough for stacksize bytes.
 G*
-runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
+runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize)
 {
+	uintptr stacksize;
 	G *newg;
+	byte* unused_stack;
+	uintptr unused_stacksize;
+#if USING_SPLIT_STACK
+	int dont_block_signals = 0;
+	size_t ss_stacksize;
+#endif
 
+	if (ret_stack == nil) {
+		ret_stack = &unused_stack;
+	}
+	if (ret_stacksize == nil) {
+		ret_stacksize = &unused_stacksize;
+	}
 	newg = allocg();
-	if(stacksize >= 0) {
-#if USING_SPLIT_STACK
-		int dont_block_signals = 0;
-		size_t ss_stacksize;
+	if(allocatestack) {
+		stacksize = StackMin;
+		if(signalstack) {
+			stacksize = 32 * 1024; // OS X wants >= 8K, GNU/Linux >= 2K
+#ifdef SIGSTKSZ
+			if(stacksize < SIGSTKSZ)
+				stacksize = SIGSTKSZ;
+#endif
+		}
 
+#if USING_SPLIT_STACK
 		*ret_stack = __splitstack_makecontext(stacksize,
 						      &newg->stackcontext[0],
 						      &ss_stacksize);
@@ -2361,7 +2127,7 @@  __go_go(void (*fn)(void*), void* arg)
 	} else {
 		uintptr malsize;
 
-		newg = runtime_malg(StackMin, &sp, &malsize);
+		newg = runtime_malg(true, false, &sp, &malsize);
 		spsize = (size_t)malsize;
 		allgadd(newg);
 	}
@@ -2376,30 +2142,17 @@  __go_go(void (*fn)(void*), void* arg)
 	}
 	newg->goid = p->goidcache++;
 
-	{
-		// Avoid warnings about variables clobbered by
-		// longjmp.
-		byte * volatile vsp = sp;
-		size_t volatile vspsize = spsize;
-		G * volatile vnewg = newg;
-		ucontext_t * volatile uc;
-
-		uc = ucontext_arg(&vnewg->context[0]);
-		getcontext(uc);
-		uc->uc_stack.ss_sp = vsp;
-		uc->uc_stack.ss_size = vspsize;
-		makecontext(uc, kickoff, 0);
-
-		runqput(p, vnewg);
-
-		if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
-			wakep();
-		g->m->locks--;
-		return vnewg;
-	}
+	makeGContext(newg, sp, (uintptr)spsize);
+
+	runqput(p, newg);
+
+	if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
+		wakep();
+	g->m->locks--;
+	return newg;
 }
 
-static void
+void
 allgadd(G *gp)
 {
 	G **new;
@@ -2902,7 +2655,7 @@  checkdead(void)
 	}
 
 	// -1 for sysmon
-	run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1 - countextra();
+	run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1;
 	if(run > 0)
 		return;
 	// If we are dying because of a signal caught on an already idle thread,
@@ -3534,12 +3287,6 @@  sync_atomic_runtime_procUnpin()
 	procUnpin();
 }
 
-void
-runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
-{
-	enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
-}
-
 // Return whether we are waiting for a GC.  This gc toolchain uses
 // preemption instead.
 bool
Index: libgo/runtime/runtime.h
===================================================================
--- libgo/runtime/runtime.h	(revision 243445)
+++ libgo/runtime/runtime.h	(working copy)
@@ -52,7 +52,7 @@  typedef uintptr		uintreg;
 
 /* Defined types.  */
 
-typedef	uint8			bool;
+typedef	_Bool			bool;
 typedef	uint8			byte;
 typedef	struct	g		G;
 typedef	struct	mutex		Lock;
@@ -240,7 +240,6 @@  extern	M*	runtime_allm;
 extern	P**	runtime_allp;
 extern	Sched*  runtime_sched;
 extern	int32	runtime_gomaxprocs;
-extern	uint32	runtime_needextram;
 extern	uint32	runtime_panicking(void)
   __asm__ (GOSYM_PREFIX "runtime.getPanicking");
 extern	int8*	runtime_goos;
@@ -298,15 +297,13 @@  void	runtime_ready(G*);
 String	runtime_getenv(const char*);
 int32	runtime_atoi(const byte*, intgo);
 void*	runtime_mstart(void*);
-G*	runtime_malg(int32, byte**, uintptr*);
+G*	runtime_malg(bool, bool, byte**, uintptr*)
+	__asm__(GOSYM_PREFIX "runtime.malg");
 void	runtime_mpreinit(M*);
-void	runtime_minit(void);
-void	runtime_unminit(void);
-void	runtime_needm(void)
-  __asm__ (GOSYM_PREFIX "runtime.needm");
-void	runtime_dropm(void)
-  __asm__ (GOSYM_PREFIX "runtime.dropm");
-void	runtime_signalstack(byte*, int32);
+void	runtime_minit(void)
+  __asm__ (GOSYM_PREFIX "runtime.minit");
+void	runtime_signalstack(byte*, uintptr)
+  __asm__ (GOSYM_PREFIX "runtime.signalstack");
 MCache*	runtime_allocmcache(void)
   __asm__ (GOSYM_PREFIX "runtime.allocmcache");
 void	runtime_freemcache(MCache*);
@@ -345,7 +342,8 @@  int32	runtime_round2(int32 x); // round
 
 void runtime_setg(G*)
   __asm__ (GOSYM_PREFIX "runtime.setg");
-void runtime_newextram(void);
+void runtime_newextram(void)
+  __asm__ (GOSYM_PREFIX "runtime.newextram");
 #define runtime_exit(s) exit(s)
 #define runtime_breakpoint() __builtin_trap()
 void	runtime_gosched(void);
@@ -523,9 +521,12 @@  void	runtime_procyield(uint32)
   __asm__(GOSYM_PREFIX "runtime.procyield");
 void	runtime_osyield(void)
   __asm__(GOSYM_PREFIX "runtime.osyield");
-void	runtime_lockOSThread(void);
-void	runtime_unlockOSThread(void);
-bool	runtime_lockedOSThread(void);
+void	runtime_lockOSThread(void)
+  __asm__(GOSYM_PREFIX "runtime.lockOSThread");
+void	runtime_unlockOSThread(void)
+  __asm__(GOSYM_PREFIX "runtime.unlockOSThread");
+bool	runtime_lockedOSThread(void)
+  __asm__(GOSYM_PREFIX "runtime.lockedOSThread");
 
 void	runtime_printcreatedby(G*)
   __asm__(GOSYM_PREFIX "runtime.printcreatedby");
@@ -587,8 +588,6 @@  struct time_now_ret now() __asm__ (GOSYM
 extern void _cgo_wait_runtime_init_done (void);
 extern void _cgo_notify_runtime_init_done (void);
 extern _Bool runtime_iscgo;
-extern _Bool runtime_cgoHasExtraM;
-extern Hchan *runtime_main_init_done;
 extern uintptr __go_end __attribute__ ((weak));
 extern void *getitab(const struct __go_type_descriptor *,
 		     const struct __go_type_descriptor *,
@@ -596,5 +595,11 @@  extern void *getitab(const struct __go_t
   __asm__ (GOSYM_PREFIX "runtime.getitab");
 
 extern void runtime_cpuinit(void);
+extern void setIsCgo(void)
+  __asm__ (GOSYM_PREFIX "runtime.setIsCgo");
 extern void setCpuidECX(uint32)
   __asm__ (GOSYM_PREFIX "runtime.setCpuidECX");
+extern void makeMainInitDone(void)
+  __asm__ (GOSYM_PREFIX "runtime.makeMainInitDone");
+extern void closeMainInitDone(void)
+  __asm__ (GOSYM_PREFIX "runtime.closeMainInitDone");
Index: libgo/runtime/runtime_c.c
===================================================================
--- libgo/runtime/runtime_c.c	(revision 243445)
+++ libgo/runtime/runtime_c.c	(working copy)
@@ -99,43 +99,12 @@  runtime_cputicks(void)
 void
 runtime_mpreinit(M *mp)
 {
-	int32 stacksize = 32 * 1024;	// OS X wants >=8K, Linux >=2K
-
-#ifdef SIGSTKSZ
-	if(stacksize < SIGSTKSZ)
-		stacksize = SIGSTKSZ;
-#endif
-
-	mp->gsignal = runtime_malg(stacksize, (byte**)&mp->gsignalstack, &mp->gsignalstacksize);
+	mp->gsignal = runtime_malg(true, true, (byte**)&mp->gsignalstack, &mp->gsignalstacksize);
 	mp->gsignal->m = mp;
 }
 
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime_minit(void)
-{
-	M* m;
-	sigset_t sigs;
-
-	// Initialize signal handling.
-	m = runtime_m();
-	runtime_signalstack(m->gsignalstack, m->gsignalstacksize);
-	if (sigemptyset(&sigs) != 0)
-		runtime_throw("sigemptyset");
-	pthread_sigmask(SIG_SETMASK, &sigs, nil);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime_unminit(void)
-{
-	runtime_signalstack(nil, 0);
-}
-
-
 void
-runtime_signalstack(byte *p, int32 n)
+runtime_signalstack(byte *p, uintptr n)
 {
 	stack_t st;