diff mbox

libgo patch committed: copy more scheduler code from Go 1.7 runtime

Message ID CAOyqgcUvjzyevyAnX6B+QFmnWU3S9eNvbfmWQHnkw_dd9otf6A@mail.gmail.com
State New
Headers show

Commit Message

Ian Lance Taylor Jan. 10, 2017, 9:09 p.m. UTC
I looked at a diff of libgo/go/runtime/proc.go between Go 1.7 and
gccgo, and copied over all the easy stuff.  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian
diff mbox

Patch

Index: gcc/go/gofrontend/MERGE
===================================================================
--- gcc/go/gofrontend/MERGE	(revision 244256)
+++ gcc/go/gofrontend/MERGE	(working copy)
@@ -1,4 +1,4 @@ 
-f439989e483b7c2eada6ddcf6e730a791cce603f
+d3725d876496f2cca3d6ce538e98b58c85d90bfb
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/go/runtime/debug.go
===================================================================
--- libgo/go/runtime/debug.go	(revision 244236)
+++ libgo/go/runtime/debug.go	(working copy)
@@ -44,7 +44,7 @@  func NumCPU() int
 // NumCgoCall returns the number of cgo calls made by the current process.
 func NumCgoCall() int64 {
 	var n int64
-	for mp := (*m)(atomic.Loadp(unsafe.Pointer(allm()))); mp != nil; mp = mp.alllink {
+	for mp := (*m)(atomic.Loadp(unsafe.Pointer(&allm))); mp != nil; mp = mp.alllink {
 		n += int64(mp.ncgocall)
 	}
 	return n
Index: libgo/go/runtime/export_test.go
===================================================================
--- libgo/go/runtime/export_test.go	(revision 244236)
+++ libgo/go/runtime/export_test.go	(working copy)
@@ -24,8 +24,7 @@  import (
 
 var Entersyscall = entersyscall
 var Exitsyscall = exitsyscall
-
-// var LockedOSThread = lockedOSThread
+var LockedOSThread = lockedOSThread
 
 // var Xadduintptr = xadduintptr
 
Index: libgo/go/runtime/mprof.go
===================================================================
--- libgo/go/runtime/mprof.go	(revision 244166)
+++ libgo/go/runtime/mprof.go	(working copy)
@@ -521,7 +521,7 @@  func BlockProfile(p []BlockProfileRecord
 // Most clients should use the runtime/pprof package instead
 // of calling ThreadCreateProfile directly.
 func ThreadCreateProfile(p []StackRecord) (n int, ok bool) {
-	first := (*m)(atomic.Loadp(unsafe.Pointer(allm())))
+	first := (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
 	for mp := first; mp != nil; mp = mp.alllink {
 		n++
 	}
Index: libgo/go/runtime/os_gccgo.go
===================================================================
--- libgo/go/runtime/os_gccgo.go	(revision 244166)
+++ libgo/go/runtime/os_gccgo.go	(working copy)
@@ -11,6 +11,13 @@  import (
 // Temporary for C code to call:
 //go:linkname minit runtime.minit
 
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(true, true, &mp.gsignalstack, &mp.gsignalstacksize)
+	mp.gsignal.m = mp
+}
+
 // minit is called to initialize a new m (including the bootstrap m).
 // Called on the new thread, cannot allocate memory.
 func minit() {
Index: libgo/go/runtime/proc.go
===================================================================
--- libgo/go/runtime/proc.go	(revision 244236)
+++ libgo/go/runtime/proc.go	(working copy)
@@ -18,6 +18,7 @@  import (
 //go:linkname sysmon runtime.sysmon
 //go:linkname schedtrace runtime.schedtrace
 //go:linkname allgadd runtime.allgadd
+//go:linkname mcommoninit runtime.mcommoninit
 //go:linkname ready runtime.ready
 //go:linkname gcprocs runtime.gcprocs
 //go:linkname needaddgcproc runtime.needaddgcproc
@@ -27,6 +28,10 @@  import (
 //go:linkname stoplockedm runtime.stoplockedm
 //go:linkname schedule runtime.schedule
 //go:linkname execute runtime.execute
+//go:linkname gfput runtime.gfput
+//go:linkname gfget runtime.gfget
+//go:linkname lockOSThread runtime.lockOSThread
+//go:linkname unlockOSThread runtime.unlockOSThread
 //go:linkname procresize runtime.procresize
 //go:linkname helpgc runtime.helpgc
 //go:linkname stopTheWorldWithSema runtime.stopTheWorldWithSema
@@ -66,6 +71,113 @@  func goready(gp *g, traceskip int) {
 	})
 }
 
+//go:nosplit
+func acquireSudog() *sudog {
+	// Delicate dance: the semaphore implementation calls
+	// acquireSudog, acquireSudog calls new(sudog),
+	// new calls malloc, malloc can call the garbage collector,
+	// and the garbage collector calls the semaphore implementation
+	// in stopTheWorld.
+	// Break the cycle by doing acquirem/releasem around new(sudog).
+	// The acquirem/releasem increments m.locks during new(sudog),
+	// which keeps the garbage collector from being invoked.
+	mp := acquirem()
+	pp := mp.p.ptr()
+	if len(pp.sudogcache) == 0 {
+		lock(&sched.sudoglock)
+		// First, try to grab a batch from central cache.
+		for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil {
+			s := sched.sudogcache
+			sched.sudogcache = s.next
+			s.next = nil
+			pp.sudogcache = append(pp.sudogcache, s)
+		}
+		unlock(&sched.sudoglock)
+		// If the central cache is empty, allocate a new one.
+		if len(pp.sudogcache) == 0 {
+			pp.sudogcache = append(pp.sudogcache, new(sudog))
+		}
+	}
+	n := len(pp.sudogcache)
+	s := pp.sudogcache[n-1]
+	pp.sudogcache[n-1] = nil
+	pp.sudogcache = pp.sudogcache[:n-1]
+	if s.elem != nil {
+		throw("acquireSudog: found s.elem != nil in cache")
+	}
+	releasem(mp)
+	return s
+}
+
+//go:nosplit
+func releaseSudog(s *sudog) {
+	if s.elem != nil {
+		throw("runtime: sudog with non-nil elem")
+	}
+	if s.selectdone != nil {
+		throw("runtime: sudog with non-nil selectdone")
+	}
+	if s.next != nil {
+		throw("runtime: sudog with non-nil next")
+	}
+	if s.prev != nil {
+		throw("runtime: sudog with non-nil prev")
+	}
+	if s.waitlink != nil {
+		throw("runtime: sudog with non-nil waitlink")
+	}
+	if s.c != nil {
+		throw("runtime: sudog with non-nil c")
+	}
+	gp := getg()
+	if gp.param != nil {
+		throw("runtime: releaseSudog with non-nil gp.param")
+	}
+	mp := acquirem() // avoid rescheduling to another P
+	pp := mp.p.ptr()
+	if len(pp.sudogcache) == cap(pp.sudogcache) {
+		// Transfer half of local cache to the central cache.
+		var first, last *sudog
+		for len(pp.sudogcache) > cap(pp.sudogcache)/2 {
+			n := len(pp.sudogcache)
+			p := pp.sudogcache[n-1]
+			pp.sudogcache[n-1] = nil
+			pp.sudogcache = pp.sudogcache[:n-1]
+			if first == nil {
+				first = p
+			} else {
+				last.next = p
+			}
+			last = p
+		}
+		lock(&sched.sudoglock)
+		last.next = sched.sudogcache
+		sched.sudogcache = first
+		unlock(&sched.sudoglock)
+	}
+	pp.sudogcache = append(pp.sudogcache, s)
+	releasem(mp)
+}
+
+// funcPC returns the entry PC of the function f.
+// It assumes that f is a func value. Otherwise the behavior is undefined.
+// For gccgo here unless and until we port proc.go.
+// Note that this differs from the gc implementation; the gc implementation
+// adds sys.PtrSize to the address of the interface value, but GCC's
+// alias analysis decides that that can not be a reference to the second
+// field of the interface, and in some cases it drops the initialization
+// of the second field as a dead store.
+//go:nosplit
+func funcPC(f interface{}) uintptr {
+	i := (*iface)(unsafe.Pointer(&f))
+	return **(**uintptr)(i.data)
+}
+
+func lockedOSThread() bool {
+	gp := getg()
+	return gp.lockedm != nil && gp.m.lockedg != nil
+}
+
 var (
 	allgs    []*g
 	allglock mutex
@@ -98,6 +210,43 @@  func dumpgstatus(gp *g) {
 	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
 }
 
+func checkmcount() {
+	// sched lock is held
+	if sched.mcount > sched.maxmcount {
+		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
+		throw("thread exhaustion")
+	}
+}
+
+func mcommoninit(mp *m) {
+	_g_ := getg()
+
+	// g0 stack won't make sense for user (and is not necessary unwindable).
+	if _g_ != _g_.m.g0 {
+		callers(1, mp.createstack[:])
+	}
+
+	mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
+	if mp.fastrand == 0 {
+		mp.fastrand = 0x49f6428a
+	}
+
+	lock(&sched.lock)
+	mp.id = sched.mcount
+	sched.mcount++
+	checkmcount()
+	mpreinit(mp)
+
+	// Add to allm so garbage collector doesn't free g->m
+	// when it is just in a register or thread-local storage.
+	mp.alllink = allm
+
+	// NumCgoCall() iterates over allm w/o schedlock,
+	// so we need to publish it safely.
+	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
+	unlock(&sched.lock)
+}
+
 // Mark gp ready to run.
 func ready(gp *g, traceskip int, next bool) {
 	if trace.enabled {
@@ -203,6 +352,13 @@  func freezetheworld() {
 	usleep(1000)
 }
 
+func isscanstatus(status uint32) bool {
+	if status == _Gscan {
+		throw("isscanstatus: Bad status Gscan")
+	}
+	return status&_Gscan == _Gscan
+}
+
 // All reads and writes of g's status go through readgstatus, casgstatus
 // castogscanstatus, casfrom_Gscanstatus.
 //go:nosplit
@@ -210,6 +366,63 @@  func readgstatus(gp *g) uint32 {
 	return atomic.Load(&gp.atomicstatus)
 }
 
+// Ownership of gcscanvalid:
+//
+// If gp is running (meaning status == _Grunning or _Grunning|_Gscan),
+// then gp owns gp.gcscanvalid, and other goroutines must not modify it.
+//
+// Otherwise, a second goroutine can lock the scan state by setting _Gscan
+// in the status bit and then modify gcscanvalid, and then unlock the scan state.
+//
+// Note that the first condition implies an exception to the second:
+// if a second goroutine changes gp's status to _Grunning|_Gscan,
+// that second goroutine still does not have the right to modify gcscanvalid.
+
+// The Gscanstatuses are acting like locks and this releases them.
+// If it proves to be a performance hit we should be able to make these
+// simple atomic stores but for now we are going to throw if
+// we see an inconsistent state.
+func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
+	success := false
+
+	// Check that transition is valid.
+	switch oldval {
+	default:
+		print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+		dumpgstatus(gp)
+		throw("casfrom_Gscanstatus:top gp->status is not in scan state")
+	case _Gscanrunnable,
+		_Gscanwaiting,
+		_Gscanrunning,
+		_Gscansyscall:
+		if newval == oldval&^_Gscan {
+			success = atomic.Cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	if !success {
+		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+		dumpgstatus(gp)
+		throw("casfrom_Gscanstatus: gp->status is not in scan state")
+	}
+}
+
+// This will return false if the gp is not in the expected status and the cas fails.
+// This acts like a lock acquire while the casfromgstatus acts like a lock release.
+func castogscanstatus(gp *g, oldval, newval uint32) bool {
+	switch oldval {
+	case _Grunnable,
+		_Grunning,
+		_Gwaiting,
+		_Gsyscall:
+		if newval == oldval|_Gscan {
+			return atomic.Cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
+	throw("castogscanstatus")
+	panic("not reached")
+}
+
 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
 // and casfrom_Gscanstatus instead.
 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
@@ -453,6 +666,100 @@  func startTheWorldWithSema() {
 	_g_.m.locks--
 }
 
+// forEachP calls fn(p) for every P p when p reaches a GC safe point.
+// If a P is currently executing code, this will bring the P to a GC
+// safe point and execute fn on that P. If the P is not executing code
+// (it is idle or in a syscall), this will call fn(p) directly while
+// preventing the P from exiting its state. This does not ensure that
+// fn will run on every CPU executing Go code, but it acts as a global
+// memory barrier. GC uses this as a "ragged barrier."
+//
+// The caller must hold worldsema.
+//
+//go:systemstack
+func forEachP(fn func(*p)) {
+	mp := acquirem()
+	_p_ := getg().m.p.ptr()
+
+	lock(&sched.lock)
+	if sched.safePointWait != 0 {
+		throw("forEachP: sched.safePointWait != 0")
+	}
+	sched.safePointWait = gomaxprocs - 1
+	sched.safePointFn = fn
+
+	// Ask all Ps to run the safe point function.
+	for _, p := range allp[:gomaxprocs] {
+		if p != _p_ {
+			atomic.Store(&p.runSafePointFn, 1)
+		}
+	}
+	preemptall()
+
+	// Any P entering _Pidle or _Psyscall from now on will observe
+	// p.runSafePointFn == 1 and will call runSafePointFn when
+	// changing its status to _Pidle/_Psyscall.
+
+	// Run safe point function for all idle Ps. sched.pidle will
+	// not change because we hold sched.lock.
+	for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() {
+		if atomic.Cas(&p.runSafePointFn, 1, 0) {
+			fn(p)
+			sched.safePointWait--
+		}
+	}
+
+	wait := sched.safePointWait > 0
+	unlock(&sched.lock)
+
+	// Run fn for the current P.
+	fn(_p_)
+
+	// Force Ps currently in _Psyscall into _Pidle and hand them
+	// off to induce safe point function execution.
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		s := p.status
+		if s == _Psyscall && p.runSafePointFn == 1 && atomic.Cas(&p.status, s, _Pidle) {
+			if trace.enabled {
+				traceGoSysBlock(p)
+				traceProcStop(p)
+			}
+			p.syscalltick++
+			handoffp(p)
+		}
+	}
+
+	// Wait for remaining Ps to run fn.
+	if wait {
+		for {
+			// Wait for 100us, then try to re-preempt in
+			// case of any races.
+			//
+			// Requires system stack.
+			if notetsleep(&sched.safePointNote, 100*1000) {
+				noteclear(&sched.safePointNote)
+				break
+			}
+			preemptall()
+		}
+	}
+	if sched.safePointWait != 0 {
+		throw("forEachP: not done")
+	}
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		if p.runSafePointFn != 0 {
+			throw("forEachP: P did not run fn")
+		}
+	}
+
+	lock(&sched.lock)
+	sched.safePointFn = nil
+	unlock(&sched.lock)
+	releasem(mp)
+}
+
 // runSafePointFn runs the safe point function, if any, for this P.
 // This should be called like
 //
@@ -1245,6 +1552,108 @@  top:
 	execute(gp, inheritTime)
 }
 
+// dropg removes the association between m and the current goroutine m->curg (gp for short).
+// Typically a caller sets gp's status away from Grunning and then
+// immediately calls dropg to finish the job. The caller is also responsible
+// for arranging that gp will be restarted using ready at an
+// appropriate time. After calling dropg and arranging for gp to be
+// readied later, the caller can do other work but eventually should
+// call schedule to restart the scheduling of goroutines on this m.
+func dropg() {
+	_g_ := getg()
+
+	_g_.m.curg.m = nil
+	_g_.m.curg = nil
+}
+
+func beforefork() {
+	gp := getg().m.curg
+
+	// Fork can hang if preempted with signals frequently enough (see issue 5517).
+	// Ensure that we stay on the same M where we disable profiling.
+	gp.m.locks++
+	if gp.m.profilehz != 0 {
+		resetcpuprofiler(0)
+	}
+}
+
+// Called from syscall package before fork.
+//go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork
+//go:nosplit
+func syscall_runtime_BeforeFork() {
+	systemstack(beforefork)
+}
+
+func afterfork() {
+	gp := getg().m.curg
+
+	hz := sched.profilehz
+	if hz != 0 {
+		resetcpuprofiler(hz)
+	}
+	gp.m.locks--
+}
+
+// Called from syscall package after fork in parent.
+//go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork
+//go:nosplit
+func syscall_runtime_AfterFork() {
+	systemstack(afterfork)
+}
+
+// Put on gfree list.
+// If local list is too long, transfer a batch to the global list.
+func gfput(_p_ *p, gp *g) {
+	if readgstatus(gp) != _Gdead {
+		throw("gfput: bad status (not Gdead)")
+	}
+
+	gp.schedlink.set(_p_.gfree)
+	_p_.gfree = gp
+	_p_.gfreecnt++
+	if _p_.gfreecnt >= 64 {
+		lock(&sched.gflock)
+		for _p_.gfreecnt >= 32 {
+			_p_.gfreecnt--
+			gp = _p_.gfree
+			_p_.gfree = gp.schedlink.ptr()
+			gp.schedlink.set(sched.gfree)
+			sched.gfree = gp
+			sched.ngfree++
+		}
+		unlock(&sched.gflock)
+	}
+}
+
+// Get from gfree list.
+// If local list is empty, grab a batch from global list.
+func gfget(_p_ *p) *g {
+retry:
+	gp := _p_.gfree
+	if gp == nil && sched.gfree != nil {
+		lock(&sched.gflock)
+		for _p_.gfreecnt < 32 {
+			if sched.gfree != nil {
+				gp = sched.gfree
+				sched.gfree = gp.schedlink.ptr()
+			} else {
+				break
+			}
+			_p_.gfreecnt++
+			sched.ngfree--
+			gp.schedlink.set(_p_.gfree)
+			_p_.gfree = gp
+		}
+		unlock(&sched.gflock)
+		goto retry
+	}
+	if gp != nil {
+		_p_.gfree = gp.schedlink.ptr()
+		_p_.gfreecnt--
+	}
+	return gp
+}
+
 // Purge all cached G's from gfree list to the global list.
 func gfpurge(_p_ *p) {
 	lock(&sched.gflock)
@@ -1259,6 +1668,90 @@  func gfpurge(_p_ *p) {
 	unlock(&sched.gflock)
 }
 
+// dolockOSThread is called by LockOSThread and lockOSThread below
+// after they modify m.locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
+//go:nosplit
+func dolockOSThread() {
+	_g_ := getg()
+	_g_.m.lockedg = _g_
+	_g_.lockedm = _g_.m
+}
+
+//go:nosplit
+
+// LockOSThread wires the calling goroutine to its current operating system thread.
+// Until the calling goroutine exits or calls UnlockOSThread, it will always
+// execute in that thread, and no other goroutine can.
+func LockOSThread() {
+	getg().m.locked |= _LockExternal
+	dolockOSThread()
+}
+
+//go:nosplit
+func lockOSThread() {
+	getg().m.locked += _LockInternal
+	dolockOSThread()
+}
+
+// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
+//go:nosplit
+func dounlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked != 0 {
+		return
+	}
+	_g_.m.lockedg = nil
+	_g_.lockedm = nil
+}
+
+//go:nosplit
+
+// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
+// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+func UnlockOSThread() {
+	getg().m.locked &^= _LockExternal
+	dounlockOSThread()
+}
+
+//go:nosplit
+func unlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked < _LockInternal {
+		systemstack(badunlockosthread)
+	}
+	_g_.m.locked -= _LockInternal
+	dounlockOSThread()
+}
+
+func badunlockosthread() {
+	throw("runtime: internal error: misuse of lockOSThread/unlockOSThread")
+}
+
+func gcount() int32 {
+	n := int32(allglen) - sched.ngfree - int32(atomic.Load(&sched.ngsys))
+	for i := 0; ; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			break
+		}
+		n -= _p_.gfreecnt
+	}
+
+	// All these variables can be changed concurrently, so the result can be inconsistent.
+	// But at least the current goroutine is running.
+	if n < 1 {
+		n = 1
+	}
+	return n
+}
+
+func mcount() int32 {
+	return sched.mcount
+}
+
 // Change number of processors. The world is stopped, sched is locked.
 // gcworkbufs are not being modified by either the GC or
 // the write barrier code.
@@ -1513,23 +2006,21 @@  func checkdead() {
 	// Maybe jump time forward for playground.
 	gp := timejump()
 	if gp != nil {
-		// Temporarily commented out for gccgo.
-		// For gccgo this code will never run anyhow.
-		// casgstatus(gp, _Gwaiting, _Grunnable)
-		// globrunqput(gp)
-		// _p_ := pidleget()
-		// if _p_ == nil {
-		// 	throw("checkdead: no p for timer")
-		// }
-		// mp := mget()
-		// if mp == nil {
-		// 	// There should always be a free M since
-		// 	// nothing is running.
-		// 	throw("checkdead: no m for timer")
-		// }
-		// nmp.nextp.set(_p_)
-		// notewakeup(&mp.park)
-		// return
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		globrunqput(gp)
+		_p_ := pidleget()
+		if _p_ == nil {
+			throw("checkdead: no p for timer")
+		}
+		mp := mget()
+		if mp == nil {
+			// There should always be a free M since
+			// nothing is running.
+			throw("checkdead: no m for timer")
+		}
+		mp.nextp.set(_p_)
+		notewakeup(&mp.park)
+		return
 	}
 
 	getg().m.throwing = -1 // do not dump full stacks
@@ -1815,7 +2306,7 @@  func schedtrace(detailed bool) {
 		return
 	}
 
-	for mp := allm(); mp != nil; mp = mp.alllink {
+	for mp := allm; mp != nil; mp = mp.alllink {
 		_p_ := mp.p.ptr()
 		gp := mp.curg
 		lockedg := mp.lockedg
@@ -2186,6 +2677,55 @@  func runqsteal(_p_, p2 *p, stealRunNextG
 	return gp
 }
 
+//go:linkname setMaxThreads runtime_debug.setMaxThreads
+func setMaxThreads(in int) (out int) {
+	lock(&sched.lock)
+	out = int(sched.maxmcount)
+	sched.maxmcount = int32(in)
+	checkmcount()
+	unlock(&sched.lock)
+	return
+}
+
+//go:nosplit
+func procPin() int {
+	_g_ := getg()
+	mp := _g_.m
+
+	mp.locks++
+	return int(mp.p.ptr().id)
+}
+
+//go:nosplit
+func procUnpin() {
+	_g_ := getg()
+	_g_.m.locks--
+}
+
+//go:linkname sync_runtime_procPin sync.runtime_procPin
+//go:nosplit
+func sync_runtime_procPin() int {
+	return procPin()
+}
+
+//go:linkname sync_runtime_procUnpin sync.runtime_procUnpin
+//go:nosplit
+func sync_runtime_procUnpin() {
+	procUnpin()
+}
+
+//go:linkname sync_atomic_runtime_procPin sync_atomic.runtime_procPin
+//go:nosplit
+func sync_atomic_runtime_procPin() int {
+	return procPin()
+}
+
+//go:linkname sync_atomic_runtime_procUnpin sync_atomic.runtime_procUnpin
+//go:nosplit
+func sync_atomic_runtime_procUnpin() {
+	procUnpin()
+}
+
 // Active spinning for sync.Mutex.
 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
 //go:nosplit
Index: libgo/go/runtime/runtime2.go
===================================================================
--- libgo/go/runtime/runtime2.go	(revision 244236)
+++ libgo/go/runtime/runtime2.go	(working copy)
@@ -755,10 +755,8 @@  const _TracebackMaxFrames = 100
 var (
 	//	emptystring string
 
-	allglen uintptr
-
-	//	allm        *m
-
+	allglen    uintptr
+	allm       *m
 	allp       [_MaxGomaxprocs + 1]*p
 	gomaxprocs int32
 	panicking  uint32
Index: libgo/go/runtime/stubs.go
===================================================================
--- libgo/go/runtime/stubs.go	(revision 244236)
+++ libgo/go/runtime/stubs.go	(working copy)
@@ -234,20 +234,6 @@  func newobject(*_type) unsafe.Pointer
 // For gccgo unless and until we port malloc.go.
 func newarray(*_type, int) unsafe.Pointer
 
-// funcPC returns the entry PC of the function f.
-// It assumes that f is a func value. Otherwise the behavior is undefined.
-// For gccgo here unless and until we port proc.go.
-// Note that this differs from the gc implementation; the gc implementation
-// adds sys.PtrSize to the address of the interface value, but GCC's
-// alias analysis decides that that can not be a reference to the second
-// field of the interface, and in some cases it drops the initialization
-// of the second field as a dead store.
-//go:nosplit
-func funcPC(f interface{}) uintptr {
-	i := (*iface)(unsafe.Pointer(&f))
-	return **(**uintptr)(i.data)
-}
-
 // For gccgo, to communicate from the C code to the Go code.
 //go:linkname setIsCgo runtime.setIsCgo
 func setIsCgo() {
@@ -352,56 +338,6 @@  func exitsyscall(int32)
 func gopark(func(*g, unsafe.Pointer) bool, unsafe.Pointer, string, byte, int)
 func goparkunlock(*mutex, string, byte, int)
 
-// Temporary hack for gccgo until we port proc.go.
-//go:nosplit
-func acquireSudog() *sudog {
-	mp := acquirem()
-	pp := mp.p.ptr()
-	if len(pp.sudogcache) == 0 {
-		pp.sudogcache = append(pp.sudogcache, new(sudog))
-	}
-	n := len(pp.sudogcache)
-	s := pp.sudogcache[n-1]
-	pp.sudogcache[n-1] = nil
-	pp.sudogcache = pp.sudogcache[:n-1]
-	if s.elem != nil {
-		throw("acquireSudog: found s.elem != nil in cache")
-	}
-	releasem(mp)
-	return s
-}
-
-// Temporary hack for gccgo until we port proc.go.
-//go:nosplit
-func releaseSudog(s *sudog) {
-	if s.elem != nil {
-		throw("runtime: sudog with non-nil elem")
-	}
-	if s.selectdone != nil {
-		throw("runtime: sudog with non-nil selectdone")
-	}
-	if s.next != nil {
-		throw("runtime: sudog with non-nil next")
-	}
-	if s.prev != nil {
-		throw("runtime: sudog with non-nil prev")
-	}
-	if s.waitlink != nil {
-		throw("runtime: sudog with non-nil waitlink")
-	}
-	if s.c != nil {
-		throw("runtime: sudog with non-nil c")
-	}
-	gp := getg()
-	if gp.param != nil {
-		throw("runtime: releaseSudog with non-nil gp.param")
-	}
-	mp := acquirem() // avoid rescheduling to another P
-	pp := mp.p.ptr()
-	pp.sudogcache = append(pp.sudogcache, s)
-	releasem(mp)
-}
-
 // Temporary hack for gccgo until we port the garbage collector.
 func typeBitsBulkBarrier(typ *_type, p, size uintptr) {}
 
@@ -450,7 +386,6 @@  func LockOSThread()
 func UnlockOSThread()
 func lockOSThread()
 func unlockOSThread()
-func allm() *m
 
 // Temporary for gccgo until we port malloc.go
 func persistentalloc(size, align uintptr, sysStat *uint64) unsafe.Pointer
@@ -466,14 +401,6 @@  func setGCPercent(in int32) (out int32)
 	return setgcpercent(in)
 }
 
-// Temporary for gccgo until we port proc.go.
-func setmaxthreads(int) int
-
-//go:linkname setMaxThreads runtime_debug.setMaxThreads
-func setMaxThreads(in int) (out int) {
-	return setmaxthreads(in)
-}
-
 // Temporary for gccgo until we port atomic_pointer.go.
 //go:nosplit
 func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
@@ -495,7 +422,6 @@  func getZerobase() *uintptr {
 
 // Temporary for gccgo until we port proc.go.
 func sigprof()
-func mcount() int32
 func goexit1()
 
 // Get signal trampoline, written in C.
@@ -549,6 +475,12 @@  func getallg(i int) *g {
 	return allgs[i]
 }
 
+// Temporary for gccgo until we port the garbage collector.
+//go:linkname getallm runtime.getallm
+func getallm() *m {
+	return allm
+}
+
 // Throw and rethrow an exception.
 func throwException()
 func rethrowException()
@@ -577,21 +509,6 @@  var work struct {
 	}
 }
 
-// gcount is temporary for gccgo until more of proc.go is ported.
-// This is a copy of the C function we used to use.
-func gcount() int32 {
-	n := int32(0)
-	lock(&allglock)
-	for _, gp := range allgs {
-		s := readgstatus(gp)
-		if s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting {
-			n++
-		}
-	}
-	unlock(&allglock)
-	return n
-}
-
 // Temporary for gccgo until we port mgc.go.
 var gcBlackenEnabled uint32
 
Index: libgo/runtime/heapdump.c
===================================================================
--- libgo/runtime/heapdump.c	(revision 244236)
+++ libgo/runtime/heapdump.c	(working copy)
@@ -474,7 +474,7 @@  dumpms(void)
 {
 	M *mp;
 
-	for(mp = runtime_allm; mp != nil; mp = mp->alllink) {
+	for(mp = runtime_getallm(); mp != nil; mp = mp->alllink) {
 		dumpint(TagOSThread);
 		dumpint((uintptr)mp);
 		dumpint(mp->id);
Index: libgo/runtime/mgc0.c
===================================================================
--- libgo/runtime/mgc0.c	(revision 244236)
+++ libgo/runtime/mgc0.c	(working copy)
@@ -1279,7 +1279,6 @@  markroot(ParFor *desc, uint32 i)
 		// For gccgo we use this for all the other global roots.
 		enqueue1(&wbuf, (Obj){(byte*)&runtime_m0, sizeof runtime_m0, 0});
 		enqueue1(&wbuf, (Obj){(byte*)&runtime_g0, sizeof runtime_g0, 0});
-		enqueue1(&wbuf, (Obj){(byte*)&runtime_allm, sizeof runtime_allm, 0});
 		enqueue1(&wbuf, (Obj){(byte*)&runtime_allp, sizeof runtime_allp, 0});
 		enqueue1(&wbuf, (Obj){(byte*)&work, sizeof work, 0});
 		break;
@@ -2002,7 +2001,7 @@  runtime_updatememstats(GCStats *stats)
 	if(stats)
 		runtime_memclr((byte*)stats, sizeof(*stats));
 	stacks_inuse = 0;
-	for(mp=runtime_allm; mp; mp=mp->alllink) {
+	for(mp=runtime_getallm(); mp; mp=mp->alllink) {
 		//stacks_inuse += mp->stackinuse*FixedStack;
 		if(stats) {
 			src = (uint64*)&mp->gcstats;
Index: libgo/runtime/proc.c
===================================================================
--- libgo/runtime/proc.c	(revision 244236)
+++ libgo/runtime/proc.c	(working copy)
@@ -376,7 +376,6 @@  Sched*	runtime_sched;
 M	runtime_m0;
 G	runtime_g0;	// idle goroutine for m0
 G*	runtime_lastg;
-M*	runtime_allm;
 P**	runtime_allp;
 int8*	runtime_goos;
 int32	runtime_ncpu;
@@ -385,18 +384,17 @@  bool	runtime_precisestack;
 bool	runtime_isarchive;
 
 void* runtime_mstart(void*);
-static void mcommoninit(M*);
 static void exitsyscall0(G*);
 static void park0(G*);
 static void goexit0(G*);
-static void gfput(P*, G*);
-static G* gfget(P*);
 static bool exitsyscallfast(void);
 
 extern void setncpu(int32)
   __asm__(GOSYM_PREFIX "runtime.setncpu");
 extern void allgadd(G*)
   __asm__(GOSYM_PREFIX "runtime.allgadd");
+extern void mcommoninit(M*)
+  __asm__(GOSYM_PREFIX "runtime.mcommoninit");
 extern void stopm(void)
   __asm__(GOSYM_PREFIX "runtime.stopm");
 extern void handoffp(P*)
@@ -409,6 +407,10 @@  extern void schedule(void)
   __asm__(GOSYM_PREFIX "runtime.schedule");
 extern void execute(G*, bool)
   __asm__(GOSYM_PREFIX "runtime.execute");
+extern void gfput(P*, G*)
+  __asm__(GOSYM_PREFIX "runtime.gfput");
+extern G* gfget(P*)
+  __asm__(GOSYM_PREFIX "runtime.gfget");
 extern void procresize(int32)
   __asm__(GOSYM_PREFIX "runtime.procresize");
 extern void acquirep(P*)
@@ -620,16 +622,6 @@  void getTraceback(G* me, G* gp)
 	}
 }
 
-static void
-checkmcount(void)
-{
-	// sched lock is held
-	if(runtime_sched->mcount > runtime_sched->maxmcount) {
-		runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
-		runtime_throw("thread exhaustion");
-	}
-}
-
 // Do a stack trace of gp, and then restore the context to
 // gp->dotraceback.
 
@@ -649,30 +641,6 @@  gtraceback(G* gp)
 	runtime_gogo(traceback->gp);
 }
 
-static void
-mcommoninit(M *mp)
-{
-	// If there is no mcache runtime_callers() will crash,
-	// and we are most likely in sysmon thread so the stack is senseless anyway.
-	if(g->m->mcache)
-		runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
-
-	mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
-
-	runtime_lock(&runtime_sched->lock);
-	mp->id = runtime_sched->mcount++;
-	checkmcount();
-	runtime_mpreinit(mp);
-
-	// Add to runtime_allm so garbage collector doesn't free m
-	// when it is just in a register or thread-local storage.
-	mp->alllink = runtime_allm;
-	// runtime_NumCgoCall() iterates over allm w/o schedlock,
-	// so we need to publish it safely.
-	runtime_atomicstorep(&runtime_allm, mp);
-	runtime_unlock(&runtime_sched->lock);
-}
-
 // Called to start an M.
 void*
 runtime_mstart(void* mp)
@@ -1332,33 +1300,6 @@  syscall_exitsyscall()
   runtime_exitsyscall(0);
 }
 
-// Called from syscall package before fork.
-void syscall_runtime_BeforeFork(void)
-  __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
-void
-syscall_runtime_BeforeFork(void)
-{
-	// Fork can hang if preempted with signals frequently enough (see issue 5517).
-	// Ensure that we stay on the same M where we disable profiling.
-	runtime_m()->locks++;
-	if(runtime_m()->profilehz != 0)
-		runtime_resetcpuprofiler(0);
-}
-
-// Called from syscall package after fork in parent.
-void syscall_runtime_AfterFork(void)
-  __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
-void
-syscall_runtime_AfterFork(void)
-{
-	int32 hz;
-
-	hz = runtime_sched->profilehz;
-	if(hz != 0)
-		runtime_resetcpuprofiler(hz);
-	runtime_m()->locks--;
-}
-
 // Allocate a new g, with a stack big enough for stacksize bytes.
 G*
 runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize)
@@ -1480,55 +1421,6 @@  __go_go(void (*fn)(void*), void* arg)
 	return newg;
 }
 
-// Put on gfree list.
-// If local list is too long, transfer a batch to the global list.
-static void
-gfput(P *p, G *gp)
-{
-	gp->schedlink = (uintptr)p->gfree;
-	p->gfree = gp;
-	p->gfreecnt++;
-	if(p->gfreecnt >= 64) {
-		runtime_lock(&runtime_sched->gflock);
-		while(p->gfreecnt >= 32) {
-			p->gfreecnt--;
-			gp = p->gfree;
-			p->gfree = (G*)gp->schedlink;
-			gp->schedlink = (uintptr)runtime_sched->gfree;
-			runtime_sched->gfree = gp;
-		}
-		runtime_unlock(&runtime_sched->gflock);
-	}
-}
-
-// Get from gfree list.
-// If local list is empty, grab a batch from global list.
-static G*
-gfget(P *p)
-{
-	G *gp;
-
-retry:
-	gp = p->gfree;
-	if(gp == nil && runtime_sched->gfree) {
-		runtime_lock(&runtime_sched->gflock);
-		while(p->gfreecnt < 32 && runtime_sched->gfree) {
-			p->gfreecnt++;
-			gp = runtime_sched->gfree;
-			runtime_sched->gfree = (G*)gp->schedlink;
-			gp->schedlink = (uintptr)p->gfree;
-			p->gfree = gp;
-		}
-		runtime_unlock(&runtime_sched->gflock);
-		goto retry;
-	}
-	if(gp) {
-		p->gfree = (G*)gp->schedlink;
-		p->gfreecnt--;
-	}
-	return gp;
-}
-
 void
 runtime_Breakpoint(void)
 {
@@ -1543,74 +1435,6 @@  runtime_Gosched(void)
 	runtime_gosched();
 }
 
-// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
-// after they modify m->locked. Do not allow preemption during this call,
-// or else the m might be different in this function than in the caller.
-static void
-lockOSThread(void)
-{
-	g->m->lockedg = g;
-	g->lockedm = g->m;
-}
-
-void	runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
-void
-runtime_LockOSThread(void)
-{
-	g->m->locked |= _LockExternal;
-	lockOSThread();
-}
-
-void
-runtime_lockOSThread(void)
-{
-	g->m->locked += _LockInternal;
-	lockOSThread();
-}
-
-
-// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
-// after they update m->locked. Do not allow preemption during this call,
-// or else the m might be in different in this function than in the caller.
-static void
-unlockOSThread(void)
-{
-	if(g->m->locked != 0)
-		return;
-	g->m->lockedg = nil;
-	g->lockedm = nil;
-}
-
-void	runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
-
-void
-runtime_UnlockOSThread(void)
-{
-	g->m->locked &= ~_LockExternal;
-	unlockOSThread();
-}
-
-void
-runtime_unlockOSThread(void)
-{
-	if(g->m->locked < _LockInternal)
-		runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
-	g->m->locked -= _LockInternal;
-	unlockOSThread();
-}
-
-bool
-runtime_lockedOSThread(void)
-{
-	return g->lockedm != nil && g->m->lockedg != nil;
-}
-
-int32
-runtime_mcount(void)
-{
-	return runtime_sched->mcount;
-}
-
 static struct {
 	uint32 lock;
 	int32 hz;
@@ -1719,71 +1543,6 @@  runtime_setcpuprofilerate_m(int32 hz)
 	g->m->locks--;
 }
 
-intgo
-runtime_setmaxthreads(intgo in)
-{
-	intgo out;
-
-	runtime_lock(&runtime_sched->lock);
-	out = (intgo)runtime_sched->maxmcount;
-	runtime_sched->maxmcount = (int32)in;
-	checkmcount();
-	runtime_unlock(&runtime_sched->lock);
-	return out;
-}
-
-static intgo
-procPin()
-{
-	M *mp;
-
-	mp = runtime_m();
-	mp->locks++;
-	return (intgo)(((P*)mp->p)->id);
-}
-
-static void
-procUnpin()
-{
-	runtime_m()->locks--;
-}
-
-intgo sync_runtime_procPin(void)
-  __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
-
-intgo
-sync_runtime_procPin()
-{
-	return procPin();
-}
-
-void sync_runtime_procUnpin(void)
-  __asm__ (GOSYM_PREFIX  "sync.runtime_procUnpin");
-
-void
-sync_runtime_procUnpin()
-{
-	procUnpin();
-}
-
-intgo sync_atomic_runtime_procPin(void)
-  __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
-
-intgo
-sync_atomic_runtime_procPin()
-{
-	return procPin();
-}
-
-void sync_atomic_runtime_procUnpin(void)
-  __asm__ (GOSYM_PREFIX  "sync_atomic.runtime_procUnpin");
-
-void
-sync_atomic_runtime_procUnpin()
-{
-	procUnpin();
-}
-
 // Return whether we are waiting for a GC.  This gc toolchain uses
 // preemption instead.
 bool
@@ -1802,17 +1561,6 @@  os_beforeExit()
 {
 }
 
-// For Go code to look at variables, until we port proc.go.
-
-extern M* runtime_go_allm(void)
-  __asm__ (GOSYM_PREFIX "runtime.allm");
-
-M*
-runtime_go_allm()
-{
-	return runtime_allm;
-}
-
 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
 
 intgo
Index: libgo/runtime/runtime.h
===================================================================
--- libgo/runtime/runtime.h	(revision 244236)
+++ libgo/runtime/runtime.h	(working copy)
@@ -237,7 +237,8 @@  extern G* runtime_getallg(intgo)
 extern uintptr runtime_getallglen(void)
   __asm__(GOSYM_PREFIX "runtime.getallglen");
 extern	G*	runtime_lastg;
-extern	M*	runtime_allm;
+extern	M*	runtime_getallm(void)
+  __asm__(GOSYM_PREFIX "runtime.getallm");
 extern	P**	runtime_allp;
 extern	Sched*  runtime_sched;
 extern	uint32	runtime_panicking(void)
@@ -301,7 +302,6 @@  int32	runtime_atoi(const byte*, intgo);
 void*	runtime_mstart(void*);
 G*	runtime_malg(bool, bool, byte**, uintptr*)
 	__asm__(GOSYM_PREFIX "runtime.malg");
-void	runtime_mpreinit(M*);
 void	runtime_minit(void)
   __asm__ (GOSYM_PREFIX "runtime.minit");
 void	runtime_signalstack(byte*, uintptr)
@@ -313,8 +313,6 @@  void	runtime_freemcache(MCache*)
 void	runtime_mallocinit(void);
 void	runtime_mprofinit(void);
 #define runtime_getcallersp(p) __builtin_frame_address(0)
-int32	runtime_mcount(void)
-  __asm__ (GOSYM_PREFIX "runtime.mcount");
 void	runtime_mcall(void(*)(G*));
 uint32	runtime_fastrand1(void) __asm__ (GOSYM_PREFIX "runtime.fastrand1");
 int32	runtime_timediv(int64, int32, int32*)
@@ -394,8 +392,6 @@  void	runtime_crash(void)
 void	runtime_parsedebugvars(void)
   __asm__(GOSYM_PREFIX "runtime.parsedebugvars");
 void	_rt0_go(void);
-intgo	runtime_setmaxthreads(intgo)
-  __asm__ (GOSYM_PREFIX "runtime.setmaxthreads");
 G*	runtime_timejump(void);
 void	runtime_iterate_finq(void (*callback)(FuncVal*, void*, const FuncType*, const PtrType*));
 
@@ -522,8 +518,6 @@  void	runtime_lockOSThread(void)
   __asm__(GOSYM_PREFIX "runtime.lockOSThread");
 void	runtime_unlockOSThread(void)
   __asm__(GOSYM_PREFIX "runtime.unlockOSThread");
-bool	runtime_lockedOSThread(void)
-  __asm__(GOSYM_PREFIX "runtime.lockedOSThread");
 
 void	runtime_printcreatedby(G*)
   __asm__(GOSYM_PREFIX "runtime.printcreatedby");
Index: libgo/runtime/runtime_c.c
===================================================================
--- libgo/runtime/runtime_c.c	(revision 244166)
+++ libgo/runtime/runtime_c.c	(working copy)
@@ -95,15 +95,6 @@  runtime_cputicks(void)
 #endif
 }
 
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime_mpreinit(M *mp)
-{
-	mp->gsignal = runtime_malg(true, true, (byte**)&mp->gsignalstack, &mp->gsignalstacksize);
-	mp->gsignal->m = mp;
-}
-
 void
 runtime_signalstack(byte *p, uintptr n)
 {