Patchwork libgo patch committed: Runtime profiling support (not working yet)

login
register
mail settings
Submitter Ian Taylor
Date March 27, 2011, 7:14 p.m.
Message ID <mcrzkoge5ot.fsf@google.com>
Download mbox | patch
Permalink /patch/88513/
State New
Headers show

Comments

Ian Taylor - March 27, 2011, 7:14 p.m.
This patch to libgo adds the runtime profiling infrastructure from the
master Go library.  This does not yet work for gccgo, because I haven't
written the code to get a stack traceback.  However, that should come in
time, and adding the infrastructure provides some symbols which code in
the Go library expects to exist.  Bootstrapped and ran Go testsuite on
x86_64-unknown-linux-gnu.  Committed to mainline.

Ian

Patch

diff -r d71c93d01702 libgo/Makefile.am
--- a/libgo/Makefile.am	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/Makefile.am	Sun Mar 27 12:07:37 2011 -0700
@@ -423,6 +423,7 @@ 
 	runtime/go-unsafe-newarray.c \
 	runtime/go-unsafe-pointer.c \
 	runtime/go-unwind.c \
+	runtime/cpuprof.c \
 	runtime/mcache.c \
 	runtime/mcentral.c \
 	$(runtime_mem_file) \
diff -r d71c93d01702 libgo/runtime/cpuprof.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/cpuprof.c	Sun Mar 27 12:07:37 2011 -0700
@@ -0,0 +1,432 @@ 
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// CPU profiling.
+// Based on algorithms and data structures used in
+// http://code.google.com/p/google-perftools/.
+//
+// The main difference between this code and the google-perftools
+// code is that this code is written to allow copying the profile data
+// to an arbitrary io.Writer, while the google-perftools code always
+// writes to an operating system file.
+//
+// The signal handler for the profiling clock tick adds a new stack trace
+// to a hash table tracking counts for recent traces.  Most clock ticks
+// hit in the cache.  In the event of a cache miss, an entry must be 
+// evicted from the hash table, copied to a log that will eventually be
+// written as profile data.  The google-perftools code flushed the
+// log itself during the signal handler.  This code cannot do that, because
+// the io.Writer might block or need system calls or locks that are not
+// safe to use from within the signal handler.  Instead, we split the log
+// into two halves and let the signal handler fill one half while a goroutine
+// is writing out the other half.  When the signal handler fills its half, it
+// offers to swap with the goroutine.  If the writer is not done with its half,
+// we lose the stack trace for this clock tick (and record that loss).
+// The goroutine interacts with the signal handler by calling getprofile() to
+// get the next log piece to write, implicitly handing back the last log
+// piece it obtained.
+//
+// The state of this dance between the signal handler and the goroutine
+// is encoded in the Profile.handoff field.  If handoff == 0, then the goroutine
+// is not using either log half and is waiting (or will soon be waiting) for
+// a new piece by calling notesleep(&p->wait).  If the signal handler
+// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
+// to wake the goroutine.  The value indicates the number of entries in the
+// log half being handed off.  The goroutine leaves the non-zero value in
+// place until it has finished processing the log half and then flips the number
+// back to zero.  Setting the high bit in handoff means that the profiling is over, 
+// and the goroutine is now in charge of flushing the data left in the hash table
+// to the log and returning that data.  
+//
+// The handoff field is manipulated using atomic operations.
+// For the most part, the manipulation of handoff is orderly: if handoff == 0
+// then the signal handler owns it and can change it to non-zero.  
+// If handoff != 0 then the goroutine owns it and can change it to zero.
+// If that were the end of the story then we would not need to manipulate
+// handoff using atomic operations.  The operations are needed, however,
+// in order to let the log closer set the high bit to indicate "EOF" safely
+// in the situation when normally the goroutine "owns" handoff.
+
+#include "runtime.h"
+#include "malloc.h"
+
+#include "array.h"
+typedef struct __go_open_array Slice;
+#define array __values
+#define len __count
+#define cap __capacity
+
+enum
+{
+	HashSize = 1<<10,
+	LogSize = 1<<17,
+	Assoc = 4,
+	MaxStack = 64,
+};
+
+typedef struct Profile Profile;
+typedef struct Bucket Bucket;
+typedef struct Entry Entry;
+
+struct Entry {
+	uintptr count;
+	uintptr depth;
+	uintptr stack[MaxStack];
+};
+
+struct Bucket {
+	Entry entry[Assoc];
+};
+
+struct Profile {
+	bool on;		// profiling is on
+	Note wait;		// goroutine waits here
+	uintptr count;		// tick count
+	uintptr evicts;		// eviction count
+	uintptr lost;		// lost ticks that need to be logged
+	uintptr totallost;	// total lost ticks
+
+	// Active recent stack traces.
+	Bucket hash[HashSize];
+
+	// Log of traces evicted from hash.
+	// Signal handler has filled log[toggle][:nlog].
+	// Goroutine is writing log[1-toggle][:handoff].
+	uintptr log[2][LogSize/2];
+	uintptr nlog;
+	int32 toggle;
+	uint32 handoff;
+	
+	// Writer state.
+	// Writer maintains its own toggle to avoid races
+	// looking at signal handler's toggle.
+	uint32 wtoggle;
+	bool wholding;	// holding & need to release a log half
+	bool flushing;	// flushing hash table - profile is over
+};
+
+static Lock lk;
+static Profile *prof;
+
+static void tick(uintptr*, int32);
+static void add(Profile*, uintptr*, int32);
+static bool evict(Profile*, Entry*);
+static bool flushlog(Profile*);
+
+// LostProfileData is a no-op function used in profiles
+// to mark the number of profiling stack traces that were
+// discarded due to slow data writers.
+static void LostProfileData(void) {
+}
+
+extern void runtime_SetCPUProfileRate(int32)
+     __asm__("libgo_runtime.runtime.SetCPUProfileRate");
+
+// SetCPUProfileRate sets the CPU profiling rate.
+// The user documentation is in debug.go.
+void
+runtime_SetCPUProfileRate(int32 hz)
+{
+	uintptr *p;
+	uintptr n;
+
+	// Clamp hz to something reasonable.
+	if(hz < 0)
+		hz = 0;
+	if(hz > 1000000)
+		hz = 1000000;
+
+	runtime_lock(&lk);
+	if(hz > 0) {
+		if(prof == nil) {
+			prof = runtime_SysAlloc(sizeof *prof);
+			if(prof == nil) {
+				runtime_printf("runtime: cpu profiling cannot allocate memory\n");
+				runtime_unlock(&lk);
+				return;
+			}
+		}
+		if(prof->on || prof->handoff != 0) {
+			runtime_printf("runtime: cannot set cpu profile rate until previous profile has finished.\n");
+			runtime_unlock(&lk);
+			return;
+		}
+
+		prof->on = true;
+		p = prof->log[0];
+		// pprof binary header format.
+		// http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117
+		*p++ = 0;  // count for header
+		*p++ = 3;  // depth for header
+		*p++ = 0;  // version number
+		*p++ = 1000000 / hz;  // period (microseconds)
+		*p++ = 0;
+		prof->nlog = p - prof->log[0];
+		prof->toggle = 0;
+		prof->wholding = false;
+		prof->wtoggle = 0;
+		prof->flushing = false;
+		runtime_noteclear(&prof->wait);
+
+		runtime_setcpuprofilerate(tick, hz);
+	} else if(prof->on) {
+		runtime_setcpuprofilerate(nil, 0);
+		prof->on = false;
+
+		// Now add is not running anymore, and getprofile owns the entire log.
+		// Set the high bit in prof->handoff to tell getprofile.
+		for(;;) {
+			n = prof->handoff;
+			if(n&0x80000000)
+				runtime_printf("runtime: setcpuprofile(off) twice");
+			if(runtime_cas(&prof->handoff, n, n|0x80000000))
+				break;
+		}
+		if(n == 0) {
+			// we did the transition from 0 -> nonzero so we wake getprofile
+			runtime_notewakeup(&prof->wait);
+		}
+	}
+	runtime_unlock(&lk);
+}
+
+static void
+tick(uintptr *pc, int32 n)
+{
+	add(prof, pc, n);
+}
+
+// add adds the stack trace to the profile.
+// It is called from signal handlers and other limited environments
+// and cannot allocate memory or acquire locks that might be
+// held at the time of the signal, nor can it use substantial amounts
+// of stack.  It is allowed to call evict.
+static void
+add(Profile *p, uintptr *pc, int32 n)
+{
+	int32 i, j;
+	uintptr h, x;
+	Bucket *b;
+	Entry *e;
+
+	if(n > MaxStack)
+		n = MaxStack;
+	
+	// Compute hash.
+	h = 0;
+	for(i=0; i<n; i++) {
+		h = h<<8 | (h>>(8*(sizeof(h)-1)));
+		x = pc[i];
+		h += x*31 + x*7 + x*3;
+	}
+	p->count++;
+
+	// Add to entry count if already present in table.
+	b = &p->hash[h%HashSize];
+	for(i=0; i<Assoc; i++) {
+		e = &b->entry[i];
+		if(e->depth != (uintptr)n)	
+			continue;
+		for(j=0; j<n; j++)
+			if(e->stack[j] != pc[j])
+				goto ContinueAssoc;
+		e->count++;
+		return;
+	ContinueAssoc:;
+	}
+
+	// Evict entry with smallest count.
+	e = &b->entry[0];
+	for(i=1; i<Assoc; i++)
+		if(b->entry[i].count < e->count)
+			e = &b->entry[i];
+	if(e->count > 0) {
+		if(!evict(p, e)) {
+			// Could not evict entry.  Record lost stack.
+			p->lost++;
+			p->totallost++;
+			return;
+		}
+		p->evicts++;
+	}
+	
+	// Reuse the newly evicted entry.
+	e->depth = n;
+	e->count = 1;
+	for(i=0; i<n; i++)
+		e->stack[i] = pc[i];
+}
+
+// evict copies the given entry's data into the log, so that
+// the entry can be reused.  evict is called from add, which
+// is called from the profiling signal handler, so it must not
+// allocate memory or block.  It is safe to call flushLog.
+// evict returns true if the entry was copied to the log,
+// false if there was no room available.
+static bool
+evict(Profile *p, Entry *e)
+{
+	int32 i, d, nslot;
+	uintptr *log, *q;
+	
+	d = e->depth;
+	nslot = d+2;
+	log = p->log[p->toggle];
+	if(p->nlog+nslot > nelem(p->log[0])) {
+		if(!flushlog(p))
+			return false;
+		log = p->log[p->toggle];
+	}
+	
+	q = log+p->nlog;
+	*q++ = e->count;
+	*q++ = d;
+	for(i=0; i<d; i++)
+		*q++ = e->stack[i];
+	p->nlog = q - log;
+	e->count = 0;
+	return true;
+}
+
+// flushlog tries to flush the current log and switch to the other one.
+// flushlog is called from evict, called from add, called from the signal handler,
+// so it cannot allocate memory or block.  It can try to swap logs with
+// the writing goroutine, as explained in the comment at the top of this file.
+static bool
+flushlog(Profile *p)
+{
+	uintptr *log, *q;
+
+	if(!runtime_cas(&p->handoff, 0, p->nlog))
+		return false;
+	runtime_notewakeup(&p->wait);
+
+	p->toggle = 1 - p->toggle;
+	log = p->log[p->toggle];
+	q = log;
+	if(p->lost > 0) {
+		*q++ = p->lost;
+		*q++ = 1;
+		*q++ = (uintptr)LostProfileData;
+	}
+	p->nlog = q - log;
+	return true;
+}
+
+// getprofile blocks until the next block of profiling data is available
+// and returns it as a []byte.  It is called from the writing goroutine.
+Slice
+getprofile(Profile *p)
+{
+	uint32 i, j, n;
+	Slice ret;
+	Bucket *b;
+	Entry *e;
+
+	ret.array = nil;
+	ret.len = 0;
+	ret.cap = 0;
+	
+	if(p == nil)	
+		return ret;
+
+	if(p->wholding) {
+		// Release previous log to signal handling side.
+		// Loop because we are racing against setprofile(off).
+		for(;;) {
+			n = p->handoff;
+			if(n == 0) {
+				runtime_printf("runtime: phase error during cpu profile handoff\n");
+				return ret;
+			}
+			if(n & 0x80000000) {
+				p->wtoggle = 1 - p->wtoggle;
+				p->wholding = false;
+				p->flushing = true;
+				goto flush;
+			}
+			if(runtime_cas(&p->handoff, n, 0))
+				break;
+		}
+		p->wtoggle = 1 - p->wtoggle;
+		p->wholding = false;
+	}
+	
+	if(p->flushing)
+		goto flush;
+	
+	if(!p->on && p->handoff == 0)
+		return ret;
+
+	// Wait for new log.
+	// runtime·entersyscall();
+	runtime_notesleep(&p->wait);
+	// runtime·exitsyscall();
+	runtime_noteclear(&p->wait);
+
+	n = p->handoff;
+	if(n == 0) {
+		runtime_printf("runtime: phase error during cpu profile wait\n");
+		return ret;
+	}
+	if(n == 0x80000000) {
+		p->flushing = true;
+		goto flush;
+	}
+	n &= ~0x80000000;
+
+	// Return new log to caller.
+	p->wholding = true;
+
+	ret.array = (byte*)p->log[p->wtoggle];
+	ret.len = n*sizeof(uintptr);
+	ret.cap = ret.len;
+	return ret;
+
+flush:
+	// In flush mode.
+	// Add is no longer being called.  We own the log.
+	// Also, p->handoff is non-zero, so flushlog will return false.
+	// Evict the hash table into the log and return it.
+	for(i=0; i<HashSize; i++) {
+		b = &p->hash[i];
+		for(j=0; j<Assoc; j++) {
+			e = &b->entry[j];
+			if(e->count > 0 && !evict(p, e)) {
+				// Filled the log.  Stop the loop and return what we've got.
+				goto breakflush;
+			}
+		}
+	}
+breakflush:
+
+	// Return pending log data.
+	if(p->nlog > 0) {
+		// Note that we're using toggle now, not wtoggle,
+		// because we're working on the log directly.
+		ret.array = (byte*)p->log[p->toggle];
+		ret.len = p->nlog*sizeof(uintptr);
+		ret.cap = ret.len;
+		p->nlog = 0;
+		return ret;
+	}
+
+	// Made it through the table without finding anything to log.
+	// Finally done.  Clean up and return nil.
+	p->flushing = false;
+	if(!runtime_cas(&p->handoff, p->handoff, 0))
+		runtime_printf("runtime: profile flush racing with something\n");
+	return ret;  // set to nil at top of function
+}
+
+extern Slice runtime_CPUProfile(void)
+     __asm__("libgo_runtime.runtime.CPUProfile");
+
+// CPUProfile returns the next cpu profile block as a []byte.
+// The user documentation is in debug.go.
+Slice
+runtime_CPUProfile(void)
+{
+	return getprofile(prof);
+}
diff -r d71c93d01702 libgo/runtime/go-note.c
--- a/libgo/runtime/go-note.c	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/runtime/go-note.c	Sun Mar 27 12:07:37 2011 -0700
@@ -21,7 +21,7 @@ 
    notewakeup.  */
 
 void
-noteclear (Note* n)
+runtime_noteclear (Note* n)
 {
   int32 i;
 
@@ -37,7 +37,7 @@ 
 /* Wait until notewakeup is called.  */
 
 void
-notesleep (Note* n)
+runtime_notesleep (Note* n)
 {
   int32 i;
 
@@ -57,7 +57,7 @@ 
 /* Wake up every thread sleeping on the note.  */
 
 void
-notewakeup (Note *n)
+runtime_notewakeup (Note *n)
 {
   int32 i;
 
diff -r d71c93d01702 libgo/runtime/go-signal.c
--- a/libgo/runtime/go-signal.c	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/runtime/go-signal.c	Sun Mar 27 12:07:37 2011 -0700
@@ -6,6 +6,7 @@ 
 
 #include <signal.h>
 #include <stdlib.h>
+#include <sys/time.h>
 
 #include "go-assert.h"
 #include "go-panic.h"
@@ -13,10 +14,8 @@ 
 
 #include "runtime.h"
 
-#undef int 
-
-#ifndef SA_ONSTACK
-#define SA_ONSTACK 0
+#ifndef SA_RESTART
+  #define SA_RESTART 0
 #endif
 
 /* What to do for a signal.  */
@@ -27,68 +26,70 @@ 
   int sig;
   /* Nonzero if the signal should be ignored.  */
   _Bool ignore;
+  /* Nonzero if we should restart system calls.  */
+  _Bool restart;
 };
 
 /* What to do for signals.  */
 
 static struct sigtab signals[] =
 {
-  { SIGHUP, 0 },
-  { SIGINT, 0 },
-  { SIGALRM, 1 },
-  { SIGTERM, 0 },
+  { SIGHUP, 0, 1 },
+  { SIGINT, 0, 1 },
+  { SIGALRM, 1, 1 },
+  { SIGTERM, 0, 1 },
 #ifdef SIGBUS
-  { SIGBUS, 0 },
+  { SIGBUS, 0, 0 },
 #endif
 #ifdef SIGFPE
-  { SIGFPE, 0 },
+  { SIGFPE, 0, 0 },
 #endif
 #ifdef SIGUSR1
-  { SIGUSR1, 1 },
+  { SIGUSR1, 1, 1 },
 #endif
 #ifdef SIGSEGV
-  { SIGSEGV, 0 },
+  { SIGSEGV, 0, 0 },
 #endif
 #ifdef SIGUSR2
-  { SIGUSR2, 1 },
+  { SIGUSR2, 1, 1 },
 #endif
 #ifdef SIGPIPE
-  { SIGPIPE, 1 },
+  { SIGPIPE, 1, 0 },
 #endif
 #ifdef SIGCHLD
-  { SIGCHLD, 1 },
+  { SIGCHLD, 1, 1 },
 #endif
 #ifdef SIGTSTP
-  { SIGTSTP, 1 },
+  { SIGTSTP, 1, 1 },
 #endif
 #ifdef SIGTTIN
-  { SIGTTIN, 1 },
+  { SIGTTIN, 1, 1 },
 #endif
 #ifdef SIGTTOU
-  { SIGTTOU, 1 },
+  { SIGTTOU, 1, 1 },
 #endif
 #ifdef SIGURG
-  { SIGURG, 1 },
+  { SIGURG, 1, 1 },
 #endif
 #ifdef SIGXCPU
-  { SIGXCPU, 1 },
+  { SIGXCPU, 1, 1 },
 #endif
 #ifdef SIGXFSZ
-  { SIGXFSZ, 1 },
+  { SIGXFSZ, 1, 1 },
 #endif
 #ifdef SIGVTARLM
-  { SIGVTALRM, 1 },
+  { SIGVTALRM, 1, 1 },
 #endif
 #ifdef SIGWINCH
-  { SIGWINCH, 1 },
+  { SIGWINCH, 1, 1 },
 #endif
 #ifdef SIGIO
-  { SIGIO, 1 },
+  { SIGIO, 1, 1 },
 #endif
 #ifdef SIGPWR
-  { SIGPWR, 1 },
+  { SIGPWR, 1, 1 },
 #endif
-  { -1, 0 }
+  { -1, 0, 0 }
 };
 
 /* The Go signal handler.  */
@@ -99,6 +100,13 @@ 
   const char *msg;
   int i;
 
+  if (sig == SIGPROF)
+    {
+      /* FIXME.  */
+      runtime_sigprof (0, 0, nil);
+      return;
+    }
+
   /* FIXME: Should check siginfo for more information when
      available.  */
   msg = NULL;
@@ -192,6 +200,48 @@ 
   __go_assert (i == 0);
 
   for (i = 0; signals[i].sig != -1; ++i)
-    if (sigaction (signals[i].sig, &sa, NULL) != 0)
-      __go_assert (0);
+    {
+      sa.sa_flags = signals[i].restart ? SA_RESTART : 0;
+      if (sigaction (signals[i].sig, &sa, NULL) != 0)
+	__go_assert (0);
+    }
 }
+
+void
+runtime_resetcpuprofiler(int32 hz)
+{
+  struct itimerval it;
+  struct sigaction sa;
+  int i;
+
+  memset (&it, 0, sizeof it);
+
+  memset (&sa, 0, sizeof sa);
+  i = sigfillset (&sa.sa_mask);
+  __go_assert (i == 0);
+
+  if (hz == 0)
+    {
+      i = setitimer (ITIMER_PROF, &it, NULL);
+      __go_assert (i == 0);
+
+      sa.sa_handler = SIG_IGN;
+      i = sigaction (SIGPROF, &sa, NULL);
+      __go_assert (i == 0);
+    }
+  else
+    {
+      sa.sa_handler = sighandler;
+      sa.sa_flags = SA_RESTART;
+      i = sigaction (SIGPROF, &sa, NULL);
+      __go_assert (i == 0);
+
+      it.it_interval.tv_sec = 0;
+      it.it_interval.tv_usec = 1000000 / hz;
+      it.it_value = it.it_interval;
+      i = setitimer (ITIMER_PROF, &it, NULL);
+      __go_assert (i == 0);
+    }
+
+  m->profilehz = hz;
+}
diff -r d71c93d01702 libgo/runtime/proc.c
--- a/libgo/runtime/proc.c	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/runtime/proc.c	Sun Mar 27 12:07:37 2011 -0700
@@ -14,3 +14,60 @@ 
 #endif
 
 __thread M *m = &m0;
+
+static struct {
+	Lock;
+	void (*fn)(uintptr*, int32);
+	int32 hz;
+	uintptr pcbuf[100];
+} prof;
+
+void
+runtime_sigprof(uint8 *pc __attribute__ ((unused)),
+		uint8 *sp __attribute__ ((unused)),
+		uint8 *lr __attribute__ ((unused)))
+{
+	int32 n;
+	
+	if(prof.fn == nil || prof.hz == 0)
+		return;
+	
+	runtime_lock(&prof);
+	if(prof.fn == nil) {
+		runtime_unlock(&prof);
+		return;
+	}
+	n = 0;
+	// n = runtime·gentraceback(pc, sp, lr, gp, 0, prof.pcbuf, nelem(prof.pcbuf));
+	if(n > 0)
+		prof.fn(prof.pcbuf, n);
+	runtime_unlock(&prof);
+}
+
+void
+runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
+{
+	// Force sane arguments.
+	if(hz < 0)
+		hz = 0;
+	if(hz == 0)
+		fn = nil;
+	if(fn == nil)
+		hz = 0;
+
+	// Stop profiler on this cpu so that it is safe to lock prof.
+	// if a profiling signal came in while we had prof locked,
+	// it would deadlock.
+	runtime_resetcpuprofiler(0);
+
+	runtime_lock(&prof);
+	prof.fn = fn;
+	prof.hz = hz;
+	runtime_unlock(&prof);
+	// runtime_lock(&runtime_sched);
+	// runtime_sched.profilehz = hz;
+	// runtime_unlock(&runtime_sched);
+	
+	if(hz != 0)
+		runtime_resetcpuprofiler(hz);
+}
diff -r d71c93d01702 libgo/runtime/runtime.h
--- a/libgo/runtime/runtime.h	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/runtime/runtime.h	Sun Mar 27 12:07:37 2011 -0700
@@ -102,6 +102,7 @@ 
 	int32	gcing_for_prof;
 	int32	holds_finlock;
 	int32	gcing_for_finlock;
+	int32	profilehz;
 	MCache	*mcache;
 
 	/* For the list of all threads.  */
@@ -163,9 +164,9 @@ 
  * once notewakeup has been called, all the notesleeps
  * will return.  future notesleeps will return immediately.
  */
-void	noteclear(Note*);
-void	notesleep(Note*);
-void	notewakeup(Note*);
+void	runtime_noteclear(Note*);
+void	runtime_notesleep(Note*);
+void	runtime_notewakeup(Note*);
 
 /* Functions.  */
 #define runtime_printf printf
@@ -187,6 +188,10 @@ 
 #define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 #define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 
+void	runtime_sigprof(uint8 *pc, uint8 *sp, uint8 *lr);
+void	runtime_resetcpuprofiler(int32);
+void	runtime_setcpuprofilerate(void(*)(uintptr*, int32), int32);
+
 struct __go_func_type;
 void reflect_call(const struct __go_func_type *, const void *, _Bool, void **,
 		  void **)
diff -r d71c93d01702 libgo/runtime/sigqueue.goc
--- a/libgo/runtime/sigqueue.goc	Sun Mar 27 11:06:07 2011 -0700
+++ b/libgo/runtime/sigqueue.goc	Sun Mar 27 12:07:37 2011 -0700
@@ -51,7 +51,7 @@ 
 void
 siginit(void)
 {
-	noteclear(&sig);
+	runtime_noteclear(&sig);
 }
 
 // Called from sighandler to send a signal back out of the signal handling thread.
@@ -71,7 +71,7 @@ 
 			// Added to queue.
 			// Only send a wakeup for the first signal in each round.
 			if(mask == 0)
-				notewakeup(&sig);
+				runtime_notewakeup(&sig);
 			break;
 		}
 	}
@@ -81,9 +81,9 @@ 
 // Called to receive a bitmask of queued signals.
 func Sigrecv() (m uint32) {
 	// runtime·entersyscall();
-	notesleep(&sig);
+	runtime_notesleep(&sig);
 	// runtime·exitsyscall();
-	noteclear(&sig);
+	runtime_noteclear(&sig);
 	for(;;) {
 		m = sig.mask;
 		if(runtime_cas(&sig.mask, m, 0))