Patchwork [gccgo] Turn on garbage collector

login
register
mail settings
Submitter Ian Taylor
Date July 23, 2010, 4:51 p.m.
Message ID <mcr7hkmywej.fsf@google.com>
Download mbox | patch
Permalink /patch/59823/
State New
Headers show

Comments

Ian Taylor - July 23, 2010, 4:51 p.m.
This patch finally turns on the garbage collector for gccgo.  This just
uses the same garbage collector as the other Go compiler; it's a simple
stop-the-world mark-and-sweep collector.  It works but it is far too
slow.  I will be committing some straightforward patches to speed it up
for gccgo, then investigating it in more detail.  Committed to gccgo
branch.

Ian
Richard Henderson - July 23, 2010, 4:54 p.m.
On 07/23/2010 09:51 AM, Ian Lance Taylor wrote:
> +#elif defined(__x86_64__)
> + #ifndef __PIC__
> +  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp", "rbx")
> + #else
> +  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp")
> + #endif
> +#else

rbx is not special for x86_64 with or without -fpic.


r~
Ian Taylor - July 23, 2010, 5:26 p.m.
Richard Henderson <rth@redhat.com> writes:

> On 07/23/2010 09:51 AM, Ian Lance Taylor wrote:
>> +#elif defined(__x86_64__)
>> + #ifndef __PIC__
>> +  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp", "rbx")
>> + #else
>> +  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp")
>> + #endif
>> +#else
>
> rbx is not special for x86_64 with or without -fpic.

Good point, will fix.

Ian

Patch

diff -r c7120a8d41cc go/gogo-tree.cc
--- a/go/gogo-tree.cc	Fri Jul 23 09:44:07 2010 -0700
+++ b/go/gogo-tree.cc	Fri Jul 23 09:45:52 2010 -0700
@@ -4218,16 +4218,21 @@ 
   tree trampoline_type = Gogo::trampoline_type_tree();
   tree trampoline_size = TYPE_SIZE_UNIT(trampoline_type);
 
+  closure = save_expr(closure);
+
   // We allocate the trampoline using a special function which will
   // mark it as executable.
   static tree trampoline_fndecl;
   tree x = Gogo::call_builtin(&trampoline_fndecl,
 			      location,
 			      "__go_allocate_trampoline",
-			      1,
+			      2,
 			      ptr_type_node,
 			      size_type_node,
-			      trampoline_size);
+			      trampoline_size,
+			      ptr_type_node,
+			      fold_convert_loc(location, ptr_type_node,
+					       closure));
 
   x = save_expr(x);
 
diff -r c7120a8d41cc libgo/Makefile.am
--- a/libgo/Makefile.am	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/Makefile.am	Fri Jul 23 09:45:52 2010 -0700
@@ -31,6 +31,10 @@ 
 AM_CFLAGS = -fexceptions -fplan9-extensions $(SPLIT_STACK) $(WARN_CFLAGS) \
 	-I $(srcdir)/../gcc -I $(MULTIBUILDTOP)../../gcc/include
 
+if USING_SPLIT_STACK
+AM_LDFLAGS = -XCClinker $(SPLIT_STACK)
+endif
+
 # Multilib support.
 MAKEOVERRIDES=
 
@@ -285,7 +289,6 @@ 
 	runtime/go-deferred-recover.c \
 	runtime/go-getgoroot.c \
 	runtime/go-go.c \
-	runtime/go-goexit.c \
 	runtime/go-gomaxprocs.c \
 	runtime/go-int-array-to-string.c \
 	runtime/go-int-to-string.c \
@@ -297,6 +300,7 @@ 
 	runtime/go-map-index.c \
 	runtime/go-map-len.c \
 	runtime/go-map-range.c \
+	runtime/go-nanotime.c \
 	runtime/go-new-channel.c \
 	runtime/go-new-interface-object.c \
 	runtime/go-new-interface-pointer.c \
diff -r c7120a8d41cc libgo/runtime/go-go.c
--- a/libgo/runtime/go-go.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-go.c	Fri Jul 23 09:45:52 2010 -0700
@@ -5,58 +5,185 @@ 
    license that can be found in the LICENSE file.  */
 
 #include <assert.h>
+#include <errno.h>
 #include <limits.h>
+#include <signal.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <pthread.h>
+#include <semaphore.h>
 
 #include "config.h"
 #include "go-panic.h"
 #include "go-alloc.h"
 #include "runtime.h"
+#include "malloc.h"
 
-#undef int /* FIXME */
+#ifdef USING_SPLIT_STACK
+/* FIXME: This is not declared anywhere.  */
+extern void *__splitstack_find (void *, void *, size_t *, void **, void **,
+				void **);
+#endif
 
-/* What to call.  */
+/* We need to ensure that all callee-saved registers are stored on the
+   stack, in case they hold pointers.  */
 
-struct call
+#if defined(__i386__)
+ #ifndef __PIC__
+  #define SAVE_REGS asm ("" : : : "esi", "edi", "ebx")
+ #else
+  #define SAVE_REGS asm ("" : : : "esi", "edi")
+ #endif
+#elif defined(__x86_64__)
+ #ifndef __PIC__
+  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp", "rbx")
+ #else
+  #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp")
+ #endif
+#else
+ #error must define SAVE_REGS
+#endif
+
+/* We stop the threads by sending them the signal GO_SIG_STOP and we
+   start them by sending them the signal GO_SIG_START.  */
+
+#define GO_SIG_START (SIGRTMIN + 1)
+#define GO_SIG_STOP (SIGRTMIN + 2)
+
+/* A doubly linked list of the threads we have started.  */
+
+struct __go_thread_id
 {
+  /* Links.  */
+  struct __go_thread_id *prev;
+  struct __go_thread_id *next;
+  /* True if the thread ID has not yet been filled in.  */
+  _Bool tentative;
+  /* Thread ID.  */
+  pthread_t id;
+  /* Thread's M structure.  */
+  struct M *m;
+  /* If the thread ID has not been filled in, the function we are
+     running.  */
   void (*pfn) (void *);
+  /* If the thread ID has not been filled in, the argument to the
+     function.  */
   void *arg;
-  struct M *m;
 };
 
+static struct __go_thread_id *__go_all_thread_ids;
+
+/* A lock to control access to ALL_THREAD_IDS.  */
+
+static pthread_mutex_t __go_thread_ids_lock = PTHREAD_MUTEX_INITIALIZER;
+
+/* A semaphore used to wait until all the threads have stopped.  */
+
+static sem_t __go_thread_ready_sem;
+
+/* A signal set used to wait until garbage collection is complete.  */
+
+static sigset_t __go_thread_wait_sigset;
+
+/* Remove the current thread from the list of threads.  */
+
+static void
+remove_current_thread (void)
+{
+  struct __go_thread_id *list_entry;
+  int i;
+  
+  list_entry = m->list_entry;
+
+  i = pthread_mutex_lock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  if (list_entry->prev != NULL)
+    list_entry->prev->next = list_entry->next;
+  else
+    __go_all_thread_ids = list_entry->next;
+  if (list_entry->next != NULL)
+    list_entry->next->prev = list_entry->prev;
+
+  i = pthread_mutex_unlock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  free (list_entry);
+  m->list_entry = NULL;
+}
+
 /* Start the thread.  */
 
 static void *
 start_go_thread (void *thread_arg)
 {
-  struct call *pc = (struct call *) thread_arg;
+  struct M *newm = (struct M *) thread_arg;
   void (*pfn) (void *);
   void *arg;
+  struct __go_thread_id *list_entry;
+  int i;
 
 #ifdef __rtems__
   __wrap_rtems_task_variable_add ((void **) &m);
   __wrap_rtems_task_variable_add ((void **) &__go_panic_defer);
 #endif
 
-  pfn = pc->pfn;
-  arg = pc->arg;
-  m = pc->m;
-  free (pc);
+  m = newm;
+
+  list_entry = newm->list_entry;
+
+  pfn = list_entry->pfn;
+  arg = list_entry->arg;
+
+#ifndef USING_SPLIT_STACK
+  /* If we don't support split stack, record the current stack as the
+     top of the stack.  There shouldn't be anything relevant to the
+     garbage collector above this point.  */
+  m->gc_sp = (void *) &arg;
+#endif
+
+  /* Finish up the entry on the thread list.  */
+
+  i = pthread_mutex_lock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  list_entry->id = pthread_self ();
+  list_entry->pfn = NULL;
+  list_entry->arg = NULL;
+  list_entry->tentative = 0;
+
+  i = pthread_mutex_unlock (&__go_thread_ids_lock);
+  assert (i == 0);
 
   (*pfn) (arg);
 
+  remove_current_thread ();
+
   return NULL;
 }
 
+/* The runtime.Goexit function.  */
+
+void Goexit (void) asm ("libgo_runtime.runtime.Goexit");
+
+void
+Goexit (void)
+{
+  remove_current_thread ();
+  pthread_exit (NULL);
+  abort ();
+}
+
 /* Implement the go statement.  */
 
 void
-__go_go (void (*pfn) (void*), void* arg)
+__go_go (void (*pfn) (void*), void *arg)
 {
   int i;
   pthread_attr_t attr;
-  struct call *pc;
+  struct M *newm;
+  struct __go_thread_id *list_entry;
+  pthread_t tid;
 
   i = pthread_attr_init (&attr);
   assert (i == 0);
@@ -76,17 +203,419 @@ 
   assert (i == 0);
 #endif
 
-  pc = malloc (sizeof (struct call));
-  pc->pfn = pfn;
-  pc->arg = arg;
-  pc->m = __go_alloc (sizeof (M));
-  __builtin_memset (pc->m, 0, sizeof (M));
-  pc->m->mcache = allocmcache ();
+  newm = __go_alloc (sizeof (M));
+  newm->mcache = allocmcache ();
 
-  pthread_t tid;
-  i = pthread_create (&tid, &attr, start_go_thread, pc);
+  list_entry = malloc (sizeof (struct __go_thread_id));
+  list_entry->prev = NULL;
+  list_entry->next = NULL;
+  list_entry->tentative = 1;
+  list_entry->m = newm;
+  list_entry->pfn = pfn;
+  list_entry->arg = arg;
+
+  newm->list_entry = list_entry;
+
+  /* Add the thread to the list of all threads, marked as tentative
+     since it is not yet ready to go.  */
+  i = pthread_mutex_lock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  if (__go_all_thread_ids != NULL)
+    __go_all_thread_ids->prev = list_entry;
+  list_entry->next = __go_all_thread_ids;
+  __go_all_thread_ids = list_entry;
+
+  i = pthread_mutex_unlock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  /* Start the thread.  */
+  i = pthread_create (&tid, &attr, start_go_thread, newm);
   assert (i == 0);
 
   i = pthread_attr_destroy (&attr);
   assert (i == 0);
 }
+
+/* This is the signal handler for GO_SIG_START.  The garbage collector
+   will send this signal to a thread when it wants the thread to
+   start.  We don't have to actually do anything here, but we need a
+   signal handler since ignoring the signal will mean that the
+   sigsuspend will never see it.  */
+
+static void
+gc_start_handler (int sig __attribute__ ((unused)))
+{
+}
+
+/* Tell the garbage collector that we are ready, and wait for the
+   garbage collector to tell us that it is done.  This may be called
+   by a signal handler, so it is restricted to using functions which
+   are async cancel safe.  */
+
+static void
+stop_for_gc (void)
+{
+  int i;
+
+  /* Tell the garbage collector about our stack.  */
+#ifdef USING_SPLIT_STACK
+  m->gc_sp = __splitstack_find (NULL, NULL, &m->gc_len,
+				&m->gc_next_segment, &m->gc_next_sp,
+				&m->gc_initial_sp);
+#else
+  {
+    uintptr_t top = (uintptr_t) m->gc_sp;
+    uintptr_t bottom = (uintptr_t) &top;
+    if (top < bottom)
+      {
+	m->gc_next_sp = m->gc_sp;
+	m->gc_len = bottom - top;
+      }
+    else
+      {
+	m->gc_next_sp = (void *) bottom;
+	m->gc_len = top - bottom;
+      }
+  }
+#endif
+
+  /* FIXME: Perhaps we should just move __go_panic_defer into M.  */
+  m->gc_panic_defer = __go_panic_defer;
+
+  /* Tell the garbage collector that we are ready by posting to the
+     semaphore.  */
+  i = sem_post (&__go_thread_ready_sem);
+  assert (i == 0);
+
+  /* Wait for the garbage collector to tell us to continue.  */
+  sigsuspend (&__go_thread_wait_sigset);
+}
+
+/* This is the signal handler for GO_SIG_STOP.  The garbage collector
+   will send this signal to a thread when it wants the thread to
+   stop.  */
+
+static void
+gc_stop_handler (int sig __attribute__ ((unused)))
+{
+  struct M *pm = m;
+
+  if (__sync_bool_compare_and_swap (&pm->mallocing, 1, 1))
+    {
+      /* m->mallocing was already non-zero.  We can't interrupt the
+	 thread while it is running an malloc.  Instead, tell it to
+	 call back to us when done.  */
+      __sync_bool_compare_and_swap (&pm->gcing, 0, 1);
+      return;
+    }
+
+  if (__sync_bool_compare_and_swap (&pm->nomemprof, 1, 1))
+    {
+      /* Similarly, we can't interrupt the thread while it is building
+	 profiling information.  Otherwise we can get into a deadlock
+	 when sweepspan calls MProf_Free.  */
+      __sync_bool_compare_and_swap (&pm->gcing, 0, 1);
+      return;
+    }
+
+  stop_for_gc ();
+}
+
+/* This is called by malloc when it gets a signal during the malloc
+   call itself.  */
+
+int
+__go_run_goroutine_gc (int r)
+{
+  /* Force callee-saved registers to be saved on the stack.  This is
+     not needed if we are invoked from the signal handler, but it is
+     needed if we are called directly, since otherwise we might miss
+     something that a function somewhere up the call stack is holding
+     in a register.  */
+  SAVE_REGS;
+
+  stop_for_gc ();
+
+  /* This avoids tail recursion, to make sure that the saved registers
+     are on the stack.  */
+  return r;
+}
+
+/* Stop all the other threads for garbage collection.  */
+
+void
+stoptheworld (void)
+{
+  int i;
+  pthread_t me;
+  int c;
+  struct __go_thread_id *p;
+
+  i = pthread_mutex_lock (&__go_thread_ids_lock);
+  assert (i == 0);
+
+  me = pthread_self ();
+  c = 0;
+  p = __go_all_thread_ids;
+  while (p != NULL)
+    {
+      if (p->tentative || pthread_equal (me, p->id))
+	p = p->next;
+      else
+	{
+	  i = pthread_kill (p->id, GO_SIG_STOP);
+	  if (i == 0)
+	    {
+	      ++c;
+	      p = p->next;
+	    }
+	  else if (i == ESRCH)
+	    {
+	      struct __go_thread_id *next;
+
+	      /* This thread died somehow.  Remove it from the
+		 list.  */
+	      next = p->next;
+	      if (p->prev != NULL)
+		p->prev->next = next;
+	      else
+		__go_all_thread_ids = next;
+	      if (next != NULL)
+		next->prev = p->prev;
+	      free (p);
+	      p = next;
+	    }
+	  else
+	    abort ();
+	}
+    }
+
+  /* Wait for each thread to receive the signal and post to the
+     semaphore.  If a thread receives the signal but contrives to die
+     before it posts to the semaphore, then we will hang forever
+     here.  */
+
+  while (c > 0)
+    {
+      i = sem_wait (&__go_thread_ready_sem);
+      if (i < 0 && errno == EINTR)
+	continue;
+      assert (i == 0);
+      --c;
+    }
+
+  /* The gc_panic_defer field should now be set for all M's except the
+     one in this thread.  Set this one now.  */
+  m->gc_panic_defer = __go_panic_defer;
+
+  /* Leave with __go_thread_ids_lock held.  */
+}
+
+/* Scan all the stacks for garbage collection.  This should be called
+   with __go_thread_ids_lock held.  */
+
+void
+__go_scanstacks (void (*scan) (int32_t, unsigned char *, int64_t))
+{
+  pthread_t me;
+  struct __go_thread_id *p;
+
+  /* Make sure all the registers for this thread are on the stack.  */
+  SAVE_REGS;
+
+  me = pthread_self ();
+  for (p = __go_all_thread_ids; p != NULL; p = p->next)
+    {
+      _Bool isme = 0;
+
+      if (p->tentative)
+	{
+	  /* The goroutine function and argument can be allocated on
+	     the heap, so we have to scan them for a thread that has
+	     not yet started.  */
+	  scan (0, (void *) &p->pfn, sizeof (void *));
+	  scan (0, (void *) &p->arg, sizeof (void *));
+	  scan (0, (void *) &p->m, sizeof (void *));
+	  continue;
+	}
+
+#ifdef USING_SPLIT_STACK
+
+      void *sp;
+      size_t len;
+      void *next_segment;
+      void *next_sp;
+      void *initial_sp;
+
+      if (pthread_equal (me, p->id))
+	{
+	  isme = 1;
+	  next_segment = NULL;
+	  next_sp = NULL;
+	  initial_sp = NULL;
+	  sp = __splitstack_find (NULL, NULL, &len, &next_segment,
+				  &next_sp, &initial_sp);
+	}
+      else
+	{
+	  sp = p->m->gc_sp;
+	  len = p->m->gc_len;
+	  next_segment = p->m->gc_next_segment;
+	  next_sp = p->m->gc_next_sp;
+	  initial_sp = p->m->gc_initial_sp;
+	}
+
+      while (sp != NULL)
+	{
+	  scan (0, sp, len);
+	  sp = __splitstack_find (next_segment, next_sp, &len,
+				  &next_segment, &next_sp, &initial_sp);
+	}
+
+#else /* !defined(USING_SPLIT_STACK) */
+
+      if (pthread_equal (me, p->id))
+	{
+	  isme = 1;
+	  uintptr_t top = (uintptr_t) m->gc_sp;
+	  uintptr_t bottom = (uintptr_t) &top;
+	  if (top < bottom)
+	    scan (0, m->gc_sp, bottom - top);
+	  else
+	    scan (0, (void *) bottom, top - bottom);
+	}
+      else
+	{
+	  scan (0, p->m->gc_next_sp, p->m->gc_len);
+	}
+	
+#endif /* !defined(USING_SPLIT_STACK) */
+
+      /* Also scan the M structure while we're at it.  */
+
+      scan (0, (void *) &p->m, sizeof (void *));
+    }
+}
+
+/* Release all the memory caches.  This is called with
+   __go_thread_ids_lock held.  */
+
+void
+__go_stealcache(void)
+{
+  struct __go_thread_id *p;
+
+  for (p = __go_all_thread_ids; p != NULL; p = p->next)
+    MCache_ReleaseAll(p->m->mcache);
+}
+
+/* Start the other threads after garbage collection.  */
+
+void
+starttheworld (void)
+{
+  int i;
+  pthread_t me;
+  struct __go_thread_id *p;
+
+  /* Here __go_thread_ids_lock should be held.  */
+
+  me = pthread_self ();
+  p = __go_all_thread_ids;
+  while (p != NULL)
+    {
+      if (p->tentative || pthread_equal (me, p->id))
+	p = p->next;
+      else
+	{
+	  i = pthread_kill (p->id, GO_SIG_START);
+	  if (i == 0)
+	    p = p->next;
+	  else
+	    abort ();
+	}
+    }
+
+  i = pthread_mutex_unlock (&__go_thread_ids_lock);
+  assert (i == 0);
+}
+
+/* Initialize the interaction between goroutines and the garbage
+   collector.  */
+
+void
+__go_gc_goroutine_init (void *sp __attribute__ ((unused)))
+{
+  struct __go_thread_id *list_entry;
+  int i;
+  sigset_t sset;
+  struct sigaction act;
+
+  /* Add the initial thread to the list of all threads.  */
+
+  list_entry = malloc (sizeof (struct __go_thread_id));
+  list_entry->prev = NULL;
+  list_entry->next = NULL;
+  list_entry->tentative = 0;
+  list_entry->id = pthread_self ();
+  list_entry->m = m;
+  list_entry->pfn = NULL;
+  list_entry->arg = NULL;
+  __go_all_thread_ids = list_entry;
+
+  /* Initialize the semaphore which signals when threads are ready for
+     GC.  */
+
+  i = sem_init (&__go_thread_ready_sem, 0, 0);
+  assert (i == 0);
+
+  /* Fetch the current signal mask.  */
+
+  i = sigemptyset (&sset);
+  assert (i == 0);
+  i = sigprocmask (SIG_BLOCK, NULL, &sset);
+  assert (i == 0);
+
+  /* Make sure that GO_SIG_START is not blocked and GO_SIG_STOP is
+     blocked, and save that set for use with later calls to sigsuspend
+     while waiting for GC to complete.  */
+
+  i = sigdelset (&sset, GO_SIG_START);
+  assert (i == 0);
+  i = sigaddset (&sset, GO_SIG_STOP);
+  assert (i == 0);
+  __go_thread_wait_sigset = sset;
+
+  /* Block SIG_SET_START and unblock SIG_SET_STOP, and use that for
+     the process signal mask.  */
+
+  i = sigaddset (&sset, GO_SIG_START);
+  assert (i == 0);
+  i = sigdelset (&sset, GO_SIG_STOP);
+  assert (i == 0);
+  i = sigprocmask (SIG_SETMASK, &sset, NULL);
+  assert (i == 0);
+
+  /* Install the signal handlers.  */
+  memset (&act, 0, sizeof act);
+  i = sigemptyset (&act.sa_mask);
+  assert (i == 0);
+
+  act.sa_handler = gc_start_handler;
+  act.sa_flags = SA_RESTART;
+  i = sigaction (GO_SIG_START, &act, NULL);
+  assert (i == 0);
+
+  /* We could consider using an alternate signal stack for this.  The
+     function does not use much stack space, so it may be OK.  */
+  act.sa_handler = gc_stop_handler;
+  i = sigaction (GO_SIG_STOP, &act, NULL);
+  assert (i == 0);
+
+#ifndef USING_SPLIT_STACK
+  /* If we don't support split stack, record the current stack as the
+     top of the stack.  */
+  m->gc_sp = sp;
+#endif
+}
diff -r c7120a8d41cc libgo/runtime/go-goexit.c
--- a/libgo/runtime/go-goexit.c	Fri Jul 23 09:44:07 2010 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@ 
-/* go-goexit.c -- the runtime.Goexit function.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-#include <assert.h>
-#include <pthread.h>
-
-void Goexit (void) asm ("libgo_runtime.runtime.Goexit");
-
-void
-Goexit (void)
-{
-  pthread_exit (NULL);
-  assert (0);
-}
diff -r c7120a8d41cc libgo/runtime/go-main.c
--- a/libgo/runtime/go-main.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-main.c	Fri Jul 23 09:45:52 2010 -0700
@@ -19,6 +19,7 @@ 
 #include "go-string.h"
 
 #include "runtime.h"
+#include "malloc.h"
 
 #undef int
 #undef char
@@ -47,6 +48,7 @@ 
   struct __go_string *values;
 
   mallocinit ();
+  __go_gc_goroutine_init (&argc);
 
   Args.__count = argc;
   Args.__capacity = argc;
@@ -78,6 +80,9 @@ 
   srand ((unsigned int) time (NULL));
 #endif
   __go_init_main ();
+
+  __go_enable_gc ();
+
   real_main ();
 
   return 0;
diff -r c7120a8d41cc libgo/runtime/go-nanotime.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libgo/runtime/go-nanotime.c	Fri Jul 23 09:45:52 2010 -0700
@@ -0,0 +1,22 @@ 
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Return time in nanoseconds.  This is only used for computing runtime.
+
+#include <assert.h>
+#include <sys/time.h>
+
+#include "runtime.h"
+
+int64
+nanotime (void)
+{
+  int i;
+  struct timeval tv;
+
+  i = gettimeofday (&tv, NULL);
+  assert (i == 0);
+
+  return (int64) tv.tv_sec * 1000000000 + (int64) tv.tv_usec * 1000;
+}
diff -r c7120a8d41cc libgo/runtime/go-reflect-call.c
--- a/libgo/runtime/go-reflect-call.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-reflect-call.c	Fri Jul 23 09:45:52 2010 -0700
@@ -12,6 +12,7 @@ 
 
 #include "go-alloc.h"
 #include "go-type.h"
+#include "runtime.h"
 
 /* Forward declaration.  */
 
@@ -316,13 +317,9 @@ 
    address is FUNC_ADDR.  PARAMS is an array of parameter addresses.
    RESULTS is an array of result addresses.  */
 
-extern void call (const struct __go_func_type *, const void *, _Bool, void **,
-		  void **)
-  asm ("libgo_reflect.reflect.call");
-
 void
-call (const struct __go_func_type *func_type, const void *func_addr,
-      _Bool is_interface, void **params, void **results)
+reflect_call (const struct __go_func_type *func_type, const void *func_addr,
+	      _Bool is_interface, void **params, void **results)
 {
   ffi_cif cif;
   unsigned char *call_result;
@@ -334,7 +331,10 @@ 
 
   ffi_call (&cif, func_addr, call_result, params);
 
-  go_set_results (func_type, call_result, results);
+  /* Some day we may need to free result values if RESULTS is
+     NULL.  */
+  if (results != NULL)
+    go_set_results (func_type, call_result, results);
 
   free (call_result);
 }
diff -r c7120a8d41cc libgo/runtime/go-semacquire.c
--- a/libgo/runtime/go-semacquire.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-semacquire.c	Fri Jul 23 09:45:52 2010 -0700
@@ -9,6 +9,8 @@ 
 
 #include <pthread.h>
 
+#include "runtime.h"
+
 /* We use a single global lock and condition variable.  This is
    painful, since it will cause unnecessary contention, but is hard to
    avoid in a portable manner.  On Linux we can use futexes, but they
@@ -23,14 +25,14 @@ 
    false.  */
 
 static _Bool
-acquire (int32_t *addr)
+acquire (uint32 *addr)
 {
   while (1)
     {
-      int32_t val;
+      uint32 val;
 
       val = *addr;
-      if (val <= 0)
+      if (val == 0)
 	return 0;
       if (__sync_bool_compare_and_swap (addr, val, val - 1))
 	return 1;
@@ -41,10 +43,8 @@ 
    We have acquired the semaphore when we have decremented the count
    and it remains nonnegative.  */
 
-void Semacquire (int32_t *) asm ("libgo_runtime.runtime.Semacquire");
-
 void
-Semacquire (int32_t *addr)
+semacquire (uint32 *addr)
 {
   while (1)
     {
@@ -85,10 +85,8 @@ 
    positive, we signal the condition variable to wake up another
    process.  */
 
-void Semrelease (int32_t *) asm ("libgo_runtime.runtime.Semrelease");
-
 void
-Semrelease (int32_t *addr)
+semrelease (uint32 *addr)
 {
   int32_t val;
 
diff -r c7120a8d41cc libgo/runtime/go-trampoline.c
--- a/libgo/runtime/go-trampoline.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-trampoline.c	Fri Jul 23 09:45:52 2010 -0700
@@ -22,10 +22,11 @@ 
    needs to be more system dependent.  */
 
 void *
-__go_allocate_trampoline (size_t size)
+__go_allocate_trampoline (size_t size, void *closure)
 {
   unsigned int page_size;
   void *ret;
+  size_t off;
 
   page_size = getpagesize ();
   assert (page_size >= size);
@@ -33,6 +34,13 @@ 
   ret = (void *) (((uintptr_t) ret + page_size - 1)
 		  & ~ ((uintptr_t) page_size - 1));
 
+  /* Because the garbage collector only looks at correct address
+     offsets, we need to ensure that it will see the closure
+     address.  */
+  off = ((size + sizeof (void *) - 1) / sizeof (void *)) * sizeof (void *);
+  assert (size + off + sizeof (void *) <= page_size);
+  __builtin_memcpy (ret + off, &closure, sizeof (void *));
+
 #ifdef HAVE_SYS_MMAN_H
   {
     int i;
diff -r c7120a8d41cc libgo/runtime/go-unwind.c
--- a/libgo/runtime/go-unwind.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/go-unwind.c	Fri Jul 23 09:45:52 2010 -0700
@@ -7,6 +7,7 @@ 
 #include "config.h"
 
 #include <stdlib.h>
+#include <unistd.h>
 
 #include "unwind.h"
 #define NO_SIZE_OF_ENCODED_VALUE
diff -r c7120a8d41cc libgo/runtime/malloc.goc
--- a/libgo/runtime/malloc.goc	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/malloc.goc	Fri Jul 23 09:45:52 2010 -0700
@@ -54,9 +54,8 @@ 
 	void *v;
 	uint32 *ref;
 
-	if(m->mallocing)
+	if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
 		throw("malloc/free - deadlock");
-	m->mallocing = 1;
 	if(size == 0)
 		size = 1;
 
@@ -98,7 +97,10 @@ 
 		ref = &s->gcref0;
 	}
 
-	m->mallocing = 0;
+	__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
+
+	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
+		__go_run_goroutine_gc(0);
 
 	if(!(refflag & RefNoProfiling) && (rate = MemProfileRate) > 0) {
 		if(size >= (uint32) rate)
@@ -139,9 +141,8 @@ 
 	if(v == nil)
 		return;
 
-	if(m->mallocing)
+	if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1))
 		throw("malloc/free - deadlock");
-	m->mallocing = 1;
 
 	if(!mlookup(v, nil, nil, &s, &ref)) {
 		printf("free %p: not an allocated block\n", v);
@@ -171,7 +172,10 @@ 
 		mstats.by_size[sizeclass].nfree++;
 		MCache_Free(c, v, sizeclass, size);
 	}
-	m->mallocing = 0;
+	__sync_bool_compare_and_swap(&m->mallocing, 1, 0);
+
+	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
+		__go_run_goroutine_gc(1);
 }
 
 int32
@@ -268,59 +272,6 @@ 
 	return mallocgc(n, 0, 1, 1);
 }
 
-// Stack allocator uses malloc/free most of the time,
-// but if we're in the middle of malloc and need stack,
-// we have to do something else to avoid deadlock.
-// In that case, we fall back on a fixed-size free-list
-// allocator, assuming that inside malloc all the stack
-// frames are small, so that all the stack allocations
-// will be a single size, the minimum (right now, 5k).
-struct {
-	Lock;
-	FixAlloc;
-} stacks;
-
-void*
-stackalloc(uint32 n)
-{
-	void *v;
-	uint32 *ref;
-
-	if(m->mallocing || m->gcing) {
-		lock(&stacks);
-		if(stacks.size == 0)
-			FixAlloc_Init(&stacks, n, SysAlloc, nil, nil);
-		if(stacks.size != n) {
-			printf("stackalloc: in malloc, size=%zu want %d", (size_t)stacks.size, n);
-			throw("stackalloc");
-		}
-		v = FixAlloc_Alloc(&stacks);
-		mstats.stacks_inuse = stacks.inuse;
-		mstats.stacks_sys = stacks.sys;
-		unlock(&stacks);
-		return v;
-	}
-	v = mallocgc(n, RefNoProfiling, 0, 0);
-	if(!mlookup(v, nil, nil, nil, &ref))
-		throw("stackalloc mlookup");
-	*ref = RefStack;
-	return v;
-}
-
-void
-stackfree(void *v)
-{
-	if(m->mallocing || m->gcing) {
-		lock(&stacks);
-		FixAlloc_Free(&stacks, v);
-		mstats.stacks_inuse = stacks.inuse;
-		mstats.stacks_sys = stacks.sys;
-		unlock(&stacks);
-		return;
-	}
-	__go_free(v);
-}
-
 func Alloc(n uintptr) (p *byte) {
 	p = __go_alloc(n);
 }
@@ -341,8 +292,6 @@ 
 	byte *base;
 	uintptr size;
 	const FuncType *ft;
-	int32 i, nret;
-	Type *t;
 
 	if(obj == nil) {
 		printf("runtime.SetFinalizer: first argument is nil interface\n");
@@ -357,7 +306,7 @@ 
 		printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n");
 		goto throw;
 	}
-	nret = 0;
+	ft = nil;
 	if(finalizer != nil) {
 		if(finalizer->__type_descriptor->__code != GO_FUNC) {
 		badfunc:
@@ -368,18 +317,10 @@ 
 		if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj->__type_descriptor))
 			goto badfunc;
 
-		// compute size needed for return parameters
-		for(i=0; i<ft->__out.__count; i++) {
-			t = ((Type**)ft->__out.__values)[i];
-			nret = (nret + t->__align - 1) & ~(t->__align - 1);
-			nret += t->__size;
-		}
-		nret = (nret + sizeof(void*)-1) & ~(sizeof(void*)-1);
-
 		if(getfinalizer(obj->__object, 0)) {
 			printf("runtime.SetFinalizer: finalizer already set");
 			goto throw;
 		}
 	}
-	addfinalizer(obj->__object, finalizer ? finalizer->__object : nil, nret);
+	addfinalizer(obj->__object, finalizer ? *(void**)finalizer->__object : nil, ft);
 }
diff -r c7120a8d41cc libgo/runtime/malloc.h
--- a/libgo/runtime/malloc.h	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/malloc.h	Fri Jul 23 09:45:52 2010 -0700
@@ -391,7 +391,7 @@ 
 	Finalizer *next;	// for use by caller of getfinalizer
 	void (*fn)(void*);
 	void *arg;
-	int32 nret;
+	const struct __go_func_type *ft;
 };
 
 Finalizer*	getfinalizer(void*, bool);
diff -r c7120a8d41cc libgo/runtime/mfinal.c
--- a/libgo/runtime/mfinal.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/mfinal.c	Fri Jul 23 09:45:52 2010 -0700
@@ -85,7 +85,7 @@ 
 
 // add finalizer; caller is responsible for making sure not already in table
 void
-addfinalizer(void *p, void (*f)(void*), int32 nret)
+addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft)
 {
 	Fintab newtab;
 	int32 i;
@@ -97,7 +97,7 @@ 
 	if(f != nil) {
 		e = mal(sizeof *e);
 		e->fn = f;
-		e->nret = nret;
+		e->ft = ft;
 	}
 
 	lock(&finlock);
diff -r c7120a8d41cc libgo/runtime/mgc0.c
--- a/libgo/runtime/mgc0.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/mgc0.c	Fri Jul 23 09:45:52 2010 -0700
@@ -19,26 +19,18 @@ 
 	Debug = 0
 };
 
-extern byte data[];
-extern byte etext[];
-extern byte end[];
-
-#if 0
-static G *fing;
+static bool finstarted;
+static Lock finlock = LOCK_INITIALIZER;
+static pthread_cond_t fincond = PTHREAD_COND_INITIALIZER;
 static Finalizer *finq;
 static int32 fingwait;
-#endif
 
-#if 0
-static void sweepblock(byte*, int64, uint32*, int32);
-static void runfinq(void);
-#endif
+static void runfinq(void*);
 
 enum {
 	PtrSize = sizeof(void*)
 };
 
-#if 0
 static void
 scanblock(int32 depth, byte *b, int64 n)
 {
@@ -50,7 +42,7 @@ 
 	int64 i;
 
 	if(Debug > 1)
-		printf("%d scanblock %p %lld\n", depth, b, n);
+		printf("%d scanblock %p %lld\n", depth, b, (long long) n);
 	off = (uint32)(uintptr)b & (PtrSize-1);
 	if(off) {
 		b += PtrSize - off;
@@ -80,7 +72,7 @@ 
 			obj = *pp;
 		}
 		if(mheap.min <= (byte*)obj && (byte*)obj < mheap.max) {
-			if(mlookup(obj, &obj, &size, nil, &refp)) {
+			if(mlookup(obj, (byte**)&obj, &size, nil, &refp)) {
 				ref = *refp;
 				switch(ref & ~RefFlags) {
 				case RefNone:
@@ -97,26 +89,6 @@ 
 }
 
 static void
-scanstack(G *gp)
-{
-	Stktop *stk;
-	byte *sp;
-
-	if(gp == g)
-		sp = (byte*)&gp;
-	else
-		sp = gp->sched.sp;
-	if(Debug > 1)
-		printf("scanstack %d %p\n", gp->goid, sp);
-	stk = (Stktop*)gp->stackbase;
-	while(stk) {
-		scanblock(0, sp, (byte*)stk - sp);
-		sp = stk->gobuf.sp;
-		stk = (Stktop*)stk->stackbase;
-	}
-}
-
-static void
 markfin(void *v)
 {
 	uintptr size;
@@ -124,45 +96,84 @@ 
 
 	size = 0;
 	refp = nil;
-	if(!mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer))
+	if(!mlookup(v, (byte**)&v, &size, nil, &refp) || !(*refp & RefHasFinalizer))
 		throw("mark - finalizer inconsistency");
 	
 	// do not mark the finalizer block itself.  just mark the things it points at.
 	scanblock(1, v, size);
 }
 
+struct globals {
+	byte *start;
+	uintptr size;
+};
+
+// FIXME: This needs to grow as needed.
+#define GLOBALS_ENTRIES 16
+
+static struct globals globals[GLOBALS_ENTRIES];
+
+// Called by runtime.
+void
+__go_register_mem(void *start, void *end)
+{
+	int i;
+
+	if(start == nil || end == nil)
+		throw("__go_register_mem");
+	if(start == end)
+		return;
+	for(i = 0; i < GLOBALS_ENTRIES; ++i) {
+		if(globals[i].start == nil) {
+			globals[i].start = (byte*)start;
+			globals[i].size = (byte*)end - (byte*)start;
+			return;
+		}
+	}
+	throw("__go_register_mem out of space");
+}
+
+// Called by runtime for dlclose.
+void
+__go_deregister_mem(void *start, void *end)
+{
+	int i;
+
+	if(start == end)
+		return;
+	for(i = 0; i < GLOBALS_ENTRIES; ++i) {
+		if(globals[i].start == (byte*)start
+		   && globals[i].size == (size_t)((byte*)end - (byte*)start)) {
+			globals[i].start = nil;
+			return;
+		}
+	}
+	throw("__go_deregister_mem not found");
+}
+
 static void
 mark(void)
 {
-	G *gp;
+	int i;
 
 	// mark data+bss.
 	// skip mheap itself, which has no interesting pointers
 	// and is mostly zeroed and would not otherwise be paged in.
-	scanblock(0, data, (byte*)&mheap - data);
-	scanblock(0, (byte*)(&mheap+1), end - (byte*)(&mheap+1));
+	for(i = 0; i < GLOBALS_ENTRIES; ++i) {
+		if (globals[i].start == nil)
+			continue;
+		if ((byte*)&mheap >= globals[i].start
+		    && (byte*)&mheap < globals[i].start + globals[i].size) {
+			scanblock(0, globals[i].start, (byte*)&mheap - globals[i].start);
+			scanblock(0, (byte*)(&mheap+1),
+				  globals[i].start + globals[i].size - (byte*)(&mheap+1));
+		}
+		else
+			scanblock(0, globals[i].start, globals[i].size);
+	}
 
 	// mark stacks
-	for(gp=allg; gp!=nil; gp=gp->alllink) {
-		switch(gp->status){
-		default:
-			printf("unexpected G.status %d\n", gp->status);
-			throw("mark - bad status");
-		case Gdead:
-			break;
-		case Grunning:
-		case Grecovery:
-			if(gp != g)
-				throw("mark - world not stopped");
-			scanstack(gp);
-			break;
-		case Grunnable:
-		case Gsyscall:
-		case Gwaiting:
-			scanstack(gp);
-			break;
-		}
-	}
+	__go_scanstacks(scanblock);
 
 	// mark things pointed at by objects with finalizers
 	walkfintab(markfin);
@@ -257,14 +268,7 @@ 
 			sweepspan(s);
 }
 
-#endif
-
-#if 0
-// Semaphore, not Lock, so that the goroutine
-// reschedules when there is contention rather
-// than spinning.
-static uint32 gcsema = 1;
-#endif
+static Lock gcsema = LOCK_INITIALIZER;
 
 // Initialized from $GOGC.  GOGC=off means no gc.
 //
@@ -277,27 +281,12 @@ 
 // extra memory used).
 static int32 gcpercent = -2;
 
-#if 0
-static void
-stealcache(void)
-{
-	M *m;
-	
-	for(m=allm; m; m=m->alllink)
-		MCache_ReleaseAll(m->mcache);
-}
-#endif
-
 void
 gc(int32 force __attribute__ ((unused)))
 {
-#if 0
 	int64 t0, t1;
-#endif
 	char *p;
-#if 0
 	Finalizer *fp;
-#endif
 
 	// The gc is turned off (via enablegc) until
 	// the bootstrap has completed.
@@ -307,7 +296,7 @@ 
 	// problems, don't bother trying to run gc
 	// while holding a lock.  The next mallocgc
 	// without a lock will do the gc instead.
-	if(!mstats.enablegc /* || m->locks > 0 || panicking */)
+	if(!mstats.enablegc || m->locks > 0 /* || panicking */)
 		return;
 
 	if(gcpercent == -2) {	// first time through
@@ -322,82 +311,78 @@ 
 	if(gcpercent < 0)
 		return;
 
-#if 0
-	semacquire(&gcsema);
+	lock(&finlock);
+	lock(&gcsema);
+	m->locks++;	// disable gc during the mallocs in newproc
 	t0 = nanotime();
-	m->gcing = 1;
 	stoptheworld();
-	if(mheap.Lock.key != 0)
-		throw("mheap locked during gc");
 	if(force || mstats.heap_alloc >= mstats.next_gc) {
 		mark();
 		sweep();
-		stealcache();
+		__go_stealcache();
 		mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100;
 	}
-	m->gcing = 0;
-
-	m->locks++;	// disable gc during the mallocs in newproc
-	fp = finq;
-	if(fp != nil) {
-		// kick off or wake up goroutine to run queued finalizers
-		if(fing == nil)
-			fing = newproc1((byte*)runfinq, nil, 0, 0);
-		else if(fingwait) {
-			fingwait = 0;
-			ready(fing);
-		}
-	}
-	m->locks--;
 
 	t1 = nanotime();
 	mstats.numgc++;
 	mstats.pause_ns += t1 - t0;
 	if(mstats.debuggc)
-		printf("pause %D\n", t1-t0);
-	semrelease(&gcsema);
+		printf("pause %llu\n", (unsigned long long)t1-t0);
+	unlock(&gcsema);
 	starttheworld();
-	
-	// give the queued finalizers, if any, a chance to run
-	if(fp != nil)
-		gosched();
-#endif
+
+	// finlock is still held.
+	fp = finq;
+	if(fp != nil) {
+		// kick off or wake up goroutine to run queued finalizers
+		if(!finstarted) {
+			__go_go(runfinq, nil);
+			finstarted = 1;
+		}
+		else if(fingwait) {
+			fingwait = 0;
+			pthread_cond_signal(&fincond);
+		}
+	}
+	m->locks--;
+	unlock(&finlock);
 }
 
-#if 0
 static void
-runfinq(void)
+runfinq(void* dummy)
 {
 	Finalizer *f, *next;
-	byte *frame;
+
+	USED(dummy);
 
 	for(;;) {
-		// There's no need for a lock in this section
-		// because it only conflicts with the garbage
-		// collector, and the garbage collector only
-		// runs when everyone else is stopped, and
-		// runfinq only stops at the gosched() or
-		// during the calls in the for loop.
+		lock(&finlock);
 		f = finq;
 		finq = nil;
 		if(f == nil) {
 			fingwait = 1;
-			g->status = Gwaiting;
-			gosched();
+			pthread_cond_wait(&fincond, &finlock.mutex);
+			unlock(&finlock);
 			continue;
 		}
+		unlock(&finlock);
 		for(; f; f=next) {
+			void *params[1];
+
 			next = f->next;
-			frame = mal(sizeof(uintptr) + f->nret);
-			*(void**)frame = f->arg;
-			reflect·call((byte*)f->fn, frame, sizeof(uintptr) + f->nret);
-			free(frame);
+			params[0] = &f->arg;
+			reflect_call(f->ft, (void*)f->fn, 0, params, nil);
 			f->fn = nil;
 			f->arg = nil;
 			f->next = nil;
-			free(f);
+			__go_free(f);
 		}
 		gc(1);	// trigger another gc to clean up the finalized objects, if possible
 	}
 }
-#endif
+
+void
+__go_enable_gc()
+{
+  mstats.enablegc = 1;
+}
diff -r c7120a8d41cc libgo/runtime/mprof.goc
--- a/libgo/runtime/mprof.goc	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/mprof.goc	Fri Jul 23 09:45:52 2010 -0700
@@ -193,10 +193,8 @@ 
 	uintptr stk[32];
 	Bucket *b;
 
-	if(m->nomemprof > 0)
+	if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
 		return;
-
-	m->nomemprof++;
 #if 0
 	nstk = callers(1, stk, 32);
 #else
@@ -208,7 +206,10 @@ 
 	b->alloc_bytes += size;
 	setaddrbucket((uintptr)p, b);
 	unlock(&proflock);
-	m->nomemprof--;
+	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
+
+	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
+		__go_run_goroutine_gc(100);
 }
 
 // Called when freeing a profiled block.
@@ -217,10 +218,9 @@ 
 {
 	Bucket *b;
 
-	if(m->nomemprof > 0)
+	if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1))
 		return;
 
-	m->nomemprof++;
 	lock(&proflock);
 	b = getaddrbucket((uintptr)p);
 	if(b != nil) {
@@ -228,7 +228,10 @@ 
 		b->free_bytes += size;
 	}
 	unlock(&proflock);
-	m->nomemprof--;
+	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
+
+	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
+		__go_run_goroutine_gc(101);
 }
 
 
@@ -263,6 +266,8 @@ 
 	Bucket *b;
 	Record *r;
 
+	__sync_bool_compare_and_swap(&m->nomemprof, 0, 1);
+
 	lock(&proflock);
 	n = 0;
 	for(b=buckets; b; b=b->allnext)
@@ -277,4 +282,9 @@ 
 				record(r++, b);
 	}
 	unlock(&proflock);
+
+	__sync_bool_compare_and_swap(&m->nomemprof, 1, 0);
+
+	if(__sync_bool_compare_and_swap(&m->gcing, 1, 0))
+		__go_run_goroutine_gc(102);
 }
diff -r c7120a8d41cc libgo/runtime/runtime.h
--- a/libgo/runtime/runtime.h	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/runtime.h	Fri Jul 23 09:45:52 2010 -0700
@@ -90,8 +90,20 @@ 
 {
 	int32	mallocing;
 	int32	gcing;
+	int32	locks;
+	int32	nomemprof;
 	MCache	*mcache;
-	int32	nomemprof;
+
+	/* For the list of all threads.  */
+	struct __go_thread_id *list_entry;
+
+	/* For the garbage collector.  */
+	void	*gc_sp;
+	size_t	gc_len;
+	void	*gc_next_segment;
+	void	*gc_next_sp;
+	void	*gc_initial_sp;
+	struct __go_panic_defer_struct *gc_panic_defer;
 };
 
 /* Macros.  */
@@ -106,6 +118,16 @@ 
 void	mallocinit(void);
 void	siginit(void);
 bool	__go_sigsend(int32 sig);
+int64	nanotime(void);
+
+void	stoptheworld(void);
+void	starttheworld(void);
+void	__go_go(void (*pfn)(void*), void*);
+void	__go_gc_goroutine_init(void*);
+void	__go_enable_gc(void);
+int	__go_run_goroutine_gc(int);
+void	__go_scanstacks(void (*scan)(int32, byte *, int64));
+void	__go_stealcache(void);
 
 /*
  * mutual exclusion locks.  in the uncontended case,
@@ -117,6 +139,10 @@ 
 void	lock(Lock*);
 void	unlock(Lock*);
 void	destroylock(Lock*);
+bool	trylock(Lock*);
+
+void semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire");
+void semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease");
 
 /*
  * sleep and wakeup on one-time events.
@@ -136,10 +162,17 @@ 
 #define mcmp(a, b, s) __builtin_memcmp((a), (b), (s))
 MCache*	allocmcache(void);
 void	free(void *v);
-void	addfinalizer(void*, void(*fn)(void*), int32);
+struct __go_func_type;
+void	addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *);
+void	walkfintab(void (*fn)(void*));
 #define runtime_mmap mmap
 #define cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
 
+struct __go_func_type;
+void reflect_call(const struct __go_func_type *, const void *, _Bool, void **,
+		  void **)
+  asm ("libgo_reflect.reflect.call");
+
 #ifdef __rtems__
 void __wrap_rtems_task_variable_add(void **);
 #endif
diff -r c7120a8d41cc libgo/runtime/thread.c
--- a/libgo/runtime/thread.c	Fri Jul 23 09:44:07 2010 -0700
+++ b/libgo/runtime/thread.c	Fri Jul 23 09:45:52 2010 -0700
@@ -30,3 +30,9 @@ 
 {
 	pthread_mutex_destroy(&l->mutex);
 }
+
+bool
+trylock(Lock *l)
+{
+	return pthread_mutex_trylock(&l->mutex) == 0;
+}