From patchwork Fri Jul 23 16:51:00 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ian Lance Taylor X-Patchwork-Id: 59823 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id DCA89B70A8 for ; Sat, 24 Jul 2010 02:51:30 +1000 (EST) Received: (qmail 14676 invoked by alias); 23 Jul 2010 16:51:28 -0000 Received: (qmail 14642 invoked by uid 22791); 23 Jul 2010 16:51:23 -0000 X-SWARE-Spam-Status: No, hits=-0.6 required=5.0 tests=AWL, BAYES_50, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, SPF_HELO_PASS, TW_CC, TW_GC, T_RP_MATCHES_RCVD, T_TVD_MIME_NO_HEADERS X-Spam-Check-By: sourceware.org Received: from smtp-out.google.com (HELO smtp-out.google.com) (74.125.121.35) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 23 Jul 2010 16:51:10 +0000 Received: from hpaq1.eem.corp.google.com (hpaq1.eem.corp.google.com [172.25.149.1]) by smtp-out.google.com with ESMTP id o6NGp7ic014894 for ; Fri, 23 Jul 2010 09:51:07 -0700 Received: from ewy10 (ewy10.prod.google.com [10.241.103.10]) by hpaq1.eem.corp.google.com with ESMTP id o6NGp5cZ007602 for ; Fri, 23 Jul 2010 09:51:06 -0700 Received: by ewy10 with SMTP id 10so190119ewy.21 for ; Fri, 23 Jul 2010 09:51:05 -0700 (PDT) Received: by 10.213.32.74 with SMTP id b10mr795575ebd.26.1279903865540; Fri, 23 Jul 2010 09:51:05 -0700 (PDT) Received: from coign.google.com (81-233-149-58-no82.tbcn.telia.com [81.233.149.58]) by mx.google.com with ESMTPS id x54sm666298eeh.5.2010.07.23.09.51.02 (version=TLSv1/SSLv3 cipher=RC4-MD5); Fri, 23 Jul 2010 09:51:04 -0700 (PDT) From: Ian Lance Taylor To: gcc-patches@gcc.gnu.org, gofrontend-dev@googlegroups.com Subject: [gccgo] Turn on garbage collector Date: Fri, 23 Jul 2010 09:51:00 -0700 Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1 (gnu/linux) MIME-Version: 1.0 X-System-Of-Record: true X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org This patch finally turns on the garbage collector for gccgo. This just uses the same garbage collector as the other Go compiler; it's a simple stop-the-world mark-and-sweep collector. It works but it is far too slow. I will be committing some straightforward patches to speed it up for gccgo, then investigating it in more detail. Committed to gccgo branch. Ian diff -r c7120a8d41cc go/gogo-tree.cc --- a/go/gogo-tree.cc Fri Jul 23 09:44:07 2010 -0700 +++ b/go/gogo-tree.cc Fri Jul 23 09:45:52 2010 -0700 @@ -4218,16 +4218,21 @@ tree trampoline_type = Gogo::trampoline_type_tree(); tree trampoline_size = TYPE_SIZE_UNIT(trampoline_type); + closure = save_expr(closure); + // We allocate the trampoline using a special function which will // mark it as executable. static tree trampoline_fndecl; tree x = Gogo::call_builtin(&trampoline_fndecl, location, "__go_allocate_trampoline", - 1, + 2, ptr_type_node, size_type_node, - trampoline_size); + trampoline_size, + ptr_type_node, + fold_convert_loc(location, ptr_type_node, + closure)); x = save_expr(x); diff -r c7120a8d41cc libgo/Makefile.am --- a/libgo/Makefile.am Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/Makefile.am Fri Jul 23 09:45:52 2010 -0700 @@ -31,6 +31,10 @@ AM_CFLAGS = -fexceptions -fplan9-extensions $(SPLIT_STACK) $(WARN_CFLAGS) \ -I $(srcdir)/../gcc -I $(MULTIBUILDTOP)../../gcc/include +if USING_SPLIT_STACK +AM_LDFLAGS = -XCClinker $(SPLIT_STACK) +endif + # Multilib support. MAKEOVERRIDES= @@ -285,7 +289,6 @@ runtime/go-deferred-recover.c \ runtime/go-getgoroot.c \ runtime/go-go.c \ - runtime/go-goexit.c \ runtime/go-gomaxprocs.c \ runtime/go-int-array-to-string.c \ runtime/go-int-to-string.c \ @@ -297,6 +300,7 @@ runtime/go-map-index.c \ runtime/go-map-len.c \ runtime/go-map-range.c \ + runtime/go-nanotime.c \ runtime/go-new-channel.c \ runtime/go-new-interface-object.c \ runtime/go-new-interface-pointer.c \ diff -r c7120a8d41cc libgo/runtime/go-go.c --- a/libgo/runtime/go-go.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-go.c Fri Jul 23 09:45:52 2010 -0700 @@ -5,58 +5,185 @@ license that can be found in the LICENSE file. */ #include +#include #include +#include +#include #include #include +#include #include "config.h" #include "go-panic.h" #include "go-alloc.h" #include "runtime.h" +#include "malloc.h" -#undef int /* FIXME */ +#ifdef USING_SPLIT_STACK +/* FIXME: This is not declared anywhere. */ +extern void *__splitstack_find (void *, void *, size_t *, void **, void **, + void **); +#endif -/* What to call. */ +/* We need to ensure that all callee-saved registers are stored on the + stack, in case they hold pointers. */ -struct call +#if defined(__i386__) + #ifndef __PIC__ + #define SAVE_REGS asm ("" : : : "esi", "edi", "ebx") + #else + #define SAVE_REGS asm ("" : : : "esi", "edi") + #endif +#elif defined(__x86_64__) + #ifndef __PIC__ + #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp", "rbx") + #else + #define SAVE_REGS asm ("" : : : "r12", "r13", "r14", "r15", "rbp") + #endif +#else + #error must define SAVE_REGS +#endif + +/* We stop the threads by sending them the signal GO_SIG_STOP and we + start them by sending them the signal GO_SIG_START. */ + +#define GO_SIG_START (SIGRTMIN + 1) +#define GO_SIG_STOP (SIGRTMIN + 2) + +/* A doubly linked list of the threads we have started. */ + +struct __go_thread_id { + /* Links. */ + struct __go_thread_id *prev; + struct __go_thread_id *next; + /* True if the thread ID has not yet been filled in. */ + _Bool tentative; + /* Thread ID. */ + pthread_t id; + /* Thread's M structure. */ + struct M *m; + /* If the thread ID has not been filled in, the function we are + running. */ void (*pfn) (void *); + /* If the thread ID has not been filled in, the argument to the + function. */ void *arg; - struct M *m; }; +static struct __go_thread_id *__go_all_thread_ids; + +/* A lock to control access to ALL_THREAD_IDS. */ + +static pthread_mutex_t __go_thread_ids_lock = PTHREAD_MUTEX_INITIALIZER; + +/* A semaphore used to wait until all the threads have stopped. */ + +static sem_t __go_thread_ready_sem; + +/* A signal set used to wait until garbage collection is complete. */ + +static sigset_t __go_thread_wait_sigset; + +/* Remove the current thread from the list of threads. */ + +static void +remove_current_thread (void) +{ + struct __go_thread_id *list_entry; + int i; + + list_entry = m->list_entry; + + i = pthread_mutex_lock (&__go_thread_ids_lock); + assert (i == 0); + + if (list_entry->prev != NULL) + list_entry->prev->next = list_entry->next; + else + __go_all_thread_ids = list_entry->next; + if (list_entry->next != NULL) + list_entry->next->prev = list_entry->prev; + + i = pthread_mutex_unlock (&__go_thread_ids_lock); + assert (i == 0); + + free (list_entry); + m->list_entry = NULL; +} + /* Start the thread. */ static void * start_go_thread (void *thread_arg) { - struct call *pc = (struct call *) thread_arg; + struct M *newm = (struct M *) thread_arg; void (*pfn) (void *); void *arg; + struct __go_thread_id *list_entry; + int i; #ifdef __rtems__ __wrap_rtems_task_variable_add ((void **) &m); __wrap_rtems_task_variable_add ((void **) &__go_panic_defer); #endif - pfn = pc->pfn; - arg = pc->arg; - m = pc->m; - free (pc); + m = newm; + + list_entry = newm->list_entry; + + pfn = list_entry->pfn; + arg = list_entry->arg; + +#ifndef USING_SPLIT_STACK + /* If we don't support split stack, record the current stack as the + top of the stack. There shouldn't be anything relevant to the + garbage collector above this point. */ + m->gc_sp = (void *) &arg; +#endif + + /* Finish up the entry on the thread list. */ + + i = pthread_mutex_lock (&__go_thread_ids_lock); + assert (i == 0); + + list_entry->id = pthread_self (); + list_entry->pfn = NULL; + list_entry->arg = NULL; + list_entry->tentative = 0; + + i = pthread_mutex_unlock (&__go_thread_ids_lock); + assert (i == 0); (*pfn) (arg); + remove_current_thread (); + return NULL; } +/* The runtime.Goexit function. */ + +void Goexit (void) asm ("libgo_runtime.runtime.Goexit"); + +void +Goexit (void) +{ + remove_current_thread (); + pthread_exit (NULL); + abort (); +} + /* Implement the go statement. */ void -__go_go (void (*pfn) (void*), void* arg) +__go_go (void (*pfn) (void*), void *arg) { int i; pthread_attr_t attr; - struct call *pc; + struct M *newm; + struct __go_thread_id *list_entry; + pthread_t tid; i = pthread_attr_init (&attr); assert (i == 0); @@ -76,17 +203,419 @@ assert (i == 0); #endif - pc = malloc (sizeof (struct call)); - pc->pfn = pfn; - pc->arg = arg; - pc->m = __go_alloc (sizeof (M)); - __builtin_memset (pc->m, 0, sizeof (M)); - pc->m->mcache = allocmcache (); + newm = __go_alloc (sizeof (M)); + newm->mcache = allocmcache (); - pthread_t tid; - i = pthread_create (&tid, &attr, start_go_thread, pc); + list_entry = malloc (sizeof (struct __go_thread_id)); + list_entry->prev = NULL; + list_entry->next = NULL; + list_entry->tentative = 1; + list_entry->m = newm; + list_entry->pfn = pfn; + list_entry->arg = arg; + + newm->list_entry = list_entry; + + /* Add the thread to the list of all threads, marked as tentative + since it is not yet ready to go. */ + i = pthread_mutex_lock (&__go_thread_ids_lock); + assert (i == 0); + + if (__go_all_thread_ids != NULL) + __go_all_thread_ids->prev = list_entry; + list_entry->next = __go_all_thread_ids; + __go_all_thread_ids = list_entry; + + i = pthread_mutex_unlock (&__go_thread_ids_lock); + assert (i == 0); + + /* Start the thread. */ + i = pthread_create (&tid, &attr, start_go_thread, newm); assert (i == 0); i = pthread_attr_destroy (&attr); assert (i == 0); } + +/* This is the signal handler for GO_SIG_START. The garbage collector + will send this signal to a thread when it wants the thread to + start. We don't have to actually do anything here, but we need a + signal handler since ignoring the signal will mean that the + sigsuspend will never see it. */ + +static void +gc_start_handler (int sig __attribute__ ((unused))) +{ +} + +/* Tell the garbage collector that we are ready, and wait for the + garbage collector to tell us that it is done. This may be called + by a signal handler, so it is restricted to using functions which + are async cancel safe. */ + +static void +stop_for_gc (void) +{ + int i; + + /* Tell the garbage collector about our stack. */ +#ifdef USING_SPLIT_STACK + m->gc_sp = __splitstack_find (NULL, NULL, &m->gc_len, + &m->gc_next_segment, &m->gc_next_sp, + &m->gc_initial_sp); +#else + { + uintptr_t top = (uintptr_t) m->gc_sp; + uintptr_t bottom = (uintptr_t) ⊤ + if (top < bottom) + { + m->gc_next_sp = m->gc_sp; + m->gc_len = bottom - top; + } + else + { + m->gc_next_sp = (void *) bottom; + m->gc_len = top - bottom; + } + } +#endif + + /* FIXME: Perhaps we should just move __go_panic_defer into M. */ + m->gc_panic_defer = __go_panic_defer; + + /* Tell the garbage collector that we are ready by posting to the + semaphore. */ + i = sem_post (&__go_thread_ready_sem); + assert (i == 0); + + /* Wait for the garbage collector to tell us to continue. */ + sigsuspend (&__go_thread_wait_sigset); +} + +/* This is the signal handler for GO_SIG_STOP. The garbage collector + will send this signal to a thread when it wants the thread to + stop. */ + +static void +gc_stop_handler (int sig __attribute__ ((unused))) +{ + struct M *pm = m; + + if (__sync_bool_compare_and_swap (&pm->mallocing, 1, 1)) + { + /* m->mallocing was already non-zero. We can't interrupt the + thread while it is running an malloc. Instead, tell it to + call back to us when done. */ + __sync_bool_compare_and_swap (&pm->gcing, 0, 1); + return; + } + + if (__sync_bool_compare_and_swap (&pm->nomemprof, 1, 1)) + { + /* Similarly, we can't interrupt the thread while it is building + profiling information. Otherwise we can get into a deadlock + when sweepspan calls MProf_Free. */ + __sync_bool_compare_and_swap (&pm->gcing, 0, 1); + return; + } + + stop_for_gc (); +} + +/* This is called by malloc when it gets a signal during the malloc + call itself. */ + +int +__go_run_goroutine_gc (int r) +{ + /* Force callee-saved registers to be saved on the stack. This is + not needed if we are invoked from the signal handler, but it is + needed if we are called directly, since otherwise we might miss + something that a function somewhere up the call stack is holding + in a register. */ + SAVE_REGS; + + stop_for_gc (); + + /* This avoids tail recursion, to make sure that the saved registers + are on the stack. */ + return r; +} + +/* Stop all the other threads for garbage collection. */ + +void +stoptheworld (void) +{ + int i; + pthread_t me; + int c; + struct __go_thread_id *p; + + i = pthread_mutex_lock (&__go_thread_ids_lock); + assert (i == 0); + + me = pthread_self (); + c = 0; + p = __go_all_thread_ids; + while (p != NULL) + { + if (p->tentative || pthread_equal (me, p->id)) + p = p->next; + else + { + i = pthread_kill (p->id, GO_SIG_STOP); + if (i == 0) + { + ++c; + p = p->next; + } + else if (i == ESRCH) + { + struct __go_thread_id *next; + + /* This thread died somehow. Remove it from the + list. */ + next = p->next; + if (p->prev != NULL) + p->prev->next = next; + else + __go_all_thread_ids = next; + if (next != NULL) + next->prev = p->prev; + free (p); + p = next; + } + else + abort (); + } + } + + /* Wait for each thread to receive the signal and post to the + semaphore. If a thread receives the signal but contrives to die + before it posts to the semaphore, then we will hang forever + here. */ + + while (c > 0) + { + i = sem_wait (&__go_thread_ready_sem); + if (i < 0 && errno == EINTR) + continue; + assert (i == 0); + --c; + } + + /* The gc_panic_defer field should now be set for all M's except the + one in this thread. Set this one now. */ + m->gc_panic_defer = __go_panic_defer; + + /* Leave with __go_thread_ids_lock held. */ +} + +/* Scan all the stacks for garbage collection. This should be called + with __go_thread_ids_lock held. */ + +void +__go_scanstacks (void (*scan) (int32_t, unsigned char *, int64_t)) +{ + pthread_t me; + struct __go_thread_id *p; + + /* Make sure all the registers for this thread are on the stack. */ + SAVE_REGS; + + me = pthread_self (); + for (p = __go_all_thread_ids; p != NULL; p = p->next) + { + _Bool isme = 0; + + if (p->tentative) + { + /* The goroutine function and argument can be allocated on + the heap, so we have to scan them for a thread that has + not yet started. */ + scan (0, (void *) &p->pfn, sizeof (void *)); + scan (0, (void *) &p->arg, sizeof (void *)); + scan (0, (void *) &p->m, sizeof (void *)); + continue; + } + +#ifdef USING_SPLIT_STACK + + void *sp; + size_t len; + void *next_segment; + void *next_sp; + void *initial_sp; + + if (pthread_equal (me, p->id)) + { + isme = 1; + next_segment = NULL; + next_sp = NULL; + initial_sp = NULL; + sp = __splitstack_find (NULL, NULL, &len, &next_segment, + &next_sp, &initial_sp); + } + else + { + sp = p->m->gc_sp; + len = p->m->gc_len; + next_segment = p->m->gc_next_segment; + next_sp = p->m->gc_next_sp; + initial_sp = p->m->gc_initial_sp; + } + + while (sp != NULL) + { + scan (0, sp, len); + sp = __splitstack_find (next_segment, next_sp, &len, + &next_segment, &next_sp, &initial_sp); + } + +#else /* !defined(USING_SPLIT_STACK) */ + + if (pthread_equal (me, p->id)) + { + isme = 1; + uintptr_t top = (uintptr_t) m->gc_sp; + uintptr_t bottom = (uintptr_t) ⊤ + if (top < bottom) + scan (0, m->gc_sp, bottom - top); + else + scan (0, (void *) bottom, top - bottom); + } + else + { + scan (0, p->m->gc_next_sp, p->m->gc_len); + } + +#endif /* !defined(USING_SPLIT_STACK) */ + + /* Also scan the M structure while we're at it. */ + + scan (0, (void *) &p->m, sizeof (void *)); + } +} + +/* Release all the memory caches. This is called with + __go_thread_ids_lock held. */ + +void +__go_stealcache(void) +{ + struct __go_thread_id *p; + + for (p = __go_all_thread_ids; p != NULL; p = p->next) + MCache_ReleaseAll(p->m->mcache); +} + +/* Start the other threads after garbage collection. */ + +void +starttheworld (void) +{ + int i; + pthread_t me; + struct __go_thread_id *p; + + /* Here __go_thread_ids_lock should be held. */ + + me = pthread_self (); + p = __go_all_thread_ids; + while (p != NULL) + { + if (p->tentative || pthread_equal (me, p->id)) + p = p->next; + else + { + i = pthread_kill (p->id, GO_SIG_START); + if (i == 0) + p = p->next; + else + abort (); + } + } + + i = pthread_mutex_unlock (&__go_thread_ids_lock); + assert (i == 0); +} + +/* Initialize the interaction between goroutines and the garbage + collector. */ + +void +__go_gc_goroutine_init (void *sp __attribute__ ((unused))) +{ + struct __go_thread_id *list_entry; + int i; + sigset_t sset; + struct sigaction act; + + /* Add the initial thread to the list of all threads. */ + + list_entry = malloc (sizeof (struct __go_thread_id)); + list_entry->prev = NULL; + list_entry->next = NULL; + list_entry->tentative = 0; + list_entry->id = pthread_self (); + list_entry->m = m; + list_entry->pfn = NULL; + list_entry->arg = NULL; + __go_all_thread_ids = list_entry; + + /* Initialize the semaphore which signals when threads are ready for + GC. */ + + i = sem_init (&__go_thread_ready_sem, 0, 0); + assert (i == 0); + + /* Fetch the current signal mask. */ + + i = sigemptyset (&sset); + assert (i == 0); + i = sigprocmask (SIG_BLOCK, NULL, &sset); + assert (i == 0); + + /* Make sure that GO_SIG_START is not blocked and GO_SIG_STOP is + blocked, and save that set for use with later calls to sigsuspend + while waiting for GC to complete. */ + + i = sigdelset (&sset, GO_SIG_START); + assert (i == 0); + i = sigaddset (&sset, GO_SIG_STOP); + assert (i == 0); + __go_thread_wait_sigset = sset; + + /* Block SIG_SET_START and unblock SIG_SET_STOP, and use that for + the process signal mask. */ + + i = sigaddset (&sset, GO_SIG_START); + assert (i == 0); + i = sigdelset (&sset, GO_SIG_STOP); + assert (i == 0); + i = sigprocmask (SIG_SETMASK, &sset, NULL); + assert (i == 0); + + /* Install the signal handlers. */ + memset (&act, 0, sizeof act); + i = sigemptyset (&act.sa_mask); + assert (i == 0); + + act.sa_handler = gc_start_handler; + act.sa_flags = SA_RESTART; + i = sigaction (GO_SIG_START, &act, NULL); + assert (i == 0); + + /* We could consider using an alternate signal stack for this. The + function does not use much stack space, so it may be OK. */ + act.sa_handler = gc_stop_handler; + i = sigaction (GO_SIG_STOP, &act, NULL); + assert (i == 0); + +#ifndef USING_SPLIT_STACK + /* If we don't support split stack, record the current stack as the + top of the stack. */ + m->gc_sp = sp; +#endif +} diff -r c7120a8d41cc libgo/runtime/go-goexit.c --- a/libgo/runtime/go-goexit.c Fri Jul 23 09:44:07 2010 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -/* go-goexit.c -- the runtime.Goexit function. - - Copyright 2009 The Go Authors. All rights reserved. - Use of this source code is governed by a BSD-style - license that can be found in the LICENSE file. */ - -#include -#include - -void Goexit (void) asm ("libgo_runtime.runtime.Goexit"); - -void -Goexit (void) -{ - pthread_exit (NULL); - assert (0); -} diff -r c7120a8d41cc libgo/runtime/go-main.c --- a/libgo/runtime/go-main.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-main.c Fri Jul 23 09:45:52 2010 -0700 @@ -19,6 +19,7 @@ #include "go-string.h" #include "runtime.h" +#include "malloc.h" #undef int #undef char @@ -47,6 +48,7 @@ struct __go_string *values; mallocinit (); + __go_gc_goroutine_init (&argc); Args.__count = argc; Args.__capacity = argc; @@ -78,6 +80,9 @@ srand ((unsigned int) time (NULL)); #endif __go_init_main (); + + __go_enable_gc (); + real_main (); return 0; diff -r c7120a8d41cc libgo/runtime/go-nanotime.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/libgo/runtime/go-nanotime.c Fri Jul 23 09:45:52 2010 -0700 @@ -0,0 +1,22 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Return time in nanoseconds. This is only used for computing runtime. + +#include +#include + +#include "runtime.h" + +int64 +nanotime (void) +{ + int i; + struct timeval tv; + + i = gettimeofday (&tv, NULL); + assert (i == 0); + + return (int64) tv.tv_sec * 1000000000 + (int64) tv.tv_usec * 1000; +} diff -r c7120a8d41cc libgo/runtime/go-reflect-call.c --- a/libgo/runtime/go-reflect-call.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-reflect-call.c Fri Jul 23 09:45:52 2010 -0700 @@ -12,6 +12,7 @@ #include "go-alloc.h" #include "go-type.h" +#include "runtime.h" /* Forward declaration. */ @@ -316,13 +317,9 @@ address is FUNC_ADDR. PARAMS is an array of parameter addresses. RESULTS is an array of result addresses. */ -extern void call (const struct __go_func_type *, const void *, _Bool, void **, - void **) - asm ("libgo_reflect.reflect.call"); - void -call (const struct __go_func_type *func_type, const void *func_addr, - _Bool is_interface, void **params, void **results) +reflect_call (const struct __go_func_type *func_type, const void *func_addr, + _Bool is_interface, void **params, void **results) { ffi_cif cif; unsigned char *call_result; @@ -334,7 +331,10 @@ ffi_call (&cif, func_addr, call_result, params); - go_set_results (func_type, call_result, results); + /* Some day we may need to free result values if RESULTS is + NULL. */ + if (results != NULL) + go_set_results (func_type, call_result, results); free (call_result); } diff -r c7120a8d41cc libgo/runtime/go-semacquire.c --- a/libgo/runtime/go-semacquire.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-semacquire.c Fri Jul 23 09:45:52 2010 -0700 @@ -9,6 +9,8 @@ #include +#include "runtime.h" + /* We use a single global lock and condition variable. This is painful, since it will cause unnecessary contention, but is hard to avoid in a portable manner. On Linux we can use futexes, but they @@ -23,14 +25,14 @@ false. */ static _Bool -acquire (int32_t *addr) +acquire (uint32 *addr) { while (1) { - int32_t val; + uint32 val; val = *addr; - if (val <= 0) + if (val == 0) return 0; if (__sync_bool_compare_and_swap (addr, val, val - 1)) return 1; @@ -41,10 +43,8 @@ We have acquired the semaphore when we have decremented the count and it remains nonnegative. */ -void Semacquire (int32_t *) asm ("libgo_runtime.runtime.Semacquire"); - void -Semacquire (int32_t *addr) +semacquire (uint32 *addr) { while (1) { @@ -85,10 +85,8 @@ positive, we signal the condition variable to wake up another process. */ -void Semrelease (int32_t *) asm ("libgo_runtime.runtime.Semrelease"); - void -Semrelease (int32_t *addr) +semrelease (uint32 *addr) { int32_t val; diff -r c7120a8d41cc libgo/runtime/go-trampoline.c --- a/libgo/runtime/go-trampoline.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-trampoline.c Fri Jul 23 09:45:52 2010 -0700 @@ -22,10 +22,11 @@ needs to be more system dependent. */ void * -__go_allocate_trampoline (size_t size) +__go_allocate_trampoline (size_t size, void *closure) { unsigned int page_size; void *ret; + size_t off; page_size = getpagesize (); assert (page_size >= size); @@ -33,6 +34,13 @@ ret = (void *) (((uintptr_t) ret + page_size - 1) & ~ ((uintptr_t) page_size - 1)); + /* Because the garbage collector only looks at correct address + offsets, we need to ensure that it will see the closure + address. */ + off = ((size + sizeof (void *) - 1) / sizeof (void *)) * sizeof (void *); + assert (size + off + sizeof (void *) <= page_size); + __builtin_memcpy (ret + off, &closure, sizeof (void *)); + #ifdef HAVE_SYS_MMAN_H { int i; diff -r c7120a8d41cc libgo/runtime/go-unwind.c --- a/libgo/runtime/go-unwind.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/go-unwind.c Fri Jul 23 09:45:52 2010 -0700 @@ -7,6 +7,7 @@ #include "config.h" #include +#include #include "unwind.h" #define NO_SIZE_OF_ENCODED_VALUE diff -r c7120a8d41cc libgo/runtime/malloc.goc --- a/libgo/runtime/malloc.goc Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/malloc.goc Fri Jul 23 09:45:52 2010 -0700 @@ -54,9 +54,8 @@ void *v; uint32 *ref; - if(m->mallocing) + if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1)) throw("malloc/free - deadlock"); - m->mallocing = 1; if(size == 0) size = 1; @@ -98,7 +97,10 @@ ref = &s->gcref0; } - m->mallocing = 0; + __sync_bool_compare_and_swap(&m->mallocing, 1, 0); + + if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) + __go_run_goroutine_gc(0); if(!(refflag & RefNoProfiling) && (rate = MemProfileRate) > 0) { if(size >= (uint32) rate) @@ -139,9 +141,8 @@ if(v == nil) return; - if(m->mallocing) + if(!__sync_bool_compare_and_swap(&m->mallocing, 0, 1)) throw("malloc/free - deadlock"); - m->mallocing = 1; if(!mlookup(v, nil, nil, &s, &ref)) { printf("free %p: not an allocated block\n", v); @@ -171,7 +172,10 @@ mstats.by_size[sizeclass].nfree++; MCache_Free(c, v, sizeclass, size); } - m->mallocing = 0; + __sync_bool_compare_and_swap(&m->mallocing, 1, 0); + + if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) + __go_run_goroutine_gc(1); } int32 @@ -268,59 +272,6 @@ return mallocgc(n, 0, 1, 1); } -// Stack allocator uses malloc/free most of the time, -// but if we're in the middle of malloc and need stack, -// we have to do something else to avoid deadlock. -// In that case, we fall back on a fixed-size free-list -// allocator, assuming that inside malloc all the stack -// frames are small, so that all the stack allocations -// will be a single size, the minimum (right now, 5k). -struct { - Lock; - FixAlloc; -} stacks; - -void* -stackalloc(uint32 n) -{ - void *v; - uint32 *ref; - - if(m->mallocing || m->gcing) { - lock(&stacks); - if(stacks.size == 0) - FixAlloc_Init(&stacks, n, SysAlloc, nil, nil); - if(stacks.size != n) { - printf("stackalloc: in malloc, size=%zu want %d", (size_t)stacks.size, n); - throw("stackalloc"); - } - v = FixAlloc_Alloc(&stacks); - mstats.stacks_inuse = stacks.inuse; - mstats.stacks_sys = stacks.sys; - unlock(&stacks); - return v; - } - v = mallocgc(n, RefNoProfiling, 0, 0); - if(!mlookup(v, nil, nil, nil, &ref)) - throw("stackalloc mlookup"); - *ref = RefStack; - return v; -} - -void -stackfree(void *v) -{ - if(m->mallocing || m->gcing) { - lock(&stacks); - FixAlloc_Free(&stacks, v); - mstats.stacks_inuse = stacks.inuse; - mstats.stacks_sys = stacks.sys; - unlock(&stacks); - return; - } - __go_free(v); -} - func Alloc(n uintptr) (p *byte) { p = __go_alloc(n); } @@ -341,8 +292,6 @@ byte *base; uintptr size; const FuncType *ft; - int32 i, nret; - Type *t; if(obj == nil) { printf("runtime.SetFinalizer: first argument is nil interface\n"); @@ -357,7 +306,7 @@ printf("runtime.SetFinalizer: pointer not at beginning of allocated block\n"); goto throw; } - nret = 0; + ft = nil; if(finalizer != nil) { if(finalizer->__type_descriptor->__code != GO_FUNC) { badfunc: @@ -368,18 +317,10 @@ if(ft->__dotdotdot || ft->__in.__count != 1 || !__go_type_descriptors_equal(*(Type**)ft->__in.__values, obj->__type_descriptor)) goto badfunc; - // compute size needed for return parameters - for(i=0; i__out.__count; i++) { - t = ((Type**)ft->__out.__values)[i]; - nret = (nret + t->__align - 1) & ~(t->__align - 1); - nret += t->__size; - } - nret = (nret + sizeof(void*)-1) & ~(sizeof(void*)-1); - if(getfinalizer(obj->__object, 0)) { printf("runtime.SetFinalizer: finalizer already set"); goto throw; } } - addfinalizer(obj->__object, finalizer ? finalizer->__object : nil, nret); + addfinalizer(obj->__object, finalizer ? *(void**)finalizer->__object : nil, ft); } diff -r c7120a8d41cc libgo/runtime/malloc.h --- a/libgo/runtime/malloc.h Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/malloc.h Fri Jul 23 09:45:52 2010 -0700 @@ -391,7 +391,7 @@ Finalizer *next; // for use by caller of getfinalizer void (*fn)(void*); void *arg; - int32 nret; + const struct __go_func_type *ft; }; Finalizer* getfinalizer(void*, bool); diff -r c7120a8d41cc libgo/runtime/mfinal.c --- a/libgo/runtime/mfinal.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/mfinal.c Fri Jul 23 09:45:52 2010 -0700 @@ -85,7 +85,7 @@ // add finalizer; caller is responsible for making sure not already in table void -addfinalizer(void *p, void (*f)(void*), int32 nret) +addfinalizer(void *p, void (*f)(void*), const struct __go_func_type *ft) { Fintab newtab; int32 i; @@ -97,7 +97,7 @@ if(f != nil) { e = mal(sizeof *e); e->fn = f; - e->nret = nret; + e->ft = ft; } lock(&finlock); diff -r c7120a8d41cc libgo/runtime/mgc0.c --- a/libgo/runtime/mgc0.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/mgc0.c Fri Jul 23 09:45:52 2010 -0700 @@ -19,26 +19,18 @@ Debug = 0 }; -extern byte data[]; -extern byte etext[]; -extern byte end[]; - -#if 0 -static G *fing; +static bool finstarted; +static Lock finlock = LOCK_INITIALIZER; +static pthread_cond_t fincond = PTHREAD_COND_INITIALIZER; static Finalizer *finq; static int32 fingwait; -#endif -#if 0 -static void sweepblock(byte*, int64, uint32*, int32); -static void runfinq(void); -#endif +static void runfinq(void*); enum { PtrSize = sizeof(void*) }; -#if 0 static void scanblock(int32 depth, byte *b, int64 n) { @@ -50,7 +42,7 @@ int64 i; if(Debug > 1) - printf("%d scanblock %p %lld\n", depth, b, n); + printf("%d scanblock %p %lld\n", depth, b, (long long) n); off = (uint32)(uintptr)b & (PtrSize-1); if(off) { b += PtrSize - off; @@ -80,7 +72,7 @@ obj = *pp; } if(mheap.min <= (byte*)obj && (byte*)obj < mheap.max) { - if(mlookup(obj, &obj, &size, nil, &refp)) { + if(mlookup(obj, (byte**)&obj, &size, nil, &refp)) { ref = *refp; switch(ref & ~RefFlags) { case RefNone: @@ -97,26 +89,6 @@ } static void -scanstack(G *gp) -{ - Stktop *stk; - byte *sp; - - if(gp == g) - sp = (byte*)&gp; - else - sp = gp->sched.sp; - if(Debug > 1) - printf("scanstack %d %p\n", gp->goid, sp); - stk = (Stktop*)gp->stackbase; - while(stk) { - scanblock(0, sp, (byte*)stk - sp); - sp = stk->gobuf.sp; - stk = (Stktop*)stk->stackbase; - } -} - -static void markfin(void *v) { uintptr size; @@ -124,45 +96,84 @@ size = 0; refp = nil; - if(!mlookup(v, &v, &size, nil, &refp) || !(*refp & RefHasFinalizer)) + if(!mlookup(v, (byte**)&v, &size, nil, &refp) || !(*refp & RefHasFinalizer)) throw("mark - finalizer inconsistency"); // do not mark the finalizer block itself. just mark the things it points at. scanblock(1, v, size); } +struct globals { + byte *start; + uintptr size; +}; + +// FIXME: This needs to grow as needed. +#define GLOBALS_ENTRIES 16 + +static struct globals globals[GLOBALS_ENTRIES]; + +// Called by runtime. +void +__go_register_mem(void *start, void *end) +{ + int i; + + if(start == nil || end == nil) + throw("__go_register_mem"); + if(start == end) + return; + for(i = 0; i < GLOBALS_ENTRIES; ++i) { + if(globals[i].start == nil) { + globals[i].start = (byte*)start; + globals[i].size = (byte*)end - (byte*)start; + return; + } + } + throw("__go_register_mem out of space"); +} + +// Called by runtime for dlclose. +void +__go_deregister_mem(void *start, void *end) +{ + int i; + + if(start == end) + return; + for(i = 0; i < GLOBALS_ENTRIES; ++i) { + if(globals[i].start == (byte*)start + && globals[i].size == (size_t)((byte*)end - (byte*)start)) { + globals[i].start = nil; + return; + } + } + throw("__go_deregister_mem not found"); +} + static void mark(void) { - G *gp; + int i; // mark data+bss. // skip mheap itself, which has no interesting pointers // and is mostly zeroed and would not otherwise be paged in. - scanblock(0, data, (byte*)&mheap - data); - scanblock(0, (byte*)(&mheap+1), end - (byte*)(&mheap+1)); + for(i = 0; i < GLOBALS_ENTRIES; ++i) { + if (globals[i].start == nil) + continue; + if ((byte*)&mheap >= globals[i].start + && (byte*)&mheap < globals[i].start + globals[i].size) { + scanblock(0, globals[i].start, (byte*)&mheap - globals[i].start); + scanblock(0, (byte*)(&mheap+1), + globals[i].start + globals[i].size - (byte*)(&mheap+1)); + } + else + scanblock(0, globals[i].start, globals[i].size); + } // mark stacks - for(gp=allg; gp!=nil; gp=gp->alllink) { - switch(gp->status){ - default: - printf("unexpected G.status %d\n", gp->status); - throw("mark - bad status"); - case Gdead: - break; - case Grunning: - case Grecovery: - if(gp != g) - throw("mark - world not stopped"); - scanstack(gp); - break; - case Grunnable: - case Gsyscall: - case Gwaiting: - scanstack(gp); - break; - } - } + __go_scanstacks(scanblock); // mark things pointed at by objects with finalizers walkfintab(markfin); @@ -257,14 +268,7 @@ sweepspan(s); } -#endif - -#if 0 -// Semaphore, not Lock, so that the goroutine -// reschedules when there is contention rather -// than spinning. -static uint32 gcsema = 1; -#endif +static Lock gcsema = LOCK_INITIALIZER; // Initialized from $GOGC. GOGC=off means no gc. // @@ -277,27 +281,12 @@ // extra memory used). static int32 gcpercent = -2; -#if 0 -static void -stealcache(void) -{ - M *m; - - for(m=allm; m; m=m->alllink) - MCache_ReleaseAll(m->mcache); -} -#endif - void gc(int32 force __attribute__ ((unused))) { -#if 0 int64 t0, t1; -#endif char *p; -#if 0 Finalizer *fp; -#endif // The gc is turned off (via enablegc) until // the bootstrap has completed. @@ -307,7 +296,7 @@ // problems, don't bother trying to run gc // while holding a lock. The next mallocgc // without a lock will do the gc instead. - if(!mstats.enablegc /* || m->locks > 0 || panicking */) + if(!mstats.enablegc || m->locks > 0 /* || panicking */) return; if(gcpercent == -2) { // first time through @@ -322,82 +311,78 @@ if(gcpercent < 0) return; -#if 0 - semacquire(&gcsema); + lock(&finlock); + lock(&gcsema); + m->locks++; // disable gc during the mallocs in newproc t0 = nanotime(); - m->gcing = 1; stoptheworld(); - if(mheap.Lock.key != 0) - throw("mheap locked during gc"); if(force || mstats.heap_alloc >= mstats.next_gc) { mark(); sweep(); - stealcache(); + __go_stealcache(); mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; } - m->gcing = 0; - - m->locks++; // disable gc during the mallocs in newproc - fp = finq; - if(fp != nil) { - // kick off or wake up goroutine to run queued finalizers - if(fing == nil) - fing = newproc1((byte*)runfinq, nil, 0, 0); - else if(fingwait) { - fingwait = 0; - ready(fing); - } - } - m->locks--; t1 = nanotime(); mstats.numgc++; mstats.pause_ns += t1 - t0; if(mstats.debuggc) - printf("pause %D\n", t1-t0); - semrelease(&gcsema); + printf("pause %llu\n", (unsigned long long)t1-t0); + unlock(&gcsema); starttheworld(); - - // give the queued finalizers, if any, a chance to run - if(fp != nil) - gosched(); -#endif + + // finlock is still held. + fp = finq; + if(fp != nil) { + // kick off or wake up goroutine to run queued finalizers + if(!finstarted) { + __go_go(runfinq, nil); + finstarted = 1; + } + else if(fingwait) { + fingwait = 0; + pthread_cond_signal(&fincond); + } + } + m->locks--; + unlock(&finlock); } -#if 0 static void -runfinq(void) +runfinq(void* dummy) { Finalizer *f, *next; - byte *frame; + + USED(dummy); for(;;) { - // There's no need for a lock in this section - // because it only conflicts with the garbage - // collector, and the garbage collector only - // runs when everyone else is stopped, and - // runfinq only stops at the gosched() or - // during the calls in the for loop. + lock(&finlock); f = finq; finq = nil; if(f == nil) { fingwait = 1; - g->status = Gwaiting; - gosched(); + pthread_cond_wait(&fincond, &finlock.mutex); + unlock(&finlock); continue; } + unlock(&finlock); for(; f; f=next) { + void *params[1]; + next = f->next; - frame = mal(sizeof(uintptr) + f->nret); - *(void**)frame = f->arg; - reflect·call((byte*)f->fn, frame, sizeof(uintptr) + f->nret); - free(frame); + params[0] = &f->arg; + reflect_call(f->ft, (void*)f->fn, 0, params, nil); f->fn = nil; f->arg = nil; f->next = nil; - free(f); + __go_free(f); } gc(1); // trigger another gc to clean up the finalized objects, if possible } } -#endif + +void +__go_enable_gc() +{ + mstats.enablegc = 1; +} diff -r c7120a8d41cc libgo/runtime/mprof.goc --- a/libgo/runtime/mprof.goc Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/mprof.goc Fri Jul 23 09:45:52 2010 -0700 @@ -193,10 +193,8 @@ uintptr stk[32]; Bucket *b; - if(m->nomemprof > 0) + if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1)) return; - - m->nomemprof++; #if 0 nstk = callers(1, stk, 32); #else @@ -208,7 +206,10 @@ b->alloc_bytes += size; setaddrbucket((uintptr)p, b); unlock(&proflock); - m->nomemprof--; + __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); + + if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) + __go_run_goroutine_gc(100); } // Called when freeing a profiled block. @@ -217,10 +218,9 @@ { Bucket *b; - if(m->nomemprof > 0) + if(!__sync_bool_compare_and_swap(&m->nomemprof, 0, 1)) return; - m->nomemprof++; lock(&proflock); b = getaddrbucket((uintptr)p); if(b != nil) { @@ -228,7 +228,10 @@ b->free_bytes += size; } unlock(&proflock); - m->nomemprof--; + __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); + + if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) + __go_run_goroutine_gc(101); } @@ -263,6 +266,8 @@ Bucket *b; Record *r; + __sync_bool_compare_and_swap(&m->nomemprof, 0, 1); + lock(&proflock); n = 0; for(b=buckets; b; b=b->allnext) @@ -277,4 +282,9 @@ record(r++, b); } unlock(&proflock); + + __sync_bool_compare_and_swap(&m->nomemprof, 1, 0); + + if(__sync_bool_compare_and_swap(&m->gcing, 1, 0)) + __go_run_goroutine_gc(102); } diff -r c7120a8d41cc libgo/runtime/runtime.h --- a/libgo/runtime/runtime.h Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/runtime.h Fri Jul 23 09:45:52 2010 -0700 @@ -90,8 +90,20 @@ { int32 mallocing; int32 gcing; + int32 locks; + int32 nomemprof; MCache *mcache; - int32 nomemprof; + + /* For the list of all threads. */ + struct __go_thread_id *list_entry; + + /* For the garbage collector. */ + void *gc_sp; + size_t gc_len; + void *gc_next_segment; + void *gc_next_sp; + void *gc_initial_sp; + struct __go_panic_defer_struct *gc_panic_defer; }; /* Macros. */ @@ -106,6 +118,16 @@ void mallocinit(void); void siginit(void); bool __go_sigsend(int32 sig); +int64 nanotime(void); + +void stoptheworld(void); +void starttheworld(void); +void __go_go(void (*pfn)(void*), void*); +void __go_gc_goroutine_init(void*); +void __go_enable_gc(void); +int __go_run_goroutine_gc(int); +void __go_scanstacks(void (*scan)(int32, byte *, int64)); +void __go_stealcache(void); /* * mutual exclusion locks. in the uncontended case, @@ -117,6 +139,10 @@ void lock(Lock*); void unlock(Lock*); void destroylock(Lock*); +bool trylock(Lock*); + +void semacquire (uint32 *) asm ("libgo_runtime.runtime.Semacquire"); +void semrelease (uint32 *) asm ("libgo_runtime.runtime.Semrelease"); /* * sleep and wakeup on one-time events. @@ -136,10 +162,17 @@ #define mcmp(a, b, s) __builtin_memcmp((a), (b), (s)) MCache* allocmcache(void); void free(void *v); -void addfinalizer(void*, void(*fn)(void*), int32); +struct __go_func_type; +void addfinalizer(void*, void(*fn)(void*), const struct __go_func_type *); +void walkfintab(void (*fn)(void*)); #define runtime_mmap mmap #define cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new) +struct __go_func_type; +void reflect_call(const struct __go_func_type *, const void *, _Bool, void **, + void **) + asm ("libgo_reflect.reflect.call"); + #ifdef __rtems__ void __wrap_rtems_task_variable_add(void **); #endif diff -r c7120a8d41cc libgo/runtime/thread.c --- a/libgo/runtime/thread.c Fri Jul 23 09:44:07 2010 -0700 +++ b/libgo/runtime/thread.c Fri Jul 23 09:45:52 2010 -0700 @@ -30,3 +30,9 @@ { pthread_mutex_destroy(&l->mutex); } + +bool +trylock(Lock *l) +{ + return pthread_mutex_trylock(&l->mutex) == 0; +}