diff mbox

[gccgo] Split stack improvements

Message ID mcrbp9ye03y.fsf@google.com
State New
Headers show

Commit Message

Ian Lance Taylor July 23, 2010, 2:36 p.m. UTC
This patch incorporate several different improvements to the split stack
code.

* Block signals while splitting the stack.  This permits a signal
  handler to use split stack code itself.

* Add __splitstack_find function.  This gives a program a way to find
  its stack space, which is needed to implement some types of garbage
  collection.

* Add guard pages, controlled by SPLIT_STACK_GUARD environment variable.
  This is for debugging.

* Record the initial value of the stack pointer for each thread.  This
  is needed for __splitstack_find, and permits more use of the initial
  stack.

* Increase the backoff so there is room to run a signal handler.

* Make x86_64 unwinding like i386--ignore single return.  This makes
  backtraces nicer.

* Fix i386 unwinding when PIC.  A bug fix--since the main stack split
  function never sets %ebx, it may not be set as required in the
  exception handler.

Committed to gccgo branch.

Ian
diff mbox

Patch

Index: libgcc/generic-morestack.c
===================================================================
--- libgcc/generic-morestack.c	(revision 161680)
+++ libgcc/generic-morestack.c	(working copy)
@@ -39,6 +39,7 @@  see the files COPYING3 and COPYING.RUNTI
 #include <assert.h>
 #include <errno.h>
 #include <signal.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
@@ -54,6 +55,10 @@  see the files COPYING3 and COPYING.RUNTI
    minimize stack usage even at the cost of code size, and in general
    inlining everything will do that.  */
 
+extern void
+__generic_morestack_set_initial_sp (void *sp, size_t len)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
 extern void *
 __generic_morestack (size_t *frame_size, void *old_stack, size_t param_size)
   __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
@@ -62,6 +67,14 @@  extern void *
 __generic_releasestack (size_t *pavailable)
   __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
 
+extern void
+__morestack_block_signals (void)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void
+__morestack_unblock_signals (void)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
 extern size_t
 __generic_findstack (void *stack)
   __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
@@ -70,6 +83,10 @@  extern void
 __morestack_load_mmap (void)
   __attribute__ ((no_split_stack));
 
+extern void *
+__splitstack_find (void *, void *, size_t *, void **, void **, void **)
+  __attribute__ ((visibility ("default")));
+
 /* When we allocate a stack segment we put this header at the
    start.  */
 
@@ -102,6 +119,26 @@  __thread struct stack_segment *__moresta
 __thread struct stack_segment *__morestack_current_segment
   __attribute__ ((visibility ("default")));
 
+/* The (approximate) initial stack pointer and size for this thread on
+   the system supplied stack.  This is set when the thread is created.
+   We also store a sigset_t here to hold the signal mask while
+   splitting the stack, since we don't want to store that on the
+   stack.  */
+
+struct initial_sp
+{
+  void *sp;
+  size_t len;
+  sigset_t mask;
+};
+
+__thread struct initial_sp __morestack_initial_sp
+  __attribute__ ((visibility ("default")));
+
+/* A static signal mask, to avoid taking up stack space.  */
+
+static sigset_t __morestack_fullmask;
+
 /* Convert an integer to a decimal string without using much stack
    space.  Return a pointer to the part of the buffer to use.  We this
    instead of sprintf because sprintf will require too much stack
@@ -180,6 +217,7 @@  static struct stack_segment *
 allocate_segment (size_t frame_size)
 {
   static unsigned int static_pagesize;
+  static int use_guard_page;
   unsigned int pagesize;
   unsigned int overhead;
   unsigned int allocate;
@@ -201,6 +239,8 @@  allocate_segment (size_t frame_size)
       p = 0;
 #endif
 
+      use_guard_page = getenv ("SPLIT_STACK_GUARD") != 0;
+
       /* FIXME: I'm not sure this assert should be in the released
 	 code.  */
       assert (p == 0 || p == pagesize);
@@ -216,6 +256,9 @@  allocate_segment (size_t frame_size)
     allocate = ((frame_size + overhead + pagesize - 1)
 		& ~ (pagesize - 1));
 
+  if (use_guard_page)
+    allocate += pagesize;
+
   /* FIXME: If this binary requires an executable stack, then we need
      to set PROT_EXEC.  Unfortunately figuring that out is complicated
      and target dependent.  We would need to use dl_iterate_phdr to
@@ -230,6 +273,21 @@  allocate_segment (size_t frame_size)
       __morestack_fail (msg, sizeof msg - 1, errno);
     }
 
+  if (use_guard_page)
+    {
+      void *guard;
+
+#ifdef STACK_GROWS_DOWNWARD
+      guard = space;
+      space = (char *) space + pagesize;
+#else
+      guard = space + allocate - pagesize;
+#endif
+
+      mprotect (guard, pagesize, PROT_NONE);
+      allocate -= pagesize;
+    }
+
   pss = (struct stack_segment *) space;
 
   pss->prev = __morestack_current_segment;
@@ -271,6 +329,41 @@  __morestack_release_segments (struct sta
   *pp = NULL;
 }
 
+/* This function is called by a processor specific function to set the
+   initial stack pointer for a thread.  The operating system will
+   always create a stack for a thread.  Here we record a stack pointer
+   near the base of that stack.  The size argument lets the processor
+   specific code estimate how much stack space is available on this
+   initial stack.  */
+
+void
+__generic_morestack_set_initial_sp (void *sp, size_t len)
+{
+  /* The stack pointer most likely starts on a page boundary.  Adjust
+     to the nearest 512 byte boundary.  It's not essential that we be
+     precise here; getting it wrong will just leave some stack space
+     unused.  */
+#ifdef STACK_GROWS_DOWNWARD
+  sp = (void *) ((((__UINTPTR_TYPE__) sp + 511U) / 512U) * 512U);
+#else
+  sp = (void *) ((((__UINTPTR_TYPE__) sp - 511U) / 512U) * 512U);
+#endif
+
+  __morestack_initial_sp.sp = sp;
+  __morestack_initial_sp.len = len;
+  sigemptyset (&__morestack_initial_sp.mask);
+
+  sigfillset (&__morestack_fullmask);
+#ifdef __linux__
+  /* On Linux, the first two real time signals are used by the NPTL
+     threading library.  By taking them out of the set of signals, we
+     avoiding copying the signal mask in pthread_sigmask.  More
+     importantly, pthread_sigmask uses less stack space on x86_64.  */
+  sigdelset (&__morestack_fullmask, __SIGRTMIN);
+  sigdelset (&__morestack_fullmask, __SIGRTMIN + 1);
+#endif
+}
+
 /* This function is called by a processor specific function which is
    run in the prologue when more stack is needed.  The processor
    specific function handles the details of saving registers and
@@ -370,14 +463,59 @@  __generic_releasestack (size_t *pavailab
     }
   else
     {
-      /* We have popped back to the original stack.  We don't know how
-	 large it is.  */
-      *pavailable = 512;
+      size_t used;
+
+      /* We have popped back to the original stack.  */
+#ifdef STACK_GROWS_DOWNWARD
+      if ((char *) old_stack >= (char *) __morestack_initial_sp.sp)
+	used = 0;
+      else
+	used = (char *) __morestack_initial_sp.sp - (char *) old_stack;
+#else
+      if ((char *) old_stack <= (char *) __morestack_initial_sp.sp)
+	used = 0;
+      else
+	used = (char *) old_stack - (char *) __morestack_initial_sp.sp;
+#endif
+
+      if (used > __morestack_initial_sp.len)
+	*pavailable = 0;
+      else
+	*pavailable = __morestack_initial_sp.len - used;
     }
 
   return old_stack;
 }
 
+/* Block signals while splitting the stack.  This avoids trouble if we
+   try to invoke a signal handler which itself wants to split the
+   stack.  */
+
+extern int pthread_sigmask (int, const sigset_t *, sigset_t *)
+  __attribute__ ((weak));
+
+void
+__morestack_block_signals (void)
+{
+  if (pthread_sigmask)
+    pthread_sigmask (SIG_BLOCK, &__morestack_fullmask,
+		     &__morestack_initial_sp.mask);
+  else
+    sigprocmask (SIG_BLOCK, &__morestack_fullmask,
+		 &__morestack_initial_sp.mask);
+}
+
+/* Unblock signals while splitting the stack.  */
+
+void
+__morestack_unblock_signals (void)
+{
+  if (pthread_sigmask)
+    pthread_sigmask (SIG_SETMASK, &__morestack_initial_sp.mask, NULL);
+  else
+    sigprocmask (SIG_SETMASK, &__morestack_initial_sp.mask, NULL);
+}
+
 /* Find the stack segment for STACK and return the amount of space
    available.  This is used when unwinding the stack because of an
    exception, in order to reset the stack guard correctly.  */
@@ -386,6 +524,7 @@  size_t
 __generic_findstack (void *stack)
 {
   struct stack_segment *pss;
+  size_t used;
 
   for (pss = __morestack_current_segment; pss != NULL; pss = pss->prev)
     {
@@ -401,8 +540,23 @@  __generic_findstack (void *stack)
 	}
     }
 
-  // We don't know where we are on the stack.
-  return 512;
+  /* We have popped back to the original stack.  */
+#ifdef STACK_GROWS_DOWNWARD
+  if ((char *) stack >= (char *) __morestack_initial_sp.sp)
+    used = 0;
+  else
+    used = (char *) __morestack_initial_sp.sp - (char *) stack;
+#else
+  if ((char *) stack <= (char *) __morestack_initial_sp.sp)
+    used = 0;
+  else
+    used = (char *) stack - (char *) __morestack_initial_sp.sp;
+#endif
+
+  if (used > __morestack_initial_sp.len)
+    return 0;
+  else
+    return __morestack_initial_sp.len - used;
 }
 
 /* This function is called at program startup time to make sure that
@@ -417,7 +571,121 @@  __morestack_load_mmap (void)
      fails.  Pass __MORESTACK_CURRENT_SEGMENT to make sure that any
      TLS accessor function is resolved.  */
   mmap (__morestack_current_segment, 0, PROT_READ, MAP_ANONYMOUS, -1, 0);
+  mprotect (NULL, 0, 0);
   munmap (0, getpagesize ());
 }
 
+/* This function may be used to iterate over the stack segments.
+   This can be called like this.
+     void *next_segment = NULL;
+     void *next_sp = NULL;
+     void *initial_sp = NULL;
+     void *stack;
+     size_t stack_size;
+     while ((stack = __splitstack_find (next_segment, next_sp, &stack_size,
+                                        &next_segment, &next_sp,
+					&initial_sp)) != NULL)
+       {
+         // Stack segment starts at stack and is stack_size bytes long.
+       }
+
+   There is no way to iterate over the stack segments of a different
+   thread.  However, what is permitted is for one thread to call this
+   with the first two values NULL, to pass next_segment, next_sp, and
+   initial_sp to a different thread, and then to suspend one way or
+   another.  A different thread may run the subsequent
+   __morestack_find iterations.  Of course, this will only work if the
+   first thread is suspended during the __morestack_find iterations.
+   If not, the second thread will be looking at the stack while it is
+   changing, and anything could happen.
+
+   FIXME: This should be declared in some header file, but where?  */
+
+void *
+__splitstack_find (void *segment_arg, void *sp, size_t *len,
+		   void **next_segment, void **next_sp,
+		   void **initial_sp)
+{
+  struct stack_segment *segment;
+  void *ret;
+  char *nsp;
+
+  if (segment_arg == (void *) 1)
+    {
+      char *isp = (char *) *initial_sp;
+
+      *next_segment = (void *) 2;
+      *next_sp = NULL;
+#ifdef STACK_GROWS_DOWNWARD
+      if ((char *) sp >= isp)
+	return NULL;
+      *len = (char *) isp - (char *) sp;
+      return sp;
+#else
+      if ((char *) sp <= (char *) isp)
+	return NULL;
+      *len = (char *) sp - (char *) isp;
+      return (void *) isp;
+#endif
+    }
+  else if (segment_arg == (void *) 2)
+    return NULL;
+  else if (segment_arg != NULL)
+    segment = (struct stack_segment *) segment_arg;
+  else
+    {
+      *initial_sp = __morestack_initial_sp.sp;
+      segment = __morestack_current_segment;
+      sp = (void *) &segment;
+      while (1)
+	{
+	  if (segment == NULL)
+	    return __splitstack_find ((void *) 1, sp, len, next_segment,
+				      next_sp, initial_sp);
+	  if ((char *) sp >= (char *) (segment + 1)
+	      && (char *) sp <= (char *) (segment + 1) + segment->size)
+	    break;
+	  segment = segment->prev;
+	}
+    }
+
+  if (segment->prev == NULL)
+    *next_segment = (void *) 1;
+  else
+    *next_segment = segment->prev;
+
+  /* The old_stack value is the address of the function parameters of
+     the function which called __morestack.  So if f1 called f2 which
+     called __morestack, the stack looks like this:
+
+         parameters       <- old_stack
+         return in f1
+	 return in f2
+	 data pushed by __morestack
+
+     On x86, the data pushed by __morestack includes the saved value
+     of the ebp/rbp register.  We want our caller to be able to see
+     that value, which can not be found on any other stack.  So we
+     adjust accordingly.  This may need to be tweaked for other
+     targets.  */
+
+  nsp = (char *) segment->old_stack;
+#ifdef STACK_GROWS_DOWNWARD
+  nsp -= 3 * sizeof (void *);
+#else
+  nsp += 3 * sizeof (void *);
+#endif
+  *next_sp = (void *) nsp;
+
+#ifdef STACK_GROWS_DOWNWARD
+  *len = (char *) (segment + 1) + segment->size - (char *) sp;
+  ret = (void *) sp;
+#else
+  *len = (char *) sp - (char *) (segment + 1);
+  ret = (void *) (segment + 1);
+#endif
+
+  return ret;
+}
+
 #endif /* !defined (inhibit_libc) */
Index: libgcc/config/i386/morestack.S
===================================================================
--- libgcc/config/i386/morestack.S	(revision 161680)
+++ libgcc/config/i386/morestack.S	(working copy)
@@ -77,12 +77,13 @@ 
 # requested stack space by 16K.
 
 # The amount of extra space we ask for.  In general this has to be
-# enough for the dynamic loader to find a symbol.
+# enough for the dynamic loader to find a symbol and for a signal
+# handler to run.
 	
 #ifndef __x86_64__
-#define BACKOFF (512)
-#else
 #define BACKOFF (1024)
+#else
+#define BACKOFF (1536)
 #endif
 
 	.global __morestack_non_split
@@ -153,6 +154,8 @@  __morestack:
 	pushl	%edx
 	pushl	%ecx
 
+	call	__morestack_block_signals
+
 	pushl	12(%ebp)		# The size of the parameters.
 	leal	20(%ebp),%eax		# Address of caller's parameters.
 	pushl	%eax
@@ -175,6 +178,8 @@  __morestack:
 .LEHB0:
 	movl	%eax,%gs:0x30		# Save the new stack boundary.
 
+	call	__morestack_unblock_signals
+
 	movl	-8(%ebp),%edx		# Restore registers.
 	movl	-12(%ebp),%ecx
 
@@ -199,6 +204,8 @@  __morestack:
 	pushl	%eax
 	pushl	%edx
 
+	call	__morestack_block_signals
+
 	pushl	$0			# Where the available space is returned.
 	leal	0(%esp),%eax		# Push its address.
 	push	%eax
@@ -212,12 +219,27 @@  __morestack:
 
 	addl	$8,%esp			# Remove values from stack.
 
-	popl	%edx			# Restore possible return value
+	# We need to restore the old stack pointer, which is in %rbp,
+	# before we unblock signals.  We also need to restore %eax and
+	# %edx after we unblock signals but before we return.  Do this
+	# by moving %eax and %edx from the current stack to the old
+	# stack.
+
+	popl	%edx			# Pop return value from current stack.
+	popl	%eax
+
+	movl	%ebp,%esp		# Restore stack pointer.
+
+	pushl	%eax			# Push return value on old stack.
+	pushl	%edx
+
+	call	__morestack_unblock_signals
+
+	popl	%edx			# Restore return value.
 	popl	%eax
 
-	# Switch back to the old stack via copy back from %ebp.
-	leave
 	.cfi_remember_state
+	popl	%ebp
 	.cfi_restore %ebp
 	.cfi_def_cfa %esp, 12
 	ret	$8			# Return to caller, which will
@@ -240,6 +262,9 @@  __morestack:
 	movl	4(%esp),%eax		# Function argument.
 	movl	%eax,(%esp)
 #ifdef __PIC__
+#undef __i686
+	call	__i686.get_pc_thunk.bx	# %ebx may not be set up for us.
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
 	call	_Unwind_Resume@PLT	# Resume unwinding.
 #else
 	call	_Unwind_Resume
@@ -264,6 +289,13 @@  __morestack:
 	movq	%rsp, %rbp
 	.cfi_def_cfa_register %rbp
 
+	# We will return a single return instruction, which will
+	# return to the caller of our caller.  Let the unwinder skip
+	# that single return instruction, and just return to the real
+	# caller.
+	.cfi_offset 16, 0
+	.cfi_escape 0x15, 7, 0x7f	# DW_CFA_val_offset_sf, %esp, 8/-8
+
 	# In 64-bit mode the new stack frame size is passed in r10
         # and the argument size is passed in r11.
 
@@ -282,22 +314,17 @@  __morestack:
 	pushq	%r8
 	pushq	%r9
 
+	pushq	%r11
+
+	call	__morestack_block_signals
+
 	leaq	-8(%rbp),%rdi		# Address of new frame size.
 	leaq	24(%rbp),%rsi		# The caller's parameters.
-	movq	%r11,%rdx		# The size of the parameters.
+	popq	%rdx			# The size of the parameters.
 
 	call	__generic_morestack
 
-	popq	%r9			# Restore registers
-	popq	%r8
-	popq	%rcx
-	popq	%rdx
-	popq	%rsi
-	popq	%rdi
-
-	popq	%r11			# Pop old %rax into %r11.
-
-	popq	%r10			# Reload modified frame size
+	movq	-8(%rbp),%r10		# Reload modified frame size
 	movq	%rax,%rsp		# Switch to the new stack.
 	subq	%r10,%rax		# The end of the stack space.
 	addq	$BACKOFF,%rax		# Back off 1024 bytes.
@@ -308,11 +335,20 @@  __morestack:
 .LEHB0:
 	movq	%rax,%fs:0x70		# Save the new stack boundary.
 
+	call	__morestack_unblock_signals
+
+	movq	-24(%rbp),%rdi		# Restore registers.
+	movq	-32(%rbp),%rsi
+	movq	-40(%rbp),%rdx
+	movq	-48(%rbp),%rcx
+	movq	-56(%rbp),%r8
+	movq	-64(%rbp),%r9
+
 	movq	8(%rbp),%r10		# Increment the return address
 	incq	%r10			# to skip the ret instruction;
 					# see above.
 
-	movq	%r11,%rax		# Restore caller's %rax.
+	movq	-16(%rbp),%rax		# Restore caller's %rax.
 
 	call	*%r10			# Call our caller!
 
@@ -324,6 +360,8 @@  __morestack:
 	pushq	%rax
 	pushq	%rdx
 
+	call	__morestack_block_signals
+
 	pushq	$0			# Where the available space is returned.
 	leaq	0(%rsp),%rdi		# Pass its address.
 
@@ -336,12 +374,27 @@  __morestack:
 
 	addq	$8,%rsp			# Remove value from stack.
 
-	popq	%rdx			# Restore possible return value
+	# We need to restore the old stack pointer, which is in %rbp,
+	# before we unblock signals.  We also need to restore %rax and
+	# %rdx after we unblock signals but before we return.  Do this
+	# by moving %rax and %rdx from the current stack to the old
+	# stack.
+
+	popq	%rdx			# Pop return value from current stack.
+	popq	%rax
+
+	movq	%rbp,%rsp		# Restore stack pointer.
+
+	pushq	%rax			# Push return value on old stack.
+	pushq	%rdx
+
+	call	__morestack_unblock_signals
+
+	popq	%rdx			# Restore return value.
 	popq	%rax
 
-	# Switch back to the old stack via copy back from %rbp.
 	.cfi_remember_state
-	leave
+	popq	%rbp
 	.cfi_restore %rbp
 	.cfi_def_cfa %rsp, 8
 	ret				# Return to caller, which will
@@ -425,12 +478,27 @@  __stack_split_initialize:
 
 	leal	-16000(%esp),%eax	# We should have at least 16K.
 	movl	%eax,%gs:0x30
+	pushl	$16000
+	pushl	%esp
+#ifdef __PIC__
+	call	__generic_morestack_set_initial_sp@PLT
+#else
+	call	__generic_morestack_set_initial_sp
+#endif
+	addl	$8,%esp
 	ret
 
 #else /* defined(__x86_64__) */
 
 	leaq	-16000(%rsp),%rax	# We should have at least 16K.
 	movq	%rax,%fs:0x70
+	movq	%rsp,%rdi
+	movq	$16000,%rsi
+#ifdef __PIC__
+	call	__generic_morestack_set_initial_sp@PLT
+#else
+	call	__generic_morestack_set_initial_sp
+#endif
 	ret
 
 #endif /* defined(__x86_64__) */