Patchwork PATCH RFA: Split stack [6/7]: libgcc

login
register
mail settings
Submitter Ian Taylor
Date Sept. 22, 2010, 10:04 p.m.
Message ID <mcrk4md77uj.fsf@google.com>
Download mbox | patch
Permalink /patch/65467/
State New
Headers show

Comments

Ian Taylor - Sept. 22, 2010, 10:04 p.m.
This is the sixth of the -fsplit-stack patches.  This contains the
changes to libgcc.  This adds the runtime support, the code which
actually splits the stack.  The code is separated into the generic code,
written in C, which should work for any target, and the x86 specific
code, written in assembler.

I'm a libgcc maintainer, so no formal approval is required, but as
always I would like to hear any comments on the code.

Ian


libgcc/ChangeLog:

2010-09-21  Ian Lance Taylor  <iant@google.com>

	* generic-morestack.h: New file.
	* generic-morestack.c: New file.
	* generic-morestack-thread.c: New file.
	* config/i386/morestack.S: New file.
	* config/t-stack: New file.
	* config/i386/t-stack-i386: New file.
	* config.host (i[34567]86-*-linux* and friends): Add t-stack and
	i386/t-stack-i386 to tmake_file.

gcc/ChangeLog:

2010-09-21  Ian Lance Taylor  <iant@google.com>

	* libgcc-std.ver (GCC_4.6.0): New version, for -fstack-split
	support routines.
Joseph S. Myers - Sept. 22, 2010, 10:48 p.m.
On Wed, 22 Sep 2010, Ian Lance Taylor wrote:

> 2010-09-21  Ian Lance Taylor  <iant@google.com>
> 
> 	* libgcc-std.ver (GCC_4.6.0): New version, for -fstack-split
> 	support routines.

The new version already exists, inheriting from GCC_4.5.0 (but is 
currently empty in the generic file); you should put the functions inside 
the existing GCC_4.6.0 block rather than adding a second such block.  The 
new functions should be documented in libgcc.texi.

Patch

Index: libgcc/config.host
===================================================================
--- libgcc/config.host	(revision 164490)
+++ libgcc/config.host	(working copy)
@@ -602,7 +602,7 @@  case ${host} in
 i[34567]86-*-linux* | x86_64-*-linux* | \
   i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | \
   i[34567]86-*-gnu*)
-	tmake_file="${tmake_file} t-tls"
+	tmake_file="${tmake_file} t-tls t-stack i386/t-stack-i386"
 	;;
 esac
 
Index: libgcc/generic-morestack.c
===================================================================
--- libgcc/generic-morestack.c	(revision 0)
+++ libgcc/generic-morestack.c	(revision 0)
@@ -0,0 +1,876 @@ 
+/* Library support for -fsplit-stack.  */
+/* Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <iant@google.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* If inhibit_libc is defined, we can not compile this file.  The
+   effect is that people will not be able to use -fsplit-stack.  That
+   is much better than failing the build particularly since people
+   will want to define inhibit_libc while building a compiler which
+   can build glibc.  */
+
+#ifndef inhibit_libc
+
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+
+#include "generic-morestack.h"
+
+/* This file contains subroutines that are used by code compiled with
+   -fsplit-stack.  */
+
+/* Declare functions to avoid warnings--there is no header file for
+   these internal functions.  We give most of these functions the
+   flatten attribute in order to minimize their stack usage--here we
+   must minimize stack usage even at the cost of code size, and in
+   general inlining everything will do that.  */
+
+extern void
+__generic_morestack_set_initial_sp (void *sp, size_t len)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void *
+__generic_morestack (size_t *frame_size, void *old_stack, size_t param_size)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void *
+__generic_releasestack (size_t *pavailable)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void
+__morestack_block_signals (void)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void
+__morestack_unblock_signals (void)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern size_t
+__generic_findstack (void *stack)
+  __attribute__ ((no_split_stack, flatten, visibility ("hidden")));
+
+extern void
+__morestack_load_mmap (void)
+  __attribute__ ((no_split_stack, visibility ("hidden")));
+
+extern void *
+__morestack_allocate_stack_space (size_t size)
+  __attribute__ ((visibility ("hidden")));
+
+/* This is a function which -fsplit-stack code can call to get a list
+   of the stacks.  Since it is not called only by the compiler, it is
+   not hidden.  */
+
+extern void *
+__splitstack_find (void *, void *, size_t *, void **, void **, void **)
+  __attribute__ ((visibility ("default")));
+
+/* When we allocate a stack segment we put this header at the
+   start.  */
+
+struct stack_segment
+{
+  /* The previous stack segment--when a function running on this stack
+     segment returns, it will run on the previous one.  */
+  struct stack_segment *prev;
+  /* The next stack segment, if it has been allocated--when a function
+     is running on this stack segment, the next one is not being
+     used.  */
+  struct stack_segment *next;
+  /* The total size of this stack segment.  */
+  size_t size;
+  /* The stack address when this stack was created.  This is used when
+     popping the stack.  */
+  void *old_stack;
+  /* A list of memory blocks allocated by dynamic stack
+     allocation.  */
+  struct dynamic_allocation_blocks *dynamic_allocation;
+  /* A list of dynamic memory blocks no longer needed.  */
+  struct dynamic_allocation_blocks *free_dynamic_allocation;
+  /* An extra pointer in case we need some more information some
+     day.  */
+  void *extra;
+};
+
+/* This structure holds the (approximate) initial stack pointer and
+   size for the system supplied stack for a thread.  This is set when
+   the thread is created.  We also store a sigset_t here to hold the
+   signal mask while splitting the stack, since we don't want to store
+   that on the stack.  */
+
+struct initial_sp
+{
+  /* The initial stack pointer.  */
+  void *sp;
+  /* The stack length.  */
+  size_t len;
+  /* A signal mask, put here so that the thread can use it without
+     needing stack space.  */
+  sigset_t mask;
+  /* Some extra space for later extensibility.  */
+  void *extra[5];
+};
+
+/* A list of memory blocks allocated by dynamic stack allocation.
+   This is used for code that calls alloca or uses variably sized
+   arrays.  */
+
+struct dynamic_allocation_blocks
+{
+  /* The next block in the list.  */
+  struct dynamic_allocation_blocks *next;
+  /* The size of the allocated memory.  */
+  size_t size;
+  /* The allocated memory.  */
+  void *block;
+};
+
+/* These thread local global variables must be shared by all split
+   stack code across shared library boundaries.  Therefore, they have
+   default visibility.  They have extensibility fields if needed for
+   new versions.  If more radical changes are needed, new code can be
+   written using new variable names, while still using the existing
+   variables in a backward compatible manner.  Symbol versioning is
+   also used, although, since these variables are only referenced by
+   code in this file and generic-morestack-thread.c, it is likely that
+   simply using new names will suffice.  */
+
+/* The first stack segment allocated for this thread.  */
+
+__thread struct stack_segment *__morestack_segments
+  __attribute__ ((visibility ("default")));
+
+/* The stack segment that we think we are currently using.  This will
+   be correct in normal usage, but will be incorrect if an exception
+   unwinds into a different stack segment or if longjmp jumps to a
+   different stack segment.  */
+
+__thread struct stack_segment *__morestack_current_segment
+  __attribute__ ((visibility ("default")));
+
+/* The initial stack pointer and size for this thread.  */
+
+__thread struct initial_sp __morestack_initial_sp
+  __attribute__ ((visibility ("default")));
+
+/* A static signal mask, to avoid taking up stack space.  */
+
+static sigset_t __morestack_fullmask;
+
+/* Convert an integer to a decimal string without using much stack
+   space.  Return a pointer to the part of the buffer to use.  We this
+   instead of sprintf because sprintf will require too much stack
+   space.  */
+
+static char *
+print_int (int val, char *buf, int buflen, size_t *print_len)
+{
+  int is_negative;
+  int i;
+  unsigned int uval;
+
+  uval = (unsigned int) val;
+  if (val >= 0)
+    is_negative = 0;
+  else
+    {
+      is_negative = 1;
+      uval = - uval;
+    }
+
+  i = buflen;
+  do
+    {
+      --i;
+      buf[i] = '0' + (uval % 10);
+      uval /= 10;
+    }
+  while (uval != 0 && i > 0);
+
+  if (is_negative)
+    {
+      if (i > 0)
+	--i;
+      buf[i] = '-';
+    }
+
+  *print_len = buflen - i;
+  return buf + i;
+}
+
+/* Print the string MSG/LEN, the errno number ERR, and a newline on
+   stderr.  Then crash.  */
+
+void
+__morestack_fail (const char *, size_t, int) __attribute__ ((noreturn));
+
+void
+__morestack_fail (const char *msg, size_t len, int err)
+{
+  char buf[24];
+  static const char nl[] = "\n";
+  struct iovec iov[3];
+  union { char *p; const char *cp; } const_cast;
+
+  const_cast.cp = msg;
+  iov[0].iov_base = const_cast.p;
+  iov[0].iov_len = len;
+  /* We can't call strerror, because it may try to translate the error
+     message, and that would use too much stack space.  */
+  iov[1].iov_base = print_int (err, buf, sizeof buf, &iov[1].iov_len);
+  const_cast.cp = &nl[0];
+  iov[2].iov_base = const_cast.p;
+  iov[2].iov_len = sizeof nl - 1;
+  /* FIXME: On systems without writev we need to issue three write
+     calls, or punt on printing errno.  For now this is irrelevant
+     since stack splitting only works on GNU/Linux anyhow.  */
+  writev (2, iov, 3);
+  abort ();
+}
+
+/* Allocate a new stack segment.  FRAME_SIZE is the required frame
+   size.  */
+
+static struct stack_segment *
+allocate_segment (size_t frame_size)
+{
+  static unsigned int static_pagesize;
+  static int use_guard_page;
+  unsigned int pagesize;
+  unsigned int overhead;
+  unsigned int allocate;
+  void *space;
+  struct stack_segment *pss;
+
+  pagesize = static_pagesize;
+  if (pagesize == 0)
+    {
+      unsigned int p;
+
+      pagesize = getpagesize ();
+
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+      p = __sync_val_compare_and_swap (&static_pagesize, 0, pagesize);
+#else
+      /* Just hope this assignment is atomic.  */
+      static_pagesize = pagesize;
+      p = 0;
+#endif
+
+      use_guard_page = getenv ("SPLIT_STACK_GUARD") != 0;
+
+      /* FIXME: I'm not sure this assert should be in the released
+	 code.  */
+      assert (p == 0 || p == pagesize);
+    }
+
+  overhead = sizeof (struct stack_segment);
+
+  allocate = pagesize;
+  if (allocate < MINSIGSTKSZ)
+    allocate = ((MINSIGSTKSZ + overhead + pagesize - 1)
+		& ~ (pagesize - 1));
+  if (allocate < frame_size)
+    allocate = ((frame_size + overhead + pagesize - 1)
+		& ~ (pagesize - 1));
+
+  if (use_guard_page)
+    allocate += pagesize;
+
+  /* FIXME: If this binary requires an executable stack, then we need
+     to set PROT_EXEC.  Unfortunately figuring that out is complicated
+     and target dependent.  We would need to use dl_iterate_phdr to
+     see if there is any object which does not have a PT_GNU_STACK
+     phdr, though only for architectures which use that mechanism.  */
+  space = mmap (NULL, allocate, PROT_READ | PROT_WRITE,
+		MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  if (space == MAP_FAILED)
+    {
+      static const char msg[] =
+	"unable to allocate additional stack space: errno ";
+      __morestack_fail (msg, sizeof msg - 1, errno);
+    }
+
+  if (use_guard_page)
+    {
+      void *guard;
+
+#ifdef STACK_GROWS_DOWNWARD
+      guard = space;
+      space = (char *) space + pagesize;
+#else
+      guard = space + allocate - pagesize;
+#endif
+
+      mprotect (guard, pagesize, PROT_NONE);
+      allocate -= pagesize;
+    }
+
+  pss = (struct stack_segment *) space;
+
+  pss->prev = __morestack_current_segment;
+  pss->next = NULL;
+  pss->size = allocate - overhead;
+  pss->dynamic_allocation = NULL;
+  pss->free_dynamic_allocation = NULL;
+  pss->extra = NULL;
+
+  if (__morestack_current_segment != NULL)
+    __morestack_current_segment->next = pss;
+  else
+    __morestack_segments = pss;
+
+  return pss;
+}
+
+/* Free a list of dynamic blocks.  */
+
+static void
+free_dynamic_blocks (struct dynamic_allocation_blocks *p)
+{
+  while (p != NULL)
+    {
+      struct dynamic_allocation_blocks *next;
+
+      next = p->next;
+      free (p->block);
+      free (p);
+      p = next;
+    }
+}
+
+/* Merge two lists of dynamic blocks.  */
+
+static struct dynamic_allocation_blocks *
+merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
+		      struct dynamic_allocation_blocks *b)
+{
+  struct dynamic_allocation_blocks **pp;
+
+  if (a == NULL)
+    return b;
+  if (b == NULL)
+    return a;
+  for (pp = &a->next; *pp != NULL; pp = &(*pp)->next)
+    ;
+  *pp = b;
+  return a;
+}
+
+/* Release stack segments.  If FREE_DYNAMIC is non-zero, we also free
+   any dynamic blocks.  Otherwise we return them.  */
+
+struct dynamic_allocation_blocks *
+__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
+{
+  struct dynamic_allocation_blocks *ret;
+  struct stack_segment *pss;
+
+  ret = NULL;
+  pss = *pp;
+  while (pss != NULL)
+    {
+      struct stack_segment *next;
+      unsigned int allocate;
+
+      next = pss->next;
+
+      if (pss->dynamic_allocation != NULL
+	  || pss->free_dynamic_allocation != NULL)
+	{
+	  if (free_dynamic)
+	    {
+	      free_dynamic_blocks (pss->dynamic_allocation);
+	      free_dynamic_blocks (pss->free_dynamic_allocation);
+	    }
+	  else
+	    {
+	      ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
+	      ret = merge_dynamic_blocks (pss->free_dynamic_allocation, ret);
+	    }
+	}
+
+      allocate = pss->size + sizeof (struct stack_segment);
+      if (munmap (pss, allocate) < 0)
+	{
+	  static const char msg[] = "munmap of stack space failed: errno ";
+	  __morestack_fail (msg, sizeof msg - 1, errno);
+	}
+
+      pss = next;
+    }
+  *pp = NULL;
+
+  return ret;
+}
+
+/* This function is called by a processor specific function to set the
+   initial stack pointer for a thread.  The operating system will
+   always create a stack for a thread.  Here we record a stack pointer
+   near the base of that stack.  The size argument lets the processor
+   specific code estimate how much stack space is available on this
+   initial stack.  */
+
+void
+__generic_morestack_set_initial_sp (void *sp, size_t len)
+{
+  /* The stack pointer most likely starts on a page boundary.  Adjust
+     to the nearest 512 byte boundary.  It's not essential that we be
+     precise here; getting it wrong will just leave some stack space
+     unused.  */
+#ifdef STACK_GROWS_DOWNWARD
+  sp = (void *) ((((__UINTPTR_TYPE__) sp + 511U) / 512U) * 512U);
+#else
+  sp = (void *) ((((__UINTPTR_TYPE__) sp - 511U) / 512U) * 512U);
+#endif
+
+  __morestack_initial_sp.sp = sp;
+  __morestack_initial_sp.len = len;
+  sigemptyset (&__morestack_initial_sp.mask);
+
+  sigfillset (&__morestack_fullmask);
+#ifdef __linux__
+  /* On Linux, the first two real time signals are used by the NPTL
+     threading library.  By taking them out of the set of signals, we
+     avoiding copying the signal mask in pthread_sigmask.  More
+     importantly, pthread_sigmask uses less stack space on x86_64.  */
+  sigdelset (&__morestack_fullmask, __SIGRTMIN);
+  sigdelset (&__morestack_fullmask, __SIGRTMIN + 1);
+#endif
+}
+
+/* This function is called by a processor specific function which is
+   run in the prologue when more stack is needed.  The processor
+   specific function handles the details of saving registers and
+   frobbing the actual stack pointer.  This function is responsible
+   for allocating a new stack segment and for copying a parameter
+   block from the old stack to the new one.  On function entry
+   *PFRAME_SIZE is the size of the required stack frame--the returned
+   stack must be at least this large.  On function exit *PFRAME_SIZE
+   is the amount of space remaining on the allocated stack.  OLD_STACK
+   points at the parameters the old stack (really the current one
+   while this function is running).  OLD_STACK is saved so that it can
+   be returned by a later call to __generic_releasestack.  PARAM_SIZE
+   is the size in bytes of parameters to copy to the new stack.  This
+   function returns a pointer to the new stack segment, pointing to
+   the memory after the parameters have been copied.  The returned
+   value minus the returned *PFRAME_SIZE (or plus if the stack grows
+   upward) is the first address on the stack which should not be used.
+
+   This function is running on the old stack and has only a limited
+   amount of stack space available.  */
+
+void *
+__generic_morestack (size_t *pframe_size, void *old_stack, size_t param_size)
+{
+  size_t frame_size = *pframe_size;
+  struct stack_segment *current;
+  struct stack_segment **pp;
+  struct dynamic_allocation_blocks *dynamic;
+  char *from;
+  char *to;
+  void *ret;
+  size_t i;
+
+  current = __morestack_current_segment;
+
+  pp = current != NULL ? &current->next : &__morestack_segments;
+  if (*pp != NULL && (*pp)->size < frame_size)
+    dynamic = __morestack_release_segments (pp, 0);
+  else
+    dynamic = NULL;
+  current = *pp;
+
+  if (current == NULL)
+    current = allocate_segment (frame_size);
+
+  current->old_stack = old_stack;
+
+  __morestack_current_segment = current;
+
+  if (dynamic != NULL)
+    {
+      /* Move the free blocks onto our list.  We don't want to call
+	 free here, as we are short on stack space.  */
+      current->free_dynamic_allocation =
+	merge_dynamic_blocks (dynamic, current->free_dynamic_allocation);
+    }
+
+  *pframe_size = current->size - param_size;
+
+#ifdef STACK_GROWS_DOWNWARD
+  {
+    char *bottom = (char *) (current + 1) + current->size;
+    to = bottom - param_size;
+    ret = bottom - param_size;
+  }
+#else
+  to = current + 1;
+  ret = (char *) (current + 1) + param_size;
+#endif
+
+  /* We don't call memcpy to avoid worrying about the dynamic linker
+     trying to resolve it.  */
+  from = (char *) old_stack;
+  for (i = 0; i < param_size; i++)
+    *to++ = *from++;
+
+  return ret;
+}
+
+/* This function is called by a processor specific function when it is
+   ready to release a stack segment.  We don't actually release the
+   stack segment, we just move back to the previous one.  The current
+   stack segment will still be available if we need it in
+   __generic_morestack.  This returns a pointer to the new stack
+   segment to use, which is the one saved by a previous call to
+   __generic_morestack.  The processor specific function is then
+   responsible for actually updating the stack pointer.  This sets
+   *PAVAILABLE to the amount of stack space now available.  */
+
+void *
+__generic_releasestack (size_t *pavailable)
+{
+  struct stack_segment *current;
+  void *old_stack;
+
+  current = __morestack_current_segment;
+  old_stack = current->old_stack;
+  current = current->prev;
+  __morestack_current_segment = current;
+
+  if (current != NULL)
+    {
+#ifdef STACK_GROWS_DOWNWARD
+      *pavailable = (char *) old_stack - (char *) (current + 1);
+#else
+      *pavailable = (char *) (current + 1) + current->size - (char *) old_stack;
+#endif
+    }
+  else
+    {
+      size_t used;
+
+      /* We have popped back to the original stack.  */
+#ifdef STACK_GROWS_DOWNWARD
+      if ((char *) old_stack >= (char *) __morestack_initial_sp.sp)
+	used = 0;
+      else
+	used = (char *) __morestack_initial_sp.sp - (char *) old_stack;
+#else
+      if ((char *) old_stack <= (char *) __morestack_initial_sp.sp)
+	used = 0;
+      else
+	used = (char *) old_stack - (char *) __morestack_initial_sp.sp;
+#endif
+
+      if (used > __morestack_initial_sp.len)
+	*pavailable = 0;
+      else
+	*pavailable = __morestack_initial_sp.len - used;
+    }
+
+  return old_stack;
+}
+
+/* Block signals while splitting the stack.  This avoids trouble if we
+   try to invoke a signal handler which itself wants to split the
+   stack.  */
+
+extern int pthread_sigmask (int, const sigset_t *, sigset_t *)
+  __attribute__ ((weak));
+
+void
+__morestack_block_signals (void)
+{
+  if (pthread_sigmask)
+    pthread_sigmask (SIG_BLOCK, &__morestack_fullmask,
+		     &__morestack_initial_sp.mask);
+  else
+    sigprocmask (SIG_BLOCK, &__morestack_fullmask,
+		 &__morestack_initial_sp.mask);
+}
+
+/* Unblock signals while splitting the stack.  */
+
+void
+__morestack_unblock_signals (void)
+{
+  if (pthread_sigmask)
+    pthread_sigmask (SIG_SETMASK, &__morestack_initial_sp.mask, NULL);
+  else
+    sigprocmask (SIG_SETMASK, &__morestack_initial_sp.mask, NULL);
+}
+
+/* This function is called to allocate dynamic stack space, for alloca
+   or a variably sized array.  This is a regular function with
+   sufficient stack space, so we just use malloc to allocate the
+   space.  We attach the allocated blocks to the current stack
+   segment, so that they will eventually be reused or freed.  */
+
+void *
+__morestack_allocate_stack_space (size_t size)
+{
+  struct stack_segment *seg, *current;
+  struct dynamic_allocation_blocks *p;
+
+  /* We have to block signals to avoid getting confused if we get
+     interrupted by a signal whose handler itself uses alloca or a
+     variably sized array.  */
+  __morestack_block_signals ();
+
+  /* Since we don't want to call free while we are low on stack space,
+     we may have a list of already allocated blocks waiting to be
+     freed.  Release them all, unless we find one that is large
+     enough.  We don't look at every block to see if one is large
+     enough, just the first one, because we aren't trying to build a
+     memory allocator here, we're just trying to speed up common
+     cases.  */
+
+  current = __morestack_current_segment;
+  p = NULL;
+  for (seg = __morestack_segments; seg != NULL; seg = seg->next)
+    {
+      p = seg->free_dynamic_allocation;
+      if (p != NULL)
+	{
+	  if (p->size >= size)
+	    {
+	      seg->free_dynamic_allocation = p->next;
+	      break;
+	    }
+
+	  free_dynamic_blocks (p);
+	  seg->free_dynamic_allocation = NULL;
+	  p = NULL;
+	}
+    }
+
+  if (p == NULL)
+    {
+      /* We need to allocate additional memory.  */
+      p = malloc (sizeof (*p));
+      if (p == NULL)
+	abort ();
+      p->size = size;
+      p->block = malloc (size);
+      if (p->block == NULL)
+	abort ();
+    }
+
+  /* If we are still on the initial stack, then we have a space leak.
+     FIXME.  */
+  if (current != NULL)
+    {
+      p->next = current->dynamic_allocation;
+      current->dynamic_allocation = p;
+    }
+
+  __morestack_unblock_signals ();
+
+  return p->block;
+}
+
+/* Find the stack segment for STACK and return the amount of space
+   available.  This is used when unwinding the stack because of an
+   exception, in order to reset the stack guard correctly.  */
+
+size_t
+__generic_findstack (void *stack)
+{
+  struct stack_segment *pss;
+  size_t used;
+
+  for (pss = __morestack_current_segment; pss != NULL; pss = pss->prev)
+    {
+      if ((char *) pss < (char *) stack
+	  && (char *) pss + pss->size > (char *) stack)
+	{
+	  __morestack_current_segment = pss;
+#ifdef STACK_GROWS_DOWNWARD
+	  return (char *) stack - (char *) (pss + 1);
+#else
+	  return (char *) (pss + 1) + pss->size - (char *) stack;
+#endif
+	}
+    }
+
+  /* We have popped back to the original stack.  */
+#ifdef STACK_GROWS_DOWNWARD
+  if ((char *) stack >= (char *) __morestack_initial_sp.sp)
+    used = 0;
+  else
+    used = (char *) __morestack_initial_sp.sp - (char *) stack;
+#else
+  if ((char *) stack <= (char *) __morestack_initial_sp.sp)
+    used = 0;
+  else
+    used = (char *) stack - (char *) __morestack_initial_sp.sp;
+#endif
+
+  if (used > __morestack_initial_sp.len)
+    return 0;
+  else
+    return __morestack_initial_sp.len - used;
+}
+
+/* This function is called at program startup time to make sure that
+   mmap, munmap, and getpagesize are resolved if linking dynamically.
+   We want to resolve them while we have enough stack for them, rather
+   than calling into the dynamic linker while low on stack space.  */
+
+void
+__morestack_load_mmap (void)
+{
+  /* Call with bogus values to run faster.  We don't care if the call
+     fails.  Pass __MORESTACK_CURRENT_SEGMENT to make sure that any
+     TLS accessor function is resolved.  */
+  mmap (__morestack_current_segment, 0, PROT_READ, MAP_ANONYMOUS, -1, 0);
+  mprotect (NULL, 0, 0);
+  munmap (0, getpagesize ());
+}
+
+/* This function may be used to iterate over the stack segments.
+   This can be called like this.
+     void *next_segment = NULL;
+     void *next_sp = NULL;
+     void *initial_sp = NULL;
+     void *stack;
+     size_t stack_size;
+     while ((stack = __splitstack_find (next_segment, next_sp, &stack_size,
+                                        &next_segment, &next_sp,
+					&initial_sp)) != NULL)
+       {
+         // Stack segment starts at stack and is stack_size bytes long.
+       }
+
+   There is no way to iterate over the stack segments of a different
+   thread.  However, what is permitted is for one thread to call this
+   with the first two values NULL, to pass next_segment, next_sp, and
+   initial_sp to a different thread, and then to suspend one way or
+   another.  A different thread may run the subsequent
+   __morestack_find iterations.  Of course, this will only work if the
+   first thread is suspended during the __morestack_find iterations.
+   If not, the second thread will be looking at the stack while it is
+   changing, and anything could happen.
+
+   FIXME: This should be declared in some header file, but where?  */
+
+void *
+__splitstack_find (void *segment_arg, void *sp, size_t *len,
+		   void **next_segment, void **next_sp,
+		   void **initial_sp)
+{
+  struct stack_segment *segment;
+  void *ret;
+  char *nsp;
+
+  if (segment_arg == (void *) 1)
+    {
+      char *isp = (char *) *initial_sp;
+
+      *next_segment = (void *) 2;
+      *next_sp = NULL;
+#ifdef STACK_GROWS_DOWNWARD
+      if ((char *) sp >= isp)
+	return NULL;
+      *len = (char *) isp - (char *) sp;
+      return sp;
+#else
+      if ((char *) sp <= (char *) isp)
+	return NULL;
+      *len = (char *) sp - (char *) isp;
+      return (void *) isp;
+#endif
+    }
+  else if (segment_arg == (void *) 2)
+    return NULL;
+  else if (segment_arg != NULL)
+    segment = (struct stack_segment *) segment_arg;
+  else
+    {
+      *initial_sp = __morestack_initial_sp.sp;
+      segment = __morestack_current_segment;
+      sp = (void *) &segment;
+      while (1)
+	{
+	  if (segment == NULL)
+	    return __splitstack_find ((void *) 1, sp, len, next_segment,
+				      next_sp, initial_sp);
+	  if ((char *) sp >= (char *) (segment + 1)
+	      && (char *) sp <= (char *) (segment + 1) + segment->size)
+	    break;
+	  segment = segment->prev;
+	}
+    }
+
+  if (segment->prev == NULL)
+    *next_segment = (void *) 1;
+  else
+    *next_segment = segment->prev;
+
+  /* The old_stack value is the address of the function parameters of
+     the function which called __morestack.  So if f1 called f2 which
+     called __morestack, the stack looks like this:
+
+         parameters       <- old_stack
+         return in f1
+	 return in f2
+	 data pushed by __morestack
+
+     On x86, the data pushed by __morestack includes the saved value
+     of the ebp/rbp register.  We want our caller to be able to see
+     that value, which can not be found on any other stack.  So we
+     adjust accordingly.  This may need to be tweaked for other
+     targets.  */
+
+  nsp = (char *) segment->old_stack;
+#ifdef STACK_GROWS_DOWNWARD
+  nsp -= 3 * sizeof (void *);
+#else
+  nsp += 3 * sizeof (void *);
+#endif
+  *next_sp = (void *) nsp;
+
+#ifdef STACK_GROWS_DOWNWARD
+  *len = (char *) (segment + 1) + segment->size - (char *) sp;
+  ret = (void *) sp;
+#else
+  *len = (char *) sp - (char *) (segment + 1);
+  ret = (void *) (segment + 1);
+#endif
+
+  return ret;
+}
+
+#endif /* !defined (inhibit_libc) */
Index: libgcc/generic-morestack.h
===================================================================
--- libgcc/generic-morestack.h	(revision 0)
+++ libgcc/generic-morestack.h	(revision 0)
@@ -0,0 +1,53 @@ 
+/* Library support for -fsplit-stack.  */
+/* Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <iant@google.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This is a libgcc internal header file for functions shared between
+   generic-morestack.c and generic-morestack-thread.c.  The latter
+   file is only used when linking with the pthread library.  */
+
+/* The stack segment structure, defined in generic-morestack.c.  */
+
+struct stack_segment;
+
+/* The list of stack segments for this thread.  */
+
+extern __thread struct stack_segment *__morestack_segments;
+
+/* Print the string MSG/LEN, the errno number ERR, and a newline on
+   stderr, without using printf.  Then crash.  */
+
+extern void __morestack_fail (const char *msg, size_t len, int err)
+  __attribute__ ((noreturn, visibility ("hidden")));
+
+/* Release stack segments.  */
+
+extern struct dynamic_allocation_blocks *
+  __morestack_release_segments (struct stack_segment **, int)
+  __attribute__ ((visibility ("hidden")));
+
+/* Store the stack information in a processor dependent manner.  */
+
+extern void __stack_split_initialize (void)
+  __attribute__ ((visibility ("hidden")));
Index: libgcc/generic-morestack-thread.c
===================================================================
--- libgcc/generic-morestack-thread.c	(revision 0)
+++ libgcc/generic-morestack-thread.c	(revision 0)
@@ -0,0 +1,162 @@ 
+/* Thread library support for -fsplit-stack.  */
+/* Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+   Contributed by Ian Lance Taylor <iant@google.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#include "tconfig.h"
+#include "tsystem.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* If inhibit_libc is defined, we can not compile this file.  The
+   effect is that people will not be able to use -fsplit-stack.  That
+   is much better than failing the build particularly since people
+   will want to define inhibit_libc while building a compiler which
+   can build glibc.  */
+
+#ifndef inhibit_libc
+
+#include <pthread.h>
+
+#include "generic-morestack.h"
+
+/* We declare the pthread functions we need as weak, so that
+   libgcc_s.so does not need to be linked against -lpthread.  */
+
+extern int pthread_once (pthread_once_t *, void (*) (void))
+  __attribute__ ((weak));
+
+extern int pthread_key_create (pthread_key_t *, void (*) (void *))
+  __attribute__ ((weak));
+
+extern int pthread_setspecific (pthread_key_t, const void *)
+  __attribute__ ((weak));
+
+/* The key for the list of stack segments to free when the thread
+   exits.  This is created by pthread_key_create.  */
+
+static pthread_key_t segment_list_key;
+
+/* Used to only run create_key once.  */
+
+static pthread_once_t create_key_once = PTHREAD_ONCE_INIT;
+
+/* Release all the segments for a thread.  This is the destructor
+   function used by pthread_key_create, and is called when a thread
+   exits.  */
+
+static void
+free_segments (void* arg)
+{
+  __morestack_release_segments ((struct stack_segment **) arg, 1);
+}
+
+/* Set up the key for the list of segments.  This is called via
+   pthread_once.  */
+
+static void
+create_key (void)
+{
+  int err;
+
+  err = pthread_key_create (&segment_list_key, free_segments);
+  if (err != 0)
+    {
+      static const char msg[] = "pthread_key_create failed: errno ";
+      __morestack_fail (msg, sizeof msg - 1, err);
+    }
+}
+
+/* Pass information from the pthread_create wrapper to
+   stack_split_initialize_thread.  */
+
+struct pthread_create_args
+{
+  void *(*start_routine) (void *);
+  void *arg;
+};
+
+/* Initialize a thread.  This is called via pthread_create.  It calls
+   a target dependent function to set up any required stack guard.  */
+
+static void* stack_split_initialize_thread (void *)
+  __attribute__ ((no_split_stack));
+
+static void *
+stack_split_initialize_thread (void *varg)
+{
+  struct pthread_create_args *args = (struct pthread_create_args *) varg;
+  int err;
+  void *(*start_routine) (void *);
+  void *arg;
+
+  __stack_split_initialize ();
+
+  err = pthread_setspecific (segment_list_key, (void *) &__morestack_segments);
+  if (err != 0)
+    {
+      static const char msg[] = "pthread_setspecific failed: errno ";
+      __morestack_fail (msg, sizeof msg - 1, err);
+    }
+
+  start_routine = args->start_routine;
+  arg = args->arg;
+  free (args);
+  return (*start_routine) (arg);
+}
+
+/* This function wraps calls to pthread_create to make sure that the
+   stack guard is initialized for new threads.  FIXME: This hack will
+   not be necessary if glibc supports -fsplit-stack directly.  */
+
+int __wrap_pthread_create (pthread_t *, const pthread_attr_t *,
+			   void *(*start_routine) (void *), void *)
+  __attribute__ ((visibility ("hidden")));
+
+extern int __real_pthread_create (pthread_t *, const pthread_attr_t *,
+				  void *(*start_routine) (void *), void *)
+  __attribute__ ((weak));
+
+int
+__wrap_pthread_create (pthread_t *tid, const pthread_attr_t *attr,
+		       void *(*start_routine) (void *), void *arg)
+{
+  int err;
+  struct pthread_create_args* args;
+
+  err = pthread_once (&create_key_once, create_key);
+  if (err != 0)
+    {
+      static const char msg[] = "pthread_once failed: errno ";
+      __morestack_fail (msg, sizeof msg - 1, err);
+    }
+
+  args = malloc (sizeof (struct pthread_create_args));
+  if (args == NULL)
+    return EAGAIN;
+  args->start_routine = start_routine;
+  args->arg = arg;
+  return __real_pthread_create (tid, attr, stack_split_initialize_thread, args);
+}
+
+#endif /* !defined (inhibit_libc) */
Index: libgcc/config/i386/morestack.S
===================================================================
--- libgcc/config/i386/morestack.S	(revision 0)
+++ libgcc/config/i386/morestack.S	(revision 0)
@@ -0,0 +1,559 @@ 
+# x86/x86_64 support for -fsplit-stack.
+# Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+# Contributed by Ian Lance Taylor <iant@google.com>.
+
+# This file is part of GCC.
+
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+# <http://www.gnu.org/licenses/>.
+
+
+# Support for allocating more stack space when using -fsplit-stack.
+# When a function discovers that it needs more stack space, it will
+# call __morestack with the size of the stack frame and the size of
+# the parameters to copy from the old stack frame to the new one.
+# The __morestack function preserves the parameter registers and
+# calls __generic_morestack to actually allocate the stack space.
+
+# When this is called stack space is very low, but we ensure that
+# there is enough space to push the parameter registers and to call
+# __generic_morestack.
+
+# When calling __generic_morestack, FRAME_SIZE points to the size of
+# the desired frame when the function is called, and the function
+# sets it to the size of the allocated stack.  OLD_STACK points to
+# the parameters on the old stack and PARAM_SIZE is the number of
+# bytes of parameters to copy to the new stack.  These are the
+# parameters of the function that called __morestack.  The
+# __generic_morestack function returns the new stack pointer,
+# pointing to the address of the first copied parameter.  The return
+# value minus the returned *FRAME_SIZE will be the first address on
+# the stack which we should not use.
+
+# void *__generic_morestack (size_t *frame_size, void *old_stack,
+#			     size_t param_size);
+
+# The __morestack routine has to arrange for the caller to return to a
+# stub on the new stack.  The stub is responsible for restoring the
+# old stack pointer and returning to the caller's caller.  This calls
+# __generic_releasestack to retrieve the old stack pointer and release
+# the newly allocated stack.
+
+# void *__generic_releasestack (size_t *available);
+
+# We do a little dance so that the processor's call/return return
+# address prediction works out.  The compiler arranges for the caller
+# to look like this:
+#   call __generic_morestack
+#   ret
+#  L:
+#   // carry on with function
+# After we allocate more stack, we call L, which is in our caller.
+# When that returns (to the predicted instruction), we release the
+# stack segment and reset the stack pointer.  We then return to the
+# predicted instruction, namely the ret instruction immediately after
+# the call to __generic_morestack.  That then returns to the caller of
+# the original caller.
+
+
+# The amount of extra space we ask for.  In general this has to be
+# enough for the dynamic loader to find a symbol and for a signal
+# handler to run.
+	
+#ifndef __x86_64__
+#define BACKOFF (1024)
+#else
+#define BACKOFF (1536)
+#endif
+
+
+# This entry point is for split-stack code which calls non-split-stack
+# code.  When the linker sees this case, it converts the call to
+# __morestack to call __morestack_non_split instead.  We just bump the
+# requested stack space by 16K.
+
+	.global __morestack_non_split
+	.hidden	__morestack_non_split
+
+#ifdef __ELF__
+       .type	__morestack_non_split,@function
+#endif
+
+__morestack_non_split:
+
+#ifndef __x86_64__
+	addl	$0x4000,4(%esp)
+#else
+	addq	$0x4000,%r10
+#endif
+
+#ifdef __ELF__
+	.size	__morestack_non_split, . - __morestack_non_split
+#endif
+
+# __morestack_non_split falls through into __morestack.
+
+
+# The __morestack function.
+
+	.global	__morestack
+	.hidden	__morestack
+
+#ifdef __ELF__
+	.type	__morestack,@function
+#endif
+
+__morestack:
+.LFB1:
+	.cfi_startproc
+
+
+#ifndef __x86_64__
+
+
+# The 32-bit __morestack function.
+
+	# We use a cleanup to restore the stack guard if an exception
+	# is thrown through this code.
+#ifndef __PIC__
+	.cfi_personality 0,__gcc_personality_v0
+	.cfi_lsda 0,.LLSDA1
+#else
+	.cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+	.cfi_lsda 0x1b,.LLSDA1
+#endif
+
+	# Set up a normal backtrace.
+	pushl	%ebp
+	.cfi_def_cfa_offset 8
+	.cfi_offset %ebp, -8
+	movl	%esp, %ebp
+	.cfi_def_cfa_register %ebp
+
+	# We return below with a ret $8.  We will return to a single
+	# return instruction, which will return to the caller of our
+	# caller.  We let the unwinder skip that single return
+	# instruction, and just return to the real caller.
+	.cfi_offset 8, 8
+	.cfi_escape 0x15, 4, 0x7d	# DW_CFA_val_offset_sf, %esp, 12/-4
+
+	# In 32-bit mode the parameters are pushed on the stack.  The
+	# argument size is pushed then the new stack frame size is
+	# pushed.
+
+	# In 32-bit mode the registers %eax, %edx, and %ecx may be
+	# used for parameters, depending on the regparm and fastcall
+	# attributes.
+
+	pushl	%eax
+	pushl	%edx
+	pushl	%ecx
+
+	call	__morestack_block_signals
+
+	pushl	12(%ebp)		# The size of the parameters.
+	leal	20(%ebp),%eax		# Address of caller's parameters.
+	pushl	%eax
+	addl	$BACKOFF,8(%ebp)	# Ask for backoff bytes.
+	leal	8(%ebp),%eax		# The address of the new frame size.
+	pushl	%eax
+
+	# Note that %esp is exactly 32 bytes below the CFA -- perfect for
+	# a 16-byte aligned stack.  That said, we still ought to compile
+	# generic-morestack.c with -mpreferred-stack-boundary=2.  FIXME.
+	call	__generic_morestack
+
+	movl	%eax,%esp		# Switch to the new stack.
+	subl	8(%ebp),%eax		# The end of the stack space.
+	addl	$BACKOFF,%eax		# Back off 512 bytes.
+
+.LEHB0:
+	# FIXME: The offset must match
+	# TARGET_THREAD_SPLIT_STACK_OFFSET in
+	# gcc/config/i386/linux.h.
+	movl	%eax,%gs:0x30		# Save the new stack boundary.
+
+	call	__morestack_unblock_signals
+
+	movl	-8(%ebp),%edx		# Restore registers.
+	movl	-12(%ebp),%ecx
+
+	movl	4(%ebp),%eax		# Increment the return address
+	cmpb	$0xc3,(%eax)		# to skip the ret instruction;
+	je	1f			# see above.
+	addl	$2,%eax
+1:	inc	%eax
+
+	movl	%eax,-8(%ebp)		# Store return address in an
+					# unused slot.
+
+	movl	-4(%ebp),%eax		# Restore the last register.
+
+	call	*-8(%ebp)		# Call our caller!
+
+	# The caller will return here, as predicted.
+
+	# Save the registers which may hold a return value.  We
+	# assume that __generic_releasestack does not touch any
+	# floating point or vector registers.
+	pushl	%eax
+	pushl	%edx
+
+	# Push the arguments to __generic_releasestack now so that the
+	# stack is at a 16-byte boundary for
+	# __morestack_block_signals.
+	pushl	$0			# Where the available space is returned.
+	leal	0(%esp),%eax		# Push its address.
+	push	%eax
+
+	call	__morestack_block_signals
+
+	call	__generic_releasestack
+
+	subl	4(%esp),%eax		# Subtract available space.
+	addl	$BACKOFF,%eax		# Back off 512 bytes.
+.LEHE0:
+	movl	%eax,%gs:0x30		# Save the new stack boundary.
+
+	addl	$8,%esp			# Remove values from stack.
+
+	# We need to restore the old stack pointer, which is in %rbp,
+	# before we unblock signals.  We also need to restore %eax and
+	# %edx after we unblock signals but before we return.  Do this
+	# by moving %eax and %edx from the current stack to the old
+	# stack.
+
+	popl	%edx			# Pop return value from current stack.
+	popl	%eax
+
+	movl	%ebp,%esp		# Restore stack pointer.
+
+	pushl	%eax			# Push return value on old stack.
+	pushl	%edx
+	subl	$8,%esp			# Align stack to 16-byte boundary.
+
+	call	__morestack_unblock_signals
+
+	addl	$8,%esp
+	popl	%edx			# Restore return value.
+	popl	%eax
+
+	.cfi_remember_state
+	popl	%ebp
+	.cfi_restore %ebp
+	.cfi_def_cfa %esp, 12
+	ret	$8			# Return to caller, which will
+					# immediately return.  Pop
+					# arguments as we go.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+	
+.L1:
+	.cfi_restore_state
+	subl	$16,%esp		# Maintain 16 byte alignment.
+	movl	%eax,4(%esp)		# Save exception header.
+	movl	%ebp,(%esp)		# Stack pointer after resume.
+	call	__generic_findstack
+	movl	%ebp,%ecx		# Get the stack pointer.
+	subl	%eax,%ecx		# Subtract available space.
+	addl	$BACKOFF,%ecx		# Back off 512 bytes.
+	movl	%ecx,%gs:0x30		# Save new stack boundary.
+	movl	4(%esp),%eax		# Function argument.
+	movl	%eax,(%esp)
+#ifdef __PIC__
+#undef __i686
+	call	__i686.get_pc_thunk.bx	# %ebx may not be set up for us.
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	call	_Unwind_Resume@PLT	# Resume unwinding.
+#else
+	call	_Unwind_Resume
+#endif
+
+#else /* defined(__x86_64__) */
+
+
+# The 64-bit __morestack function.
+
+	# We use a cleanup to restore the stack guard if an exception
+	# is thrown through this code.
+#ifndef __PIC__
+	.cfi_personality 0x3,__gcc_personality_v0
+	.cfi_lsda 0x3,.LLSDA1
+#else
+	.cfi_personality 0x9b,DW.ref.__gcc_personality_v0
+	.cfi_lsda 0x1b,.LLSDA1
+#endif
+
+	# Set up a normal backtrace.
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+
+	# We will return a single return instruction, which will
+	# return to the caller of our caller.  Let the unwinder skip
+	# that single return instruction, and just return to the real
+	# caller.
+	.cfi_offset 16, 0
+	.cfi_escape 0x15, 7, 0x7f	# DW_CFA_val_offset_sf, %esp, 8/-8
+
+	# In 64-bit mode the new stack frame size is passed in r10
+        # and the argument size is passed in r11.
+
+	addq	$BACKOFF,%r10		# Ask for backoff bytes.
+	pushq	%r10			# Save new frame size.
+
+	# In 64-bit mode the registers %rdi, %rsi, %rdx, %rcx, %r8,
+	# and %r9 may be used for parameters.  We also preserve %rax
+	# which the caller may use to hold %r10.
+
+	pushq	%rax
+	pushq	%rdi
+	pushq	%rsi
+	pushq	%rdx
+	pushq	%rcx
+	pushq	%r8
+	pushq	%r9
+
+	pushq	%r11
+	pushq	$0			# For alignment.
+
+	call	__morestack_block_signals
+
+	leaq	-8(%rbp),%rdi		# Address of new frame size.
+	leaq	24(%rbp),%rsi		# The caller's parameters.
+	addq	$8,%rsp
+	popq	%rdx			# The size of the parameters.
+
+	call	__generic_morestack
+
+	movq	-8(%rbp),%r10		# Reload modified frame size
+	movq	%rax,%rsp		# Switch to the new stack.
+	subq	%r10,%rax		# The end of the stack space.
+	addq	$BACKOFF,%rax		# Back off 1024 bytes.
+
+.LEHB0:
+	# FIXME: The offset must match
+	# TARGET_THREAD_SPLIT_STACK_OFFSET in
+	# gcc/config/i386/linux64.h.
+	movq	%rax,%fs:0x70		# Save the new stack boundary.
+
+	call	__morestack_unblock_signals
+
+	movq	-24(%rbp),%rdi		# Restore registers.
+	movq	-32(%rbp),%rsi
+	movq	-40(%rbp),%rdx
+	movq	-48(%rbp),%rcx
+	movq	-56(%rbp),%r8
+	movq	-64(%rbp),%r9
+
+	movq	8(%rbp),%r10		# Increment the return address
+	incq	%r10			# to skip the ret instruction;
+					# see above.
+
+	movq	-16(%rbp),%rax		# Restore caller's %rax.
+
+	call	*%r10			# Call our caller!
+
+	# The caller will return here, as predicted.
+
+	# Save the registers which may hold a return value.  We
+	# assume that __generic_releasestack does not touch any
+	# floating point or vector registers.
+	pushq	%rax
+	pushq	%rdx
+
+	call	__morestack_block_signals
+
+	pushq	$0			# For alignment.
+	pushq	$0			# Where the available space is returned.
+	leaq	0(%rsp),%rdi		# Pass its address.
+
+	call	__generic_releasestack
+
+	subq	0(%rsp),%rax		# Subtract available space.
+	addq	$BACKOFF,%rax		# Back off 1024 bytes.
+.LEHE0:
+	movq	%rax,%fs:0x70		# Save the new stack boundary.
+
+	addq	$16,%rsp		# Remove values from stack.
+
+	# We need to restore the old stack pointer, which is in %rbp,
+	# before we unblock signals.  We also need to restore %rax and
+	# %rdx after we unblock signals but before we return.  Do this
+	# by moving %rax and %rdx from the current stack to the old
+	# stack.
+
+	popq	%rdx			# Pop return value from current stack.
+	popq	%rax
+
+	movq	%rbp,%rsp		# Restore stack pointer.
+
+	pushq	%rax			# Push return value on old stack.
+	pushq	%rdx
+
+	call	__morestack_unblock_signals
+
+	popq	%rdx			# Restore return value.
+	popq	%rax
+
+	.cfi_remember_state
+	popq	%rbp
+	.cfi_restore %rbp
+	.cfi_def_cfa %rsp, 8
+	ret				# Return to caller, which will
+					# immediately return.
+
+# This is the cleanup code called by the stack unwinder when unwinding
+# through the code between .LEHB0 and .LEHE0 above.
+	
+.L1:
+	.cfi_restore_state
+	subq	$16,%rsp		# Maintain 16 byte alignment.
+	movq	%rax,(%rsp)		# Save exception header.
+	movq	%rbp,%rdi		# Stack pointer after resume.
+	call	__generic_findstack
+	movq	%rbp,%rcx		# Get the stack pointer.
+	subq	%rax,%rcx		# Subtract available space.
+	addq	$BACKOFF,%rcx		# Back off 1024 bytes.
+	movq	%rcx,%fs:0x70		# Save new stack boundary.
+	movq	(%rsp),%rdi		# Restore exception data for call.
+#ifdef __PIC__
+	call	_Unwind_Resume@PLT	# Resume unwinding.
+#else
+	call	_Unwind_Resume		# Resume unwinding.
+#endif
+
+#endif /* defined(__x86_64__) */
+
+	.cfi_endproc
+#ifdef __ELF__
+	.size	__morestack, . - __morestack
+#endif
+
+
+# The exception table.  This tells the personality routine to execute
+# the exception handler.
+
+	.section	.gcc_except_table,"a",@progbits
+	.align	4
+.LLSDA1:
+	.byte	0xff	# @LPStart format (omit)
+	.byte	0xff	# @TType format (omit)
+	.byte	0x1	# call-site format (uleb128)
+	.uleb128 .LLSDACSE1-.LLSDACSB1	# Call-site table length
+.LLSDACSB1:
+	.uleb128 .LEHB0-.LFB1	# region 0 start
+	.uleb128 .LEHE0-.LEHB0	# length
+	.uleb128 .L1-.LFB1	# landing pad
+	.uleb128 0		# action
+.LLSDACSE1:
+
+
+	.global __gcc_personality_v0
+#ifdef __PIC__
+	# Build a position independent reference to the basic
+        # personality function.
+	.hidden DW.ref.__gcc_personality_v0
+	.weak   DW.ref.__gcc_personality_v0
+	.section .data.DW.ref.__gcc_personality_v0,"awG",@progbits,DW.ref.__gcc_personality_v0,comdat
+	.type	DW.ref.__gcc_personality_v0, @object
+DW.ref.__gcc_personality_v0:
+#ifndef __x86_64
+	.align 4
+	.size	DW.ref.__gcc_personality_v0, 4
+	.long	__gcc_personality_v0
+#else
+	.align 8
+	.size	DW.ref.__gcc_personality_v0, 8
+	.quad	__gcc_personality_v0
+#endif
+#endif
+
+
+# Initialize the stack test value when the program starts or when a
+# new thread starts.  We don't know how large the main stack is, so we
+# guess conservatively.  We might be able to use getrlimit here.
+
+	.text
+	.global	__stack_split_initialize
+	.hidden	__stack_split_initialize
+
+#ifdef __ELF__
+	.type	__stack_split_initialize, @function
+#endif
+
+__stack_split_initialize:
+
+#ifndef __x86_64__
+
+	leal	-16000(%esp),%eax	# We should have at least 16K.
+	movl	%eax,%gs:0x30
+	pushl	$16000
+	pushl	%esp
+#ifdef __PIC__
+	call	__generic_morestack_set_initial_sp@PLT
+#else
+	call	__generic_morestack_set_initial_sp
+#endif
+	addl	$8,%esp
+	ret
+
+#else /* defined(__x86_64__) */
+
+	leaq	-16000(%rsp),%rax	# We should have at least 16K.
+	movq	%rax,%fs:0x70
+	movq	%rsp,%rdi
+	movq	$16000,%rsi
+#ifdef __PIC__
+	call	__generic_morestack_set_initial_sp@PLT
+#else
+	call	__generic_morestack_set_initial_sp
+#endif
+	ret
+
+#endif /* defined(__x86_64__) */
+
+#ifdef __ELF__
+	.size	__stack_split_initialize, . - __stack_split_initialize
+#endif
+
+
+# Make __stack_split_initialize a high priority constructor.  FIXME:
+# This is ELF specific.
+
+	.section	.ctors.65535,"aw",@progbits
+
+#ifndef __x86_64__
+	.align	4
+	.long	__stack_split_initialize
+	.long	__morestack_load_mmap
+#else
+	.align	8
+	.quad	__stack_split_initialize
+	.quad	__morestack_load_mmap
+#endif
+
+#ifdef __ELF__
+	.section	.note.GNU-stack,"",@progbits
+	.section	.note.GNU-split-stack,"",@progbits
+	.section	.note.GNU-no-split-stack,"",@progbits
+#endif
Index: libgcc/config/i386/t-stack-i386
===================================================================
--- libgcc/config/i386/t-stack-i386	(revision 0)
+++ libgcc/config/i386/t-stack-i386	(revision 0)
@@ -0,0 +1,2 @@ 
+# Makefile fragment to support -fsplit-stack for x86.
+LIB2ADD += $(srcdir)/config/i386/morestack.S
Index: libgcc/config/t-stack
===================================================================
--- libgcc/config/t-stack	(revision 0)
+++ libgcc/config/t-stack	(revision 0)
@@ -0,0 +1,4 @@ 
+# Makefile fragment to provide generic support for -fsplit-stack.
+# This should be used in config.host for any host which supports
+# -fsplit-stack.
+LIB2ADD += $(srcdir)/generic-morestack.c $(srcdir)/generic-morestack-thread.c
Index: gcc/libgcc-std.ver
===================================================================
--- gcc/libgcc-std.ver	(revision 164490)
+++ gcc/libgcc-std.ver	(working copy)
@@ -1915,3 +1915,11 @@  GCC_4.5.0 {
 %inherit GCC_4.6.0 GCC_4.5.0
 GCC_4.6.0 {
 }
+
+%inherit GCC_4.6.0 GCC_4.4.0
+GCC_4.6.0 {
+  __morestack_segments
+  __morestack_current_segment
+  __morestack_initial_sp
+  __splitstack_find
+}