diff mbox

libgcc patch committed: Fix split-stack stack alignment

Message ID mcr390ms58h.fsf@google.com
State New
Headers show

Commit Message

Ian Lance Taylor Nov. 6, 2012, 11:04 p.m. UTC
This patch fixes two stack alignment bugs in the split-stack support.

The first is that I managed to miscalculate the stack alignment in the
assembly code.  I was treating that code as though it were a normal
function.  That is wrong, because __morestack is actually called without
any stack adjustment in the caller, which means that when __morestack is
entered the stack % 16 will == 8 in 32-bit mode and 0 in 64-bit mode,
unlike the usual case of 12 and 8, respectively.  This patch corrects
the code to align the stack correctly when calling the C split-stack
functions.  Previously the stack was misaligned when calling those
functions.  Since those functions don't happen to use any vector
registers, this did not matter except for performance.  In any case,
this patch fixes it.

The second bug is that the C functions were not aligning the returned
stack.  The result was coming back to an alignment determined by the
parameter size.  This is simply wrong.  Since the C code doesn't know
the required stack alignment, I simply made it always align to a 32-byte
boundary.  If split stack support is added for more processors, this may
need to become processor dependent.

Along the way I noticed that the 32-bit __morestack_non_split support
was mishandling the return address when called by a varargs function,
and I fixed that too.

Bootstrapped and ran split-stack and Go tests on
x86_64-unknown-linux-gnu, both 64-bit and 32-bit mode.  Committed to
mainline.

Ian


2012-11-06  Ian Lance Taylor  <iant@google.com>

	* generic-morestack.c (__generic_morestack): Align the returned
	stack pointer to a 32 byte boundary.
	* config/i386/morestack.S (__morestack_non_split) [32-bit]: Don't
	increment the return address until we have decided that we don't
	have a varargs function.
	(__morestack) [32-bit]: Align stack correctly when calling C
	functions.
	(__morestack) [64-bit]: Likewise.
diff mbox

Patch

Index: generic-morestack.c
===================================================================
--- generic-morestack.c	(revision 193263)
+++ generic-morestack.c	(working copy)
@@ -549,6 +549,7 @@  __generic_morestack (size_t *pframe_size
   char *to;
   void *ret;
   size_t i;
+  size_t aligned;
 
   current = __morestack_current_segment;
 
@@ -580,15 +581,19 @@  __generic_morestack (size_t *pframe_size
 
   *pframe_size = current->size - param_size;
 
+  /* Align the returned stack to a 32-byte boundary.  */
+  aligned = (param_size + 31) & ~ (size_t) 31;
+
 #ifdef STACK_GROWS_DOWNWARD
   {
     char *bottom = (char *) (current + 1) + current->size;
-    to = bottom - param_size;
-    ret = bottom - param_size;
+    to = bottom - aligned;
+    ret = bottom - aligned;
   }
 #else
   to = current + 1;
-  ret = (char *) (current + 1) + param_size;
+  to += aligned - param_size;
+  ret = (char *) (current + 1) + aligned;
 #endif
 
   /* We don't call memcpy to avoid worrying about the dynamic linker
Index: config/i386/morestack.S
===================================================================
--- config/i386/morestack.S	(revision 193263)
+++ config/i386/morestack.S	(working copy)
@@ -200,18 +200,19 @@  __morestack_non_split:
 
 	jb	2f			# Get more space if we need it.
 
-	# This breaks call/return prediction, as described above.
-	incq	8(%rsp)			# Increment the return address.
-
 	# If the instruction that we return to is
 	#   leaq  24(%rbp), %r11n
 	# then we have been called by a varargs function that expects
 	# %ebp to hold a real value.  That can only work if we do the
 	# full stack split routine.  FIXME: This is fragile.
 	movq	8(%rsp),%rax
+	incq	%rax			# Skip ret instruction in caller.
 	cmpl	$0x185d8d4c,(%rax)
 	je	2f
 
+	# This breaks call/return prediction, as described above.
+	incq	8(%rsp)			# Increment the return address.
+
 	popq	%rax			# Restore register.
 
 	.cfi_adjust_cfa_offset -8	# Adjust for popped register.
@@ -296,9 +297,13 @@  __morestack:
 	# argument size is pushed then the new stack frame size is
 	# pushed.
 
-	# Align stack to 16-byte boundary with enough space for saving
-	# registers and passing parameters to functions we call.
-	subl	$40,%esp
+	# In the body of a non-leaf function, the stack pointer will
+	# be aligned to a 16-byte boundary.  That is CFA + 12 in the
+	# stack picture above: (CFA + 12) % 16 == 0.  At this point we
+	# have %esp == CFA - 8, so %esp % 16 == 12.  We need some
+	# space for saving registers and passing parameters, and we
+	# need to wind up with %esp % 16 == 0.
+	subl	$44,%esp
 
 	# Because our cleanup code may need to clobber %ebx, we need
 	# to save it here so the unwinder can restore the value used
@@ -393,13 +398,15 @@  __morestack:
 
 	movl	%ebp,%esp		# Restore stack pointer.
 
+	# As before, we now have %esp % 16 == 12.
+
 	pushl	%eax			# Push return value on old stack.
 	pushl	%edx
-	subl	$8,%esp			# Align stack to 16-byte boundary.
+	subl	$4,%esp			# Align stack to 16-byte boundary.
 
 	call	__morestack_unblock_signals
 
-	addl	$8,%esp
+	addl	$4,%esp
 	popl	%edx			# Restore return value.
 	popl	%eax
 
@@ -485,15 +492,21 @@  __morestack:
 	pushq	%r9
 
 	pushq	%r11
-	pushq	$0			# For alignment.
+
+	# We entered morestack with the stack pointer aligned to a
+	# 16-byte boundary (the call to morestack's caller used 8
+	# bytes, and the call to morestack used 8 bytes).  We have now
+	# pushed 10 registers, so we are still aligned to a 16-byte
+	# boundary.
 
 	call	__morestack_block_signals
 
 	leaq	-8(%rbp),%rdi		# Address of new frame size.
 	leaq	24(%rbp),%rsi		# The caller's parameters.
-	addq	$8,%rsp
 	popq	%rdx			# The size of the parameters.
 
+	subq	$8,%rsp			# Align stack.
+
 	call	__generic_morestack
 
 	movq	-8(%rbp),%r10		# Reload modified frame size
@@ -564,6 +577,9 @@  __morestack:
 
 	movq	%rbp,%rsp		# Restore stack pointer.
 
+	# Now (%rsp & 16) == 8.
+
+	subq	$8,%rsp			# For alignment.
 	pushq	%rax			# Push return value on old stack.
 	pushq	%rdx
 
@@ -571,6 +587,7 @@  __morestack:
 
 	popq	%rdx			# Restore return value.
 	popq	%rax
+	addq	$8,%rsp
 
 	.cfi_remember_state
 	popq	%rbp