Patchwork [i386] Simplify stack_realign_fp handling

login
register
mail settings
Submitter Richard Henderson
Date Aug. 10, 2010, 3:09 p.m.
Message ID <4C616B9C.80605@redhat.com>
Download mbox | patch
Permalink /patch/61395/
State New
Headers show

Comments

Richard Henderson - Aug. 10, 2010, 3:09 p.m.
Previously, a stack_realign_fp frame would look like

	[return addr]
	[saved fp]		_ EBP
	- stack realign -
	[saved regs]
	[local frame]		_ ESP

This patch moves the stack realignment down such that we get

	[return addr]
	[saved fp]		_ EBP
	[saved regs]
	- stack realign -
	[local frame]		_ ESP

The benefits are several:

(1) When saving registers via push, we get to do all of them
all at once.  I know that on most micro-architectures, there's
a special bypass so that the ESP value from one push feeds into
the next push.  This ought to avoid a stall between the AND
that realigns the stack and a subsequent push.

(2) When saving registers via move, we get to base the store
off of EBP, so the stack re-alignment ought to be able to take
place in parallel.

(3) The epilogue is simpler.  In the end it's exactly the same
as a non-realigned stack for a function that uses alloca.
Previously, if we wanted to use pops we'd get

	add	$ofs, %esp
	pop	...
	mov	%ebp, %esp
	pop	%ebp
	ret

Now we can combine the add and mov to undo the stack realign
in one go:

	lea	ofs(%ebp), %esp
	pop	...
	pop	%ebp
	ret

(4) The registers are saved at a simple offset from the CFA,
which eliminates the need for a complex DW_CFA_expression, 
which makes the unwind info smaller.

(5) Win64 SEH cannot represent anything as complex as
DW_CFA_expression; simple offsets are the only thing supported.

Tested on x86_64 and i686-linux.  Committed.


r~
* config/i386/i386.c (ix86_compute_frame_layout): Re-align stack
	after saving registers.  Assert that SSE registers are only saved
	with a sufficiently aligned frame.
	(ix86_emit_save_reg_using_mov): Assert realigned only with DRAP;
	remove stack_realign_fp handling.
	(ix86_expand_prologue): Save int registers before stack_realign_fp,
	and do not mark the stack alignment as frame related.
	(ix86_expand_epilogue): SP is now invalid with stack_realign_fp.

	* dwarf2out.c (dwarf2out_frame_debug_expr): Flush queued register
	saves when re-aligning the stack.

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1877730..3f37066 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -8404,10 +8404,6 @@  ix86_compute_frame_layout (struct ix86_frame *frame)
 
   frame->hard_frame_pointer_offset = offset;
 
-  /* Set offset to aligned because the realigned frame starts from here.  */
-  if (stack_realign_fp)
-    offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
-
   /* Register save area */
   offset += frame->nregs * UNITS_PER_WORD;
   frame->reg_save_offset = offset;
@@ -8415,11 +8411,22 @@  ix86_compute_frame_layout (struct ix86_frame *frame)
   /* Align and set SSE register save area.  */
   if (frame->nsseregs)
     {
+      /* The only ABI that has saved SSE registers (Win64) also has a
+         16-byte aligned default stack, and thus we don't need to be
+	 within the re-aligned local stack frame to save them.  */
+      gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
       offset = (offset + 16 - 1) & -16;
       offset += frame->nsseregs * 16;
     }
   frame->sse_reg_save_offset = offset;
 
+  /* The re-aligned stack starts here.  Values before this point are not
+     directly comparable with values below this point.  In order to make
+     sure that no value happens to be the same before and after, force
+     the alignment computation below to add a non-zero value.  */
+  if (stack_realign_fp)
+    offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
+
   /* Va-arg area */
   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
   offset += frame->va_arg_size;
@@ -8621,7 +8628,9 @@  ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
      any tricky guessing by dwarf2out.  */
   if (m->fs.realigned)
     {
-      if (stack_realign_drap && regno == REGNO (crtl->drap_reg))
+      gcc_checking_assert (stack_realign_drap);
+
+      if (regno == REGNO (crtl->drap_reg))
 	{
 	  /* A bit of a hack.  We force the DRAP register to be saved in
 	     the re-aligned stack frame, which provides us with a copy
@@ -8632,21 +8641,6 @@  ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
 	  mem = gen_rtx_MEM (mode, addr);
 	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
 	}
-      else if (stack_realign_fp)
-	{
-	  /* The stack pointer may or may not be varying within the
-	     function.  If it is, then we can't use it as a stable
-	     reference to the locations within the frame.  Instead,
-	     simply compute the location of the aligned frame from
-	     the frame pointer.  */
-	  addr = GEN_INT (-(HOST_WIDE_INT)crtl->stack_alignment_needed
-			  / BITS_PER_UNIT);
-	  addr = gen_rtx_AND (Pmode, hard_frame_pointer_rtx, addr);
-	  addr = plus_constant (addr, -cfa_offset);
-	  mem = gen_rtx_MEM (mode, addr);
-	  add_reg_note (insn, REG_CFA_EXPRESSION,
-			gen_rtx_SET (VOIDmode, mem, reg));
-	}
       else
 	{
 	  /* The frame pointer is a stable reference within the
@@ -9391,7 +9385,7 @@  ix86_expand_prologue (void)
   bool pic_reg_used;
   struct ix86_frame frame;
   HOST_WIDE_INT allocate;
-  bool int_registers_saved = false;
+  bool int_registers_saved;
 
   ix86_finalize_stack_realign_flags ();
 
@@ -9560,33 +9554,59 @@  ix86_expand_prologue (void)
       m->fs.fp_valid = true;
     }
 
+  int_registers_saved = (frame.nregs == 0);
+
+  if (!int_registers_saved)
+    {
+      /* If saving registers via PUSH, do so now.  */
+      if (!frame.save_regs_using_mov)
+	{
+	  ix86_emit_save_regs ();
+	  int_registers_saved = true;
+	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
+	}
+
+      /* When using red zone we may start register saving before allocating
+	 the stack frame saving one cycle of the prologue.  However, avoid
+	 doing this if we have to probe the stack; at least on x86_64 the
+	 stack probe can turn into a call that clobbers a red zone location. */
+      else if (ix86_using_red_zone ()
+	       && (! TARGET_STACK_PROBE
+		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+	{
+	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
+	  int_registers_saved = true;
+	}
+    }
+
   if (stack_realign_fp)
     {
       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
 
+      /* The computation of the size of the re-aligned stack frame means
+	 that we must allocate the size of the register save area before
+	 performing the actual alignment.  Otherwise we cannot guarantee
+	 that there's enough storage above the realignment point.  */
+      if (m->fs.sp_offset != frame.sse_reg_save_offset)
+        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+				   GEN_INT (m->fs.sp_offset
+					    - frame.sse_reg_save_offset),
+				   -1, false);
+
       /* Align the stack.  */
       insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
 					stack_pointer_rtx,
 					GEN_INT (-align_bytes)));
-      RTX_FRAME_RELATED_P (insn) = 1;
 
-      /* For the purposes of register save area addressing, the frame
-         pointer is no longer valid.  */
-      /* ??? There's no need to place the register save area into the
-	 aligned local stack frame.  We should do this later, after
-	 the register saves.  */
-      m->fs.sp_offset = (m->fs.sp_offset + align_bytes - 1) & -align_bytes;
-      m->fs.fp_valid = false;
-      m->fs.realigned = true;
+      /* For the purposes of register save area addressing, the stack
+         pointer is no longer valid.  As for the value of sp_offset,
+	 see ix86_compute_frame_layout, which we need to match in order
+	 to pass verification of stack_pointer_offset at the end.  */
+      m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
+      m->fs.sp_valid = false;
     }
 
-  if (!frame.save_regs_using_mov)
-    {
-      ix86_emit_save_regs ();
-      int_registers_saved = true;
-      gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
-    }
   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
 
   /* The stack has already been decremented by the instruction calling us
@@ -9615,21 +9635,10 @@  ix86_expand_prologue (void)
 	}
     }
 
-  /* When using red zone we may start register saving before allocating the
-     stack frame saving one cycle of the prologue.  However, avoid doing this
-     if we have to probe the stack; at least on x86_64 the stack probe can
-     turn into a call that clobbers a red zone location.  */
-  if (!int_registers_saved
-      && ix86_using_red_zone ()
-      && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
-    {
-      ix86_emit_save_regs_using_mov (frame.reg_save_offset);
-      int_registers_saved = true;
-    }
-
   if (allocate == 0)
     ;
-  else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
+  else if (!ix86_target_stack_probe ()
+	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
     {
       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
 			         GEN_INT (-allocate), -1,
@@ -9918,19 +9927,14 @@  ix86_expand_epilogue (int style)
   ix86_finalize_stack_realign_flags ();
   ix86_compute_frame_layout (&frame);
 
-  /* When stack is realigned, SP must be valid.  */
   m->fs.sp_valid = (!frame_pointer_needed
-		    || current_function_sp_is_unchanging
-		    || stack_realign_fp);
+		    || (current_function_sp_is_unchanging
+			&& !stack_realign_fp));
   gcc_assert (!m->fs.sp_valid
 	      || m->fs.sp_offset == frame.stack_pointer_offset);
 
-  /* The FP must be valid if the frame pointer is present, but not
-     if the register save area is in the re-aligned local frame and
-     the FP points to the unaligned argument frame.  */
-  gcc_assert (frame_pointer_needed
-	      ? stack_realign_fp != m->fs.fp_valid
-	      : !m->fs.fp_valid);
+  /* The FP must be valid if the frame pointer is present.  */
+  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
   gcc_assert (!m->fs.fp_valid
 	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
 
@@ -9955,10 +9959,10 @@  ix86_expand_epilogue (int style)
       /* The red-zone begins below the return address.  */
       m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
 
-      /* Since the register save area is in the aligned portion of
+      /* When the register save area is in the aligned portion of
          the stack, determine the maximum runtime displacement that
 	 matches up with the aligned frame.  */
-      if (crtl->stack_realign_needed)
+      if (stack_realign_drap)
 	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
 				  + UNITS_PER_WORD);
     }
@@ -10030,7 +10034,7 @@  ix86_expand_epilogue (int style)
 	  rtx insn, sa = EH_RETURN_STACKADJ_RTX;
 
 	  /* Stack align doesn't work with eh_return.  */
-	  gcc_assert (!crtl->stack_realign_needed);
+	  gcc_assert (!stack_realign_drap);
 	  /* Neither does regparm nested functions.  */
 	  gcc_assert (!ix86_static_chain_on_stack);
 
@@ -10109,17 +10113,8 @@  ix86_expand_epilogue (int style)
 
   /* If we used a stack pointer and haven't already got rid of it,
      then do so now.  */
-  if (m->fs.fp_valid || stack_realign_fp)
+  if (m->fs.fp_valid)
     {
-      if (stack_realign_fp)
-	{
-	  /* We're re-defining what it means to be the local stack
-	     frame.  Thus the FP is suddenly valid and the SP isn't.  */
-	  m->fs.fp_valid = true;
-	  m->fs.sp_valid = false;
-	  m->fs.realigned = false;
-	}
-
       /* If the stack pointer is valid and pointing at the frame
 	 pointer store address, then we only need a pop.  */
       if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index ddecfc4..4b4042a 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -2456,6 +2456,10 @@  dwarf2out_frame_debug_expr (rtx expr, const char *label)
 	     alignment.  */
           if (fde && XEXP (src, 0) == stack_pointer_rtx)
             {
+	      /* We interpret reg_save differently with stack_realign set.
+		 Thus we must flush whatever we have queued first.  */
+	      flush_queued_reg_saves ();
+
               gcc_assert (cfa_store.reg == REGNO (XEXP (src, 0)));
               fde->stack_realign = 1;
               fde->stack_realignment = INTVAL (XEXP (src, 1));