Patchwork [RFC] Fix pr34548 -- unnecessary alignment from alloca

login
register
mail settings
Submitter Richard Henderson
Date Aug. 1, 2012, 2:38 a.m.
Message ID <501896BD.4000402@redhat.com>
Download mbox | patch
Permalink /patch/174360/
State New
Headers show

Comments

Richard Henderson - Aug. 1, 2012, 2:38 a.m.
I've bootstrapped this on both ppc64 and x86_64.  I'll leave some time
for comment, but I can't immediately see how this could go wrong anywhere.


r~
* function.h (struct rtl_data): Add max_dynamic_stack_alignment.
	* cfgexpand.c (gimple_expand_cfg): Initialise it.
	* explow.c (allocate_dynamic_stack_space): Set it.  Simplify
	alignment requirements given the known alignment of dynamic_offset.
	* function.c (instantiate_virtual_regs): Align dtnamic_offset.

Patch

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 96c2e2e..1f16534 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4363,6 +4363,7 @@  gimple_expand_cfg (void)
   crtl->max_used_stack_slot_alignment = STACK_BOUNDARY;
   crtl->stack_alignment_estimated = 0;
   crtl->preferred_stack_boundary = STACK_BOUNDARY;
+  crtl->max_dynamic_stack_alignment = 0;
   cfun->cfg->max_jumptable_ents = 0;
 
   /* Resovle the function section.  Some targets, like ARM EABI rely on knowledge
diff --git a/gcc/explow.c b/gcc/explow.c
index 1cfe93b..c7581b0 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1173,7 +1173,6 @@  allocate_dynamic_stack_space (rtx size, unsigned size_align,
 {
   HOST_WIDE_INT stack_usage_size = -1;
   rtx final_label, final_target, target;
-  unsigned extra_align = 0;
   bool must_align;
 
   /* If we're asking for zero bytes, it doesn't matter what we point
@@ -1237,58 +1236,40 @@  allocate_dynamic_stack_space (rtx size, unsigned size_align,
   else if (size_align < BITS_PER_UNIT)
     size_align = BITS_PER_UNIT;
 
-  /* We can't attempt to minimize alignment necessary, because we don't
-     know the final value of preferred_stack_boundary yet while executing
-     this code.  */
-  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
-    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
-
   /* We will need to ensure that the address we return is aligned to
-     REQUIRED_ALIGN.  If STACK_DYNAMIC_OFFSET is defined, we don't
-     always know its final value at this point in the compilation (it
-     might depend on the size of the outgoing parameter lists, for
-     example), so we must align the value to be returned in that case.
-     (Note that STACK_DYNAMIC_OFFSET will have a default nonzero value if
-     STACK_POINTER_OFFSET or ACCUMULATE_OUTGOING_ARGS are defined).
-     We must also do an alignment operation on the returned value if
-     the stack pointer alignment is less strict than REQUIRED_ALIGN.
-
-     If we have to align, we must leave space in SIZE for the hole
-     that might result from the alignment operation.  */
-
-  must_align = (crtl->preferred_stack_boundary < required_align);
-  if (must_align)
+     REQUIRED_ALIGN.  If that alignment is no larger than
+     PREFERRED_STACK_BOUNDARY, we can handle everything without an
+     explicit alignment.  */
+  if (required_align <= PREFERRED_STACK_BOUNDARY)
     {
-      if (required_align > PREFERRED_STACK_BOUNDARY)
-	extra_align = PREFERRED_STACK_BOUNDARY;
-      else if (required_align > STACK_BOUNDARY)
-	extra_align = STACK_BOUNDARY;
-      else
-	extra_align = BITS_PER_UNIT;
+      if (crtl->preferred_stack_boundary < required_align)
+	crtl->preferred_stack_boundary = required_align;
+      if (crtl->max_dynamic_stack_alignment < required_align)
+	crtl->max_dynamic_stack_alignment = required_align;
+      must_align = false;
     }
+  else
+    {
+      unsigned extra, extra_align;
 
-  /* ??? STACK_POINTER_OFFSET is always defined now.  */
-#if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET)
-  must_align = true;
-  extra_align = BITS_PER_UNIT;
-#endif
+      crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+      crtl->max_dynamic_stack_alignment = PREFERRED_STACK_BOUNDARY;
 
-  if (must_align)
-    {
-      unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+      extra_align = PREFERRED_STACK_BOUNDARY;
+      extra = (required_align - extra_align) / BITS_PER_UNIT;
 
       size = plus_constant (Pmode, size, extra);
       size = force_operand (size, NULL_RTX);
 
       if (flag_stack_usage_info)
 	stack_usage_size += extra;
-
       if (extra && size_align > extra_align)
 	size_align = extra_align;
+      must_align = true;
     }
 
   /* Round the size to a multiple of the required stack alignment.
-     Since the stack if presumed to be rounded before this allocation,
+     Since the stack is presumed to be rounded before this allocation,
      this will maintain the required alignment.
 
      If the stack grows downward, we could save an insn by subtracting
diff --git a/gcc/function.c b/gcc/function.c
index f1e0b2d..827f687 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -1892,7 +1892,14 @@  instantiate_virtual_regs (void)
   /* Compute the offsets to use for this function.  */
   in_arg_offset = FIRST_PARM_OFFSET (current_function_decl);
   var_offset = STARTING_FRAME_OFFSET;
+
   dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl);
+  if (crtl->max_dynamic_stack_alignment)
+    {
+      int align = crtl->max_dynamic_stack_alignment / BITS_PER_UNIT;
+      dynamic_offset = (dynamic_offset + align - 1) & -align;
+    }
+
   out_arg_offset = STACK_POINTER_OFFSET;
 #ifdef FRAME_POINTER_CFA_OFFSET
   cfa_offset = FRAME_POINTER_CFA_OFFSET (current_function_decl);
diff --git a/gcc/function.h b/gcc/function.h
index 3d3313f..ed6fcb1 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -347,6 +347,10 @@  struct GTY(()) rtl_data {
         local stack.  */
   unsigned int stack_alignment_estimated;
 
+  /* The (capped) maximum alignment of dynamic stack space, and thus the
+     required alignment of STACK_DYNAMIC_OFFSET.  */
+  unsigned int max_dynamic_stack_alignment;
+
   /* For reorg.  */
 
   /* If some insns can be deferred to the delay slots of the epilogue, the