Patchwork Fix alloca with SUPPORTS_STACK_ALIGNMENT (PR middle-end/45234)

login
register
mail settings
Submitter Jakub Jelinek
Date Sept. 23, 2010, 10:15 p.m.
Message ID <20100923221548.GI1269@tyan-ft48-01.lab.bos.redhat.com>
Download mbox | patch
Permalink /patch/65601/
State New
Headers show

Comments

Jakub Jelinek - Sept. 23, 2010, 10:15 p.m.
On Thu, Sep 23, 2010 at 01:58:23PM +0200, Jakub Jelinek wrote:
> I have looked into it some more and I believe the PR45234 "fix" is just
> completely wrong, the problem is elsewhere (allocate_dynamic_stack_space)
> and there are actually many issues in there.

This patch implements 1) through 5), I've spent some time looking also
into the precompute alignment boundaries for arguments approach, but gave
up, because there is just way too much code needed to setup things
to be able to use pass_by_reference and FUNCTION_ARG_BOUNDARY on the
arguments and I didn't want to duplicate large parts of expand_call
and initialize_argument_information.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

For 4.5/4.4, I'd prefer to just revert the calls.c change and wait a little
bit if there aren't any regressions on the trunk caused by this on other
architectures.

2010-09-24  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/45234
	* rtl.h (enum global_rtl_index): Add
	GR_VIRTUAL_PREFERRED_STACK_BOUNDARY.
	(LAST_VIRTUAL_POINTER_REGISTER): Define.
	(virtual_preferred_stack_boundary_rtx,
	VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM): Define.
	(LAST_VIRTUAL_REGISTER): Increase by one.
	(REGNO_PTR_FRAME_P): Use LAST_VIRTUAL_POINTER_REGISTER
	instead of LAST_VIRTUAL_REGISTER.
	* function.c (instantiate_new_reg): Handle
	virtual_preferred_stack_boundary_rtx.
	* emit-rtl.c (init_virtual_regs): Handle
	VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM.
	(init_emit_regs): Initialize virtual_preferred_stack_boundary_rtx.
	* explow.c (round_push): If crtl->preferred_stack_boundary
	is smaller than MAX_SUPPORTED_STACK_ALIGNMENT, use
	virtual_preferred_stack_boundary_rtx alignment instead of
	crtl->preferred_stack_boundary alignment.
	(allocate_dynamic_stack_space): Use CONST_INT_P and REG_P
	macros.  Never decrease crtl->preferred_stack_boundary,
	use crtl->preferred_stack_boundary or MAX_SUPPORTED_STACK_ALIGNMENT
	instead of PREFERRED_STACK_BOUNDARY.  Don't modify
	stack_pointer_delta in dynamic allocation, even when size
	is constant.
	(probe_stack_range, anti_adjust_stack_and_probe): Use CONST_INT_P
	macro.
	* print-rtl.c (print_rtx): Handle
	VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM.
	* config/alpha/alpha.h (NONSTRICT_REG_OK_FP_BASE_P): Use
	LAST_VIRTUAL_POINTER_REGISTER instead of LAST_VIRTUAL_REGISTER.
	* config/frv/frv.c (frv_emit_movsi): Likewise.
	* config/arm/arm.c (thumb1_legitimate_address_p): Likewise.
	* config/rs6000/rs6000.c (virtual_stack_registers_memory_p):
	Likewise.

	* gcc.dg/torture/stackalign/alloca-6.c: New test.
	* gcc.target/i386/pr45234.c: New test.

	Revert:
	2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>

	PR middle-end/45234
	* calls.c (expand_call): Make sure that all variable sized
	adjustments are multiple of preferred stack boundary after
	stack alignment.



	Jakub
Richard Henderson - Sept. 23, 2010, 10:30 p.m.
On 09/23/2010 03:15 PM, Jakub Jelinek wrote:
>  round_push (rtx size)
>  {
> -  int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
> +  int align;
> +
> +  if (crtl->preferred_stack_boundary < MAX_SUPPORTED_STACK_ALIGNMENT)
> +    {
> +      /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
> +	 but we know it can't.  So add ourselves and then do
> +	 TRUNC_DIV_EXPR.  */
> +      rtx alignm1 = plus_constant (virtual_preferred_stack_boundary_rtx, -1);
> +      alignm1 = force_operand (alignm1, NULL_RTX);
> +      size = expand_binop (Pmode, add_optab, size, alignm1,
> +			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
> +      size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size,
> +			    virtual_preferred_stack_boundary_rtx,
> +			    NULL_RTX, 1);
> +      size = expand_mult (Pmode, size, virtual_preferred_stack_boundary_rtx,
> +			  NULL_RTX, 1);
> +      return size;
> +    }

It seems like it would be better to share code with the existing
expand_binop path here.

Otherwise this looks good.


r~

Patch

--- gcc/rtl.h.jj	2010-09-09 10:17:41.000000000 +0200
+++ gcc/rtl.h	2010-09-23 18:34:42.000000000 +0200
@@ -2010,6 +2010,7 @@  enum global_rtl_index
   GR_VIRTUAL_STACK_DYNAMIC,
   GR_VIRTUAL_OUTGOING_ARGS,
   GR_VIRTUAL_CFA,
+  GR_VIRTUAL_PREFERRED_STACK_BOUNDARY,
 
   GR_MAX
 };
@@ -2157,7 +2158,18 @@  extern rtx gen_rtx_MEM (enum machine_mod
 
 #define VIRTUAL_CFA_REGNUM		((FIRST_VIRTUAL_REGISTER) + 4)
 
-#define LAST_VIRTUAL_REGISTER		((FIRST_VIRTUAL_REGISTER) + 4)
+#define LAST_VIRTUAL_POINTER_REGISTER	((FIRST_VIRTUAL_REGISTER) + 4)
+
+/* This is replaced by crtl->preferred_stack_boundary / BITS_PER_UNIT
+   when finalized.  */
+
+#define virtual_preferred_stack_boundary_rtx \
+	(global_rtl[GR_VIRTUAL_PREFERRED_STACK_BOUNDARY])
+
+#define VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM \
+					((FIRST_VIRTUAL_REGISTER) + 5)
+
+#define LAST_VIRTUAL_REGISTER		((FIRST_VIRTUAL_REGISTER) + 5)
 
 /* Nonzero if REGNUM is a pointer into the stack frame.  */
 #define REGNO_PTR_FRAME_P(REGNUM)		\
@@ -2166,7 +2178,7 @@  extern rtx gen_rtx_MEM (enum machine_mod
    || (REGNUM) == HARD_FRAME_POINTER_REGNUM	\
    || (REGNUM) == ARG_POINTER_REGNUM		\
    || ((REGNUM) >= FIRST_VIRTUAL_REGISTER	\
-       && (REGNUM) <= LAST_VIRTUAL_REGISTER))
+       && (REGNUM) <= LAST_VIRTUAL_POINTER_REGISTER))
 
 /* REGNUM never really appearing in the INSN stream.  */
 #define INVALID_REGNUM			(~(unsigned int) 0)
--- gcc/function.c.jj	2010-09-17 14:11:11.000000000 +0200
+++ gcc/function.c	2010-09-23 18:34:53.000000000 +0200
@@ -1405,6 +1405,11 @@  instantiate_new_reg (rtx x, HOST_WIDE_IN
 #endif
       offset = cfa_offset;
     }
+  else if (x == virtual_preferred_stack_boundary_rtx)
+    {
+      new_rtx = GEN_INT (crtl->preferred_stack_boundary / BITS_PER_UNIT);
+      offset = 0;
+    }
   else
     return NULL_RTX;
 
--- gcc/emit-rtl.c.jj	2010-09-09 10:17:41.000000000 +0200
+++ gcc/emit-rtl.c	2010-09-23 19:11:04.000000000 +0200
@@ -5376,6 +5376,8 @@  init_virtual_regs (void)
   regno_reg_rtx[VIRTUAL_STACK_DYNAMIC_REGNUM] = virtual_stack_dynamic_rtx;
   regno_reg_rtx[VIRTUAL_OUTGOING_ARGS_REGNUM] = virtual_outgoing_args_rtx;
   regno_reg_rtx[VIRTUAL_CFA_REGNUM] = virtual_cfa_rtx;
+  regno_reg_rtx[VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM]
+    = virtual_preferred_stack_boundary_rtx;
 }
 
 
@@ -5698,6 +5700,8 @@  init_emit_regs (void)
   virtual_outgoing_args_rtx =
     gen_raw_REG (Pmode, VIRTUAL_OUTGOING_ARGS_REGNUM);
   virtual_cfa_rtx = gen_raw_REG (Pmode, VIRTUAL_CFA_REGNUM);
+  virtual_preferred_stack_boundary_rtx =
+    gen_raw_REG (Pmode, VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM);
 
   /* Initialize RTL for commonly used hard registers.  These are
      copied into regno_reg_rtx as we begin to compile each function.  */
--- gcc/explow.c.jj	2010-09-06 08:42:00.000000000 +0200
+++ gcc/explow.c	2010-09-23 18:40:40.000000000 +0200
@@ -915,7 +915,26 @@  anti_adjust_stack (rtx adjust)
 static rtx
 round_push (rtx size)
 {
-  int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+  int align;
+
+  if (crtl->preferred_stack_boundary < MAX_SUPPORTED_STACK_ALIGNMENT)
+    {
+      /* CEIL_DIV_EXPR needs to worry about the addition overflowing,
+	 but we know it can't.  So add ourselves and then do
+	 TRUNC_DIV_EXPR.  */
+      rtx alignm1 = plus_constant (virtual_preferred_stack_boundary_rtx, -1);
+      alignm1 = force_operand (alignm1, NULL_RTX);
+      size = expand_binop (Pmode, add_optab, size, alignm1,
+			   NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size,
+			    virtual_preferred_stack_boundary_rtx,
+			    NULL_RTX, 1);
+      size = expand_mult (Pmode, size, virtual_preferred_stack_boundary_rtx,
+			  NULL_RTX, 1);
+      return size;
+    }
+
+  align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
 
   if (align == 1)
     return size;
@@ -1144,9 +1164,9 @@  allocate_dynamic_stack_space (rtx size, 
      introduced later by the various alignment operations.  */
   if (flag_stack_usage)
     {
-      if (GET_CODE (size) == CONST_INT)
+      if (CONST_INT_P (size))
 	stack_usage_size = INTVAL (size);
-      else if (GET_CODE (size) == REG)
+      else if (REG_P (size))
         {
 	  /* Look into the last emitted insn and see if we can deduce
 	     something for the register.  */
@@ -1154,10 +1174,10 @@  allocate_dynamic_stack_space (rtx size, 
 	  insn = get_last_insn ();
 	  if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
 	    {
-	      if (GET_CODE (SET_SRC (set)) == CONST_INT)
+	      if (CONST_INT_P (SET_SRC (set)))
 		stack_usage_size = INTVAL (SET_SRC (set));
 	      else if ((note = find_reg_equal_equiv_note (insn))
-		       && GET_CODE (XEXP (note, 0)) == CONST_INT)
+		       && CONST_INT_P (XEXP (note, 0)))
 		stack_usage_size = INTVAL (XEXP (note, 0));
 	    }
 	}
@@ -1177,7 +1197,8 @@  allocate_dynamic_stack_space (rtx size, 
   /* We can't attempt to minimize alignment necessary, because we don't
      know the final value of preferred_stack_boundary yet while executing
      this code.  */
-  crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
+    crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
 
   /* We will need to ensure that the address we return is aligned to
      BIGGEST_ALIGNMENT.  If STACK_DYNAMIC_OFFSET is defined, we don't
@@ -1195,7 +1216,7 @@  allocate_dynamic_stack_space (rtx size, 
 #if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET)
 #define MUST_ALIGN 1
 #else
-#define MUST_ALIGN (PREFERRED_STACK_BOUNDARY < BIGGEST_ALIGNMENT)
+#define MUST_ALIGN (crtl->preferred_stack_boundary < BIGGEST_ALIGNMENT)
 #endif
 
   if (MUST_ALIGN)
@@ -1255,13 +1276,13 @@  allocate_dynamic_stack_space (rtx size, 
      insns.  Since this is an extremely rare event, we have no reliable
      way of knowing which systems have this problem.  So we avoid even
      momentarily mis-aligning the stack.  */
-  if (!known_align_valid || known_align % PREFERRED_STACK_BOUNDARY != 0)
+  if (!known_align_valid || known_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0)
     {
       size = round_push (size);
 
       if (flag_stack_usage)
 	{
-	  int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+	  int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
 	  stack_usage_size = (stack_usage_size + align - 1) / align * align;
 	}
     }
@@ -1328,6 +1349,8 @@  allocate_dynamic_stack_space (rtx size, 
   else
 #endif
     {
+      int saved_stack_pointer_delta;
+
 #ifndef STACK_GROWS_DOWNWARD
       emit_move_insn (target, virtual_stack_dynamic_rtx);
 #endif
@@ -1358,10 +1381,15 @@  allocate_dynamic_stack_space (rtx size, 
 	  emit_label (space_available);
 	}
 
+      saved_stack_pointer_delta = stack_pointer_delta;
       if (flag_stack_check && STACK_CHECK_MOVING_SP)
 	anti_adjust_stack_and_probe (size, false);
       else
 	anti_adjust_stack (size);
+      /* Even if size is constant, don't modify stack_pointer_delta.
+	 The constant size alloca should preserve
+	 crtl->preferred_stack_boundary alignment.  */
+      stack_pointer_delta = saved_stack_pointer_delta;
 
 #ifdef STACK_GROWS_DOWNWARD
       emit_move_insn (target, virtual_stack_dynamic_rtx);
@@ -1572,7 +1600,7 @@  probe_stack_range (HOST_WIDE_INT first, 
 	{
 	  rtx addr;
 
-	  if (GET_CODE (temp) == CONST_INT)
+	  if (CONST_INT_P (temp))
 	    {
 	      /* Use [base + disp} addressing mode if supported.  */
 	      HOST_WIDE_INT offset = INTVAL (temp);
@@ -1613,7 +1641,7 @@  anti_adjust_stack_and_probe (rtx size, b
 
   /* If we have a constant small number of probes to generate, that's the
      easy case.  */
-  if (GET_CODE (size) == CONST_INT && INTVAL (size) < 7 * PROBE_INTERVAL)
+  if (CONST_INT_P (size) && INTVAL (size) < 7 * PROBE_INTERVAL)
     {
       HOST_WIDE_INT isize = INTVAL (size), i;
       bool first_probe = true;
--- gcc/calls.c.jj	2010-09-22 17:15:41.000000000 +0200
+++ gcc/calls.c	2010-09-23 19:18:54.000000000 +0200
@@ -2385,19 +2385,6 @@  expand_call (tree exp, rtx target, int i
 
   preferred_unit_stack_boundary = preferred_stack_boundary / BITS_PER_UNIT;
 
-  if (SUPPORTS_STACK_ALIGNMENT)
-    {
-      /* All variable sized adjustments must be multiple of preferred
-	 stack boundary.  Stack alignment may change preferred stack
-	 boundary after variable sized adjustments have been made.  We
-	 need to compensate it here.  */
-      unsigned HOST_WIDE_INT delta
-	= ((stack_pointer_delta - pending_stack_adjust)
-	   % preferred_unit_stack_boundary);
-      if (delta)
-	anti_adjust_stack (GEN_INT (preferred_unit_stack_boundary - delta));
-    }
-
   /* We want to make two insn chains; one for a sibling call, the other
      for a normal call.  We will select one of the two chains after
      initial RTL generation is complete.  */
--- gcc/print-rtl.c.jj	2010-09-22 17:15:41.000000000 +0200
+++ gcc/print-rtl.c	2010-09-23 19:19:49.000000000 +0200
@@ -457,6 +457,9 @@  print_rtx (const_rtx in_rtx)
 		  fprintf (outfile, " %d virtual-outgoing-args", value);
 		else if (value == VIRTUAL_CFA_REGNUM)
 		  fprintf (outfile, " %d virtual-cfa", value);
+		else if (value == VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM)
+		  fprintf (outfile, " %d virtual-preferred-stack-boundary",
+			   value);
 		else
 		  fprintf (outfile, " %d virtual-reg-%d", value,
 			   value-FIRST_VIRTUAL_REGISTER);
--- gcc/config/alpha/alpha.h.jj	2010-09-17 14:10:59.000000000 +0200
+++ gcc/config/alpha/alpha.h	2010-09-23 18:15:30.000000000 +0200
@@ -916,7 +916,7 @@  extern int alpha_memory_latency;
 #define NONSTRICT_REG_OK_FP_BASE_P(X)		\
   (REGNO (X) == 31 || REGNO (X) == 63		\
    || (REGNO (X) >= FIRST_PSEUDO_REGISTER	\
-       && REGNO (X) < LAST_VIRTUAL_REGISTER))
+       && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER))
 
 /* Nonzero if X is a hard reg that can be used as a base reg.  */
 #define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
--- gcc/config/frv/frv.c.jj	2010-09-17 14:10:59.000000000 +0200
+++ gcc/config/frv/frv.c	2010-09-23 18:16:46.000000000 +0200
@@ -4067,7 +4067,7 @@  frv_emit_movsi (rtx dest, rtx src)
 	  || (GET_CODE (src) == REG
 	      && IN_RANGE_P (REGNO (src),
 			     FIRST_VIRTUAL_REGISTER,
-			     LAST_VIRTUAL_REGISTER))))
+			     LAST_VIRTUAL_POINTER_REGISTER))))
     {
       emit_insn (gen_rtx_SET (VOIDmode, dest, copy_to_mode_reg (SImode, src)));
       return TRUE;
--- gcc/config/arm/arm.c.jj	2010-09-22 17:15:41.000000000 +0200
+++ gcc/config/arm/arm.c	2010-09-23 18:20:09.000000000 +0200
@@ -5851,7 +5851,8 @@  thumb1_legitimate_address_p (enum machin
 	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
 		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
 		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
-		       && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
+		       && REGNO (XEXP (x, 0))
+			  <= LAST_VIRTUAL_POINTER_REGISTER))
 	       && GET_MODE_SIZE (mode) >= 4
 	       && GET_CODE (XEXP (x, 1)) == CONST_INT
 	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
--- gcc/config/rs6000/rs6000.c.jj	2010-09-22 17:15:40.000000000 +0200
+++ gcc/config/rs6000/rs6000.c	2010-09-23 18:21:02.000000000 +0200
@@ -5489,7 +5489,7 @@  virtual_stack_registers_memory_p (rtx op
     return false;
 
   return (regnum >= FIRST_VIRTUAL_REGISTER
-	  && regnum <= LAST_VIRTUAL_REGISTER);
+	  && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
 }
 
 static bool
--- gcc/testsuite/gcc.dg/torture/stackalign/alloca-6.c.jj	2010-09-23 19:22:57.000000000 +0200
+++ gcc/testsuite/gcc.dg/torture/stackalign/alloca-6.c	2010-09-23 19:14:54.000000000 +0200
@@ -0,0 +1,34 @@ 
+/* PR middle-end/45234 */
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-mincoming-stack-boundary=2 -mpreferred-stack-boundary=2" } */
+
+#include "check.h"
+
+void
+__attribute__ ((noinline))
+bar (__float128 f)
+{
+  check (&f, __alignof__(f));
+}
+
+volatile int z = 6;
+
+int
+main (void)
+{
+  char *p = __builtin_alloca (z);
+
+  bar (0);
+
+  __builtin_strncpy (p, "good", 5);
+  if (__builtin_strncmp (p, "good", 5) != 0)
+    {
+#ifdef DEBUG
+      p[z - 1] = '\0';
+      printf ("Failed: %s != good\n", p);
+#endif
+     abort ();
+    }
+
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/pr45234.c.jj	2010-09-23 19:26:03.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/pr45234.c	2010-09-23 19:25:57.000000000 +0200
@@ -0,0 +1,18 @@ 
+/* PR middle-end/45234 */
+/* { dg-do compile } */
+/* { dg-options "-march=i586" { target ilp32 } } */
+
+struct S { union { double b[4]; } a[18]; } s, a[5];
+void foo (struct S);
+struct S bar (struct S, struct S *, struct S);
+
+void
+foo (struct S arg)
+{
+}
+
+void
+baz (void)
+{
+ foo (bar (s, &a[1], a[2]));
+}