Patchwork [PR51491] add CLOBBER before __builtin_stack_restore when converting vla alloca_with_align to array decl

login
register
mail settings
Submitter Tom de Vries
Date Dec. 13, 2011, 12:26 p.m.
Message ID <4EE74482.3070503@codesourcery.com>
Download mbox | patch
Permalink /patch/131062/
State New
Headers show

Comments

Tom de Vries - Dec. 13, 2011, 12:26 p.m.
Jakub,

I have a patch for PR51491.

Consider the following test-case, compiled with gcc -O2 -S f.c -fdump-tree-all:
...
int f(void)
{
  int tt = 0;
  int t = 4;
  {
    int a[t];
    tt = g(a);
    tt += a[0];
  }
  {
    int a[4];
    tt += g(a);
    tt += a[0];
  }
  return tt;
}
...

The vla a[t] is gimplified to an alloca_with_align, with a
__builtin_stack_save/__builtin_stack_restore pair inserted at the scope
boundaries. This alloca_with_align is folded by ccp into an array declaration,
resulting in this representation at f.c.149t.optimized:
...
f ()
{
  <unnamed-unsigned:8> D.1726[16];
  int a[4];
  int tt;
  int D.1722;
  int D.1721;
  void * saved_stack.2;
  int D.1719;

<bb 2>:
  saved_stack.2_3 = __builtin_stack_save ();
  tt_18 = g (&D.1726);
  D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0];
  tt_20 = D.1719_19 + tt_18;
  __builtin_stack_restore (saved_stack.2_3);
  D.1721_21 = g (&a);
  tt_22 = D.1721_21 + tt_20;
  D.1722_23 = a[0];
  tt_24 = D.1722_23 + tt_22;
  a ={v} {CLOBBER};
  return tt_24;
}
...

The patch inserts this clobber of D.1726 when alloca_with_align is folded:
...
@@ -13,6 +13,7 @@
   tt_18 = g (&D.1726);
   D.1719_19 = MEM[(int[0:D.1713] *)&D.1726][0];
   tt_20 = D.1719_19 + tt_18;
+  D.1726 ={v} {CLOBBER};
   __builtin_stack_restore (saved_stack.2_3);
   D.1721_21 = g (&a);
   tt_22 = D.1721_21 + tt_20;
...

This allows D.1726 and a[4] to be mapped onto the same stack space, as can be
seen in expand dump:
...
Partition 0: size 16 align 16
	D.1726	a
...

Bootstrapped and reg-tested (ada inclusive) on x86_64.

OK for stage3?

Thanks,
- Tom

2011-12-13  Tom de Vries  <tom@codesourcery.com>

	PR tree-optimization/51491
	* tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function.
	(ccp_fold_stmt): Use insert_clobber_before_stack_restore after a
	successful fold_builtin_alloca_with_align.

	* gcc.dg/pr51491.c: New test.
Jakub Jelinek - Dec. 13, 2011, 1:13 p.m.
On Tue, Dec 13, 2011 at 01:26:42PM +0100, Tom de Vries wrote:
> 2011-12-13  Tom de Vries  <tom@codesourcery.com>
> 
> 	PR tree-optimization/51491
> 	* tree-ssa-ccp.c (insert_clobber_before_stack_restore): New function.
> 	(ccp_fold_stmt): Use insert_clobber_before_stack_restore after a
> 	successful fold_builtin_alloca_with_align.

I don't think this is safe.  You don't want to look for any following
__builtin_stack_restore, but for the matching one.  Consider:

int g (int *);

int
f (int n)
{
  int tt = 0;
  int t = 4;
  {
    int a[t
#ifdef DIFFERENT_BB2
          + (tt != 0 ? 6 : 0)
#endif
         ];
    tt = g (a);
    {
      int b[n];
      tt += g (b);
#ifdef DIFFERENT_BB
      if (n > 20)
	tt += 148 * g (b);
#endif      
      tt += b[0];
    }
    tt += a[0];
  }
  {
    int a[4];
    tt += g (a);
    tt += a[0];
  }
  return tt;
}

Without any defines, this shows that looking for the first
BUILT_IN_STACK_RESTORE is wrong if you ignore BUILT_IN_STACK_SAVE calls.
And with the various defines it shows that neither the corresponding
__builtin_stack_save nor __builtin_stack_restore have to be in the same
bb as __builtin_alloca_with_align that is being folded.
Perhaps you want to look for the closest enclosing __builtin_stack_save
(search backwards in current basic block, its immediate dominator etc.?),
remember what SSA_NAME it stores its result into and then just look at
where is the (single?) user of that, which ought to be
__builtin_stack_restore.

	Jakub

Patch

Index: gcc/tree-ssa-ccp.c
===================================================================
--- gcc/tree-ssa-ccp.c (revision 182098)
+++ gcc/tree-ssa-ccp.c (working copy)
@@ -1690,6 +1690,36 @@  evaluate_stmt (gimple stmt)
   return val;
 }
 
+/* Try to find a BUILT_IN_STACK_RESTORE after gsi_stmt (I) and insert a clobber
+   of T before it. */
+
+static void
+insert_clobber_before_stack_restore (gimple_stmt_iterator i, tree t)
+{
+  bool found = false;
+  tree clobber;
+
+  for (; !gsi_end_p (i); gsi_next (&i))
+    {
+      gimple stmt = gsi_stmt (i);
+
+      if (!gimple_call_builtin_p (stmt, BUILT_IN_STACK_RESTORE))
+	continue;
+
+      found = true;
+      break;
+    }
+
+  /* We should try harder, but for now, just return.  */
+  if (!found)
+    return;
+
+  clobber = build_constructor (TREE_TYPE (t), NULL);
+  TREE_THIS_VOLATILE (clobber) = 1;
+  gsi_insert_before (&i, gimple_build_assign (t, clobber),
+		     GSI_SAME_STMT);
+}
+
 /* Detects a __builtin_alloca_with_align with constant size argument.  Declares
    fixed-size array and returns the address, if found, otherwise returns
    NULL_TREE.  */
@@ -1824,7 +1854,9 @@  ccp_fold_stmt (gimple_stmt_iterator *gsi
             if (new_rhs)
 	      {
 		bool res = update_call_from_tree (gsi, new_rhs);
+		tree var = TREE_OPERAND (TREE_OPERAND (new_rhs, 0),0);
 		gcc_assert (res);
+		insert_clobber_before_stack_restore (*gsi, var);
 		return true;
 	      }
           }
Index: gcc/testsuite/gcc.dg/pr51491.c
===================================================================
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/pr51491.c (revision 0)
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand" } */
+
+
+int g(int*);
+
+int f(void)
+{
+  int tt = 0;
+  int t = 4;
+  {
+    int a[t];
+    tt = g(a);
+    tt += a[0];
+  }
+  {
+    int a[4];
+    tt += g(a);
+    tt += a[0];
+  }
+  return tt;
+}
+
+/* { dg-final { scan-rtl-dump-times "Partition" 1 "expand"} } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */