diff mbox

Fix PRs 65063 and 63593

Message ID alpine.LSU.2.11.1502161520120.27763@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener Feb. 16, 2015, 2:21 p.m. UTC
Predictive commoning happens to re-use SSA names it released while
there are still uses of them (oops), confusing the hell out of
other code (expected).  Fixed thus.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-02-16  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/63593
	PR tree-optimization/65063
	* tree-predcom.c (execute_pred_commoning_chain): Delay removing
	stmts and releasing SSA names until...
	(execute_pred_commoning): ... after processing all chains.

	* gcc.dg/pr63593.c: New testcase.
	* gcc.dg/pr65063.c: Likewise.

Comments

Richard Biener Feb. 17, 2015, 3:05 p.m. UTC | #1
On Mon, 16 Feb 2015, Richard Biener wrote:

> 
> Predictive commoning happens to re-use SSA names it released while
> there are still uses of them (oops), confusing the hell out of
> other code (expected).  Fixed thus.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

So I was wrong in that this doesn't fix PR65063 but it pointed at
a similar issue.  The loop transform code doesn't handle the case
where we replace looparound PHIs and need a epilogue loop (thus
we use unrolling).  The following patch disables unrolling in that
case.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-02-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/65063
	* tree-predcom.c (determine_unroll_factor): Return 1 if we
	have replaced looparound PHIs.

	* gcc.dg/pr65063.c: New testcase.

Index: gcc/tree-predcom.c
===================================================================
*** gcc/tree-predcom.c	(revision 220755)
--- gcc/tree-predcom.c	(working copy)
*************** determine_unroll_factor (vec<chain_p> ch
*** 1775,1783 ****
  
    FOR_EACH_VEC_ELT (chains, i, chain)
      {
!       if (chain->type == CT_INVARIANT || chain->combined)
  	continue;
  
        /* The best unroll factor for this chain is equal to the number of
  	 temporary variables that we create for it.  */
        af = chain->length;
--- 1775,1794 ----
  
    FOR_EACH_VEC_ELT (chains, i, chain)
      {
!       if (chain->type == CT_INVARIANT)
  	continue;
  
+       if (chain->combined)
+ 	{
+ 	  /* For combined chains, we can't handle unrolling if we replace
+ 	     looparound PHIs.  */
+ 	  dref a;
+ 	  unsigned j;
+ 	  for (j = 1; chain->refs.iterate (j, &a); j++)
+ 	    if (gimple_code (a->stmt) == GIMPLE_PHI)
+ 	      return 1;
+ 	}
+ 
        /* The best unroll factor for this chain is equal to the number of
  	 temporary variables that we create for it.  */
        af = chain->length;
Index: gcc/testsuite/gcc.dg/pr65063.c
===================================================================
*** gcc/testsuite/gcc.dg/pr65063.c	(revision 0)
--- gcc/testsuite/gcc.dg/pr65063.c	(working copy)
***************
*** 0 ****
--- 1,33 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fno-tree-loop-ivcanon -fno-tree-vectorize" } */
+ 
+ static int in[8][4];
+ static int out[4];
+ static const int check_result[] = {0, 16, 256, 4096};
+ 
+ static inline void foo ()
+ {
+   int sum;
+   int i, j, k;
+   for (k = 0; k < 4; k++)
+     {
+       sum = 1;
+       for (j = 0; j < 4; j++)
+ 	for (i = 0; i < 4; i++)
+ 	  sum *= in[i + k][j];
+       out[k] = sum;
+     }
+ }
+ 
+ int main ()
+ {
+   int i, j, k;
+   for (i = 0; i < 8; i++)
+     for (j = 0; j < 4; j++)
+       in[i][j] = (i + 2) / 3;
+   foo ();
+   for (k = 0; k < 4; k++)
+     if (out[k] != check_result[k])
+       __builtin_abort ();
+   return 0;
+ }
diff mbox

Patch

Index: gcc/tree-predcom.c
===================================================================
--- gcc/tree-predcom.c	(revision 220731)
+++ gcc/tree-predcom.c	(working copy)
@@ -1745,9 +1745,8 @@  execute_pred_commoning_chain (struct loo
   if (chain->combined)
     {
       /* For combined chains, just remove the statements that are used to
-	 compute the values of the expression (except for the root one).  */
-      for (i = 1; chain->refs.iterate (i, &a); i++)
-	remove_stmt (a->stmt);
+	 compute the values of the expression (except for the root one).
+	 We delay this until after all chains are processed.  */
     }
   else
     {
@@ -1811,6 +1810,21 @@  execute_pred_commoning (struct loop *loo
 	execute_pred_commoning_chain (loop, chain, tmp_vars);
     }
 
+  FOR_EACH_VEC_ELT (chains, i, chain)
+    {
+      if (chain->type == CT_INVARIANT)
+	;
+      else if (chain->combined)
+	{
+	  /* For combined chains, just remove the statements that are used to
+	     compute the values of the expression (except for the root one).  */
+	  dref a;
+	  unsigned j;
+	  for (j = 1; chain->refs.iterate (j, &a); j++)
+	    remove_stmt (a->stmt);
+	}
+    }
+
   update_ssa (TODO_update_ssa_only_virtuals);
 }
 
Index: gcc/testsuite/gcc.dg/pr63593.c
===================================================================
--- gcc/testsuite/gcc.dg/pr63593.c	(revision 0)
+++ gcc/testsuite/gcc.dg/pr63593.c	(working copy)
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-vectorize" } */
+
+int in[2 * 4][4];
+int out[4];
+
+void
+foo (void)
+{
+  int sum;
+  int i, j, k;
+  for (k = 0; k < 4; k++)
+    {
+      sum = 1;
+      for (j = 0; j < 4; j++)
+	for (i = 0; i < 4; i++)
+	  sum *= in[i + k][j];
+      out[k] = sum;
+    }
+}
Index: gcc/testsuite/gcc.dg/pr65063.c
===================================================================
--- gcc/testsuite/gcc.dg/pr65063.c	(revision 0)
+++ gcc/testsuite/gcc.dg/pr65063.c	(working copy)
@@ -0,0 +1,33 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-tree-loop-ivcanon -fno-tree-vectorize" } */
+
+static int in[8][4];
+static int out[4];
+static const int check_result[] = {0, 16, 256, 4096};
+
+static inline void foo ()
+{
+  int sum;
+  int i, j, k;
+  for (k = 0; k < 4; k++)
+    {
+      sum = 1;
+      for (j = 0; j < 4; j++)
+	for (i = 0; i < 4; i++)
+	  sum *= in[i + k][j];
+      out[k] = sum;
+    }
+}
+
+int main ()
+{
+  int i, j, k;
+  for (i = 0; i < 8; i++)
+    for (j = 0; j < 4; j++)
+      in[i][j] = (i + 2) / 3;
+  foo ();
+  for (k = 0; k < 4; k++)
+    if (out[k] != check_result[k])
+      __builtin_abort ();
+  return 0;
+}