[GRAPHITE] Fix SSA update

Message ID alpine.LSU.2.20.1710131256500.6597@zhemvz.fhfr.qr
State New
Headers show
Series
  • [GRAPHITE] Fix SSA update
Related show

Commit Message

Richard Biener Oct. 13, 2017, 11 a.m.
This is something I wanted to do later just as compile-time optimization
but it turns out it is necessary for correctness if we want to keep
the current order of creating SCOPs and analyzing data references and
parameters and only after that code-generating SCOPs that were optimized.

This is because what SSA names are the parameters really depens on the
IL and liveout PHIs for transformed SESE regions changes the situation
enough that use "stale" information.

Of course the issue isn't one if we do the transform in "one step"
because update-SSA can just cope with that.

In the process I simplified the main graphite function to inline
the initialize/finalize stuff, removing a weird parameter that
ended up PASSing gcc.dg/graphite/pr35356-3.c (with just one loop)
so I XFAILed that again.

I've added gcc.dg/graphite/pr81373-2.c which ICEs before this patch.

Bootstrapped and tested on x86_64-unknown-linux-gnu, SPEC is happy,
applied.

Richard.

2017-10-13  Richard Biener  <rguenther@suse.de>

	* graphite-isl-ast-to-gimple.c
	(translate_isl_ast_to_gimple::get_rename_from_scev): Remove unused
	parameters and dominance check.
	(translate_isl_ast_to_gimple::graphite_copy_stmts_from_block): Adjust.
	(translate_isl_ast_to_gimple::copy_bb_and_scalar_dependences): Likewise.
	(translate_isl_ast_to_gimple::graphite_regenerate_ast_isl):
	Do not update SSA form here or do intermediate IL verification.
	* graphite.c: Include tree-ssa.h and tree-into-ssa.h.
	(graphite_initialize): Remove check on the number of loops in
	the function and inline into graphite_transform_loops.
	(graphite_finalize): Inline into graphite_transform_loops.
	(graphite_transform_loops): Perform SSA update and IL verification
	here.
	* params.def (PARAM_GRAPHITE_MIN_LOOPS_PER_FUNCTION): Remove.

	* gcc.dg/graphite/pr35356-3.c: XFAIL again.
	* gcc.dg/graphite/pr81373-2.c: Copy from gcc.dg/graphite/pr81373.c
	with alternate flags.

Patch

Index: gcc/graphite-isl-ast-to-gimple.c
===================================================================
--- gcc/graphite-isl-ast-to-gimple.c	(revision 253719)
+++ gcc/graphite-isl-ast-to-gimple.c	(working copy)
@@ -189,7 +189,6 @@  class translate_isl_ast_to_gimple
   __isl_give isl_ast_node * scop_to_isl_ast (scop_p scop);
 
   tree get_rename_from_scev (tree old_name, gimple_seq *stmts, loop_p loop,
-			     basic_block new_bb, basic_block old_bb,
 			     vec<tree> iv_map);
   bool graphite_copy_stmts_from_block (basic_block bb, basic_block new_bb,
 				       vec<tree> iv_map);
@@ -1084,7 +1083,6 @@  gsi_insert_earliest (gimple_seq seq)
 
 tree translate_isl_ast_to_gimple::
 get_rename_from_scev (tree old_name, gimple_seq *stmts, loop_p loop,
-		      basic_block new_bb, basic_block,
 		      vec<tree> iv_map)
 {
   tree scev = scalar_evolution_in_region (region->region, loop, old_name);
@@ -1113,16 +1111,6 @@  get_rename_from_scev (tree old_name, gim
       return build_zero_cst (TREE_TYPE (old_name));
     }
 
-  if (TREE_CODE (new_expr) == SSA_NAME)
-    {
-      basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (new_expr));
-      if (bb && !dominated_by_p (CDI_DOMINATORS, new_bb, bb))
-	{
-	  set_codegen_error ();
-	  return build_zero_cst (TREE_TYPE (old_name));
-	}
-    }
-
   /* Replace the old_name with the new_expr.  */
   return force_gimple_operand (unshare_expr (new_expr), stmts,
 			       true, NULL_TREE);
@@ -1245,8 +1233,7 @@  graphite_copy_stmts_from_block (basic_bl
 	      {
 		gimple_seq stmts = NULL;
 		new_name = get_rename_from_scev (old_name, &stmts,
-						 bb->loop_father,
-						 new_bb, bb, iv_map);
+						 bb->loop_father, iv_map);
 		if (! codegen_error_p ())
 		  gsi_insert_earliest (stmts);
 		new_expr = &new_name;
@@ -1361,7 +1348,7 @@  copy_bb_and_scalar_dependences (basic_bl
 		  gimple_seq stmts = NULL;
 		  tree new_name = get_rename_from_scev (arg, &stmts,
 							bb->loop_father,
-							new_bb, bb, iv_map);
+							iv_map);
 		  if (! codegen_error_p ())
 		    gsi_insert_earliest (stmts);
 		  arg = new_name;
@@ -1567,17 +1554,6 @@  graphite_regenerate_ast_isl (scop_p scop
 				     if_region->true_region->region.exit);
       if (dump_file)
 	fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n");
-
-      mark_virtual_operands_for_renaming (cfun);
-      update_ssa (TODO_update_ssa);
-      checking_verify_ssa (true, true);
-      rewrite_into_loop_closed_ssa (NULL, 0);
-      /* We analyzed evolutions of all SCOPs during SCOP detection
-         which cached evolutions.  Now we've introduced PHIs for
-	 liveouts which causes those cached solutions to be invalid
-	 for code-generation purposes given we'd insert references
-	 to SSA names not dominating their new use.  */
-      scev_reset ();
     }
 
   if (t.codegen_error_p ())
@@ -1587,9 +1563,6 @@  graphite_regenerate_ast_isl (scop_p scop
 		 "reverting back to the original code.\n");
       set_ifsese_condition (if_region, integer_zero_node);
 
-      /* We registered new names, scrap that.  */
-      if (need_ssa_update_p (cfun))
-	delete_update_ssa ();
       /* Remove the unreachable region.  */
       remove_edge_and_dominated_blocks (if_region->true_region->region.entry);
       basic_block ifb = if_region->false_region->region.entry->src;
@@ -1605,9 +1578,11 @@  graphite_regenerate_ast_isl (scop_p scop
 	  delete_loop (loop);
     }
 
-  /* Verifies properties that GRAPHITE should maintain during translation.  */
-  checking_verify_loop_structure ();
-  checking_verify_loop_closed_ssa (true);
+  /* We are delaying SSA update to after code-generating all SCOPs.
+     This is because we analyzed DRs and parameters on the unmodified
+     IL and thus rely on SSA update to pick up new dominating definitions
+     from for example SESE liveout PHIs.  This is also for efficiency
+     as SSA update does work depending on the size of the function.  */
 
   free (if_region->true_region);
   free (if_region->region);
Index: gcc/graphite.c
===================================================================
--- gcc/graphite.c	(revision 253719)
+++ gcc/graphite.c	(working copy)
@@ -55,6 +55,8 @@  along with GCC; see the file COPYING3.
 #include "tree-cfgcleanup.h"
 #include "tree-vectorizer.h"
 #include "tree-ssa-loop-manip.h"
+#include "tree-ssa.h"
+#include "tree-into-ssa.h"
 #include "graphite.h"
 
 /* Print global statistics to FILE.  */
@@ -212,64 +214,6 @@  print_graphite_statistics (FILE* file, v
   print_loops (file, 3);
 }
 
-/* Initialize graphite: when there are no loops returns false.  */
-
-static bool
-graphite_initialize (void)
-{
-  int min_loops = PARAM_VALUE (PARAM_GRAPHITE_MIN_LOOPS_PER_FUNCTION);
-  int nloops = number_of_loops (cfun);
-
-  if (nloops <= min_loops)
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	{
-	  if (nloops <= min_loops)
-	    fprintf (dump_file, "\nFunction does not have enough loops: "
-		     "PARAM_GRAPHITE_MIN_LOOPS_PER_FUNCTION = %d.\n",
-		     min_loops);
-
-	  fprintf (dump_file, "\nnumber of SCoPs: 0\n");
-	  print_global_statistics (dump_file);
-	}
-
-      return false;
-    }
-
-  calculate_dominance_info (CDI_DOMINATORS);
-  initialize_original_copy_tables ();
-
-  if (dump_file && dump_flags)
-    {
-      dump_function_to_file (current_function_decl, dump_file, dump_flags);
-      print_loops (dump_file, 3);
-    }
-
-  return true;
-}
-
-/* Finalize graphite: perform CFG cleanup when NEED_CFG_CLEANUP_P is
-   true.  */
-
-static void
-graphite_finalize (bool need_cfg_cleanup_p)
-{
-  if (need_cfg_cleanup_p)
-    {
-      free_dominance_info (CDI_DOMINATORS);
-      scev_reset ();
-      cleanup_tree_cfg ();
-      profile_status_for_fn (cfun) = PROFILE_ABSENT;
-      release_recorded_exits (cfun);
-      tree_estimate_probability (false);
-    }
-
-  free_original_copy_tables ();
-
-  if (dump_file && dump_flags)
-    print_loops (dump_file, 3);
-}
-
 /* Deletes all scops in SCOPS.  */
 
 static void
@@ -396,7 +340,7 @@  graphite_transform_loops (void)
 {
   int i;
   scop_p scop;
-  bool need_cfg_cleanup_p = false;
+  bool changed = false;
   vec<scop_p> scops = vNULL;
   isl_ctx *ctx;
 
@@ -405,8 +349,7 @@  graphite_transform_loops (void)
   if (parallelized_function_p (cfun->decl))
     return;
 
-  if (!graphite_initialize ())
-    return;
+  calculate_dominance_info (CDI_DOMINATORS);
 
   ctx = isl_ctx_alloc ();
   isl_options_set_on_error (ctx, ISL_ON_ERROR_ABORT);
@@ -438,7 +381,7 @@  graphite_transform_loops (void)
 	location_t loc = find_loop_location
 	  (scops[i]->scop_info->region.entry->dest->loop_father);
 
-	need_cfg_cleanup_p = true;
+	changed = true;
 	if (!graphite_regenerate_ast_isl (scop))
 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
 			   "loop nest not optimized, code generation error\n");
@@ -447,6 +390,16 @@  graphite_transform_loops (void)
 			   "loop nest optimized\n");
       }
 
+  if (changed)
+    {
+      mark_virtual_operands_for_renaming (cfun);
+      update_ssa (TODO_update_ssa);
+      checking_verify_ssa (true, true);
+      rewrite_into_loop_closed_ssa (NULL, 0);
+      scev_reset ();
+      checking_verify_loop_structure ();
+    }
+
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       loop_p loop;
@@ -461,9 +414,17 @@  graphite_transform_loops (void)
     }
 
   free_scops (scops);
-  graphite_finalize (need_cfg_cleanup_p);
   the_isl_ctx = NULL;
   isl_ctx_free (ctx);
+
+  if (changed)
+    {
+      cleanup_tree_cfg ();
+      profile_status_for_fn (cfun) = PROFILE_ABSENT;
+      release_recorded_exits (cfun);
+      tree_estimate_probability (false);
+    }
+
 }
 
 #else /* If isl is not available: #ifndef HAVE_isl.  */
Index: gcc/params.def
===================================================================
--- gcc/params.def	(revision 253719)
+++ gcc/params.def	(working copy)
@@ -882,13 +882,6 @@  DEFPARAM (PARAM_GRAPHITE_MAX_ARRAYS_PER_
 	  "maximum number of arrays per scop.",
 	  100, 0, 0)
 
-/* Maximal number of basic blocks in the functions analyzed by Graphite.  */
-
-DEFPARAM (PARAM_GRAPHITE_MIN_LOOPS_PER_FUNCTION,
-	  "graphite-min-loops-per-function",
-	  "minimal number of loops per function to be analyzed by Graphite.",
-	  2, 0, 0)
-
 DEFPARAM (PARAM_MAX_ISL_OPERATIONS,
 	  "max-isl-operations",
 	  "maximum number of isl operations, 0 means unlimited",
Index: gcc/testsuite/gcc.dg/graphite/pr35356-3.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr35356-3.c	(revision 253719)
+++ gcc/testsuite/gcc.dg/graphite/pr35356-3.c	(working copy)
@@ -36,4 +36,5 @@  match (void)
    "Y[winner].y > 0".  This could be fixed when we will use predicates
    for such cases.  */
 
-/* { dg-final { scan-tree-dump-times "loop_1" 0 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "loop_1" 0 "graphite" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "number of SCoPs: 0" "graphite" } } */
Index: gcc/testsuite/gcc.dg/graphite/pr81373-2.c
===================================================================
--- gcc/testsuite/gcc.dg/graphite/pr81373-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/graphite/pr81373-2.c	(working copy)
@@ -0,0 +1,40 @@ 
+/* { dg-options "-fno-tree-scev-cprop -floop-nest-optimize -fgraphite-identity -O -fdump-tree-graphite-all" } */
+
+void bar (void);
+
+int toto()
+{
+  int i, j, k;
+  int a[101][100];
+  int b[100];
+
+  for (i = 1; i < 100; i++)
+    {
+      for (j = 1; j < 100; j++)
+	for (k = 1; k < 100; k++)
+	  a[j][k] = a[j+1][i-1] + 2;
+
+      b[i] = b[i-1] + 2;
+
+      bar ();
+
+      for (j = 1; j < 100; j++)
+	a[j][i] = a[j+1][i-1] + 2;
+
+      b[i] = b[i-1] + 2;
+
+      bar ();
+
+      for (j = 1; j < 100; j++)
+	a[j][i] = a[j+1][i-1] + 2;
+
+      b[i] = a[i-1][i] + 2;
+
+      for (j = 1; j < 100; j++)
+	a[j][i] = a[j+1][i-1] + 2;
+    }
+
+  return a[3][5] + b[1];
+}
+
+/* { dg-final { scan-tree-dump-times "number of SCoPs: 2" 1 "graphite"} } */