diff mbox series

tree-optimization/97626 - handle SCCs properly in SLP stmt analysis

Message ID nycvar.YFH.7.76.2010301228240.3814@elmra.sevgm.obk
State New
Headers show
Series tree-optimization/97626 - handle SCCs properly in SLP stmt analysis | expand

Commit Message

Richard Biener Oct. 30, 2020, 11:28 a.m. UTC
This makes sure to roll-back the whole SCC when we fail stmt
analysis, otherwise the optimistic visited treatment breaks down
with different entries.  Rollback is easy when tracking additions
to visited in a vector which also makes the whole thing cheaper
than the two hash-sets used before.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-10-30  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/97626
	* tree-vect-slp.c (vect_slp_analyze_node_operations):
	Exchange the lvisited hash-set for a vector, roll back
	recursive adds to visited when analysis failed.
	(vect_slp_analyze_operations): Likewise.

	* gcc.dg/vect/bb-slp-pr97626.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr97626.c | 34 ++++++++++++++++++++++
 gcc/tree-vect-slp.c                        | 34 +++++++++++++---------
 2 files changed, 55 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr97626.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr97626.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97626.c
new file mode 100644
index 00000000000..943d8a62de7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97626.c
@@ -0,0 +1,34 @@ 
+/* { dg-do compile } */
+
+struct {
+  int x;
+  int y;
+} do_plasma_rect;
+
+int do_plasma_context_0, do_plasma_x2, do_plasma_y2, do_plasma_plasma_depth,
+    do_plasma_xm, do_plasma_ym;
+void gegl_buffer_set();
+
+void do_plasma(int x1, int y1) {
+  if (__builtin_expect(({
+                         int _g_boolean_var_;
+                         if (do_plasma_context_0)
+                           _g_boolean_var_ = 1;
+                         else
+                           _g_boolean_var_ = 0;
+                         _g_boolean_var_;
+                       }),
+                       0)) {
+    do_plasma_rect.x = x1;
+    do_plasma_rect.y = y1;
+    gegl_buffer_set();
+  }
+  do_plasma_xm = (x1 + do_plasma_x2) / 2;
+  do_plasma_ym = (y1 + do_plasma_y2) / 2;
+  if (do_plasma_plasma_depth) {
+    do_plasma_rect.x = do_plasma_xm;
+    do_plasma_rect.y = do_plasma_ym;
+    return;
+  }
+  do_plasma(do_plasma_xm, do_plasma_ym);
+}
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 714e50697bd..56dc59e11a6 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3487,8 +3487,8 @@  vect_prologue_cost_for_slp (slp_tree node,
 static bool
 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
 				  slp_instance node_instance,
-				  hash_set<slp_tree> &visited,
-				  hash_set<slp_tree> &lvisited,
+				  hash_set<slp_tree> &visited_set,
+				  vec<slp_tree> &visited_vec,
 				  stmt_vector_for_cost *cost_vec)
 {
   int i, j;
@@ -3511,15 +3511,18 @@  vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
 
   /* If we already analyzed the exact same set of scalar stmts we're done.
      We share the generated vector stmts for those.  */
-  if (visited.contains (node)
-      || lvisited.add (node))
+  if (visited_set.add (node))
     return true;
+  visited_vec.safe_push (node);
 
   bool res = true;
+  unsigned visited_rec_start = visited_vec.length ();
+  unsigned cost_vec_rec_start = cost_vec->length ();
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     {
       res = vect_slp_analyze_node_operations (vinfo, child, node_instance,
-					      visited, lvisited, cost_vec);
+					      visited_set, visited_vec,
+					      cost_vec);
       if (!res)
 	break;
     }
@@ -3527,8 +3530,14 @@  vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
   if (res)
     res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
 					      cost_vec);
+  /* If analysis failed we have to pop all recursive visited nodes
+     plus ourselves.  */
   if (!res)
-    lvisited.remove (node);
+    {
+      while (visited_vec.length () >= visited_rec_start)
+	visited_set.remove (visited_vec.pop ());
+      cost_vec->truncate (cost_vec_rec_start);
+    }
 
   /* When the node can be vectorized cost invariant nodes it references.
      This is not done in DFS order to allow the refering node
@@ -3543,9 +3552,9 @@  vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
 	  /* Perform usual caching, note code-generation still
 	     code-gens these nodes multiple times but we expect
 	     to CSE them later.  */
-	  && !visited.contains (child)
-	  && !lvisited.add (child))
+	  && !visited_set.add (child))
 	{
+	  visited_vec.safe_push (child);
 	  /* ???  After auditing more code paths make a "default"
 	     and push the vector type from NODE to all children
 	     if it is not already set.  */
@@ -3705,14 +3714,14 @@  vect_slp_analyze_operations (vec_info *vinfo)
   hash_set<slp_tree> visited;
   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
     {
-      hash_set<slp_tree> lvisited;
+      auto_vec<slp_tree> visited_vec;
       stmt_vector_for_cost cost_vec;
       cost_vec.create (2);
       if (is_a <bb_vec_info> (vinfo))
 	vect_location = instance->location ();
       if (!vect_slp_analyze_node_operations (vinfo,
 					     SLP_INSTANCE_TREE (instance),
-					     instance, visited, lvisited,
+					     instance, visited, visited_vec,
 					     &cost_vec)
 	  /* Instances with a root stmt require vectorized defs for the
 	     SLP tree root.  */
@@ -3729,12 +3738,11 @@  vect_slp_analyze_operations (vec_info *vinfo)
 	  vect_free_slp_instance (instance);
           vinfo->slp_instances.ordered_remove (i);
 	  cost_vec.release ();
+	  while (!visited_vec.is_empty ())
+	    visited.remove (visited_vec.pop ());
 	}
       else
 	{
-	  for (hash_set<slp_tree>::iterator x = lvisited.begin();
-	       x != lvisited.end(); ++x)
-	    visited.add (*x);
 	  i++;
 
 	  /* For BB vectorization remember the SLP graph entry