diff mbox series

Fix quadraticnesses in release_defs_bitset and split_constant_offset

Message ID alpine.LSU.2.20.1907171211330.2976@zhemvz.fhfr.qr
State New
Headers show
Series Fix quadraticnesses in release_defs_bitset and split_constant_offset | expand

Commit Message

Richard Biener July 17, 2019, 10:18 a.m. UTC
The testcase in PR91178 runs into these because of vectorizer
code-gen stupidities (I'm going to fix that as well).

For split_constant_offset we should simply limit walking the
SSA def chain, now there's a convenient --param we can use for that.

For release_defs_bitset it's current implementation falls into the
trap of making it too easy to run into quadraticness for the
natural SSA name allocation of a chain of increments like

  _2 = _1 + 1;
  _3 = _2 + 1;
...

which happens here.  The fix is to (heuristically) iterate from
SSA names with higher version to ones with lower version.
Unfortunately there's no backwards bitmap iterator so the following
patch rewrites the iteration to use a vector which also allows us
to switch the bitmap to tree form for the actual iteration then
only doing bit tests/clears.  It's still horrible and as the
comment mentions a topological sort is the correct thing to do
(but we don't have the tooling for that - well, I deleted the
closest match recently).  Until we run into the next testcase ;)

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2019-07-17  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/91178
	* tree-ssa.c (release_defs_bitset): Iterate from higher to
	lower SSA names to avoid quadratic behavior in the common case.
	* tree-data-ref.c (split_constant_offset): Add limit argument
	and pass it down.  Initialize it from PARAM_SSA_NAME_DEF_CHAIN_LIMIT.
	(split_constant_offset_1): Add limit argument and use it to
	limit SSA def walking.  Optimize the common plus/minus case.
diff mbox series

Patch

Index: gcc/tree-ssa.c
===================================================================
--- gcc/tree-ssa.c	(revision 273542)
+++ gcc/tree-ssa.c	(working copy)
@@ -559,20 +559,25 @@  release_defs_bitset (bitmap toremove)
 
   /* Performing a topological sort is probably overkill, this will
      most likely run in slightly superlinear time, rather than the
-     pathological quadratic worst case.  */
+     pathological quadratic worst case.
+     But iterate from max SSA name version to min one because
+     that mimics allocation order during code generation behavior best.
+     Use an array for this which we compact on-the-fly with a NULL
+     marker moving towards the end of the vector.  */
+  auto_vec<tree, 16> names;
+  names.reserve (bitmap_count_bits (toremove) + 1);
+  names.quick_push (NULL_TREE);
+  EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi)
+    names.quick_push (ssa_name (j));
+
+  bitmap_tree_view (toremove);
   while (!bitmap_empty_p (toremove))
     {
-      unsigned to_remove_bit = -1U;
-      EXECUTE_IF_SET_IN_BITMAP (toremove, 0, j, bi)
+      j = names.length () - 1;
+      for (unsigned i = names.length () - 1; names[i];)
 	{
-	  if (to_remove_bit != -1U)
-	    {
-	      bitmap_clear_bit (toremove, to_remove_bit);
-	      to_remove_bit = -1U;
-	    }
-
 	  bool remove_now = true;
-	  tree var = ssa_name (j);
+	  tree var = names[i];
 	  gimple *stmt;
 	  imm_use_iterator uit;
 
@@ -617,14 +622,15 @@  release_defs_bitset (bitmap toremove)
 		  gsi_remove (&gsi, true);
 		  release_defs (def);
 		}
-
-	      to_remove_bit = j;
+	      bitmap_clear_bit (toremove, SSA_NAME_VERSION (var));
 	    }
+	  else
+	    --i;
+	  if (--j != i)
+	    names[i] = names[j];
 	}
-      if (to_remove_bit != -1U)
-	bitmap_clear_bit (toremove, to_remove_bit);
     }
-
+  bitmap_list_view (toremove);
 }
 
 /* Disable warnings about missing quoting in GCC diagnostics for
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	(revision 273542)
+++ gcc/tree-data-ref.c	(working copy)
@@ -583,7 +583,8 @@  debug_ddrs (vec<ddr_p> ddrs)
 
 static void
 split_constant_offset (tree exp, tree *var, tree *off,
-		       hash_map<tree, std::pair<tree, tree> > &cache);
+		       hash_map<tree, std::pair<tree, tree> > &cache,
+		       unsigned *limit);
 
 /* Helper function for split_constant_offset.  Expresses OP0 CODE OP1
    (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
@@ -594,7 +595,8 @@  split_constant_offset (tree exp, tree *v
 static bool
 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 			 tree *var, tree *off,
-			 hash_map<tree, std::pair<tree, tree> > &cache)
+			 hash_map<tree, std::pair<tree, tree> > &cache,
+			 unsigned *limit)
 {
   tree var0, var1;
   tree off0, off1;
@@ -615,8 +617,15 @@  split_constant_offset_1 (tree type, tree
       /* FALLTHROUGH */
     case PLUS_EXPR:
     case MINUS_EXPR:
-      split_constant_offset (op0, &var0, &off0, cache);
-      split_constant_offset (op1, &var1, &off1, cache);
+      if (TREE_CODE (op1) == INTEGER_CST)
+	{
+	  split_constant_offset (op0, &var0, &off0, cache, limit);
+	  *var = var0;
+	  *off = size_binop (ocode, off0, fold_convert (ssizetype, op1));
+	  return true;
+	}
+      split_constant_offset (op0, &var0, &off0, cache, limit);
+      split_constant_offset (op1, &var1, &off1, cache, limit);
       *var = fold_build2 (code, type, var0, var1);
       *off = size_binop (ocode, off0, off1);
       return true;
@@ -625,7 +634,7 @@  split_constant_offset_1 (tree type, tree
       if (TREE_CODE (op1) != INTEGER_CST)
 	return false;
 
-      split_constant_offset (op0, &var0, &off0, cache);
+      split_constant_offset (op0, &var0, &off0, cache, limit);
       *var = fold_build2 (MULT_EXPR, type, var0, op1);
       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
       return true;
@@ -649,7 +658,7 @@  split_constant_offset_1 (tree type, tree
 
 	if (poffset)
 	  {
-	    split_constant_offset (poffset, &poffset, &off1, cache);
+	    split_constant_offset (poffset, &poffset, &off1, cache, limit);
 	    off0 = size_binop (PLUS_EXPR, off0, off1);
 	    if (POINTER_TYPE_P (TREE_TYPE (base)))
 	      base = fold_build_pointer_plus (base, poffset);
@@ -719,11 +728,15 @@  split_constant_offset_1 (tree type, tree
 	    e = std::make_pair (op0, ssize_int (0));
 	  }
 
+	if (*limit == 0)
+	  return false;
+	--*limit;
+
 	var0 = gimple_assign_rhs1 (def_stmt);
 	var1 = gimple_assign_rhs2 (def_stmt);
 
 	bool res = split_constant_offset_1 (type, var0, subcode, var1,
-					    var, off, cache);
+					    var, off, cache, limit);
 	if (res && use_cache)
 	  *cache.get (op0) = std::make_pair (*var, *off);
 	return res;
@@ -746,7 +759,7 @@  split_constant_offset_1 (tree type, tree
 		/* Split the unconverted operand and try to prove that
 		   wrapping isn't a problem.  */
 		tree tmp_var, tmp_off;
-		split_constant_offset (op0, &tmp_var, &tmp_off, cache);
+		split_constant_offset (op0, &tmp_var, &tmp_off, cache, limit);
 
 		/* See whether we have an SSA_NAME whose range is known
 		   to be [A, B].  */
@@ -781,7 +794,7 @@  split_constant_offset_1 (tree type, tree
 		*off = wide_int_to_tree (ssizetype, diff);
 	      }
 	    else
-	      split_constant_offset (op0, &var0, off, cache);
+	      split_constant_offset (op0, &var0, off, cache, limit);
 	    *var = fold_convert (type, var0);
 	    return true;
 	  }
@@ -798,7 +811,8 @@  split_constant_offset_1 (tree type, tree
 
 static void
 split_constant_offset (tree exp, tree *var, tree *off,
-		       hash_map<tree, std::pair<tree, tree> > &cache)
+		       hash_map<tree, std::pair<tree, tree> > &cache,
+		       unsigned *limit)
 {
   tree type = TREE_TYPE (exp), op0, op1, e, o;
   enum tree_code code;
@@ -812,7 +826,7 @@  split_constant_offset (tree exp, tree *v
 
   code = TREE_CODE (exp);
   extract_ops_from_tree (exp, &code, &op0, &op1);
-  if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache))
+  if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
     {
       *var = e;
       *off = o;
@@ -822,10 +836,11 @@  split_constant_offset (tree exp, tree *v
 void
 split_constant_offset (tree exp, tree *var, tree *off)
 {
+  unsigned limit = PARAM_VALUE (PARAM_SSA_NAME_DEF_CHAIN_LIMIT);
   static hash_map<tree, std::pair<tree, tree> > *cache;
   if (!cache)
     cache = new hash_map<tree, std::pair<tree, tree> > (37);
-  split_constant_offset (exp, var, off, *cache);
+  split_constant_offset (exp, var, off, *cache, &limit);
   cache->empty ();
 }