Switch conversion: support any ax + b transformation (PR tree-optimization/84436).

Message ID c1f21703-b516-b7f2-b128-84dd7a939c50@suse.cz
State New
Headers show
Series
  • Switch conversion: support any ax + b transformation (PR tree-optimization/84436).
Related show

Commit Message

Martin Liška Oct. 11, 2018, 12:56 p.m.
Hi.

As seen in the PR, switch conversion can do better when we return equal numbers
based on index value. I implemented more than that, more precisely I support all linear
transformation based on index value. It's the same what clang is capable of.

Patch survives testing on x86_64-linux-gnu. I added various tests that should
benefit of the transformation.

Thanks,
Martin

gcc/ChangeLog:

2018-10-11  Martin Liska  <mliska@suse.cz>

	PR tree-optimization/84436
	* tree-switch-conversion.c (switch_conversion::contains_same_values_p):
	Remove.
	(switch_conversion::contains_linear_function_p): New.
	(switch_conversion::build_one_array): Support linear
	transformation on input.
	* tree-switch-conversion.h (struct switch_conversion): Add
	contains_linear_function_p declaration.

gcc/testsuite/ChangeLog:

2018-10-11  Martin Liska  <mliska@suse.cz>

	PR tree-optimization/84436
	* gcc.dg/tree-ssa/pr84436-1.c: New test.
	* gcc.dg/tree-ssa/pr84436-2.c: New test.
	* gcc.dg/tree-ssa/pr84436-3.c: New test.
	* gcc.dg/tree-ssa/pr84436-4.c: New test.
	* gcc.dg/tree-ssa/pr84436-5.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c | 36 +++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c | 67 +++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c | 24 ++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c | 38 ++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c | 38 ++++++++++
 gcc/tree-switch-conversion.c              | 89 +++++++++++++++++++----
 gcc/tree-switch-conversion.h              | 10 ++-
 7 files changed, 283 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c

Comments

Jakub Jelinek Oct. 11, 2018, 1:03 p.m. | #1
On Thu, Oct 11, 2018 at 02:56:14PM +0200, Martin Liška wrote:
> As seen in the PR, switch conversion can do better when we return equal numbers
> based on index value. I implemented more than that, more precisely I support all linear
> transformation based on index value. It's the same what clang is capable of.

Not a review, just a question, do you check for overflows while computing
it?  Or force the arithmetics to be performed in a type with defined
overflow.  It would be bad to introduced UB...

	Jakub
Alexander Monakov Oct. 11, 2018, 1:18 p.m. | #2
On Thu, 11 Oct 2018, Martin Liška wrote:

> Hi.
> 
> As seen in the PR, switch conversion can do better when we return equal numbers
> based on index value. I implemented more than that, more precisely I support all linear
> transformation based on index value. It's the same what clang is capable of.
> 
> Patch survives testing on x86_64-linux-gnu. I added various tests that should
> benefit of the transformation.

Nice!  I'd like to point out that __attribute__((noipa)) might be more
appropriate for the new tests, instead of the "noinline,noclone" combo.

(I was also wondering about overflow behavior that Jakub asked about)

Alexander

Patch

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c
new file mode 100644
index 00000000000..5e69eb55dab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c
@@ -0,0 +1,36 @@ 
+/* PR tree-optimization/84436 */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-optimized" } */
+/* { dg-do run } */
+
+int
+__attribute__ ((noinline, noclone))
+foo (int how)
+{
+  switch (how) {
+    case 2: how = 205; break; /* how = 100 * index + 5 */
+    case 3: how = 305; break;
+    case 4: how = 405; break;
+    case 5: how = 505; break;
+    case 6: how = 605; break;
+  }
+  return how;
+}
+
+int main()
+{
+  if (foo (2) != 205)
+  __builtin_abort ();
+
+  if (foo (6) != 605)
+  __builtin_abort ();
+
+  if (foo (123) != 123)
+  __builtin_abort ();
+
+  return 0;
+}
+
+
+/* { dg-final { scan-tree-dump-times "how.*\\* 100" 1 "switchconv" } } */
+/* { dg-final { scan-tree-dump-times "how.* = .* \\+ 5" 1 "switchconv" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c
new file mode 100644
index 00000000000..c34027a08b9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c
@@ -0,0 +1,67 @@ 
+/* PR tree-optimization/84436 */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-optimized" } */
+
+char
+lowerit(char a)
+{
+  switch (a)
+    {
+    default:
+      return a;
+    case 'A':
+      return 'a';
+    case 'B':
+      return 'b';
+    case 'C':
+      return 'c';
+    case 'D':
+      return 'd';
+    case 'E':
+      return 'e';
+    case 'F':
+      return 'f';
+    case 'G':
+      return 'g';
+    case 'H':
+      return 'h';
+    case 'I':
+      return 'i';
+    case 'J':
+      return 'j';
+    case 'K':
+      return 'k';
+    case 'L':
+      return 'l';
+    case 'M':
+      return 'm';
+    case 'N':
+      return 'n';
+    case 'O':
+      return 'o';
+    case 'P':
+      return 'p';
+    case 'Q':
+      return 'q';
+    case 'R':
+      return 'r';
+    case 'S':
+      return 's';
+    case 'T':
+      return 't';
+    case 'U':
+      return 'u';
+    case 'V':
+      return 'v';
+    case 'W':
+      return 'w';
+    case 'X':
+      return 'x';
+    case 'Y':
+      return 'y';
+    case 'Z':
+      return 'z';
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "a_.*\\+ 32" 1 "switchconv" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c
new file mode 100644
index 00000000000..261bd39aba6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c
@@ -0,0 +1,24 @@ 
+/* PR tree-optimization/84436 */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-optimized" } */
+
+enum a { b, c, d };
+int e;
+void h(enum a);
+
+void f() {
+  enum a g;
+  switch (e) {
+  case '1':
+    g = b;
+    break;
+  case '2':
+    g = c;
+    break;
+  case '3':
+    g = d;
+  }
+  h(g);
+}
+
+/* { dg-final { scan-tree-dump-times ".* \\+ \\-49" 1 "switchconv" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c
new file mode 100644
index 00000000000..92b1dd6a701
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c
@@ -0,0 +1,38 @@ 
+/* PR tree-optimization/84436 */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-optimized" } */
+/* { dg-do run } */
+
+char
+__attribute__ ((noinline, noclone))
+foo (char how)
+{
+  switch (how) {
+    case -4: how = 96; break;
+    case -3: how = -120; break;
+    case -2: how = -80; break;
+    case -1: how = -40; break;
+    case 0: how = 0; break;
+    case 1: how = 40; break;
+  }
+  return how;
+}
+
+int main()
+{
+  if (foo (-4) != 96)
+  __builtin_abort ();
+
+  if (foo (-3) != -120)
+  __builtin_abort ();
+
+  if (foo (0) != 0)
+  __builtin_abort ();
+
+  if (foo (123) != 123)
+  __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "how.*\\* 40" 1 "switchconv" } } */
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c
new file mode 100644
index 00000000000..6a6df81aa8e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c
@@ -0,0 +1,38 @@ 
+/* PR tree-optimization/84436 */
+/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-optimized" } */
+/* { dg-do run } */
+
+enum E
+{
+  A, B, C,
+};
+
+int
+__attribute__ ((noinline, noclone))
+foo(enum E e)
+{
+  switch (e)
+    {
+    case A: return 0;
+    case B: return 1;
+    case C: return 2;
+    }
+
+  return -1;
+}
+
+int main()
+{
+  if (foo (A) != 0)
+  __builtin_abort ();
+
+  if (foo (B) != 1)
+  __builtin_abort ();
+
+  if (foo (C) != 2)
+  __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index 64169a6cd3d..8f34ab45cb9 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -439,25 +439,63 @@  switch_conversion::build_constructors ()
     }
 }
 
-/* If all values in the constructor vector are the same, return the value.
-   Otherwise return NULL_TREE.  Not supposed to be called for empty
-   vectors.  */
+/* If all values in the constructor vector are products of a linear function
+   a * x + b, then return true.  When true, COEFF_A and COEFF_B and
+   coefficients of the linear function.  Note that equal values are special
+   case of a linear function with a and b equal to zero.  */
 
-tree
-switch_conversion::contains_same_values_p (vec<constructor_elt, va_gc> *vec)
+bool
+switch_conversion::contains_linear_function_p (vec<constructor_elt, va_gc> *vec,
+					       wide_int *coeff_a,
+					       wide_int *coeff_b)
 {
   unsigned int i;
-  tree prev = NULL_TREE;
   constructor_elt *elt;
 
+  gcc_assert (vec->length () >= 2);
+
+  /* Let's try to find any linear function a.x + y that can apply to
+     given values. 'a' can be calculated as follows:
+
+     a = (y2 - y1) / (x2 - x1) where x2 - x1 = 1 (consecutive case indices)
+     a = y2 - y1
+
+     and
+
+     b = y2 - a * x2
+
+  */
+
+  tree elt0 = (*vec)[0].value;
+  tree elt1 = (*vec)[1].value;
+
+  if (TREE_CODE (elt0) != INTEGER_CST || TREE_CODE (elt1) != INTEGER_CST)
+    return false;
+
+  wide_int range_min = wi::to_wide (fold_convert (TREE_TYPE (elt0),
+						  m_range_min));
+  wide_int y1 = wi::to_wide (elt0);
+  wide_int y2 = wi::to_wide (elt1);
+  wide_int a = y2 - y1;
+  wide_int b = y2 - a * (range_min + 1);
+
+  /* Verify that all values fulfill the linear function.  */
   FOR_EACH_VEC_SAFE_ELT (vec, i, elt)
     {
-      if (!prev)
-	prev = elt->value;
-      else if (!operand_equal_p (elt->value, prev, OEP_ONLY_CONST))
-	return NULL_TREE;
+      if (TREE_CODE (elt->value) != INTEGER_CST)
+	return false;
+
+      wide_int value = wi::to_wide (elt->value);
+      if (a * range_min + b != value)
+	return false;
+
+      ++range_min;
     }
-  return prev;
+
+  *coeff_a = a;
+  *coeff_b = b;
+
+  return true;
 }
 
 /* Return type which should be used for array elements, either TYPE's
@@ -551,7 +589,7 @@  void
 switch_conversion::build_one_array (int num, tree arr_index_type,
 				    gphi *phi, tree tidx)
 {
-  tree name, cst;
+  tree name;
   gimple *load;
   gimple_stmt_iterator gsi = gsi_for_stmt (m_switch);
   location_t loc = gimple_location (m_switch);
@@ -561,9 +599,30 @@  switch_conversion::build_one_array (int num, tree arr_index_type,
   name = copy_ssa_name (PHI_RESULT (phi));
   m_target_inbound_names[num] = name;
 
-  cst = contains_same_values_p (m_constructors[num]);
-  if (cst)
-    load = gimple_build_assign (name, cst);
+  wide_int coeff_a, coeff_b;
+  bool linear_p = contains_linear_function_p (m_constructors[num], &coeff_a,
+					      &coeff_b);
+  if (linear_p)
+    {
+      if (dump_file && coeff_a.to_uhwi () > 0)
+	fprintf (dump_file, "Linear transformation with A = %" PRId64
+		 "and B = %" PRId64 "\n", coeff_a.to_shwi (),
+		 coeff_b.to_shwi ());
+
+      tree t = TREE_TYPE (m_index_expr);
+      tree tmp = make_ssa_name (t);
+      tree value = fold_build2_loc (loc, MULT_EXPR, t,
+				    wide_int_to_tree (t, coeff_a),
+				    m_index_expr);
+
+      gsi_insert_before (&gsi, gimple_build_assign (tmp, value), GSI_SAME_STMT);
+      value = fold_build2_loc (loc, PLUS_EXPR, t,
+			       tmp, wide_int_to_tree (t, coeff_b));
+      tree tmp2 = make_ssa_name (t);
+      gsi_insert_before (&gsi, gimple_build_assign (tmp2, value),
+			 GSI_SAME_STMT);
+      load = gimple_build_assign (name, NOP_EXPR, fold_convert (t, tmp2));
+    }
   else
     {
       tree array_type, ctor, decl, value_type, fetch, default_type;
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 37ed2193724..ceee75017d9 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -733,10 +733,12 @@  struct switch_conversion
      order of phi nodes.  */
   void build_constructors ();
 
-  /* If all values in the constructor vector are the same, return the value.
-     Otherwise return NULL_TREE.  Not supposed to be called for empty
-     vectors.  */
-  tree contains_same_values_p (vec<constructor_elt, va_gc> *vec);
+  /* If all values in the constructor vector are products of a linear function
+     a * x + b, then return true.  When true, COEFF_A and COEFF_B and
+     coefficients of the linear function.  Note that equal values are special
+     case of a linear function with a and b equal to zero.  */
+  bool contains_linear_function_p (vec<constructor_elt, va_gc> *vec,
+				   wide_int *coeff_a, wide_int *coeff_b);
 
   /* Return type which should be used for array elements, either TYPE's
      main variant or, for integral types, some smaller integral type