Patchwork [3/3] Improve induction variable elimination

login
register
mail settings
Submitter Bin.Cheng
Date July 21, 2014, 9:47 a.m.
Message ID <CAHFci2_-WU4CMNYBXgzEcO8TH74cuN2xsiSCjHvU6zsS6Hp8kA@mail.gmail.com>
Download mbox | patch
Permalink /patch/372018/
State New
Headers show

Comments

Bin.Cheng - July 21, 2014, 9:47 a.m.
Hi, forward to Zdenek for the review.

Thanks,
bin



---------- Forwarded message ----------
From: Bin Cheng <bin.cheng@arm.com>
Date: Thu, Jul 17, 2014 at 10:09 AM
Subject: [PATCH 3/3]Improve induction variable elimination
To: gcc-patches@gcc.gnu.org


Hi,
Function iv_elimination_compare_lt is used to eliminate induction variable
when the loop's latch could run for zero time (i.e., may_be_zero in loop
niter information evaluates to true).  As stated in the first message, it
only handles very specific case that rarely happens for either GCC bootstrap
or spec2k/spec2k6 compilation.  The function has two restrictions which
could be improved:
  a) When checking that candidate iv doesn't overflow, it only handles
candidates that are computed in a type that guarantees no overflows.  More
complex analysis can be used to prove the non-overflow ness,  as in this
patch.
  b) The function only handles the original form of may_be_zero like "a + 1
> b", but that expression could have been folded into other forms.  This
patch handles three folded forms and does iv elimination as well.  I think
this isn't a very corner case, because for many loops iterating from "0"
(i.e., we have "a == 0"), the expression will be folded.

I also refactored period check from may_eliminate_iv into a single function
so that it can be reused.

Thanks,
bin


2014-07-17  Bin Cheng  <bin.cheng@arm.com>

        * tree-ssa-loop-ivopts.c (iv_nowrap_period)
        (nowrap_cand_for_loop_niter_p): New functions.
        (period_greater_niter_exit): New function refactored from
        may_eliminate_iv.
        (iv_elimination_compare_lt): New parameter.  Check wrapping
        behavior for candidate of wrapping type.  Handle folded forms
        of may_be_zero expression.
        (may_eliminate_iv): Call period_greater_niter_exit.  Pass new
        argument for iv_elimination_compare_lt.

gcc/testsuite/ChangeLog
2014-07-17  Bin Cheng  <bin.cheng@arm.com>

        * gcc.dg/tree-ssa/ivopts-lt-3.c: New test.
        * gcc.dg/tree-ssa/ivopts-lt-4.c: New test.

Patch

Index: gcc/tree-ssa-loop-ivopts.c

===================================================================
--- gcc/tree-ssa-loop-ivopts.c	(revision 212387)

+++ gcc/tree-ssa-loop-ivopts.c	(working copy)

@@ -4432,6 +4432,44 @@  iv_period (struct iv *iv)

   return period;
 }
 
+/* Returns no wrapping period of induction variable IV.  For now

+   only unsigned type IV is handled, we could extend it in case

+   of non-overflow for signed ones.  Return zero if it can't be

+   decided.  */

+

+static tree

+iv_nowrap_period (struct iv *iv)

+{

+  bool overflow;

+  tree type;

+  tree base = iv->base, step = iv->step;

+  widest_int base_val, step_val, max_val, span, period;

+

+  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);

+

+  type = TREE_TYPE (base);

+  if (!TYPE_UNSIGNED (type) || TREE_CODE (base) != INTEGER_CST)

+    return integer_zero_node;

+

+  base_val = wi::to_widest (base);

+  step_val = wi::to_widest (step);

+  if (!POINTER_TYPE_P (type) && TYPE_MAX_VALUE (type)

+      && TREE_CODE (TYPE_MAX_VALUE (type)) == INTEGER_CST)

+    max_val = wi::to_widest (TYPE_MAX_VALUE (type));

+  else

+    {

+      wide_int max_wi = wi::max_value (TYPE_PRECISION (type), UNSIGNED);

+      max_val = wi::to_widest (wide_int_to_tree (type, max_wi));

+    }

+

+  span = max_val - base_val + step_val - 1;

+  period = wi::div_trunc (span, step_val, UNSIGNED, &overflow);

+  if (overflow)

+    return integer_zero_node;

+

+  return wide_int_to_tree (type, period);

+}

+

 /* Returns the comparison operator used when eliminating the iv USE.  */
 
 static enum tree_code
@@ -4560,7 +4598,84 @@  difference_cannot_overflow_p (tree base, tree offs

     }
 }
 
-/* Tries to replace loop exit by one formulated in terms of a LT_EXPR

+/* Check whether PERIOD of CAND is greater than the number of iterations

+   described by DESC for which the exit condition is true.  The exit

+   condition is comparison against USE.  */

+

+static bool

+period_greater_niter_exit (struct ivopts_data *data,

+			   struct iv_use *use, struct iv_cand *cand,

+			   tree period, struct tree_niter_desc *desc)

+{

+  struct loop *loop = data->current_loop;

+

+  /* If the number of iterations is constant, compare against it directly.  */

+  if (TREE_CODE (desc->niter) == INTEGER_CST)

+    {

+      /* See cand_value_at.  */

+      if (stmt_after_increment (loop, cand, use->stmt))

+        {

+          if (!tree_int_cst_lt (desc->niter, period))

+            return false;

+        }

+      else

+        {

+          if (tree_int_cst_lt (period, desc->niter))

+            return false;

+        }

+    }

+

+  /* If not, and if this is the only possible exit of the loop, see whether

+     we can get a conservative estimate on the number of iterations of the

+     entire loop and compare against that instead.  */

+  else

+    {

+      widest_int period_value, max_niter;

+

+      max_niter = desc->max;

+      if (stmt_after_increment (loop, cand, use->stmt))

+        max_niter += 1;

+      period_value = wi::to_widest (period);

+      if (wi::gtu_p (max_niter, period_value))

+        {

+          /* See if we can take advantage of inferred loop bound information.  */

+          if (data->loop_single_exit_p)

+            {

+              if (!max_loop_iterations (loop, &max_niter))

+                return false;

+              /* The loop bound is already adjusted by adding 1.  */

+              if (wi::gtu_p (max_niter, period_value))

+                return false;

+            }

+          else

+            return false;

+        }

+    }

+

+  return true;

+}

+

+/* Determine whether no wrapping period of CAND is greater than

+   the number of iterations described by DESC for which exit

+   conditionis true.  The exit condition is comparison against USE.  */

+

+static bool

+nowrap_cand_for_loop_niter_p (struct ivopts_data *data,

+			      struct iv_use *use,

+			      struct iv_cand *cand,

+			      struct tree_niter_desc *desc)

+{

+  tree period;

+  

+  period = iv_nowrap_period (cand->iv);

+  if (period != integer_zero_node

+      && period_greater_niter_exit (data, use, cand, period, desc))

+    return true;

+

+  return false;

+}

+

+/* Tries to replace loop exit in USE by one formulated in terms of a LT_EXPR

    comparison with CAND.  NITER describes the number of iterations of
    the loops.  If successful, the comparison in COMP_P is altered accordingly.
 
@@ -4597,11 +4712,18 @@  difference_cannot_overflow_p (tree base, tree offs

    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
       overflow in computing it or the values of p.
    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
-      overflow.  To prove this, we use the fact that p_0 = base + a.  */

+      overflow.  To prove this, we use the fact that p_0 = base + a.

 
+

+   Moreover, with more complex overflow analysis for unsigned type, it is

+   possible to eliminate I with P if P is an induction candidate of unsigned

+   integer type other than pointer if we can prove that P doesn't  overflow

+   during all iterations of current loop.  Also special forms of MAY_BE_ZERO

+   in NITER is checked because "a + 1 > b" could be folded.  */

+

 static bool
-iv_elimination_compare_lt (struct ivopts_data *data,

-                           struct iv_cand *cand, enum tree_code *comp_p,

+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,

+			   struct iv_cand *cand, enum tree_code *comp_p,

 			   struct tree_niter_desc *niter)
 {
   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
@@ -4610,11 +4732,13 @@  static bool

   HOST_WIDE_INT step;
 
   /* We need to know that the candidate induction variable does not overflow.
-     While more complex analysis may be used to prove this, for now just

-     check that the variable appears in the original program and that it

-     is computed in a type that guarantees no overflows.  */

+     Apart from checking that the variable appears in the original program

+     and that is is computed in a type that guarantees no overflows, we also

+     check no wrapping periord of the variable covers the niter if it has

+     unsigned type.  More complex analysis may be used to prove this.  */

   cand_type = TREE_TYPE (cand->iv->base);
-  if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))

+  if ((cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))

+      && !nowrap_cand_for_loop_niter_p (data, use, cand, niter))

     return false;
 
   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
@@ -4657,6 +4781,67 @@  static bool

       else
 	return false;
     }
+  else if (TREE_CODE (mbz) == EQ_EXPR)

+    {

+      /* Check whether may_be_zero is in below three forms:

+	   1) b' == TYPE_MAX_VALUE (TREE_TYPE (b'))

+	      where b' is of type nit_type.  The expression could be

+	      folded from "b' + 1 < 1".  In this case, A is "0" and B

+	      is "b' + 1".

+	   2) b == 0

+	      where b is of type nit_type.  The expression could be

+	      folded from "b < 1".  In this case, A is "0" and B is

+	      "b".

+	   3) b' == -1

+	      where b' is of signed type which has nit_type as its

+	      unsigned counterpart.  The expression could be folded

+	      from "(nit_type)b' + 1 < 1".  In this case, A is "0"

+	      and "(nit_type)b' + 1".  */

+      tree type;

+      tree op0 = TREE_OPERAND (mbz, 0);

+      tree op1 = TREE_OPERAND (mbz, 1);

+

+      if (TREE_CODE (op1) != INTEGER_CST)

+	{

+	  tree tmp = op0;

+	  op0 = op1;

+	  op1 = tmp;

+	}

+      type = TREE_TYPE (op0);

+      if (TREE_CODE (op1) != INTEGER_CST)

+	return false;

+

+      /* Convert case 3 to case 1.  */

+      if (!TYPE_UNSIGNED (type)

+	  && operand_equal_p (op1, integer_minus_one_node, 0)

+	  && types_compatible_p (unsigned_type_for (type), nit_type))

+	{

+	  type = nit_type;

+	  op0 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op0);

+	  op1 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op1);

+	}

+

+      if (!TYPE_UNSIGNED (type))

+	return false;

+

+      /* Case 1.  */

+      if (TYPE_MAX_VALUE (type)

+	  && TREE_CODE (op1) == INTEGER_CST

+	  && operand_equal_p (op1, TYPE_MAX_VALUE (type), 0))

+	{

+	  a = integer_zero_node;

+	  b = fold_build2 (PLUS_EXPR, type, op0, integer_one_node);

+	}

+      /* Case 2.  */

+      else if (TREE_CODE (op1) == INTEGER_CST

+	       && operand_equal_p (op1, integer_zero_node, 0))

+	{

+	  a = integer_zero_node;

+	  b = op0;

+	}

+      else

+	return false;

+    }

   else
     return false;
 
@@ -4673,10 +4858,14 @@  static bool

     return false;
 
   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
-     overflow.  */

-  offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),

-			cand->iv->step,

-			fold_convert (TREE_TYPE (cand->iv->step), a));

+     overflow.  It is uncessary to build offset when A equals to ZERO.  */

+  if (a != integer_zero_node)

+    offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),

+			  cand->iv->step,

+			  fold_convert (TREE_TYPE (cand->iv->step), a));

+  else

+    offset = a;

+

   if (!difference_cannot_overflow_p (cand->iv->base, offset))
     return false;
 
@@ -4733,50 +4922,9 @@  may_eliminate_iv (struct ivopts_data *data,

      This is the case iff the period of the induction variable is greater
      than the number of iterations for which the exit condition is true.  */
   period = iv_period (cand->iv);
+  if (!period_greater_niter_exit (data, use, cand, period, desc))

+    return false;

 
-  /* If the number of iterations is constant, compare against it directly.  */

-  if (TREE_CODE (desc->niter) == INTEGER_CST)

-    {

-      /* See cand_value_at.  */

-      if (stmt_after_increment (loop, cand, use->stmt))

-        {

-          if (!tree_int_cst_lt (desc->niter, period))

-            return false;

-        }

-      else

-        {

-          if (tree_int_cst_lt (period, desc->niter))

-            return false;

-        }

-    }

-

-  /* If not, and if this is the only possible exit of the loop, see whether

-     we can get a conservative estimate on the number of iterations of the

-     entire loop and compare against that instead.  */

-  else

-    {

-      widest_int period_value, max_niter;

-

-      max_niter = desc->max;

-      if (stmt_after_increment (loop, cand, use->stmt))

-        max_niter += 1;

-      period_value = wi::to_widest (period);

-      if (wi::gtu_p (max_niter, period_value))

-        {

-          /* See if we can take advantage of inferred loop bound information.  */

-          if (data->loop_single_exit_p)

-            {

-              if (!max_loop_iterations (loop, &max_niter))

-                return false;

-              /* The loop bound is already adjusted by adding 1.  */

-              if (wi::gtu_p (max_niter, period_value))

-                return false;

-            }

-          else

-            return false;

-        }

-    }

-

   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
 
   *bound = fold_convert (TREE_TYPE (cand->iv->base),
@@ -4796,7 +4944,7 @@  may_eliminate_iv (struct ivopts_data *data,

 	   base the exit condition on it.  However, that is often too
 	   expensive.  */
   if (!integer_zerop (desc->may_be_zero))
-    return iv_elimination_compare_lt (data, cand, comp, desc);

+    return iv_elimination_compare_lt (data, use, cand, comp, desc);

 
   return true;
 }
Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c

===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c	(revision 0)

+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c	(revision 0)

@@ -0,0 +1,38 @@ 

+/* { dg-do compile { target { lp64 } } } */

+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */

+

+typedef long unsigned int size_t;

+extern float a[100], b[100];

+

+int foo (int M, unsigned int l)

+{

+  unsigned ivtmp = 0, niters, _37, _38, bnd;

+  size_t _67, _1;

+  float *vectp_a, *vectp_b, *vectp_a2;

+  float vect__6, vect__7, vect__8;

+

+  _38 = (unsigned int)l;

+  bnd = _38 + 1;

+

+  _1 = (size_t) M;

+  _67 = _1 * 4;

+  vectp_a = a; vectp_b = b; vectp_a2 = a + _67;

+

+  do

+    {

+      vect__6 = *vectp_a;

+      vect__7 = *vectp_b;

+      vect__8 = vect__6 + vect__7;

+      *vectp_a = vect__8;

+      vectp_a = vectp_a + 4;

+      vectp_b = vectp_b + 4;

+      vectp_a2 = vectp_a2 + 4;

+      ivtmp += 1;

+    }

+  while (ivtmp < bnd);

+

+  return 0;

+}

+

+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 "ivopts" } } */

+/* { dg-final { cleanup-tree-dump "ivopts" } } */

Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c

===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c	(revision 0)

+++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c	(revision 0)

@@ -0,0 +1,38 @@ 

+/* { dg-do compile { target { lp64 } } } */

+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */

+

+typedef long unsigned int size_t;

+extern float a[100], b[100];

+

+int foo (int M, int l)

+{

+  unsigned ivtmp = 0, niters, _37, _38, bnd;

+  size_t _67, _1;

+  float *vectp_a, *vectp_b, *vectp_a2;

+  float vect__6, vect__7, vect__8;

+

+  _38 = (unsigned int)l;

+  bnd = _38 + 1;

+

+  _1 = (size_t) M;

+  _67 = _1 * 4;

+  vectp_a = a; vectp_b = b; vectp_a2 = a + _67;

+

+  do

+    {

+      vect__6 = *vectp_a;

+      vect__7 = *vectp_b;

+      vect__8 = vect__6 + vect__7;

+      *vectp_a = vect__8;

+      vectp_a = vectp_a + 4;

+      vectp_b = vectp_b + 4;

+      vectp_a2 = vectp_a2 + 4;

+      ivtmp += 1;

+    }

+  while (ivtmp < bnd);

+

+  return 0;

+}

+

+/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 "ivopts" } } */

+/* { dg-final { cleanup-tree-dump "ivopts" } } */