Patchwork ivopts improvement

login
register
mail settings
Submitter Tom de Vries
Date March 2, 2011, 10:01 p.m.
Message ID <4D6EBE2A.10002@codesourcery.com>
Download mbox | patch
Permalink /patch/85173/
State New
Headers show

Comments

Tom de Vries - March 2, 2011, 10:01 p.m.
Hi Zdenek,

On 02/28/2011 07:12 PM, Zdenek Dvorak wrote:
> Hi,
> 
>>>>>>  I think this should be done unconditionally under
>>>>>>  -funsafe-loop-optimizations.
>>>>
>>>> void
>>>> f (char *base, unsigned long int i, unsigned long int n)
>>>> {
>>>>   char *p = base + i;
>>>>   do
>>>>     {
>>>>       *p = '\0';
>>>>       p++;
>>>>       i++;
>>>>    }
>>>>   while (i<  n);
>>>> }
>>>>
>>>> AFAIU, -funsafe-loop-optimizations allows me to assume that i does not
>>>> overflow.  Are you saying that -funsafe-loop-optimizations also implies
>>>> that p does not overflow?
>>>
>>> I'm wondering if the C standard doesn't allow you to accept this always, at
>>> least if the increment to i (and p) is positive.
>>>
>>> I'm thinking that "undefined behavior for NULL pointer access" implies that
>>> an object cannot be allocated across a NULL pointer.  And since pointer
>>> arithmetic beyond object boundaries is undefined, an overflow of p (which
>>> would cross a NULL pointer) is also undefined.
>>
>> The important thing is that IVOPTs is not allowed to introduce IVs that
>> overflow either.  And it is known to create weird IVs starting from
>> -4 ...
> 
> as long as we do the arithmetics in unsigned integer type and only cast to the
> pointer type the final result, C standard (plus the gcc-specific interpretation
> of casts) allows us to do that.
> 
> Nevertheless, pointer arithmetics has defined behavior only within
> a single memory object or at most one array element after it; which more
> or less forbids overflows, when the arithmetics is performed in pointer
> type,
> 
> Zdenek

Another try with updated patches.

Changes compared to last submission:
iterator.6.1-ml.patch:
* removed the stmt_after_increment related code
  in may_eliminate_iv (after review comment).
iterator.6.4-ml.patch:
* added test for loop_only_exit_p before calling
  iv_elimination_compare_lt. make sure the last loop iteration is
  reached, which makes p + n a valid pointer.
* removed cand_valid_pointer_at_use_p and replaced call
  with test for (cand->pos == IP_ORIGINAL
  && POINTER_TYPE_P (TREE_TYPE (cand->var_before))
  && POINTER_TYPE_OVERFLOW_UNDEFINED).
  In other words, the iterator is a source level pointer iterator, which
  is guaranteed not to overflow provided
  POINTER_TYPE_OVERFLOW_UNDEFINED.
* return bound in pointer type rather than unsigned int type. Make sure
  the bound matches the pointer iterator.

reg-tested on x86-64.

Ok now?

Thanks,
- Tom
Paolo Bonzini - March 3, 2011, 8:44 a.m.
On 03/02/2011 11:01 PM, Tom de Vries wrote:
> +  if (TREE_CODE (nit) == COND_EXPR)
> +    {
> +      if (!loop_only_exit_p (loop, exit))
> +        return false;
> +
> +      return iv_elimination_compare_lt (use, cand, bound, nit, comp);
> +    }
> +

You probably need a comment on top of iv_elimination_compare_lt, 
otherwise I'm left wondering why this isn't

   if (TREE_CODE (nit) == COND_EXPR
       && loop_only_exit_p (loop, exit)
       && iv_elimination_compare_lt (use, cand, bound, nit, comp))
     return true;

Also, the check on nit is an optimization.  Perhaps you should add a 
gcc_checking_assert to i_elimination_compare_lt and/or remove this from 
get_lt_bound:

> +  if (TREE_CODE (nit) != COND_EXPR)
> +    return NULL_TREE;
> +

Or perhaps loop_only_exit_p could be optimized by computing it ahead of 
time, possibly at the same time as loop_body_includes_call.  This way it 
becomes very cheap and the code above can just call 
iv_elimination_compare_lt without any pre-screening.

Paolo

Patch

diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c
--- gcc/tree-ssa-loop-ivopts.c	(working copy)
+++ gcc/tree-ssa-loop-ivopts.c	(working copy)
@@ -814,17 +814,25 @@ 
 
   if (!slot)
     {
-      /* Try to determine number of iterations.  We must know it
-	 unconditionally (i.e., without possibility of # of iterations
-	 being zero).  Also, we cannot safely work with ssa names that
-	 appear in phi nodes on abnormal edges, so that we do not create
-	 overlapping life ranges for them (PR 27283).  */
+      /* Try to determine number of iterations.  We cannot safely work with ssa
+         names that appear in phi nodes on abnormal edges, so that we do not
+         create overlapping life ranges for them (PR 27283).  */
       desc = XNEW (struct tree_niter_desc);
       if (number_of_iterations_exit (data->current_loop,
 				     exit, desc, true)
-	  && integer_zerop (desc->may_be_zero)
      	  && !contains_abnormal_ssa_name_p (desc->niter))
-	niter = desc->niter;
+	{
+	  if (!integer_zerop (desc->may_be_zero))
+            /* Construct COND_EXPR that describes the number of iterations.
+               Either the COND_EXPR is not too expensive, and we can use it as
+               loop bound, or we can deduce a LT_EXPR bound from it.  */
+	    niter
+	      = build3 (COND_EXPR, TREE_TYPE (desc->niter), desc->may_be_zero,
+			build_int_cst_type (TREE_TYPE (desc->niter), 0),
+			desc->niter);
+	  else
+	    niter = desc->niter;
+	}
       else
 	niter = NULL_TREE;
 
@@ -4342,6 +4350,123 @@ 
   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
 }
 
+/* Tries to detect
+     NIT == (use_iv_max < USE->iv->base)
+            ? 0
+            : (use_iv_max - USE->iv->base)
+   where
+     use_iv_real_base == (USE->iv->base - USE->iv->step)
+     && CAND->iv->base == base_ptr + use_iv_real_base
+   and returns the exclusive upper bound for CAND->var_after:
+     base_ptr + use_iv_max.  */
+
+static tree
+get_lt_bound (struct iv_use *use, struct iv_cand *cand, tree nit)
+{
+  tree comp, base_ptr, n, n0, n1;
+  tree use_iv_real_base, cand_iv_base, use_iv_max;
+  gimple def_stmt;
+  int npos, mpos;
+  enum tree_code compare;
+  tree cand_type = TREE_TYPE (cand->var_before);
+
+  if (TREE_CODE (nit) != COND_EXPR)
+    return NULL_TREE;
+
+  /* use_iv_real_base == use->iv->base - use->iv->step.  */
+  use_iv_real_base = fold_build_plus (MINUS_EXPR, use->iv->base, use->iv->step);
+
+  /* cand_iv_base.  */
+  cand_iv_base = cand->iv->base;
+  STRIP_NOPS (cand_iv_base);
+
+  /* cand->iv->base == base_ptr + use_iv_real_base.  */
+  if (TREE_CODE (cand_iv_base) != SSA_NAME)
+    return NULL_TREE;
+  def_stmt = SSA_NAME_DEF_STMT (cand_iv_base);
+  if (!is_gimple_assign (def_stmt)
+      || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR)
+    return NULL_TREE;
+  if (gimple_assign_rhs2 (def_stmt) != use_iv_real_base)
+    return NULL_TREE;
+  base_ptr = gimple_assign_rhs1 (def_stmt);
+
+  /* 0.  */
+  if (tree_int_cst_equal (TREE_OPERAND (nit, 1), integer_zero_node))
+    npos = 2;
+  else if (tree_int_cst_equal (TREE_OPERAND (nit, 2), integer_zero_node))
+    npos = 1;
+  else
+    return NULL_TREE;
+
+  /* n == use_iv_max - use->iv->base.  */
+  n = TREE_OPERAND (nit, npos);
+  if (TREE_CODE (n) != PLUS_EXPR)
+    return NULL_TREE;
+  n0 = TREE_OPERAND (n, 0);
+  n1 = TREE_OPERAND (n, 1);
+  if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n0),
+			  integer_zero_node))
+    use_iv_max = n1;
+  else if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n1),
+			       integer_zero_node))
+    use_iv_max = n0;
+  else
+    return NULL_TREE;
+
+  /* comp == use_iv_max < use->iv->base.  */
+  comp = TREE_OPERAND (nit, 0);
+  compare = TREE_CODE (comp);
+  if ((npos == 2 && compare == LT_EXPR)
+      || (npos == 1 && compare == GE_EXPR))
+    mpos = 0;
+  else if ((npos == 2 && compare == GT_EXPR)
+	   || (npos == 1 && compare == LE_EXPR))
+    mpos = 1;
+  else
+    return NULL_TREE;
+  if (TREE_OPERAND (comp, mpos) != use_iv_max
+      || !tree_int_cst_equal (fold_build_plus (MINUS_EXPR, use->iv->base,
+                                               TREE_OPERAND (comp, 1 - mpos)),
+			      integer_zero_node))
+    return NULL_TREE;
+
+  /* Calculate bound.  */
+  return fold_build_plus (PLUS_EXPR, convert (cand_type, base_ptr), use_iv_max);
+}
+
+/* Tries to replace loop exit test USE, which allows NIT iterations, by one
+   formulated in terms of a LT_EXPR comparison with CAND.  Stores the resulting
+   comparison in COMP_P and bound in BOUND_P.  */
+
+static bool
+iv_elimination_compare_lt (struct iv_use *use, struct iv_cand *cand,
+			   tree *bound_p, tree nit, enum tree_code *comp_p)
+{
+  tree bound;
+
+  if (!(cand->pos == IP_ORIGINAL
+	&& POINTER_TYPE_P (TREE_TYPE (cand->var_before))
+	&& POINTER_TYPE_OVERFLOW_UNDEFINED))
+    return false;
+
+  if (*comp_p != NE_EXPR)
+    return false;
+
+  bound = get_lt_bound (use, cand, nit);
+
+  if (bound == NULL_TREE)
+    return false;
+
+  if (expression_expensive_p (bound))
+    return false;
+
+  *comp_p = LT_EXPR;
+  *bound_p = bound;
+
+  return true;
+}
+
 /* Check whether it is possible to express the condition in USE by comparison
    of candidate CAND.  If so, store the value compared with to BOUND, and the
    comparison operator to COMP.  */
@@ -4420,6 +4545,15 @@ 
   *bound = aff_combination_to_tree (&bnd);
   *comp = iv_elimination_compare (data, use);
 
+  /* Try to implement nit using a '<' instead.  */
+  if (TREE_CODE (nit) == COND_EXPR)
+    {
+      if (!loop_only_exit_p (loop, exit))
+        return false;
+
+      return iv_elimination_compare_lt (use, cand, bound, nit, comp);
+    }
+
   /* It is unlikely that computing the number of iterations using division
      would be more profitable than keeping the original induction variable.  */
   if (expression_expensive_p (*bound))