@@ -419,6 +419,89 @@
return fold_build2 (code, TREE_TYPE (a), a, b);
}
+/* Folds (TREE_TYPE (A))(A CODE B), where CODE is either PLUS_EXPR or
+ MINUS_EXPR. Returns the folded expression if folding is successful.
+ Otherwise, return NULL_TREE. Handles the case that A is a pointer
+ robustly. */
+
+static inline tree
+fold_plus (enum tree_code code, tree a, tree b)
+{
+ tree a_type = TREE_TYPE (a);
+ tree res;
+
+ STRIP_NOPS (a);
+ robust_plus (&code, a, &b);
+
+ res = fold_binary (code, TREE_TYPE (a), a, b);
+ if (res == NULL_TREE)
+ return NULL_TREE;
+
+ return fold_convert (a_type, res);
+}
+
+/* Folds (TREE_TYPE (A))(A - B), possibly using the defining stmt of A. Returns
+ the folded expression if folding is successful and resulted in an ssa_name.
+ Otherwise, return NULL_TREE. */
+
+static inline tree
+fold_diff_to_ssa_name (tree a, tree b)
+{
+ tree a_type = TREE_TYPE (a);
+ tree res, a0, a1;
+ gimple def_stmt;
+
+ res = fold_plus (MINUS_EXPR, a, b);
+ if (res != NULL_TREE)
+ {
+ STRIP_NOPS (res);
+ if (TREE_CODE (res) == SSA_NAME)
+ return fold_convert (a_type, res);
+ }
+
+ STRIP_NOPS (a);
+ STRIP_NOPS (b);
+
+ if (TREE_CODE (a) == PLUS_EXPR && TREE_CODE (b) == PLUS_EXPR
+ && TREE_OPERAND (a, 1) == TREE_OPERAND (b, 1))
+ {
+ a = TREE_OPERAND (a, 0);
+ b = TREE_OPERAND (b, 0);
+
+ STRIP_NOPS (a);
+ STRIP_NOPS (b);
+
+ res = fold_plus (MINUS_EXPR, a, b);
+ if (res != NULL_TREE)
+ {
+ STRIP_NOPS (res);
+ if (TREE_CODE (res) == SSA_NAME)
+ return fold_convert (a_type, res);
+ }
+ }
+
+ if (TREE_CODE (a) != SSA_NAME)
+ return NULL_TREE;
+
+ def_stmt = SSA_NAME_DEF_STMT (a);
+ if (!is_gimple_assign (def_stmt)
+ || (gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
+ && gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR))
+ return NULL_TREE;
+ a0 = gimple_assign_rhs1 (def_stmt);
+ a1 = gimple_assign_rhs2 (def_stmt);
+
+ res = fold_plus (MINUS_EXPR, fold_build_plus (PLUS_EXPR, a0, a1), b);
+ if (res != NULL_TREE)
+ {
+ STRIP_NOPS (res);
+ if (TREE_CODE (res) == SSA_NAME)
+ return fold_convert (a_type, res);
+ }
+
+ return NULL_TREE;
+}
+
/* Number of uses recorded in DATA. */
static inline unsigned
@@ -825,17 +908,25 @@
if (!slot)
{
- /* Try to determine number of iterations. We must know it
- unconditionally (i.e., without possibility of # of iterations
- being zero). Also, we cannot safely work with ssa names that
- appear in phi nodes on abnormal edges, so that we do not create
- overlapping life ranges for them (PR 27283). */
+ /* Try to determine number of iterations. We cannot safely work with ssa
+ names that appear in phi nodes on abnormal edges, so that we do not
+ create overlapping life ranges for them (PR 27283). */
desc = XNEW (struct tree_niter_desc);
if (number_of_iterations_exit (data->current_loop,
exit, desc, true)
- && integer_zerop (desc->may_be_zero)
&& !contains_abnormal_ssa_name_p (desc->niter))
- niter = desc->niter;
+ {
+ if (!integer_zerop (desc->may_be_zero))
+ /* Construct COND_EXPR that describes the number of iterations.
+ Either the COND_EXPR is not too expensive, and we can use it as
+ loop bound, or we can deduce a LT_EXPR bound from it. */
+ niter
+ = build3 (COND_EXPR, TREE_TYPE (desc->niter), desc->may_be_zero,
+ build_int_cst_type (TREE_TYPE (desc->niter), 0),
+ desc->niter);
+ else
+ niter = desc->niter;
+ }
else
niter = NULL_TREE;
@@ -4353,6 +4444,145 @@
return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
}
+/* Get the loop bound and comparison operator of USE->iv, and store them in
+ BOUND_P and COMP_P. Returns false if unsuccessful. */
+
+static bool
+get_use_lt_bound (struct iv_use *use, tree use_iv, tree *bound_p,
+ enum tree_code *comp_p)
+{
+ gimple stmt = use->stmt;
+
+ if (gimple_code (stmt) != GIMPLE_COND
+ || gimple_cond_lhs (stmt) != use_iv)
+ return false;
+
+ *comp_p = gimple_cond_code (stmt);
+ *bound_p = gimple_cond_rhs (stmt);
+
+ return true;
+}
+
+/* Tries to replace loop exit test USE, by one formulated in terms of a LT_EXPR
+ comparison with CAND. Stores the resulting comparison in COMP_P and bound in
+ BOUND_P. */
+
+static bool
+iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use,
+ struct iv_cand *cand, tree *bound_p,
+ enum tree_code *comp_p)
+{
+ enum tree_code use_comp, canon_comp;
+ tree base_ptr, use_lt_bound, bound, *use_iv, base_cand_at_use;
+ bool ok;
+ tree use_type, cand_type, cand_iv_base = cand->iv->base;
+
+ /* Initialize cand_type, use_iv and use_type. */
+ STRIP_NOPS (cand_iv_base);
+ cand_type = TREE_TYPE (cand_iv_base);
+ ok = extract_cond_operands (data, use->stmt, &use_iv, NULL, NULL, NULL);
+ gcc_assert (ok);
+ use_type = TREE_TYPE (*use_iv);
+
+ /* We're trying to replace 'i < n' with 'p < base + n' in
+
+ void
+ f1 (char *base, unsigned long int s, unsigned long int n)
+ {
+ unsigned long int i = s;
+ char *p = base + s;
+ do
+ {
+ *p = '\0';
+ p++;
+ i++;
+ }
+ while (i < n);
+ }
+
+ Overflow of base + n can't happen because either:
+ - s < n, and i will step to n, and p will step to base + n, or
+ - s >= n, so base + n < base + s, and assuming pointer arithmetic
+ doesn't overflow, base + s doesn't overflow, so base + n won't.
+
+ This transformation is not valid if i and n are signed, because
+ base + n might underflow.
+ */
+
+ /* Use should be an unsigned integral. */
+ if (!INTEGRAL_TYPE_P (use_type) || !TYPE_UNSIGNED (use_type))
+ return false;
+
+ /* Cand should be a pointer, and pointer overflow should be undefined. */
+ if (!POINTER_TYPE_P (cand_type) || !POINTER_TYPE_OVERFLOW_UNDEFINED)
+ return false;
+
+ /* Make sure that the loop iterates till the loop bound is hit. */
+ if (!data->loop_single_exit_p)
+ return false;
+
+ /* We only handle this case for the moment. */
+ if (!tree_int_cst_equal (use->iv->step, cand->iv->step))
+ return false;
+
+ if (cand->pos == IP_ORIGINAL)
+ {
+ /* If cand is a src level ptr iv, we know that
+ cand->iv->base + nit * cand->iv->step won't overflow. */
+
+ /* Now get the base of var_at_stmt (cand, use->stmt). */
+ if (stmt_after_increment (data->current_loop, cand, use->stmt))
+ /* Get the base of var_after. */
+ base_cand_at_use = fold_build_plus (PLUS_EXPR, cand->iv->base,
+ cand->iv->step);
+ else
+ /* Get the base of var_before. */
+ base_cand_at_use = cand->iv->base;
+ }
+ else
+ {
+ /* It is possible to also allow other pointer cands, provided we can prove
+ cand->iv->base + nit * cand->iv->step doesn't overflow. Return false
+ for now. */
+ return false;
+ }
+
+ /* Detect p = base + s. */
+ base_ptr = fold_diff_to_ssa_name (base_cand_at_use,
+ fold_convert (sizetype, use->iv->base));
+ if (base_ptr == NULL_TREE)
+ return false;
+ STRIP_NOPS (base_ptr);
+
+ /* Get the bound of the iv of the use. */
+ if (!get_use_lt_bound (use, *use_iv, &use_lt_bound, &use_comp))
+ return false;
+
+ /* Determine canon_comp. */
+ if (*comp_p == NE_EXPR)
+ canon_comp = use_comp;
+ else if (*comp_p == EQ_EXPR)
+ canon_comp = invert_tree_comparison (use_comp, false);
+ else
+ gcc_unreachable ();
+
+ /* Allow positive and negative step, and inclusive and exclusive bound.
+ To trigger inclusive bound, we need -funsafe-loop-optimizations. */
+ if (canon_comp != LT_EXPR && canon_comp != GT_EXPR
+ && canon_comp != LE_EXPR && canon_comp != GE_EXPR)
+ return false;
+
+ /* Calculate bound. */
+ bound = fold_build_plus (PLUS_EXPR, base_ptr,
+ fold_convert (sizetype, use_lt_bound));
+ if (expression_expensive_p (bound))
+ return false;
+
+ *comp_p = use_comp;
+ *bound_p = bound;
+ return true;
+}
+
/* Check whether it is possible to express the condition in USE by comparison
of candidate CAND. If so, store the value compared with to BOUND, and the
comparison operator to COMP. */
@@ -4431,6 +4661,21 @@
*bound = aff_combination_to_tree (&bnd);
*comp = iv_elimination_compare (data, use);
+ /* Try to implement nit using a '<' instead. */
+ if (TREE_CODE (nit) == COND_EXPR)
+ {
+ if (iv_elimination_compare_lt (data, use, cand, bound, comp))
+ return true;
+
+ /* We could try to see if the non-lt bound is not too expensive, but the
+ cost infrastructure needs tuning for that first. Even though
+ expression_expensive_p always returns true for COND_EXPRs, it happens
+ that the bound is folded into a MAX_EXPR, which is approved by
+ expression_expensive_p, but attributed a too low cost by force_var_cost
+ in case the MAX_EXPR would expand into control flow. */
+ return false;
+ }
+
/* It is unlikely that computing the number of iterations using division
would be more profitable than keeping the original induction variable. */
if (expression_expensive_p (*bound))