From patchwork Wed Mar 2 22:01:14 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: ivopts improvement Date: Wed, 02 Mar 2011 12:01:14 -0000 From: Tom de Vries X-Patchwork-Id: 85173 Message-Id: <4D6EBE2A.10002@codesourcery.com> To: Zdenek Dvorak Cc: Richard Guenther , Paolo Bonzini , gcc-patches@gcc.gnu.org, Bernd Schmidt , Maxim Kuvyrkov Hi Zdenek, On 02/28/2011 07:12 PM, Zdenek Dvorak wrote: > Hi, > >>>>>> I think this should be done unconditionally under >>>>>> -funsafe-loop-optimizations. >>>> >>>> void >>>> f (char *base, unsigned long int i, unsigned long int n) >>>> { >>>> char *p = base + i; >>>> do >>>> { >>>> *p = '\0'; >>>> p++; >>>> i++; >>>> } >>>> while (i< n); >>>> } >>>> >>>> AFAIU, -funsafe-loop-optimizations allows me to assume that i does not >>>> overflow. Are you saying that -funsafe-loop-optimizations also implies >>>> that p does not overflow? >>> >>> I'm wondering if the C standard doesn't allow you to accept this always, at >>> least if the increment to i (and p) is positive. >>> >>> I'm thinking that "undefined behavior for NULL pointer access" implies that >>> an object cannot be allocated across a NULL pointer. And since pointer >>> arithmetic beyond object boundaries is undefined, an overflow of p (which >>> would cross a NULL pointer) is also undefined. >> >> The important thing is that IVOPTs is not allowed to introduce IVs that >> overflow either. And it is known to create weird IVs starting from >> -4 ... > > as long as we do the arithmetics in unsigned integer type and only cast to the > pointer type the final result, C standard (plus the gcc-specific interpretation > of casts) allows us to do that. > > Nevertheless, pointer arithmetics has defined behavior only within > a single memory object or at most one array element after it; which more > or less forbids overflows, when the arithmetics is performed in pointer > type, > > Zdenek Another try with updated patches. Changes compared to last submission: iterator.6.1-ml.patch: * removed the stmt_after_increment related code in may_eliminate_iv (after review comment). iterator.6.4-ml.patch: * added test for loop_only_exit_p before calling iv_elimination_compare_lt. make sure the last loop iteration is reached, which makes p + n a valid pointer. * removed cand_valid_pointer_at_use_p and replaced call with test for (cand->pos == IP_ORIGINAL && POINTER_TYPE_P (TREE_TYPE (cand->var_before)) && POINTER_TYPE_OVERFLOW_UNDEFINED). In other words, the iterator is a source level pointer iterator, which is guaranteed not to overflow provided POINTER_TYPE_OVERFLOW_UNDEFINED. * return bound in pointer type rather than unsigned int type. Make sure the bound matches the pointer iterator. reg-tested on x86-64. Ok now? Thanks, - Tom diff -u gcc/tree-ssa-loop-ivopts.c gcc/tree-ssa-loop-ivopts.c --- gcc/tree-ssa-loop-ivopts.c (working copy) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -814,17 +814,25 @@ if (!slot) { - /* Try to determine number of iterations. We must know it - unconditionally (i.e., without possibility of # of iterations - being zero). Also, we cannot safely work with ssa names that - appear in phi nodes on abnormal edges, so that we do not create - overlapping life ranges for them (PR 27283). */ + /* Try to determine number of iterations. We cannot safely work with ssa + names that appear in phi nodes on abnormal edges, so that we do not + create overlapping life ranges for them (PR 27283). */ desc = XNEW (struct tree_niter_desc); if (number_of_iterations_exit (data->current_loop, exit, desc, true) - && integer_zerop (desc->may_be_zero) && !contains_abnormal_ssa_name_p (desc->niter)) - niter = desc->niter; + { + if (!integer_zerop (desc->may_be_zero)) + /* Construct COND_EXPR that describes the number of iterations. + Either the COND_EXPR is not too expensive, and we can use it as + loop bound, or we can deduce a LT_EXPR bound from it. */ + niter + = build3 (COND_EXPR, TREE_TYPE (desc->niter), desc->may_be_zero, + build_int_cst_type (TREE_TYPE (desc->niter), 0), + desc->niter); + else + niter = desc->niter; + } else niter = NULL_TREE; @@ -4342,6 +4350,123 @@ return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); } +/* Tries to detect + NIT == (use_iv_max < USE->iv->base) + ? 0 + : (use_iv_max - USE->iv->base) + where + use_iv_real_base == (USE->iv->base - USE->iv->step) + && CAND->iv->base == base_ptr + use_iv_real_base + and returns the exclusive upper bound for CAND->var_after: + base_ptr + use_iv_max. */ + +static tree +get_lt_bound (struct iv_use *use, struct iv_cand *cand, tree nit) +{ + tree comp, base_ptr, n, n0, n1; + tree use_iv_real_base, cand_iv_base, use_iv_max; + gimple def_stmt; + int npos, mpos; + enum tree_code compare; + tree cand_type = TREE_TYPE (cand->var_before); + + if (TREE_CODE (nit) != COND_EXPR) + return NULL_TREE; + + /* use_iv_real_base == use->iv->base - use->iv->step. */ + use_iv_real_base = fold_build_plus (MINUS_EXPR, use->iv->base, use->iv->step); + + /* cand_iv_base. */ + cand_iv_base = cand->iv->base; + STRIP_NOPS (cand_iv_base); + + /* cand->iv->base == base_ptr + use_iv_real_base. */ + if (TREE_CODE (cand_iv_base) != SSA_NAME) + return NULL_TREE; + def_stmt = SSA_NAME_DEF_STMT (cand_iv_base); + if (!is_gimple_assign (def_stmt) + || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR) + return NULL_TREE; + if (gimple_assign_rhs2 (def_stmt) != use_iv_real_base) + return NULL_TREE; + base_ptr = gimple_assign_rhs1 (def_stmt); + + /* 0. */ + if (tree_int_cst_equal (TREE_OPERAND (nit, 1), integer_zero_node)) + npos = 2; + else if (tree_int_cst_equal (TREE_OPERAND (nit, 2), integer_zero_node)) + npos = 1; + else + return NULL_TREE; + + /* n == use_iv_max - use->iv->base. */ + n = TREE_OPERAND (nit, npos); + if (TREE_CODE (n) != PLUS_EXPR) + return NULL_TREE; + n0 = TREE_OPERAND (n, 0); + n1 = TREE_OPERAND (n, 1); + if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n0), + integer_zero_node)) + use_iv_max = n1; + else if (tree_int_cst_equal (fold_build_plus (PLUS_EXPR, use->iv->base, n1), + integer_zero_node)) + use_iv_max = n0; + else + return NULL_TREE; + + /* comp == use_iv_max < use->iv->base. */ + comp = TREE_OPERAND (nit, 0); + compare = TREE_CODE (comp); + if ((npos == 2 && compare == LT_EXPR) + || (npos == 1 && compare == GE_EXPR)) + mpos = 0; + else if ((npos == 2 && compare == GT_EXPR) + || (npos == 1 && compare == LE_EXPR)) + mpos = 1; + else + return NULL_TREE; + if (TREE_OPERAND (comp, mpos) != use_iv_max + || !tree_int_cst_equal (fold_build_plus (MINUS_EXPR, use->iv->base, + TREE_OPERAND (comp, 1 - mpos)), + integer_zero_node)) + return NULL_TREE; + + /* Calculate bound. */ + return fold_build_plus (PLUS_EXPR, convert (cand_type, base_ptr), use_iv_max); +} + +/* Tries to replace loop exit test USE, which allows NIT iterations, by one + formulated in terms of a LT_EXPR comparison with CAND. Stores the resulting + comparison in COMP_P and bound in BOUND_P. */ + +static bool +iv_elimination_compare_lt (struct iv_use *use, struct iv_cand *cand, + tree *bound_p, tree nit, enum tree_code *comp_p) +{ + tree bound; + + if (!(cand->pos == IP_ORIGINAL + && POINTER_TYPE_P (TREE_TYPE (cand->var_before)) + && POINTER_TYPE_OVERFLOW_UNDEFINED)) + return false; + + if (*comp_p != NE_EXPR) + return false; + + bound = get_lt_bound (use, cand, nit); + + if (bound == NULL_TREE) + return false; + + if (expression_expensive_p (bound)) + return false; + + *comp_p = LT_EXPR; + *bound_p = bound; + + return true; +} + /* Check whether it is possible to express the condition in USE by comparison of candidate CAND. If so, store the value compared with to BOUND, and the comparison operator to COMP. */ @@ -4420,6 +4545,15 @@ *bound = aff_combination_to_tree (&bnd); *comp = iv_elimination_compare (data, use); + /* Try to implement nit using a '<' instead. */ + if (TREE_CODE (nit) == COND_EXPR) + { + if (!loop_only_exit_p (loop, exit)) + return false; + + return iv_elimination_compare_lt (use, cand, bound, nit, comp); + } + /* It is unlikely that computing the number of iterations using division would be more profitable than keeping the original induction variable. */ if (expression_expensive_p (*bound))