[PING] Improve induction variable elimination

Message ID	000001cfe2a8$ddf94e00$99ebea00$@arm.com
State	New
Headers	show Return-Path: <gcc-patches-return-379826-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:mime-version:content-type; q=dns; s=default; b=oUwGxE5NvAR/6p9L9sBZTXfyodxKIR1LW9ejssr+BNOnOOpD5N CqGJcR+uB4Wf43hgvJY1sgKUJ92D20RCz9+0Ezmw1nySbiC62inoOfEIX+ojzWS0 q05sdsGX5JR9/MuCDHZNZp4YwIwMMEHKR5jYHCGiMCNAzRAngv4es3t/s= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: "Bin Cheng" <bin.cheng@arm.com> To: <gcc-patches@gcc.gnu.org> Cc: <ook@ucw.cz> Subject: [PATCH PING]Improve induction variable elimination Date: Wed, 8 Oct 2014 11:35:07 +0800 Message-ID: <000001cfe2a8$ddf94e00$99ebea00$@arm.com> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="----=_NextPart_000_0001_01CFE2EB.EC1E62C0"

Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 215108) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -4451,6 +4451,44 @@ iv_period (struct iv *iv) return period; } +/* Returns no wrapping period of induction variable IV. For now + only unsigned type IV is handled, we could extend it in case + of non-overflow for signed ones. Return zero if it can't be + decided. */ + +static tree +iv_nowrap_period (struct iv *iv) +{ + bool overflow; + tree type; + tree base = iv->base, step = iv->step; + widest_int base_val, step_val, max_val, span, period; + + gcc_assert (step && TREE_CODE (step) == INTEGER_CST); + + type = TREE_TYPE (base); + if (!TYPE_UNSIGNED (type) || TREE_CODE (base) != INTEGER_CST) + return integer_zero_node; + + base_val = wi::to_widest (base); + step_val = wi::to_widest (step); + if (!POINTER_TYPE_P (type) && TYPE_MAX_VALUE (type) + && TREE_CODE (TYPE_MAX_VALUE (type)) == INTEGER_CST) + max_val = wi::to_widest (TYPE_MAX_VALUE (type)); + else + { + wide_int max_wi = wi::max_value (TYPE_PRECISION (type), UNSIGNED); + max_val = wi::to_widest (wide_int_to_tree (type, max_wi)); + } + + span = max_val - base_val + step_val - 1; + period = wi::div_trunc (span, step_val, UNSIGNED, &overflow); + if (overflow) + return integer_zero_node; + + return wide_int_to_tree (type, period); +} + /* Returns the comparison operator used when eliminating the iv USE. */ static enum tree_code @@ -4483,6 +4521,10 @@ difference_cannot_overflow_p (struct ivopts_data * tree e1, e2; aff_tree aff_e1, aff_e2, aff_offset; + /* No overflow if offset is zero. */ + if (integer_zerop (offset)) + return true; + if (!nowrap_type_p (TREE_TYPE (base))) return false; @@ -4538,7 +4580,84 @@ difference_cannot_overflow_p (struct ivopts_data * } } -/* Tries to replace loop exit by one formulated in terms of a LT_EXPR +/* Check whether PERIOD of CAND is greater than the number of iterations + described by DESC for which the exit condition is true. The exit + condition is comparison against USE. */ + +static bool +period_greater_niter_exit (struct ivopts_data *data, + struct iv_use *use, struct iv_cand *cand, + tree period, struct tree_niter_desc *desc) +{ + struct loop *loop = data->current_loop; + + /* If the number of iterations is constant, compare against it directly. */ + if (TREE_CODE (desc->niter) == INTEGER_CST) + { + /* See cand_value_at. */ + if (stmt_after_increment (loop, cand, use->stmt)) + { + if (!tree_int_cst_lt (desc->niter, period)) + return false; + } + else + { + if (tree_int_cst_lt (period, desc->niter)) + return false; + } + } + + /* If not, and if this is the only possible exit of the loop, see whether + we can get a conservative estimate on the number of iterations of the + entire loop and compare against that instead. */ + else + { + widest_int period_value, max_niter; + + max_niter = desc->max; + if (stmt_after_increment (loop, cand, use->stmt)) + max_niter += 1; + period_value = wi::to_widest (period); + if (wi::gtu_p (max_niter, period_value)) + { + /* See if we can take advantage of inferred loop bound information. */ + if (data->loop_single_exit_p) + { + if (!max_loop_iterations (loop, &max_niter)) + return false; + /* The loop bound is already adjusted by adding 1. */ + if (wi::gtu_p (max_niter, period_value)) + return false; + } + else + return false; + } + } + + return true; +} + +/* Determine whether no wrapping period of CAND is greater than + the number of iterations described by DESC for which exit + conditionis true. The exit condition is comparison against USE. */ + +static bool +nowrap_cand_for_loop_niter_p (struct ivopts_data *data, + struct iv_use *use, + struct iv_cand *cand, + struct tree_niter_desc *desc) +{ + tree period; + + period = iv_nowrap_period (cand->iv); + if (period != integer_zero_node + && period_greater_niter_exit (data, use, cand, period, desc)) + return true; + + return false; +} + +/* Tries to replace loop exit in USE by one formulated in terms of a LT_EXPR comparison with CAND. NITER describes the number of iterations of the loops. If successful, the comparison in COMP_P is altered accordingly. @@ -4575,11 +4694,18 @@ difference_cannot_overflow_p (struct ivopts_data * 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no overflow in computing it or the values of p. 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not - overflow. To prove this, we use the fact that p_0 = base + a. */ + overflow. To prove this, we use the fact that p_0 = base + a. + + Moreover, with more complex overflow analysis for unsigned type, it is + possible to eliminate I with P if P is an induction candidate of unsigned + integer type other than pointer if we can prove that P doesn't overflow + during all iterations of current loop. Also special forms of MAY_BE_ZERO + in NITER is checked because "a + 1 > b" could be folded. */ + static bool -iv_elimination_compare_lt (struct ivopts_data *data, - struct iv_cand *cand, enum tree_code *comp_p, +iv_elimination_compare_lt (struct ivopts_data *data, struct iv_use *use, + struct iv_cand *cand, enum tree_code *comp_p, struct tree_niter_desc *niter) { tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset; @@ -4588,11 +4714,13 @@ static bool HOST_WIDE_INT step; /* We need to know that the candidate induction variable does not overflow. - While more complex analysis may be used to prove this, for now just - check that the variable appears in the original program and that it - is computed in a type that guarantees no overflows. */ + Apart from checking that the variable appears in the original program + and that is is computed in a type that guarantees no overflows, we also + check no wrapping periord of the variable covers the niter if it has + unsigned type. More complex analysis may be used to prove this. */ cand_type = TREE_TYPE (cand->iv->base); - if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) + if ((cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type)) + && !nowrap_cand_for_loop_niter_p (data, use, cand, niter)) return false; /* Make sure that the loop iterates till the loop bound is hit, as otherwise @@ -4635,6 +4763,67 @@ static bool else return false; } + else if (TREE_CODE (mbz) == EQ_EXPR) + { + /* Check whether may_be_zero is in below three forms: + 1) b' == TYPE_MAX_VALUE (TREE_TYPE (b')) + where b' is of type nit_type. The expression could be + folded from "b' + 1 < 1". In this case, A is "0" and B + is "b' + 1". + 2) b == 0 + where b is of type nit_type. The expression could be + folded from "b < 1". In this case, A is "0" and B is + "b". + 3) b' == -1 + where b' is of signed type which has nit_type as its + unsigned counterpart. The expression could be folded + from "(nit_type)b' + 1 < 1". In this case, A is "0" + and "(nit_type)b' + 1". */ + tree type; + tree op0 = TREE_OPERAND (mbz, 0); + tree op1 = TREE_OPERAND (mbz, 1); + + if (TREE_CODE (op1) != INTEGER_CST) + { + tree tmp = op0; + op0 = op1; + op1 = tmp; + } + type = TREE_TYPE (op0); + if (TREE_CODE (op1) != INTEGER_CST) + return false; + + /* Convert case 3 to case 1. */ + if (!TYPE_UNSIGNED (type) + && operand_equal_p (op1, integer_minus_one_node, 0) + && types_compatible_p (unsigned_type_for (type), nit_type)) + { + type = nit_type; + op0 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op0); + op1 = fold_convert_loc (UNKNOWN_LOCATION, nit_type, op1); + } + + if (!TYPE_UNSIGNED (type)) + return false; + + /* Case 1. */ + if (TYPE_MAX_VALUE (type) + && TREE_CODE (op1) == INTEGER_CST + && operand_equal_p (op1, TYPE_MAX_VALUE (type), 0)) + { + a = integer_zero_node; + b = fold_build2 (PLUS_EXPR, type, op0, integer_one_node); + } + /* Case 2. */ + else if (TREE_CODE (op1) == INTEGER_CST + && operand_equal_p (op1, integer_zero_node, 0)) + { + a = integer_zero_node; + b = op0; + } + else + return false; + } else return false; @@ -4651,10 +4840,14 @@ static bool return false; /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not - overflow. */ - offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), - cand->iv->step, - fold_convert (TREE_TYPE (cand->iv->step), a)); + overflow. It is uncessary to build offset when A equals to ZERO. */ + if (a != integer_zero_node) + offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step), + cand->iv->step, + fold_convert (TREE_TYPE (cand->iv->step), a)); + else + offset = a; + if (!difference_cannot_overflow_p (data, cand->iv->base, offset)) return false; @@ -4711,50 +4904,9 @@ may_eliminate_iv (struct ivopts_data *data, This is the case iff the period of the induction variable is greater than the number of iterations for which the exit condition is true. */ period = iv_period (cand->iv); + if (!period_greater_niter_exit (data, use, cand, period, desc)) + return false; - /* If the number of iterations is constant, compare against it directly. */ - if (TREE_CODE (desc->niter) == INTEGER_CST) - { - /* See cand_value_at. */ - if (stmt_after_increment (loop, cand, use->stmt)) - { - if (!tree_int_cst_lt (desc->niter, period)) - return false; - } - else - { - if (tree_int_cst_lt (period, desc->niter)) - return false; - } - } - - /* If not, and if this is the only possible exit of the loop, see whether - we can get a conservative estimate on the number of iterations of the - entire loop and compare against that instead. */ - else - { - widest_int period_value, max_niter; - - max_niter = desc->max; - if (stmt_after_increment (loop, cand, use->stmt)) - max_niter += 1; - period_value = wi::to_widest (period); - if (wi::gtu_p (max_niter, period_value)) - { - /* See if we can take advantage of inferred loop bound information. */ - if (data->loop_single_exit_p) - { - if (!max_loop_iterations (loop, &max_niter)) - return false; - /* The loop bound is already adjusted by adding 1. */ - if (wi::gtu_p (max_niter, period_value)) - return false; - } - else - return false; - } - } - cand_value_at (loop, cand, use->stmt, desc->niter, &bnd); *bound = fold_convert (TREE_TYPE (cand->iv->base), @@ -4774,7 +4926,7 @@ may_eliminate_iv (struct ivopts_data *data, base the exit condition on it. However, that is often too expensive. */ if (!integer_zerop (desc->may_be_zero)) - return iv_elimination_compare_lt (data, cand, comp, desc); + return iv_elimination_compare_lt (data, use, cand, comp, desc); return true; } Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-3.c (revision 0) @@ -0,0 +1,38 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ + +typedef long unsigned int size_t; +extern float a[100], b[100]; + +int foo (int M, unsigned int l) +{ + unsigned ivtmp = 0, niters, _37, _38, bnd; + size_t _67, _1; + float *vectp_a, *vectp_b, *vectp_a2; + float vect__6, vect__7, vect__8; + + _38 = (unsigned int)l; + bnd = _38 + 1; + + _1 = (size_t) M; + _67 = _1 * 4; + vectp_a = a; vectp_b = b; vectp_a2 = a + _67; + + do + { + vect__6 = *vectp_a; + vect__7 = *vectp_b; + vect__8 = vect__6 + vect__7; + *vectp_a = vect__8; + vectp_a = vectp_a + 4; + vectp_b = vectp_b + 4; + vectp_a2 = vectp_a2 + 4; + ivtmp += 1; + } + while (ivtmp < bnd); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 "ivopts" } } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt-4.c (revision 0) @@ -0,0 +1,38 @@ +/* { dg-do compile { target { lp64 } } } */ +/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ + +typedef long unsigned int size_t; +extern float a[100], b[100]; + +int foo (int M, int l) +{ + unsigned ivtmp = 0, niters, _37, _38, bnd; + size_t _67, _1; + float *vectp_a, *vectp_b, *vectp_a2; + float vect__6, vect__7, vect__8; + + _38 = (unsigned int)l; + bnd = _38 + 1; + + _1 = (size_t) M; + _67 = _1 * 4; + vectp_a = a; vectp_b = b; vectp_a2 = a + _67; + + do + { + vect__6 = *vectp_a; + vect__7 = *vectp_b; + vect__8 = vect__6 + vect__7; + *vectp_a = vect__8; + vectp_a = vectp_a + 4; + vectp_b = vectp_b + 4; + vectp_a2 = vectp_a2 + 4; + ivtmp += 1; + } + while (ivtmp < bnd); + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Replacing exit test: if \\(bnd.*\\)" 1 "ivopts" } } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */

[PING] Improve induction variable elimination

Commit Message

Comments

Patch