Message ID | alpine.LSU.2.20.1712041404260.12252@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Series | [gimple-interchange] Add reduction validity check | expand |
On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote: > > I've noticed we perform FP reduction association without the required > checks for associative math. I've added > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this. > > I also noticed we happily interchange a loop with a reduction like > > sum = a[i] - sum; > > where a change in order of elements isn't ok. Unfortunately bwaves > is exactly a case where single_use != next_def (tried to simply remove > that case for now), because reassoc didn't have a chance to fix the > operand order. Thus this patch exports the relevant handling from > the vectorizer (for stage1 having a separate infrastructure gathering / > analyzing of reduction/induction infrastructure would be nice...) > and uses it from interchange. We then don't handle > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer > missed-opt is PR65930). I didn't bother to split up the vectorizer > code further to implement relaxed validity checking but simply XFAILed > this testcase. > > Earlier I simplified allocation stuff in the main loop which is why > this part is included in this patch. > > Bootstrap running on x86_64-unknown-linux-gnu. > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling. > > Ok? Sure. Just for the record. There is also similar associative check in predcom. As you suggested, a path extraction/checking interface for associative checking would be great, given we have multiple users now. Thanks, bin > > Thanks, > Richard. > > 2017-12-04 Richard Biener <rguenther@suse.de> > > * tree-vectorizer.h (check_reduction_path): Declare. > * tree-vect-loop.c (check_reduction_path): New function, split out > from ... > (vect_is_simple_reduction): ... here. > * gimple-loop-interchange.cc: Include tree-vectorizer.h. > (loop_cand::analyze_iloop_reduction_var): Use single_imm_use. > Properly check for a supported reduction operation and a > valid expression if the reduction covers multiple stmts. > (prepare_perfect_loop_nest): Simpify allocation. > (pass_linterchange::execute): Likewise. > > * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags. > * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant. > * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL. > > > Index: gcc/gimple-loop-interchange.cc > =================================================================== > --- gcc/gimple-loop-interchange.cc (revision 255375) > +++ gcc/gimple-loop-interchange.cc (working copy) > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. > #include "tree-ssa-loop-ivopts.h" > #include "tree-ssa-dce.h" > #include "tree-data-ref.h" > +#include "tree-vectorizer.h" > > /* This pass performs loop interchange: for example, the loop nest > > @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var ( > in a way that reduction operation is seen as black box. In general, > we can ignore reassociation of reduction operator; we can handle fake > reductions in which VAR is not even used to compute NEXT. */ > - FOR_EACH_IMM_USE_FAST (use_p, iterator, var) > - { > - stmt = USE_STMT (use_p); > - if (is_gimple_debug (stmt)) > - continue; > - > - if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) > - return false; > - > - if (single_use != NULL) > - return false; > + if (! single_imm_use (var, &use_p, &single_use) > + || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) > + return false; > > - single_use = stmt; > - } > + /* Check the reduction operation. We require a commutative or > + left-associative operation. For FP math we also need to be allowed > + to associate operations. */ > + if (! is_gimple_assign (single_use) > + || ! (commutative_tree_code (gimple_assign_rhs_code (single_use)) > + || (commutative_ternary_tree_code > + (gimple_assign_rhs_code (single_use)) > + && (use_p->use == gimple_assign_rhs1_ptr (single_use) > + || use_p->use == gimple_assign_rhs2_ptr (single_use))) > + || (gimple_assign_rhs_code (single_use) == MINUS_EXPR > + && use_p->use == gimple_assign_rhs1_ptr (single_use))) > + || (FLOAT_TYPE_P (TREE_TYPE (var)) > + && ! flag_associative_math)) > + return false; > > + /* Handle and verify a series of stmts feeding the reduction op. */ > if (single_use != next_def > - && !stmt_dominates_stmt_p (single_use, next_def)) > + && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, > + gimple_assign_rhs_code (single_use))) > return false; > > /* Only support cases in which INIT is used in inner loop. */ > @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop * > vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs) > { > struct loop *start_loop = NULL, *innermost = loop; > - struct loop *outermost = superloop_at_depth (loop, 0); > + struct loop *outermost = loops_for_fn (cfun)->tree_root; > > /* Find loop nest from the innermost loop. The outermost is the innermost > outer*/ > @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop * > if (!start_loop || !start_loop->inner) > return false; > > + /* Prepare the data reference vector for the loop nest, pruning outer > + loops we cannot handle. */ > datarefs->create (20); > - if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL > + start_loop = prepare_data_references (start_loop, datarefs); > + if (!start_loop > /* Check if there is no data reference. */ > || datarefs->is_empty () > /* Check if there are too many of data references. */ > - || (int) datarefs->length () > MAX_DATAREFS > - /* Compute access strides for all data references. */ > - || ((start_loop = compute_access_strides (start_loop, innermost, > - *datarefs)) == NULL) > - /* Check if loop nest should be interchanged. */ > - || !should_interchange_loop_nest (start_loop, innermost, *datarefs)) > - { > - free_data_refs_with_aux (*datarefs); > - return false; > - } > + || (int) datarefs->length () > MAX_DATAREFS) > + return false; > + > + /* Compute access strides for all data references, pruning outer > + loops we cannot analyze refs in. */ > + start_loop = compute_access_strides (start_loop, innermost, *datarefs); > + if (!start_loop) > + return false; > + > + /* Check if any interchange is profitable in the loop nest. */ > + if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) > + return false; > > /* Check if data dependences can be computed for loop nest starting from > start_loop. */ > @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop * > return true; > } > > + /* ??? We should be able to adjust DDRs we already computed by > + truncating distance vectors. */ > free_dependence_relations (*ddrs); > + *ddrs = vNULL; > /* Try to compute data dependences with the outermost loop stripped. */ > loop = start_loop; > start_loop = start_loop->inner; > } while (start_loop && start_loop->inner); > > - loop_nest->release (); > - free_data_refs_with_aux (*datarefs); > return false; > } > > @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu > > bool changed_p = false; > struct loop *loop; > - vec<loop_p> loop_nest; > - vec<data_reference_p> datarefs; > - vec<ddr_p> ddrs; > - > FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) > - if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) > - { > - tree_loop_interchange loop_interchange (loop_nest); > - changed_p |= loop_interchange.interchange (datarefs, ddrs); > - free_dependence_relations (ddrs); > - free_data_refs_with_aux (datarefs); > - loop_nest.release (); > - } > + { > + vec<loop_p> loop_nest = vNULL; > + vec<data_reference_p> datarefs = vNULL; > + vec<ddr_p> ddrs = vNULL; > + if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) > + { > + tree_loop_interchange loop_interchange (loop_nest); > + changed_p |= loop_interchange.interchange (datarefs, ddrs); > + } > + free_dependence_relations (ddrs); > + free_data_refs_with_aux (datarefs); > + loop_nest.release (); > + } > > if (changed_p) > scev_reset_htab (); > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (revision 255375) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (working copy) > @@ -1,5 +1,5 @@ > /* { dg-do run } */ > -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ > +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */ > > /* Copied from graphite/interchange-4.c */ > > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy) > @@ -0,0 +1,52 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ > + > +/* Copied from graphite/interchange-4.c */ > + > +#define DEBUG 0 > +#if DEBUG > +#include <stdio.h> > +#endif > + > +double u[1782225]; > + > +static void __attribute__((noinline)) > +foo (int N, double *res) > +{ > + int i, j; > + double sum = 0; > + for (i = 0; i < N; i++) > + for (j = 0; j < N; j++) > + sum = sum + u[i + 1335 * j]; > + > + *res = sum; > +} > + > +extern void abort (); > + > +int > +main (void) > +{ > + int i, j; > + double res; > + > + for (i = 0; i < 1782225; i++) > + u[i] = 0; > + u[0] = __DBL_MAX__; > + u[1335] = -__DBL_MAX__; > + u[1] = __DBL_MAX__; > + u[1336] = -__DBL_MAX__; > + > + foo (1335, &res); > + > +#if DEBUG > + fprintf (stderr, "res = %d \n", res); > +#endif > + > + if (res != 0.0) > + abort (); > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */ > Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (revision 255375) > +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (working copy) > @@ -47,4 +47,4 @@ main (void) > return 0; > } > > -/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange"} } */ > +/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange" { xfail *-*-* } } } */ > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h (revision 255375) > +++ gcc/tree-vectorizer.h (working copy) > @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve > /* FORNOW: Used in tree-parloops.c. */ > extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, > bool *, bool); > +/* Used in gimple-loop-interchange.c. */ > +extern bool check_reduction_path (location_t, loop_p, gphi *, tree, > + enum tree_code); > /* Drive for loop analysis stage. */ > extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); > extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c (revision 255375) > +++ gcc/tree-vect-loop.c (working copy) > @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo > } > > > +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and > + reduction operation CODE has a handled computation expression. */ > + > +bool > +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg, > + enum tree_code code) > +{ > + auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > + auto_bitmap visited; > + tree lookfor = PHI_RESULT (phi); > + ssa_op_iter curri; > + use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); > + while (USE_FROM_PTR (curr) != loop_arg) > + curr = op_iter_next_use (&curri); > + curri.i = curri.numops; > + do > + { > + path.safe_push (std::make_pair (curri, curr)); > + tree use = USE_FROM_PTR (curr); > + if (use == lookfor) > + break; > + gimple *def = SSA_NAME_DEF_STMT (use); > + if (gimple_nop_p (def) > + || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) > + { > +pop: > + do > + { > + std::pair<ssa_op_iter, use_operand_p> x = path.pop (); > + curri = x.first; > + curr = x.second; > + do > + curr = op_iter_next_use (&curri); > + /* Skip already visited or non-SSA operands (from iterating > + over PHI args). */ > + while (curr != NULL_USE_OPERAND_P > + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > + || ! bitmap_set_bit (visited, > + SSA_NAME_VERSION > + (USE_FROM_PTR (curr))))); > + } > + while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); > + if (curr == NULL_USE_OPERAND_P) > + break; > + } > + else > + { > + if (gimple_code (def) == GIMPLE_PHI) > + curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); > + else > + curr = op_iter_init_use (&curri, def, SSA_OP_USE); > + while (curr != NULL_USE_OPERAND_P > + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > + || ! bitmap_set_bit (visited, > + SSA_NAME_VERSION > + (USE_FROM_PTR (curr))))) > + curr = op_iter_next_use (&curri); > + if (curr == NULL_USE_OPERAND_P) > + goto pop; > + } > + } > + while (1); > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + dump_printf_loc (MSG_NOTE, loc, "reduction path: "); > + unsigned i; > + std::pair<ssa_op_iter, use_operand_p> *x; > + FOR_EACH_VEC_ELT (path, i, x) > + { > + dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); > + dump_printf (MSG_NOTE, " "); > + } > + dump_printf (MSG_NOTE, "\n"); > + } > + > + /* Check whether the reduction path detected is valid. */ > + bool fail = path.length () == 0; > + bool neg = false; > + for (unsigned i = 1; i < path.length (); ++i) > + { > + gimple *use_stmt = USE_STMT (path[i].second); > + tree op = USE_FROM_PTR (path[i].second); > + if (! has_single_use (op) > + || ! is_gimple_assign (use_stmt)) > + { > + fail = true; > + break; > + } > + if (gimple_assign_rhs_code (use_stmt) != code) > + { > + if (code == PLUS_EXPR > + && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) > + { > + /* Track whether we negate the reduction value each iteration. */ > + if (gimple_assign_rhs2 (use_stmt) == op) > + neg = ! neg; > + } > + else > + { > + fail = true; > + break; > + } > + } > + } > + return ! fail && ! neg; > +} > + > + > /* Function vect_is_simple_reduction > > (1) Detect a cross-iteration def-use cycle that represents a simple > @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info > } > > /* Look for the expression computing loop_arg from loop PHI result. */ > - auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; > - auto_bitmap visited; > - tree lookfor = PHI_RESULT (phi); > - ssa_op_iter curri; > - use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi), > - SSA_OP_USE); > - while (USE_FROM_PTR (curr) != loop_arg) > - curr = op_iter_next_use (&curri); > - curri.i = curri.numops; > - do > - { > - path.safe_push (std::make_pair (curri, curr)); > - tree use = USE_FROM_PTR (curr); > - if (use == lookfor) > - break; > - gimple *def = SSA_NAME_DEF_STMT (use); > - if (gimple_nop_p (def) > - || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) > - { > -pop: > - do > - { > - std::pair<ssa_op_iter, use_operand_p> x = path.pop (); > - curri = x.first; > - curr = x.second; > - do > - curr = op_iter_next_use (&curri); > - /* Skip already visited or non-SSA operands (from iterating > - over PHI args). */ > - while (curr != NULL_USE_OPERAND_P > - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > - || ! bitmap_set_bit (visited, > - SSA_NAME_VERSION > - (USE_FROM_PTR (curr))))); > - } > - while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); > - if (curr == NULL_USE_OPERAND_P) > - break; > - } > - else > - { > - if (gimple_code (def) == GIMPLE_PHI) > - curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); > - else > - curr = op_iter_init_use (&curri, def, SSA_OP_USE); > - while (curr != NULL_USE_OPERAND_P > - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME > - || ! bitmap_set_bit (visited, > - SSA_NAME_VERSION > - (USE_FROM_PTR (curr))))) > - curr = op_iter_next_use (&curri); > - if (curr == NULL_USE_OPERAND_P) > - goto pop; > - } > - } > - while (1); > - if (dump_file && (dump_flags & TDF_DETAILS)) > - { > - dump_printf_loc (MSG_NOTE, vect_location, > - "reduction path: "); > - unsigned i; > - std::pair<ssa_op_iter, use_operand_p> *x; > - FOR_EACH_VEC_ELT (path, i, x) > - { > - dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); > - dump_printf (MSG_NOTE, " "); > - } > - dump_printf (MSG_NOTE, "\n"); > - } > - > - /* Check whether the reduction path detected is valid. */ > - bool fail = path.length () == 0; > - bool neg = false; > - for (unsigned i = 1; i < path.length (); ++i) > - { > - gimple *use_stmt = USE_STMT (path[i].second); > - tree op = USE_FROM_PTR (path[i].second); > - if (! has_single_use (op) > - || ! is_gimple_assign (use_stmt)) > - { > - fail = true; > - break; > - } > - if (gimple_assign_rhs_code (use_stmt) != code) > - { > - if (code == PLUS_EXPR > - && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) > - { > - /* Track whether we negate the reduction value each iteration. */ > - if (gimple_assign_rhs2 (use_stmt) == op) > - neg = ! neg; > - } > - else > - { > - fail = true; > - break; > - } > - } > - } > - if (! fail && ! neg) > + if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg, > + code)) > return def_stmt; > > if (dump_enabled_p ())
On Mon, 4 Dec 2017, Bin.Cheng wrote: > On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote: > > > > I've noticed we perform FP reduction association without the required > > checks for associative math. I've added > > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this. > > > > I also noticed we happily interchange a loop with a reduction like > > > > sum = a[i] - sum; > > > > where a change in order of elements isn't ok. Unfortunately bwaves > > is exactly a case where single_use != next_def (tried to simply remove > > that case for now), because reassoc didn't have a chance to fix the > > operand order. Thus this patch exports the relevant handling from > > the vectorizer (for stage1 having a separate infrastructure gathering / > > analyzing of reduction/induction infrastructure would be nice...) > > and uses it from interchange. We then don't handle > > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer > > missed-opt is PR65930). I didn't bother to split up the vectorizer > > code further to implement relaxed validity checking but simply XFAILed > > this testcase. > > > > Earlier I simplified allocation stuff in the main loop which is why > > this part is included in this patch. > > > > Bootstrap running on x86_64-unknown-linux-gnu. > > > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling. > > > > Ok? > Sure. > Just for the record. There is also similar associative check in > predcom. As you suggested, a path extraction/checking interface for > associative checking would be great, given we have multiple users now. Yeah. Note for interchange we don't need associativeness in the sense as implemented by associative_tree_code, we need left-associativeness which also MINUS_EXPR provides. So my check isn't perfect. I guess we instead need associative_tree_code () || (code == MINUS_EXPR && use_p->use == gimple_assign_rhs1_ptr (single_use)) where we could also allow RDIV_EXPR and other left-associative tree codes (but check_reduction_path would do the wrong thing with those at the moment but it has MINUS_EXPR handling that would support sum = x - (y - sum) which the above rejects). So I'm retesting with /* Check the reduction operation. We require a left-associative operation. For FP math we also need to be allowed to associate operations. */ enum tree_code code = gimple_assign_rhs_code (single_use); if (gassign *ass = dyn_cast <gassign *> (single_use)) { enum tree_code code = gimple_assign_rhs_code (ass); if (! (associative_tree_code (code) || (code == MINUS_EXPR && use_p->use == gimple_assign_rhs1_ptr (ass))) || (FLOAT_TYPE_P (TREE_TYPE (var)) && ! flag_associative_math)) return false; } else return false; which is more restrictive than before. Richard.
On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote: > > I've noticed we perform FP reduction association without the required > checks for associative math. I've added > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this. > > I also noticed we happily interchange a loop with a reduction like > > sum = a[i] - sum; > > where a change in order of elements isn't ok. Unfortunately bwaves > is exactly a case where single_use != next_def (tried to simply remove > that case for now), because reassoc didn't have a chance to fix the > operand order. Thus this patch exports the relevant handling from > the vectorizer (for stage1 having a separate infrastructure gathering / > analyzing of reduction/induction infrastructure would be nice...) > and uses it from interchange. We then don't handle > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer > missed-opt is PR65930). I didn't bother to split up the vectorizer > code further to implement relaxed validity checking but simply XFAILed > this testcase. > > Earlier I simplified allocation stuff in the main loop which is why > this part is included in this patch. > > Bootstrap running on x86_64-unknown-linux-gnu. > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling. > > Ok? > > Thanks, > Richard. > > 2017-12-04 Richard Biener <rguenther@suse.de> > > * tree-vectorizer.h (check_reduction_path): Declare. > * tree-vect-loop.c (check_reduction_path): New function, split out > from ... > (vect_is_simple_reduction): ... here. > * gimple-loop-interchange.cc: Include tree-vectorizer.h. > (loop_cand::analyze_iloop_reduction_var): Use single_imm_use. > Properly check for a supported reduction operation and a > valid expression if the reduction covers multiple stmts. > (prepare_perfect_loop_nest): Simpify allocation. > (pass_linterchange::execute): Likewise. > > * gcc.dg/tree-ssa/loop-interchange-1.c: Add fast-math flags. > * gcc.dg/tree-ssa/loop-interchange-1b.c: New test variant. > * gcc.dg/tree-ssa/loop-interchange-4.c: XFAIL. > > > Index: gcc/gimple-loop-interchange.cc > =================================================================== > --- gcc/gimple-loop-interchange.cc (revision 255375) > +++ gcc/gimple-loop-interchange.cc (working copy) > @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. > #include "tree-ssa-loop-ivopts.h" > #include "tree-ssa-dce.h" > #include "tree-data-ref.h" > +#include "tree-vectorizer.h" > > /* This pass performs loop interchange: for example, the loop nest > > @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var ( > in a way that reduction operation is seen as black box. In general, > we can ignore reassociation of reduction operator; we can handle fake > reductions in which VAR is not even used to compute NEXT. */ > - FOR_EACH_IMM_USE_FAST (use_p, iterator, var) > - { > - stmt = USE_STMT (use_p); > - if (is_gimple_debug (stmt)) > - continue; > - > - if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) > - return false; > - > - if (single_use != NULL) > - return false; > + if (! single_imm_use (var, &use_p, &single_use) > + || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) > + return false; > > - single_use = stmt; > - } > + /* Check the reduction operation. We require a commutative or > + left-associative operation. For FP math we also need to be allowed > + to associate operations. */ > + if (! is_gimple_assign (single_use) > + || ! (commutative_tree_code (gimple_assign_rhs_code (single_use)) > + || (commutative_ternary_tree_code > + (gimple_assign_rhs_code (single_use)) > + && (use_p->use == gimple_assign_rhs1_ptr (single_use) > + || use_p->use == gimple_assign_rhs2_ptr (single_use))) > + || (gimple_assign_rhs_code (single_use) == MINUS_EXPR > + && use_p->use == gimple_assign_rhs1_ptr (single_use))) > + || (FLOAT_TYPE_P (TREE_TYPE (var)) > + && ! flag_associative_math)) > + return false; > > + /* Handle and verify a series of stmts feeding the reduction op. */ > if (single_use != next_def > - && !stmt_dominates_stmt_p (single_use, next_def)) > + && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, > + gimple_assign_rhs_code (single_use))) > return false; > > /* Only support cases in which INIT is used in inner loop. */ > @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop * > vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs) > { > struct loop *start_loop = NULL, *innermost = loop; > - struct loop *outermost = superloop_at_depth (loop, 0); > + struct loop *outermost = loops_for_fn (cfun)->tree_root; > > /* Find loop nest from the innermost loop. The outermost is the innermost > outer*/ > @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop * > if (!start_loop || !start_loop->inner) > return false; > > + /* Prepare the data reference vector for the loop nest, pruning outer > + loops we cannot handle. */ > datarefs->create (20); > - if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL > + start_loop = prepare_data_references (start_loop, datarefs); > + if (!start_loop > /* Check if there is no data reference. */ > || datarefs->is_empty () > /* Check if there are too many of data references. */ > - || (int) datarefs->length () > MAX_DATAREFS > - /* Compute access strides for all data references. */ > - || ((start_loop = compute_access_strides (start_loop, innermost, > - *datarefs)) == NULL) > - /* Check if loop nest should be interchanged. */ > - || !should_interchange_loop_nest (start_loop, innermost, *datarefs)) > - { > - free_data_refs_with_aux (*datarefs); > - return false; > - } > + || (int) datarefs->length () > MAX_DATAREFS) > + return false; > + > + /* Compute access strides for all data references, pruning outer > + loops we cannot analyze refs in. */ > + start_loop = compute_access_strides (start_loop, innermost, *datarefs); > + if (!start_loop) > + return false; > + > + /* Check if any interchange is profitable in the loop nest. */ > + if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) > + return false; > > /* Check if data dependences can be computed for loop nest starting from > start_loop. */ > @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop * > return true; > } > > + /* ??? We should be able to adjust DDRs we already computed by > + truncating distance vectors. */ Note it could be hard enough to adjust DDRs for stripped outer loop. Dependence can become non-dep, even worse, we may result in wrong "invalid dep" in case of: DDR<a,b>: (<, <, =) (<, >, =) because after simply stripping, it becomes: DDR<a,b>: (<, =) (>, =) While the correct result is "no-dep". Thanks, bin
On Mon, 4 Dec 2017, Richard Biener wrote: > On Mon, 4 Dec 2017, Bin.Cheng wrote: > > > On Mon, Dec 4, 2017 at 1:11 PM, Richard Biener <rguenther@suse.de> wrote: > > > > > > I've noticed we perform FP reduction association without the required > > > checks for associative math. I've added > > > gcc.dg/tree-ssa/loop-interchange-1b.c to cover this. > > > > > > I also noticed we happily interchange a loop with a reduction like > > > > > > sum = a[i] - sum; > > > > > > where a change in order of elements isn't ok. Unfortunately bwaves > > > is exactly a case where single_use != next_def (tried to simply remove > > > that case for now), because reassoc didn't have a chance to fix the > > > operand order. Thus this patch exports the relevant handling from > > > the vectorizer (for stage1 having a separate infrastructure gathering / > > > analyzing of reduction/induction infrastructure would be nice...) > > > and uses it from interchange. We then don't handle > > > gcc.dg/tree-ssa/loop-interchange-4.c anymore (similar vectorizer > > > missed-opt is PR65930). I didn't bother to split up the vectorizer > > > code further to implement relaxed validity checking but simply XFAILed > > > this testcase. > > > > > > Earlier I simplified allocation stuff in the main loop which is why > > > this part is included in this patch. > > > > > > Bootstrap running on x86_64-unknown-linux-gnu. > > > > > > I'll see to craft a testcase with the sum = a[i] - sum; mis-handling. > > > > > > Ok? > > Sure. > > Just for the record. There is also similar associative check in > > predcom. As you suggested, a path extraction/checking interface for > > associative checking would be great, given we have multiple users now. > > Yeah. Note for interchange we don't need associativeness in the sense > as implemented by associative_tree_code, we need left-associativeness > which also MINUS_EXPR provides. So my check isn't perfect. I guess > we instead need > > associative_tree_code () > || (code == MINUS_EXPR > && use_p->use == gimple_assign_rhs1_ptr (single_use)) > > where we could also allow RDIV_EXPR and other left-associative > tree codes (but check_reduction_path would do the wrong thing > with those at the moment but it has MINUS_EXPR handling that > would support sum = x - (y - sum) which the above rejects). > > So I'm retesting with > > /* Check the reduction operation. We require a left-associative > operation. > For FP math we also need to be allowed to associate operations. */ > enum tree_code code = gimple_assign_rhs_code (single_use); > if (gassign *ass = dyn_cast <gassign *> (single_use)) > { > enum tree_code code = gimple_assign_rhs_code (ass); > if (! (associative_tree_code (code) > || (code == MINUS_EXPR > && use_p->use == gimple_assign_rhs1_ptr (ass))) > || (FLOAT_TYPE_P (TREE_TYPE (var)) > && ! flag_associative_math)) > return false; > } > else > return false; > > which is more restrictive than before. And here's two testcases, one for sum = x - sum and one for division by sum which shows wrong-code without this patch. Richard. 2017-12-05 Richard Biener <rguenther@suse.de> * gcc.dg/tree-ssa/loop-interchange-12.c: New testcase. * gcc.dg/tree-ssa/loop-interchange-13.c: Likewise. Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-12.c (working copy) @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ + +/* Copied from graphite/interchange-4.c */ + +#define DEBUG 0 +#if DEBUG +#include <stdio.h> +#endif + +unsigned u[1024]; + +static void __attribute__((noinline,noclone,noipa)) +foo (int N, unsigned *res) +{ + int i, j; + unsigned sum = 1; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum = u[i + 2 * j] / sum; + + *res = sum; +} + +extern void abort (); + +int +main (void) +{ + int i, j; + unsigned res; + + u[0] = 10; + u[1] = 200; + u[2] = 10; + u[3] = 10; + + foo (2, &res); + +#if DEBUG + fprintf (stderr, "res = %d \n", res); +#endif + + if (res != 0) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-13.c (working copy) @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ + +/* Copied from graphite/interchange-4.c */ + +#define DEBUG 0 +#if DEBUG +#include <stdio.h> +#endif + +unsigned u[1024]; + +static void __attribute__((noinline,noclone,noipa)) +foo (int N, int M, unsigned *res) +{ + int i, j; + unsigned sum = 0; + if (N > 0) + for (i = 0; i < M; i++) + for (j = 0; j < N; j++) + sum = u[i + 3 * j] - sum; + + *res = sum; +} + +extern void abort (); + +int +main (void) +{ + int i, j; + unsigned res; + + u[0] = 1; + u[1] = 2; + u[2] = 4; + u[3] = 5; + u[4] = 7; + u[5] = 8; + + foo (2, 3, &res); + +#if DEBUG + fprintf (stderr, "res = %d \n", res); +#endif + + if (res != 13) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */
Index: gcc/gimple-loop-interchange.cc =================================================================== --- gcc/gimple-loop-interchange.cc (revision 255375) +++ gcc/gimple-loop-interchange.cc (working copy) @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. #include "tree-ssa-loop-ivopts.h" #include "tree-ssa-dce.h" #include "tree-data-ref.h" +#include "tree-vectorizer.h" /* This pass performs loop interchange: for example, the loop nest @@ -551,23 +552,29 @@ loop_cand::analyze_iloop_reduction_var ( in a way that reduction operation is seen as black box. In general, we can ignore reassociation of reduction operator; we can handle fake reductions in which VAR is not even used to compute NEXT. */ - FOR_EACH_IMM_USE_FAST (use_p, iterator, var) - { - stmt = USE_STMT (use_p); - if (is_gimple_debug (stmt)) - continue; - - if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) - return false; - - if (single_use != NULL) - return false; + if (! single_imm_use (var, &use_p, &single_use) + || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) + return false; - single_use = stmt; - } + /* Check the reduction operation. We require a commutative or + left-associative operation. For FP math we also need to be allowed + to associate operations. */ + if (! is_gimple_assign (single_use) + || ! (commutative_tree_code (gimple_assign_rhs_code (single_use)) + || (commutative_ternary_tree_code + (gimple_assign_rhs_code (single_use)) + && (use_p->use == gimple_assign_rhs1_ptr (single_use) + || use_p->use == gimple_assign_rhs2_ptr (single_use))) + || (gimple_assign_rhs_code (single_use) == MINUS_EXPR + && use_p->use == gimple_assign_rhs1_ptr (single_use))) + || (FLOAT_TYPE_P (TREE_TYPE (var)) + && ! flag_associative_math)) + return false; + /* Handle and verify a series of stmts feeding the reduction op. */ if (single_use != next_def - && !stmt_dominates_stmt_p (single_use, next_def)) + && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, + gimple_assign_rhs_code (single_use))) return false; /* Only support cases in which INIT is used in inner loop. */ @@ -1964,7 +1971,7 @@ prepare_perfect_loop_nest (struct loop * vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs) { struct loop *start_loop = NULL, *innermost = loop; - struct loop *outermost = superloop_at_depth (loop, 0); + struct loop *outermost = loops_for_fn (cfun)->tree_root; /* Find loop nest from the innermost loop. The outermost is the innermost outer*/ @@ -1985,21 +1992,26 @@ prepare_perfect_loop_nest (struct loop * if (!start_loop || !start_loop->inner) return false; + /* Prepare the data reference vector for the loop nest, pruning outer + loops we cannot handle. */ datarefs->create (20); - if ((start_loop = prepare_data_references (start_loop, datarefs)) == NULL + start_loop = prepare_data_references (start_loop, datarefs); + if (!start_loop /* Check if there is no data reference. */ || datarefs->is_empty () /* Check if there are too many of data references. */ - || (int) datarefs->length () > MAX_DATAREFS - /* Compute access strides for all data references. */ - || ((start_loop = compute_access_strides (start_loop, innermost, - *datarefs)) == NULL) - /* Check if loop nest should be interchanged. */ - || !should_interchange_loop_nest (start_loop, innermost, *datarefs)) - { - free_data_refs_with_aux (*datarefs); - return false; - } + || (int) datarefs->length () > MAX_DATAREFS) + return false; + + /* Compute access strides for all data references, pruning outer + loops we cannot analyze refs in. */ + start_loop = compute_access_strides (start_loop, innermost, *datarefs); + if (!start_loop) + return false; + + /* Check if any interchange is profitable in the loop nest. */ + if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) + return false; /* Check if data dependences can be computed for loop nest starting from start_loop. */ @@ -2029,14 +2041,15 @@ prepare_perfect_loop_nest (struct loop * return true; } + /* ??? We should be able to adjust DDRs we already computed by + truncating distance vectors. */ free_dependence_relations (*ddrs); + *ddrs = vNULL; /* Try to compute data dependences with the outermost loop stripped. */ loop = start_loop; start_loop = start_loop->inner; } while (start_loop && start_loop->inner); - loop_nest->release (); - free_data_refs_with_aux (*datarefs); return false; } @@ -2050,19 +2063,20 @@ pass_linterchange::execute (function *fu bool changed_p = false; struct loop *loop; - vec<loop_p> loop_nest; - vec<data_reference_p> datarefs; - vec<ddr_p> ddrs; - FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) - if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) - { - tree_loop_interchange loop_interchange (loop_nest); - changed_p |= loop_interchange.interchange (datarefs, ddrs); - free_dependence_relations (ddrs); - free_data_refs_with_aux (datarefs); - loop_nest.release (); - } + { + vec<loop_p> loop_nest = vNULL; + vec<data_reference_p> datarefs = vNULL; + vec<ddr_p> ddrs = vNULL; + if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) + { + tree_loop_interchange loop_interchange (loop_nest); + changed_p |= loop_interchange.interchange (datarefs, ddrs); + } + free_dependence_relations (ddrs); + free_data_refs_with_aux (datarefs); + loop_nest.release (); + } if (changed_p) scev_reset_htab (); Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (revision 255375) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ +/* { dg-options "-O2 -floop-interchange -fassociative-math -fno-signed-zeros -fno-trapping-math -fdump-tree-linterchange-details" } */ /* Copied from graphite/interchange-4.c */ Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (nonexistent) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-1b.c (working copy) @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -floop-interchange -fdump-tree-linterchange-details" } */ + +/* Copied from graphite/interchange-4.c */ + +#define DEBUG 0 +#if DEBUG +#include <stdio.h> +#endif + +double u[1782225]; + +static void __attribute__((noinline)) +foo (int N, double *res) +{ + int i, j; + double sum = 0; + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + sum = sum + u[i + 1335 * j]; + + *res = sum; +} + +extern void abort (); + +int +main (void) +{ + int i, j; + double res; + + for (i = 0; i < 1782225; i++) + u[i] = 0; + u[0] = __DBL_MAX__; + u[1335] = -__DBL_MAX__; + u[1] = __DBL_MAX__; + u[1336] = -__DBL_MAX__; + + foo (1335, &res); + +#if DEBUG + fprintf (stderr, "res = %d \n", res); +#endif + + if (res != 0.0) + abort (); + + return 0; +} + +/* { dg-final { scan-tree-dump-not "is interchanged" "linterchange"} } */ Index: gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (revision 255375) +++ gcc/testsuite/gcc.dg/tree-ssa/loop-interchange-4.c (working copy) @@ -47,4 +47,4 @@ main (void) return 0; } -/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange"} } */ +/* { dg-final { scan-tree-dump-times "Loop_pair<outer:., inner:.> is interchanged" 1 "linterchange" { xfail *-*-* } } } */ Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h (revision 255375) +++ gcc/tree-vectorizer.h (working copy) @@ -1255,6 +1255,9 @@ extern tree vect_create_addr_base_for_ve /* FORNOW: Used in tree-parloops.c. */ extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool *, bool); +/* Used in gimple-loop-interchange.c. */ +extern bool check_reduction_path (location_t, loop_p, gphi *, tree, + enum tree_code); /* Drive for loop analysis stage. */ extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 255375) +++ gcc/tree-vect-loop.c (working copy) @@ -2626,6 +2626,114 @@ vect_is_slp_reduction (loop_vec_info loo } +/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and + reduction operation CODE has a handled computation expression. */ + +bool +check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg, + enum tree_code code) +{ + auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; + auto_bitmap visited; + tree lookfor = PHI_RESULT (phi); + ssa_op_iter curri; + use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); + while (USE_FROM_PTR (curr) != loop_arg) + curr = op_iter_next_use (&curri); + curri.i = curri.numops; + do + { + path.safe_push (std::make_pair (curri, curr)); + tree use = USE_FROM_PTR (curr); + if (use == lookfor) + break; + gimple *def = SSA_NAME_DEF_STMT (use); + if (gimple_nop_p (def) + || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) + { +pop: + do + { + std::pair<ssa_op_iter, use_operand_p> x = path.pop (); + curri = x.first; + curr = x.second; + do + curr = op_iter_next_use (&curri); + /* Skip already visited or non-SSA operands (from iterating + over PHI args). */ + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))); + } + while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); + if (curr == NULL_USE_OPERAND_P) + break; + } + else + { + if (gimple_code (def) == GIMPLE_PHI) + curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); + else + curr = op_iter_init_use (&curri, def, SSA_OP_USE); + while (curr != NULL_USE_OPERAND_P + && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME + || ! bitmap_set_bit (visited, + SSA_NAME_VERSION + (USE_FROM_PTR (curr))))) + curr = op_iter_next_use (&curri); + if (curr == NULL_USE_OPERAND_P) + goto pop; + } + } + while (1); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_printf_loc (MSG_NOTE, loc, "reduction path: "); + unsigned i; + std::pair<ssa_op_iter, use_operand_p> *x; + FOR_EACH_VEC_ELT (path, i, x) + { + dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); + dump_printf (MSG_NOTE, " "); + } + dump_printf (MSG_NOTE, "\n"); + } + + /* Check whether the reduction path detected is valid. */ + bool fail = path.length () == 0; + bool neg = false; + for (unsigned i = 1; i < path.length (); ++i) + { + gimple *use_stmt = USE_STMT (path[i].second); + tree op = USE_FROM_PTR (path[i].second); + if (! has_single_use (op) + || ! is_gimple_assign (use_stmt)) + { + fail = true; + break; + } + if (gimple_assign_rhs_code (use_stmt) != code) + { + if (code == PLUS_EXPR + && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) + { + /* Track whether we negate the reduction value each iteration. */ + if (gimple_assign_rhs2 (use_stmt) == op) + neg = ! neg; + } + else + { + fail = true; + break; + } + } + } + return ! fail && ! neg; +} + + /* Function vect_is_simple_reduction (1) Detect a cross-iteration def-use cycle that represents a simple @@ -3128,106 +3236,8 @@ vect_is_simple_reduction (loop_vec_info } /* Look for the expression computing loop_arg from loop PHI result. */ - auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; - auto_bitmap visited; - tree lookfor = PHI_RESULT (phi); - ssa_op_iter curri; - use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi), - SSA_OP_USE); - while (USE_FROM_PTR (curr) != loop_arg) - curr = op_iter_next_use (&curri); - curri.i = curri.numops; - do - { - path.safe_push (std::make_pair (curri, curr)); - tree use = USE_FROM_PTR (curr); - if (use == lookfor) - break; - gimple *def = SSA_NAME_DEF_STMT (use); - if (gimple_nop_p (def) - || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) - { -pop: - do - { - std::pair<ssa_op_iter, use_operand_p> x = path.pop (); - curri = x.first; - curr = x.second; - do - curr = op_iter_next_use (&curri); - /* Skip already visited or non-SSA operands (from iterating - over PHI args). */ - while (curr != NULL_USE_OPERAND_P - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME - || ! bitmap_set_bit (visited, - SSA_NAME_VERSION - (USE_FROM_PTR (curr))))); - } - while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); - if (curr == NULL_USE_OPERAND_P) - break; - } - else - { - if (gimple_code (def) == GIMPLE_PHI) - curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); - else - curr = op_iter_init_use (&curri, def, SSA_OP_USE); - while (curr != NULL_USE_OPERAND_P - && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME - || ! bitmap_set_bit (visited, - SSA_NAME_VERSION - (USE_FROM_PTR (curr))))) - curr = op_iter_next_use (&curri); - if (curr == NULL_USE_OPERAND_P) - goto pop; - } - } - while (1); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - dump_printf_loc (MSG_NOTE, vect_location, - "reduction path: "); - unsigned i; - std::pair<ssa_op_iter, use_operand_p> *x; - FOR_EACH_VEC_ELT (path, i, x) - { - dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); - dump_printf (MSG_NOTE, " "); - } - dump_printf (MSG_NOTE, "\n"); - } - - /* Check whether the reduction path detected is valid. */ - bool fail = path.length () == 0; - bool neg = false; - for (unsigned i = 1; i < path.length (); ++i) - { - gimple *use_stmt = USE_STMT (path[i].second); - tree op = USE_FROM_PTR (path[i].second); - if (! has_single_use (op) - || ! is_gimple_assign (use_stmt)) - { - fail = true; - break; - } - if (gimple_assign_rhs_code (use_stmt) != code) - { - if (code == PLUS_EXPR - && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) - { - /* Track whether we negate the reduction value each iteration. */ - if (gimple_assign_rhs2 (use_stmt) == op) - neg = ! neg; - } - else - { - fail = true; - break; - } - } - } - if (! fail && ! neg) + if (check_reduction_path (vect_location, loop, as_a <gphi *> (phi), loop_arg, + code)) return def_stmt; if (dump_enabled_p ())