Message ID | nycvar.YFH.7.76.1909182010440.5566@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Series | Remove vectorizer reduction operand swapping | expand |
Richard Biener <rguenther@suse.de> writes:
> It shouldn't be neccessary.
SVE is the counter-example :-) But the fix is simpler than the code
you removed, so it's still a net win.
Fixes various vect.exp and aarch64-sve.exp ICEs. OK if it passes
proper testing?
Thanks,
Richard
2019-09-19 Richard Sandiford <richard.sandiford@arm.com>
gcc/
* tree-vectorizer.h (vectorizable_condition): Take an int
reduction index instead of a boolean flag.
* tree-vect-stmts.c (vectorizable_condition): Likewise.
Swap the "then" and "else" values for EXTRACT_LAST_REDUCTION
reductions if the reduction accumulator is the "then" rather
than the "else" value.
(vect_analyze_stmt): Update call accordingly.
(vect_transform_stmt): Likewise.
* tree-vect-loop.c (vectorizable_reduction): Likewise,
asserting that the index is > 0.
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h 2019-09-05 08:49:30.717740417 +0100
+++ gcc/tree-vectorizer.h 2019-09-19 08:43:53.965866883 +0100
@@ -1541,7 +1541,7 @@ extern void vect_remove_stores (stmt_vec
extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
slp_instance, stmt_vector_for_cost *);
extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
- stmt_vec_info *, bool, slp_tree,
+ stmt_vec_info *, int, slp_tree,
stmt_vector_for_cost *);
extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
stmt_vec_info *, slp_tree,
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c 2019-09-17 15:27:13.090053952 +0100
+++ gcc/tree-vect-stmts.c 2019-09-19 08:43:53.965866883 +0100
@@ -9778,7 +9778,7 @@ vect_is_simple_cond (tree cond, vec_info
bool
vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- stmt_vec_info *vec_stmt, bool for_reduction,
+ stmt_vec_info *vec_stmt, int reduc_index,
slp_tree slp_node, stmt_vector_for_cost *cost_vec)
{
vec_info *vinfo = stmt_info->vinfo;
@@ -9807,6 +9807,7 @@ vectorizable_condition (stmt_vec_info st
vec<tree> vec_oprnds3 = vNULL;
tree vec_cmp_type;
bool masked = false;
+ bool for_reduction = (reduc_index > 0);
if (for_reduction && STMT_SLP_TYPE (stmt_info))
return false;
@@ -9888,6 +9889,29 @@ vectorizable_condition (stmt_vec_info st
cond_expr1 = TREE_OPERAND (cond_expr, 1);
}
+ /* For conditional reductions, the "then" value needs to be the candidate
+ value calculated by this iteration while the "else" value needs to be
+ the result carried over from previous iterations. If the COND_EXPR
+ is the other way around, we need to swap it. */
+ bool must_invert_cmp_result = false;
+ if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
+ {
+ if (masked)
+ must_invert_cmp_result = true;
+ else
+ {
+ bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
+ tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
+ if (new_code == ERROR_MARK)
+ must_invert_cmp_result = true;
+ else
+ cond_code = new_code;
+ }
+ /* Make sure we don't accidentally use the old condition. */
+ cond_expr = NULL_TREE;
+ std::swap (then_clause, else_clause);
+ }
+
if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
{
/* Boolean values may have another representation in vectors
@@ -10102,6 +10126,15 @@ vectorizable_condition (stmt_vec_info st
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
vec_compare = vec_compare_name;
}
+ if (must_invert_cmp_result)
+ {
+ tree vec_compare_name = make_ssa_name (vec_cmp_type);
+ gassign *new_stmt = gimple_build_assign (vec_compare_name,
+ BIT_NOT_EXPR,
+ vec_compare);
+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ vec_compare = vec_compare_name;
+ }
gcall *new_stmt = gimple_build_call_internal
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
vec_then_clause);
@@ -10635,7 +10668,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
node_instance, cost_vec)
|| vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
|| vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+ || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
cost_vec)
|| vectorizable_comparison (stmt_info, NULL, NULL, node,
cost_vec));
@@ -10654,7 +10687,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
|| vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
cost_vec)
|| vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
- || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+ || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
cost_vec)
|| vectorizable_comparison (stmt_info, NULL, NULL, node,
cost_vec));
@@ -10759,7 +10792,7 @@ vect_transform_stmt (stmt_vec_info stmt_
break;
case condition_vec_info_type:
- done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
+ done = vectorizable_condition (stmt_info, gsi, &vec_stmt, 0,
slp_node, NULL);
gcc_assert (done);
break;
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c 2019-09-17 15:27:13.078054042 +0100
+++ gcc/tree-vect-loop.c 2019-09-19 08:43:53.961866913 +0100
@@ -6774,8 +6774,9 @@ vectorizable_reduction (stmt_vec_info st
{
/* Only call during the analysis stage, otherwise we'll lose
STMT_VINFO_TYPE. */
+ gcc_assert (reduc_index > 0);
if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
- true, NULL, cost_vec))
+ reduc_index, NULL, cost_vec))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7228,9 +7229,9 @@ vectorizable_reduction (stmt_vec_info st
if (reduction_type == EXTRACT_LAST_REDUCTION)
{
- gcc_assert (!slp_node);
+ gcc_assert (!slp_node && reduc_index > 0);
return vectorizable_condition (stmt_info, gsi, vec_stmt,
- true, NULL, NULL);
+ reduc_index, NULL, NULL);
}
/* Create the destination vector */
@@ -7260,9 +7261,9 @@ vectorizable_reduction (stmt_vec_info st
{
if (code == COND_EXPR)
{
- gcc_assert (!slp_node);
+ gcc_assert (!slp_node && reduc_index > 0);
vectorizable_condition (stmt_info, gsi, vec_stmt,
- true, NULL, NULL);
+ reduc_index, NULL, NULL);
break;
}
if (code == LSHIFT_EXPR
On Thu, 19 Sep 2019, Richard Sandiford wrote: > Richard Biener <rguenther@suse.de> writes: > > It shouldn't be neccessary. > > SVE is the counter-example :-) But the fix is simpler than the code > you removed, so it's still a net win. Yeah, I meant it shouldn't be necessary to swap operands of the original scalar stmts since we keep track of the "order" via reduc_index. > Fixes various vect.exp and aarch64-sve.exp ICEs. OK if it passes > proper testing? OK. Richard. > Thanks, > Richard > > > 2019-09-19 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > * tree-vectorizer.h (vectorizable_condition): Take an int > reduction index instead of a boolean flag. > * tree-vect-stmts.c (vectorizable_condition): Likewise. > Swap the "then" and "else" values for EXTRACT_LAST_REDUCTION > reductions if the reduction accumulator is the "then" rather > than the "else" value. > (vect_analyze_stmt): Update call accordingly. > (vect_transform_stmt): Likewise. > * tree-vect-loop.c (vectorizable_reduction): Likewise, > asserting that the index is > 0. > > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2019-09-05 08:49:30.717740417 +0100 > +++ gcc/tree-vectorizer.h 2019-09-19 08:43:53.965866883 +0100 > @@ -1541,7 +1541,7 @@ extern void vect_remove_stores (stmt_vec > extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, > slp_instance, stmt_vector_for_cost *); > extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, > - stmt_vec_info *, bool, slp_tree, > + stmt_vec_info *, int, slp_tree, > stmt_vector_for_cost *); > extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *, > stmt_vec_info *, slp_tree, > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c 2019-09-17 15:27:13.090053952 +0100 > +++ gcc/tree-vect-stmts.c 2019-09-19 08:43:53.965866883 +0100 > @@ -9778,7 +9778,7 @@ vect_is_simple_cond (tree cond, vec_info > > bool > vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, > - stmt_vec_info *vec_stmt, bool for_reduction, > + stmt_vec_info *vec_stmt, int reduc_index, > slp_tree slp_node, stmt_vector_for_cost *cost_vec) > { > vec_info *vinfo = stmt_info->vinfo; > @@ -9807,6 +9807,7 @@ vectorizable_condition (stmt_vec_info st > vec<tree> vec_oprnds3 = vNULL; > tree vec_cmp_type; > bool masked = false; > + bool for_reduction = (reduc_index > 0); > > if (for_reduction && STMT_SLP_TYPE (stmt_info)) > return false; > @@ -9888,6 +9889,29 @@ vectorizable_condition (stmt_vec_info st > cond_expr1 = TREE_OPERAND (cond_expr, 1); > } > > + /* For conditional reductions, the "then" value needs to be the candidate > + value calculated by this iteration while the "else" value needs to be > + the result carried over from previous iterations. If the COND_EXPR > + is the other way around, we need to swap it. */ > + bool must_invert_cmp_result = false; > + if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1) > + { > + if (masked) > + must_invert_cmp_result = true; > + else > + { > + bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0)); > + tree_code new_code = invert_tree_comparison (cond_code, honor_nans); > + if (new_code == ERROR_MARK) > + must_invert_cmp_result = true; > + else > + cond_code = new_code; > + } > + /* Make sure we don't accidentally use the old condition. */ > + cond_expr = NULL_TREE; > + std::swap (then_clause, else_clause); > + } > + > if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype)) > { > /* Boolean values may have another representation in vectors > @@ -10102,6 +10126,15 @@ vectorizable_condition (stmt_vec_info st > vect_finish_stmt_generation (stmt_info, new_stmt, gsi); > vec_compare = vec_compare_name; > } > + if (must_invert_cmp_result) > + { > + tree vec_compare_name = make_ssa_name (vec_cmp_type); > + gassign *new_stmt = gimple_build_assign (vec_compare_name, > + BIT_NOT_EXPR, > + vec_compare); > + vect_finish_stmt_generation (stmt_info, new_stmt, gsi); > + vec_compare = vec_compare_name; > + } > gcall *new_stmt = gimple_build_call_internal > (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, > vec_then_clause); > @@ -10635,7 +10668,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in > node_instance, cost_vec) > || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec) > || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec) > - || vectorizable_condition (stmt_info, NULL, NULL, false, node, > + || vectorizable_condition (stmt_info, NULL, NULL, 0, node, > cost_vec) > || vectorizable_comparison (stmt_info, NULL, NULL, node, > cost_vec)); > @@ -10654,7 +10687,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in > || vectorizable_load (stmt_info, NULL, NULL, node, node_instance, > cost_vec) > || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec) > - || vectorizable_condition (stmt_info, NULL, NULL, false, node, > + || vectorizable_condition (stmt_info, NULL, NULL, 0, node, > cost_vec) > || vectorizable_comparison (stmt_info, NULL, NULL, node, > cost_vec)); > @@ -10759,7 +10792,7 @@ vect_transform_stmt (stmt_vec_info stmt_ > break; > > case condition_vec_info_type: > - done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false, > + done = vectorizable_condition (stmt_info, gsi, &vec_stmt, 0, > slp_node, NULL); > gcc_assert (done); > break; > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2019-09-17 15:27:13.078054042 +0100 > +++ gcc/tree-vect-loop.c 2019-09-19 08:43:53.961866913 +0100 > @@ -6774,8 +6774,9 @@ vectorizable_reduction (stmt_vec_info st > { > /* Only call during the analysis stage, otherwise we'll lose > STMT_VINFO_TYPE. */ > + gcc_assert (reduc_index > 0); > if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL, > - true, NULL, cost_vec)) > + reduc_index, NULL, cost_vec)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -7228,9 +7229,9 @@ vectorizable_reduction (stmt_vec_info st > > if (reduction_type == EXTRACT_LAST_REDUCTION) > { > - gcc_assert (!slp_node); > + gcc_assert (!slp_node && reduc_index > 0); > return vectorizable_condition (stmt_info, gsi, vec_stmt, > - true, NULL, NULL); > + reduc_index, NULL, NULL); > } > > /* Create the destination vector */ > @@ -7260,9 +7261,9 @@ vectorizable_reduction (stmt_vec_info st > { > if (code == COND_EXPR) > { > - gcc_assert (!slp_node); > + gcc_assert (!slp_node && reduc_index > 0); > vectorizable_condition (stmt_info, gsi, vec_stmt, > - true, NULL, NULL); > + reduc_index, NULL, NULL); > break; > } > if (code == LSHIFT_EXPR >
Hi Richard, > On Thu, 19 Sep 2019, Richard Sandiford wrote: > >> Richard Biener <rguenther@suse.de> writes: >> > It shouldn't be neccessary. >> >> SVE is the counter-example :-) But the fix is simpler than the code >> you removed, so it's still a net win. > > Yeah, I meant it shouldn't be necessary to swap operands of the > original scalar stmts since we keep track of the "order" via > reduc_index. > >> Fixes various vect.exp and aarch64-sve.exp ICEs. OK if it passes >> proper testing? > > OK. I suspect this patch caused +FAIL: gcc.dg/pr88031.c (internal compiler error) +FAIL: gcc.dg/pr88031.c (test for excess errors) I'm seeing it on 32 and 64-bit Solaris/x86, but there are also several reports on aarch64, armv8l, i686, powerpc64*, s390x, x86_64. Excess errors: during GIMPLE pass: vect /vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/pr88031.c:6:6: internal compiler error: in vectorizable_reduction, at tree-vect-loop.c:6662 0x938ecdf vectorizable_reduction(_stmt_vec_info*, gimple_stmt_iterator*, _stmt_vec_info**, _slp_tree*, _slp_instance*, vec<stmt_info_for_cost, va_heap, vl_ptr>*) /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:6662 0x936ac2a vect_analyze_stmt(_stmt_vec_info*, bool*, _slp_tree*, _slp_instance*, vec<stmt_info_for_cost, va_heap, vl_ptr>*) /vol/gcc/src/hg/trunk/local/gcc/tree-vect-stmts.c:10667 0x938afce vect_analyze_loop_operations /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:1580 0x938afce vect_analyze_loop_2 /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:2026 0x938afce vect_analyze_loop(loop*, _loop_vec_info*, vec_info_shared*) /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:2329 0x93ab60e try_vectorize_loop_1 /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:884 0x93abd6c try_vectorize_loop /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:1030 0x93ac2fb vectorize_loops() /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:1104 Rainer
On Fri, 20 Sep 2019, Rainer Orth wrote: > Hi Richard, > > > On Thu, 19 Sep 2019, Richard Sandiford wrote: > > > >> Richard Biener <rguenther@suse.de> writes: > >> > It shouldn't be neccessary. > >> > >> SVE is the counter-example :-) But the fix is simpler than the code > >> you removed, so it's still a net win. > > > > Yeah, I meant it shouldn't be necessary to swap operands of the > > original scalar stmts since we keep track of the "order" via > > reduc_index. > > > >> Fixes various vect.exp and aarch64-sve.exp ICEs. OK if it passes > >> proper testing? > > > > OK. > > I suspect this patch caused > > +FAIL: gcc.dg/pr88031.c (internal compiler error) > +FAIL: gcc.dg/pr88031.c (test for excess errors) > > I'm seeing it on 32 and 64-bit Solaris/x86, but there are also several > reports on aarch64, armv8l, i686, powerpc64*, s390x, x86_64. PR91822, should be fixed now. Richard.
On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote: > > > It shouldn't be neccessary. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > (SLP part testing separately) > > Richard. > > 2019-09-18 Richard Biener <rguenther@suse.de> > > * tree-vect-loop.c (vect_is_simple_reduction): Remove operand > swapping. > (vectorize_fold_left_reduction): Remove assert. > (vectorizable_reduction): Also expect COND_EXPR non-reduction > operand in position 2. Remove assert. > Hi, Since this was committed (r275898), I've noticed a regression on armeb: FAIL: gcc.dg/vect/vect-cond-4.c execution test I'm seeing this with qemu, but I do not have the execution traces yet. Christophe > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c (revision 275872) > +++ gcc/tree-vect-loop.c (working copy) > @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info > || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) > || vect_valid_reduction_input_p (def2_info))) > { > - if (! nested_in_vect_loop && orig_code != MINUS_EXPR) > - { > - /* Check if we can swap operands (just for simplicity - so that > - the rest of the code can assume that the reduction variable > - is always the last (second) argument). */ > - if (code == COND_EXPR) > - { > - /* Swap cond_expr by inverting the condition. */ > - tree cond_expr = gimple_assign_rhs1 (def_stmt); > - enum tree_code invert_code = ERROR_MARK; > - enum tree_code cond_code = TREE_CODE (cond_expr); > - > - if (TREE_CODE_CLASS (cond_code) == tcc_comparison) > - { > - bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); > - invert_code = invert_tree_comparison (cond_code, honor_nans); > - } > - if (invert_code != ERROR_MARK) > - { > - TREE_SET_CODE (cond_expr, invert_code); > - swap_ssa_operands (def_stmt, > - gimple_assign_rhs2_ptr (def_stmt), > - gimple_assign_rhs3_ptr (def_stmt)); > - } > - else > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_NOTE, def_stmt, > - "detected reduction: cannot swap operands " > - "for cond_expr"); > - return NULL; > - } > - } > - else > - swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), > - gimple_assign_rhs2_ptr (def_stmt)); > - > - if (dump_enabled_p ()) > - report_vect_op (MSG_NOTE, def_stmt, > - "detected reduction: need to swap operands: "); > - > - if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) > - LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; > - } > - else > - { > - if (dump_enabled_p ()) > - report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > - } > - > + if (dump_enabled_p ()) > + report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > return def_stmt_info; > } > > @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_ > gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); > gcc_assert (ncopies == 1); > gcc_assert (TREE_CODE_LENGTH (code) == binary_op); > - gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1)); > gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) > == FOLD_LEFT_REDUCTION); > > @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st > reduc_index = i; > } > > - if (i == 1 && code == COND_EXPR) > + if (code == COND_EXPR) > { > - /* Record how value of COND_EXPR is defined. */ > + /* Record how the non-reduction-def value of COND_EXPR is defined. */ > if (dt == vect_constant_def) > { > cond_reduc_dt = dt; > @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st > return false; > } > > - /* vect_is_simple_reduction ensured that operand 2 is the > - loop-carried operand. */ > - gcc_assert (reduc_index == 2); > - > /* Loop peeling modifies initial value of reduction PHI, which > makes the reduction stmt to be transformed different to the > original stmt analyzed. We need to record reduction code for
On Tue, 24 Sep 2019, Christophe Lyon wrote: > On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote: > > > > > > It shouldn't be neccessary. > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > (SLP part testing separately) > > > > Richard. > > > > 2019-09-18 Richard Biener <rguenther@suse.de> > > > > * tree-vect-loop.c (vect_is_simple_reduction): Remove operand > > swapping. > > (vectorize_fold_left_reduction): Remove assert. > > (vectorizable_reduction): Also expect COND_EXPR non-reduction > > operand in position 2. Remove assert. > > > > Hi, > > Since this was committed (r275898), I've noticed a regression on armeb: > FAIL: gcc.dg/vect/vect-cond-4.c execution test > > I'm seeing this with qemu, but I do not have the execution traces yet. Can you open a bugreport please? Thanks, Richard. > Christophe > > > Index: gcc/tree-vect-loop.c > > =================================================================== > > --- gcc/tree-vect-loop.c (revision 275872) > > +++ gcc/tree-vect-loop.c (working copy) > > @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info > > || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) > > || vect_valid_reduction_input_p (def2_info))) > > { > > - if (! nested_in_vect_loop && orig_code != MINUS_EXPR) > > - { > > - /* Check if we can swap operands (just for simplicity - so that > > - the rest of the code can assume that the reduction variable > > - is always the last (second) argument). */ > > - if (code == COND_EXPR) > > - { > > - /* Swap cond_expr by inverting the condition. */ > > - tree cond_expr = gimple_assign_rhs1 (def_stmt); > > - enum tree_code invert_code = ERROR_MARK; > > - enum tree_code cond_code = TREE_CODE (cond_expr); > > - > > - if (TREE_CODE_CLASS (cond_code) == tcc_comparison) > > - { > > - bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); > > - invert_code = invert_tree_comparison (cond_code, honor_nans); > > - } > > - if (invert_code != ERROR_MARK) > > - { > > - TREE_SET_CODE (cond_expr, invert_code); > > - swap_ssa_operands (def_stmt, > > - gimple_assign_rhs2_ptr (def_stmt), > > - gimple_assign_rhs3_ptr (def_stmt)); > > - } > > - else > > - { > > - if (dump_enabled_p ()) > > - report_vect_op (MSG_NOTE, def_stmt, > > - "detected reduction: cannot swap operands " > > - "for cond_expr"); > > - return NULL; > > - } > > - } > > - else > > - swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), > > - gimple_assign_rhs2_ptr (def_stmt)); > > - > > - if (dump_enabled_p ()) > > - report_vect_op (MSG_NOTE, def_stmt, > > - "detected reduction: need to swap operands: "); > > - > > - if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) > > - LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; > > - } > > - else > > - { > > - if (dump_enabled_p ()) > > - report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > > - } > > - > > + if (dump_enabled_p ()) > > + report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > > return def_stmt_info; > > } > > > > @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_ > > gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); > > gcc_assert (ncopies == 1); > > gcc_assert (TREE_CODE_LENGTH (code) == binary_op); > > - gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1)); > > gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) > > == FOLD_LEFT_REDUCTION); > > > > @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st > > reduc_index = i; > > } > > > > - if (i == 1 && code == COND_EXPR) > > + if (code == COND_EXPR) > > { > > - /* Record how value of COND_EXPR is defined. */ > > + /* Record how the non-reduction-def value of COND_EXPR is defined. */ > > if (dt == vect_constant_def) > > { > > cond_reduc_dt = dt; > > @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st > > return false; > > } > > > > - /* vect_is_simple_reduction ensured that operand 2 is the > > - loop-carried operand. */ > > - gcc_assert (reduc_index == 2); > > - > > /* Loop peeling modifies initial value of reduction PHI, which > > makes the reduction stmt to be transformed different to the > > original stmt analyzed. We need to record reduction code for >
On Wed, 25 Sep 2019 at 09:33, Richard Biener <rguenther@suse.de> wrote: > > On Tue, 24 Sep 2019, Christophe Lyon wrote: > > > On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote: > > > > > > > > > It shouldn't be neccessary. > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > > (SLP part testing separately) > > > > > > Richard. > > > > > > 2019-09-18 Richard Biener <rguenther@suse.de> > > > > > > * tree-vect-loop.c (vect_is_simple_reduction): Remove operand > > > swapping. > > > (vectorize_fold_left_reduction): Remove assert. > > > (vectorizable_reduction): Also expect COND_EXPR non-reduction > > > operand in position 2. Remove assert. > > > > > > > Hi, > > > > Since this was committed (r275898), I've noticed a regression on armeb: > > FAIL: gcc.dg/vect/vect-cond-4.c execution test > > > > I'm seeing this with qemu, but I do not have the execution traces yet. > > Can you open a bugreport please? > Sure, I've created https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91909 > Thanks, > Richard. > > > Christophe > > > > > Index: gcc/tree-vect-loop.c > > > =================================================================== > > > --- gcc/tree-vect-loop.c (revision 275872) > > > +++ gcc/tree-vect-loop.c (working copy) > > > @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info > > > || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) > > > || vect_valid_reduction_input_p (def2_info))) > > > { > > > - if (! nested_in_vect_loop && orig_code != MINUS_EXPR) > > > - { > > > - /* Check if we can swap operands (just for simplicity - so that > > > - the rest of the code can assume that the reduction variable > > > - is always the last (second) argument). */ > > > - if (code == COND_EXPR) > > > - { > > > - /* Swap cond_expr by inverting the condition. */ > > > - tree cond_expr = gimple_assign_rhs1 (def_stmt); > > > - enum tree_code invert_code = ERROR_MARK; > > > - enum tree_code cond_code = TREE_CODE (cond_expr); > > > - > > > - if (TREE_CODE_CLASS (cond_code) == tcc_comparison) > > > - { > > > - bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); > > > - invert_code = invert_tree_comparison (cond_code, honor_nans); > > > - } > > > - if (invert_code != ERROR_MARK) > > > - { > > > - TREE_SET_CODE (cond_expr, invert_code); > > > - swap_ssa_operands (def_stmt, > > > - gimple_assign_rhs2_ptr (def_stmt), > > > - gimple_assign_rhs3_ptr (def_stmt)); > > > - } > > > - else > > > - { > > > - if (dump_enabled_p ()) > > > - report_vect_op (MSG_NOTE, def_stmt, > > > - "detected reduction: cannot swap operands " > > > - "for cond_expr"); > > > - return NULL; > > > - } > > > - } > > > - else > > > - swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), > > > - gimple_assign_rhs2_ptr (def_stmt)); > > > - > > > - if (dump_enabled_p ()) > > > - report_vect_op (MSG_NOTE, def_stmt, > > > - "detected reduction: need to swap operands: "); > > > - > > > - if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) > > > - LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; > > > - } > > > - else > > > - { > > > - if (dump_enabled_p ()) > > > - report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > > > - } > > > - > > > + if (dump_enabled_p ()) > > > + report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); > > > return def_stmt_info; > > > } > > > > > > @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_ > > > gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); > > > gcc_assert (ncopies == 1); > > > gcc_assert (TREE_CODE_LENGTH (code) == binary_op); > > > - gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1)); > > > gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) > > > == FOLD_LEFT_REDUCTION); > > > > > > @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st > > > reduc_index = i; > > > } > > > > > > - if (i == 1 && code == COND_EXPR) > > > + if (code == COND_EXPR) > > > { > > > - /* Record how value of COND_EXPR is defined. */ > > > + /* Record how the non-reduction-def value of COND_EXPR is defined. */ > > > if (dt == vect_constant_def) > > > { > > > cond_reduc_dt = dt; > > > @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st > > > return false; > > > } > > > > > > - /* vect_is_simple_reduction ensured that operand 2 is the > > > - loop-carried operand. */ > > > - gcc_assert (reduc_index == 2); > > > - > > > /* Loop peeling modifies initial value of reduction PHI, which > > > makes the reduction stmt to be transformed different to the > > > original stmt analyzed. We need to record reduction code for > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, > Germany; GF: Felix Imendörffer; HRB 247165 (AG München)
Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 275872) +++ gcc/tree-vect-loop.c (working copy) @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) || vect_valid_reduction_input_p (def2_info))) { - if (! nested_in_vect_loop && orig_code != MINUS_EXPR) - { - /* Check if we can swap operands (just for simplicity - so that - the rest of the code can assume that the reduction variable - is always the last (second) argument). */ - if (code == COND_EXPR) - { - /* Swap cond_expr by inverting the condition. */ - tree cond_expr = gimple_assign_rhs1 (def_stmt); - enum tree_code invert_code = ERROR_MARK; - enum tree_code cond_code = TREE_CODE (cond_expr); - - if (TREE_CODE_CLASS (cond_code) == tcc_comparison) - { - bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); - invert_code = invert_tree_comparison (cond_code, honor_nans); - } - if (invert_code != ERROR_MARK) - { - TREE_SET_CODE (cond_expr, invert_code); - swap_ssa_operands (def_stmt, - gimple_assign_rhs2_ptr (def_stmt), - gimple_assign_rhs3_ptr (def_stmt)); - } - else - { - if (dump_enabled_p ()) - report_vect_op (MSG_NOTE, def_stmt, - "detected reduction: cannot swap operands " - "for cond_expr"); - return NULL; - } - } - else - swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), - gimple_assign_rhs2_ptr (def_stmt)); - - if (dump_enabled_p ()) - report_vect_op (MSG_NOTE, def_stmt, - "detected reduction: need to swap operands: "); - - if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt))) - LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true; - } - else - { - if (dump_enabled_p ()) - report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); - } - + if (dump_enabled_p ()) + report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); return def_stmt_info; } @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_ gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); gcc_assert (ncopies == 1); gcc_assert (TREE_CODE_LENGTH (code) == binary_op); - gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1)); gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == FOLD_LEFT_REDUCTION); @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st reduc_index = i; } - if (i == 1 && code == COND_EXPR) + if (code == COND_EXPR) { - /* Record how value of COND_EXPR is defined. */ + /* Record how the non-reduction-def value of COND_EXPR is defined. */ if (dt == vect_constant_def) { cond_reduc_dt = dt; @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st return false; } - /* vect_is_simple_reduction ensured that operand 2 is the - loop-carried operand. */ - gcc_assert (reduc_index == 2); - /* Loop peeling modifies initial value of reduction PHI, which makes the reduction stmt to be transformed different to the original stmt analyzed. We need to record reduction code for