Message ID | nycvar.YFH.7.76.1911080955100.5566@zhemvz.fhfr.qr |
---|---|
State | New |
Headers | show |
Series | Fix PR92324 | expand |
Richard Biener <rguenther@suse.de> writes: > I've been sitting on this for a few days since I'm not 100% happy > with how the code looks like. There's possibly still holes in it > (chains with mixed signed/unsigned adds for example might pick > up signed adds in the epilogue), but the wrong-code cases should > work fine now. I'm probably going to followup with some > mass renaming of variable/parameter names to make it more clear > which stmt / type we are actually looking at ... > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Does this look like the right way of updating neutral_op_for_slp_reduction? It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument). Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a full test on aarch64-linux-gnu. Thanks, Richard 2019-11-08 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vect-loop.c (neutral_op_for_slp_reduction): Take the vector type as an argument rather than reading it from the stmt_vec_info. (vect_create_epilog_for_reduction): Update accordingly, passing the STMT_VINFO_REDUC_VECTYPE. (vectorizable_reduction): Likewise. (vect_transform_cycle_phi): Likewise, but passing the STMT_VINFO_REDUC_VECTYPE_IN. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2019-11-08 09:06:29.654896085 +0000 +++ gcc/tree-vect-loop.c 2019-11-08 10:41:54.498861004 +0000 @@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_ /* If there is a neutral value X such that SLP reduction NODE would not be affected by the introduction of additional X elements, return that X, - otherwise return null. CODE is the code of the reduction. REDUC_CHAIN - is true if the SLP statements perform a single reduction, false if each - statement performs an independent reduction. */ + otherwise return null. CODE is the code of the reduction and VECTOR_TYPE + is the vector type that would hold element X. REDUC_CHAIN is true if + the SLP statements perform a single reduction, false if each statement + performs an independent reduction. */ static tree -neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code, - bool reduc_chain) +neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, + tree_code code, bool reduc_chain) { vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); stmt_vec_info stmt_vinfo = stmts[0]; - tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); tree scalar_type = TREE_TYPE (vector_type); class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; gcc_assert (loop); @@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt); enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); - tree neutral_op = NULL_TREE; - if (slp_node) - neutral_op - = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code, - REDUC_GROUP_FIRST_ELEMENT (stmt_info)); stmt_vec_info prev_phi_info; tree vectype; machine_mode mode; @@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v gcc_assert (vectype); mode = TYPE_MODE (vectype); + tree neutral_op = NULL_TREE; tree initial_def = NULL; tree induc_val = NULL_TREE; tree adjustment_def = NULL; if (slp_node) - ; + neutral_op + = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, + vectype, code, + REDUC_GROUP_FIRST_ELEMENT (stmt_info)); else { /* Get at the scalar def before the loop, that defines the initial value @@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st tree neutral_op = NULL_TREE; if (slp_node) neutral_op = neutral_op_for_slp_reduction - (slp_node_instance->reduc_phis, orig_code, + (slp_node_instance->reduc_phis, vectype_out, orig_code, REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL); if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) @@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info gcc_assert (slp_node == slp_node_instance->reduc_phis); stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info); tree neutral_op - = neutral_op_for_slp_reduction (slp_node, + = neutral_op_for_slp_reduction (slp_node, vectype_in, STMT_VINFO_REDUC_CODE (reduc_info), first != NULL); get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
On Fri, 8 Nov 2019, Richard Sandiford wrote: > Richard Biener <rguenther@suse.de> writes: > > I've been sitting on this for a few days since I'm not 100% happy > > with how the code looks like. There's possibly still holes in it > > (chains with mixed signed/unsigned adds for example might pick > > up signed adds in the epilogue), but the wrong-code cases should > > work fine now. I'm probably going to followup with some > > mass renaming of variable/parameter names to make it more clear > > which stmt / type we are actually looking at ... > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > Does this look like the right way of updating neutral_op_for_slp_reduction? > It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE > (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument). > > Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a > full test on aarch64-linux-gnu. Yeah, it looks sensible. In vect_create_epilog_for_reduction please move the call down to the only use in the else if (direct_slp_reduc) { block. Thanks, Richard. > Thanks, > Richard > > > 2019-11-08 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > * tree-vect-loop.c (neutral_op_for_slp_reduction): Take the > vector type as an argument rather than reading it from the > stmt_vec_info. > (vect_create_epilog_for_reduction): Update accordingly, > passing the STMT_VINFO_REDUC_VECTYPE. > (vectorizable_reduction): Likewise. > (vect_transform_cycle_phi): Likewise, but passing the > STMT_VINFO_REDUC_VECTYPE_IN. > > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2019-11-08 09:06:29.654896085 +0000 > +++ gcc/tree-vect-loop.c 2019-11-08 10:41:54.498861004 +0000 > @@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_ > > /* If there is a neutral value X such that SLP reduction NODE would not > be affected by the introduction of additional X elements, return that X, > - otherwise return null. CODE is the code of the reduction. REDUC_CHAIN > - is true if the SLP statements perform a single reduction, false if each > - statement performs an independent reduction. */ > + otherwise return null. CODE is the code of the reduction and VECTOR_TYPE > + is the vector type that would hold element X. REDUC_CHAIN is true if > + the SLP statements perform a single reduction, false if each statement > + performs an independent reduction. */ > > static tree > -neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code, > - bool reduc_chain) > +neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, > + tree_code code, bool reduc_chain) > { > vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); > stmt_vec_info stmt_vinfo = stmts[0]; > - tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); > tree scalar_type = TREE_TYPE (vector_type); > class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; > gcc_assert (loop); > @@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v > = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt); > enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); > internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); > - tree neutral_op = NULL_TREE; > - if (slp_node) > - neutral_op > - = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code, > - REDUC_GROUP_FIRST_ELEMENT (stmt_info)); > stmt_vec_info prev_phi_info; > tree vectype; > machine_mode mode; > @@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v > gcc_assert (vectype); > mode = TYPE_MODE (vectype); > > + tree neutral_op = NULL_TREE; > tree initial_def = NULL; > tree induc_val = NULL_TREE; > tree adjustment_def = NULL; > if (slp_node) > - ; > + neutral_op > + = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, > + vectype, code, > + REDUC_GROUP_FIRST_ELEMENT (stmt_info)); > else > { > /* Get at the scalar def before the loop, that defines the initial value > @@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st > tree neutral_op = NULL_TREE; > if (slp_node) > neutral_op = neutral_op_for_slp_reduction > - (slp_node_instance->reduc_phis, orig_code, > + (slp_node_instance->reduc_phis, vectype_out, orig_code, > REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL); > > if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) > @@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info > gcc_assert (slp_node == slp_node_instance->reduc_phis); > stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info); > tree neutral_op > - = neutral_op_for_slp_reduction (slp_node, > + = neutral_op_for_slp_reduction (slp_node, vectype_in, > STMT_VINFO_REDUC_CODE (reduc_info), > first != NULL); > get_initial_defs_for_reduction (slp_node_instance->reduc_phis, >
Richard Biener <rguenther@suse.de> writes: > On Fri, 8 Nov 2019, Richard Sandiford wrote: > >> Richard Biener <rguenther@suse.de> writes: >> > I've been sitting on this for a few days since I'm not 100% happy >> > with how the code looks like. There's possibly still holes in it >> > (chains with mixed signed/unsigned adds for example might pick >> > up signed adds in the epilogue), but the wrong-code cases should >> > work fine now. I'm probably going to followup with some >> > mass renaming of variable/parameter names to make it more clear >> > which stmt / type we are actually looking at ... >> > >> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. >> >> Does this look like the right way of updating neutral_op_for_slp_reduction? >> It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE >> (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument). >> >> Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a >> full test on aarch64-linux-gnu. > > Yeah, it looks sensible. In vect_create_epilog_for_reduction > please move the call down to the only use in the > > else if (direct_slp_reduc) > { > > block. Thanks, here's what I installed after testing on aarch64-linux-gnu and x86_64-linux-gnu. Richard 2019-11-08 Richard Sandiford <richard.sandiford@arm.com> gcc/ * tree-vect-loop.c (neutral_op_for_slp_reduction): Take the vector type as an argument rather than reading it from the stmt_vec_info. (vect_create_epilog_for_reduction): Update accordingly. (vectorizable_reduction): Likewise. (vect_transform_cycle_phi): Likewise. Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2019-11-08 11:58:22.331095690 +0000 +++ gcc/tree-vect-loop.c 2019-11-08 16:06:11.389032120 +0000 @@ -2590,17 +2590,17 @@ reduction_fn_for_scalar_code (enum tree_ /* If there is a neutral value X such that SLP reduction NODE would not be affected by the introduction of additional X elements, return that X, - otherwise return null. CODE is the code of the reduction. REDUC_CHAIN - is true if the SLP statements perform a single reduction, false if each - statement performs an independent reduction. */ + otherwise return null. CODE is the code of the reduction and VECTOR_TYPE + is the vector type that would hold element X. REDUC_CHAIN is true if + the SLP statements perform a single reduction, false if each statement + performs an independent reduction. */ static tree -neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code, - bool reduc_chain) +neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type, + tree_code code, bool reduc_chain) { vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); stmt_vec_info stmt_vinfo = stmts[0]; - tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); tree scalar_type = TREE_TYPE (vector_type); class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; gcc_assert (loop); @@ -4220,11 +4220,6 @@ vect_create_epilog_for_reduction (stmt_v = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt); enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info); internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); - tree neutral_op = NULL_TREE; - if (slp_node) - neutral_op - = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code, - REDUC_GROUP_FIRST_ELEMENT (stmt_info)); stmt_vec_info prev_phi_info; tree vectype; machine_mode mode; @@ -4822,6 +4817,14 @@ vect_create_epilog_for_reduction (stmt_v scalar value if we have one, otherwise the initial scalar value is itself a neutral value. */ tree vector_identity = NULL_TREE; + tree neutral_op = NULL_TREE; + if (slp_node) + { + stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info); + neutral_op + = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, + vectype, code, first != NULL); + } if (neutral_op) vector_identity = gimple_build_vector_from_val (&seq, vectype, neutral_op); @@ -6214,7 +6217,7 @@ vectorizable_reduction (stmt_vec_info st tree neutral_op = NULL_TREE; if (slp_node) neutral_op = neutral_op_for_slp_reduction - (slp_node_instance->reduc_phis, orig_code, + (slp_node_instance->reduc_phis, vectype_out, orig_code, REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL); if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) @@ -6797,7 +6800,7 @@ vect_transform_cycle_phi (stmt_vec_info gcc_assert (slp_node == slp_node_instance->reduc_phis); stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info); tree neutral_op - = neutral_op_for_slp_reduction (slp_node, + = neutral_op_for_slp_reduction (slp_node, vectype_out, STMT_VINFO_REDUC_CODE (reduc_info), first != NULL); get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote: > > > I've been sitting on this for a few days since I'm not 100% happy > with how the code looks like. There's possibly still holes in it > (chains with mixed signed/unsigned adds for example might pick > up signed adds in the epilogue), but the wrong-code cases should > work fine now. I'm probably going to followup with some > mass renaming of variable/parameter names to make it more clear > which stmt / type we are actually looking at ... > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > Richard. > > 2019-11-08 Richard Biener <rguenther@suse.de> > > PR tree-optimization/ > * tree-vect-loop.c (vect_create_epilog_for_reduction): Use > STMT_VINFO_REDUC_VECTYPE for all computations, inserting > sign-conversions as necessary. > (vectorizable_reduction): Reject conversions in the chain > that are not sign-conversions, base analysis on a non-converting > stmt and its operation sign. Set STMT_VINFO_REDUC_VECTYPE. > * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything > for debug stmts. > * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New. > (STMT_VINFO_REDUC_VECTYPE): Likewise. > > * gcc.dg/vect/pr92205.c: XFAIL. > * gcc.dg/vect/pr92324-1.c: New testcase. > * gcc.dg/vect/pr92324-2.c: Likewise. > Hi Richard, This new testcase (pr92324-2.c) fails on arm/aarch64: FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test FAIL: gcc.dg/vect/pr92324-2.c execution test Christophe > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c (revision 277922) > +++ gcc/tree-vect-loop.c (working copy) > @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v > gimple *new_phi = NULL, *phi; > stmt_vec_info phi_info; > gimple_stmt_iterator exit_gsi; > - tree vec_dest; > tree new_temp = NULL_TREE, new_name, new_scalar_dest; > gimple *epilog_stmt = NULL; > gimple *exit_phi; > @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v > } > gcc_assert (!nested_in_vect_loop || double_reduc); > > - vectype = STMT_VINFO_VECTYPE (stmt_info); > + vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); > gcc_assert (vectype); > mode = TYPE_MODE (vectype); > > @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v > one vector. */ > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) > { > + gimple_seq stmts = NULL; > tree first_vect = PHI_RESULT (new_phis[0]); > - gassign *new_vec_stmt = NULL; > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > + first_vect = gimple_convert (&stmts, vectype, first_vect); > for (k = 1; k < new_phis.length (); k++) > { > gimple *next_phi = new_phis[k]; > tree second_vect = PHI_RESULT (next_phi); > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > - new_vec_stmt = gimple_build_assign (tem, code, > - first_vect, second_vect); > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > - first_vect = tem; > + second_vect = gimple_convert (&stmts, vectype, second_vect); > + first_vect = gimple_build (&stmts, code, vectype, > + first_vect, second_vect); > } > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > new_phi_result = first_vect; > - if (new_vec_stmt) > - { > - new_phis.truncate (0); > - new_phis.safe_push (new_vec_stmt); > - } > + new_phis.truncate (0); > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > } > /* Likewise if we couldn't use a single defuse cycle. */ > else if (ncopies > 1) > { > gcc_assert (new_phis.length () == 1); > + gimple_seq stmts = NULL; > tree first_vect = PHI_RESULT (new_phis[0]); > - gassign *new_vec_stmt = NULL; > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > + first_vect = gimple_convert (&stmts, vectype, first_vect); > stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]); > for (int k = 1; k < ncopies; ++k) > { > next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info); > tree second_vect = PHI_RESULT (next_phi_info->stmt); > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > - new_vec_stmt = gimple_build_assign (tem, code, > - first_vect, second_vect); > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > - first_vect = tem; > + second_vect = gimple_convert (&stmts, vectype, second_vect); > + first_vect = gimple_build (&stmts, code, vectype, > + first_vect, second_vect); > } > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > new_phi_result = first_vect; > new_phis.truncate (0); > - new_phis.safe_push (new_vec_stmt); > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > } > else > new_phi_result = PHI_RESULT (new_phis[0]); > @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v > in a vector mode of smaller size and first reduce upper/lower > halves against each other. */ > enum machine_mode mode1 = mode; > + tree stype = TREE_TYPE (vectype); > unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); > unsigned sz1 = sz; > if (!slp_reduc > && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) > sz1 = GET_MODE_SIZE (mode1).to_constant (); > > - tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1); > + tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1); > reduce_with_shift = have_whole_vector_shift (mode1); > if (!VECTOR_MODE_P (mode1)) > reduce_with_shift = false; > @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v > { > gcc_assert (!slp_reduc); > sz /= 2; > - vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz); > + vectype1 = get_vectype_for_scalar_type_and_size (stype, sz); > > /* The target has to make sure we support lowpart/highpart > extraction, either via direct vector extract or through > @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v > dump_printf_loc (MSG_NOTE, vect_location, > "Reduce using vector shifts\n"); > > - vec_dest = vect_create_destination_var (scalar_dest, vectype1); > + gimple_seq stmts = NULL; > + new_temp = gimple_convert (&stmts, vectype1, new_temp); > for (elt_offset = nelements / 2; > elt_offset >= 1; > elt_offset /= 2) > @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v > calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); > indices.new_vector (sel, 2, nelements); > tree mask = vect_gen_perm_mask_any (vectype1, indices); > - epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, > - new_temp, zero_vec, mask); > - new_name = make_ssa_name (vec_dest, epilog_stmt); > - gimple_assign_set_lhs (epilog_stmt, new_name); > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > - > - epilog_stmt = gimple_build_assign (vec_dest, code, new_name, > - new_temp); > - new_temp = make_ssa_name (vec_dest, epilog_stmt); > - gimple_assign_set_lhs (epilog_stmt, new_temp); > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > + new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1, > + new_temp, zero_vec, mask); > + new_temp = gimple_build (&stmts, code, > + vectype1, new_name, new_temp); > } > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > /* 2.4 Extract the final scalar result. Create: > s_out3 = extract_field <v_out2, bitpos> */ > @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st > stmt_vector_for_cost *cost_vec) > { > tree scalar_dest; > - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > tree vectype_in = NULL_TREE; > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st > phi_info = loop_vinfo->lookup_stmt (use_stmt); > stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > } > - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > - element. */ > - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > + } > + > + /* PHIs should not participate in patterns. */ > + gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > + gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > + > + /* Verify following REDUC_IDX from the latch def leads us back to the PHI > + and compute the reduction chain length. */ > + tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > + loop_latch_edge (loop)); > + unsigned reduc_chain_length = 0; > + bool only_slp_reduc_chain = true; > + stmt_info = NULL; > + while (reduc_def != PHI_RESULT (reduc_def_phi)) > + { > + stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > + stmt_vec_info vdef = vect_stmt_to_vectorize (def); > + if (STMT_VINFO_REDUC_IDX (vdef) == -1) > { > - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "reduction chain broken by patterns.\n"); > + return false; > + } > + if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > + only_slp_reduc_chain = false; > + /* ??? For epilogue generation live members of the chain need > + to point back to the PHI via their original stmt for > + info_for_reduction to work. */ > + if (STMT_VINFO_LIVE_P (vdef)) > + STMT_VINFO_REDUC_DEF (def) = phi_info; > + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt))) > + { > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)), > + TREE_TYPE (gimple_assign_rhs1 (vdef->stmt)))) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "conversion in the reduction chain.\n"); > + return false; > + } > } > + else if (!stmt_info) > + /* First non-conversion stmt. */ > + stmt_info = vdef; > + reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > + reduc_chain_length++; > } > /* PHIs should not participate in patterns. */ > gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st > nested_cycle = true; > } > > + /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > + element. */ > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > + { > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > + stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > + } > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > gcc_assert (slp_node > && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); > @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st > inside the loop body. The last operand is the reduction variable, > which is defined by the loop-header-phi. */ > > + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > + STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; > gassign *stmt = as_a <gassign *> (stmt_info->stmt); > enum tree_code code = gimple_assign_rhs_code (stmt); > bool lane_reduc_code_p > @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st > if (!type_has_mode_precision_p (scalar_type)) > return false; > > - /* All uses but the last are expected to be defined in the loop. > - The last use is the reduction variable. In case of nested cycle this > - assumption is not true: we use reduc_index to record the index of the > - reduction variable. */ > - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > - > - /* Verify following REDUC_IDX from the latch def leads us back to the PHI > - and compute the reduction chain length. */ > - tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > - loop_latch_edge (loop)); > - unsigned reduc_chain_length = 0; > - bool only_slp_reduc_chain = true; > - while (reduc_def != PHI_RESULT (reduc_def_phi)) > - { > - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > - stmt_vec_info vdef = vect_stmt_to_vectorize (def); > - if (STMT_VINFO_REDUC_IDX (vdef) == -1) > - { > - if (dump_enabled_p ()) > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > - "reduction chain broken by patterns.\n"); > - return false; > - } > - if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > - only_slp_reduc_chain = false; > - /* ??? For epilogue generation live members of the chain need > - to point back to the PHI via their original stmt for > - info_for_reduction to work. */ > - if (STMT_VINFO_LIVE_P (vdef)) > - STMT_VINFO_REDUC_DEF (def) = phi_info; > - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > - reduc_chain_length++; > - } > - > /* For lane-reducing ops we're reducing the number of reduction PHIs > which means the only use of that may be in the lane-reducing operation. */ > if (lane_reduc_code_p > @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st > return false; > } > > + /* All uses but the last are expected to be defined in the loop. > + The last use is the reduction variable. In case of nested cycle this > + assumption is not true: we use reduc_index to record the index of the > + reduction variable. */ > reduc_def = PHI_RESULT (reduc_def_phi); > for (i = 0; i < op_type; i++) > { > @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st > } > } > if (!vectype_in) > - vectype_in = vectype_out; > + vectype_in = STMT_VINFO_VECTYPE (phi_info); > STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; > > enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info); > @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st > } > } > > - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > - /* We changed STMT to be the first stmt in reduction chain, hence we > - check that in this case the first element in the chain is STMT. */ > - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info)) > - == vect_orig_stmt (stmt_info)); > - > if (STMT_VINFO_LIVE_P (phi_info)) > return false; > > @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st > && code != SAD_EXPR > && reduction_type != FOLD_LEFT_REDUCTION) > { > - STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; > - STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; > + stmt_vec_info tem > + = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem)) > + { > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem)); > + tem = REDUC_GROUP_FIRST_ELEMENT (tem); > + } > + STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def; > + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def; > } > else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) > { > Index: gcc/tree-vect-stmts.c > =================================================================== > --- gcc/tree-vect-stmts.c (revision 277922) > +++ gcc/tree-vect-stmts.c (working copy) > @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt > basic_block bb = gimple_bb (USE_STMT (use_p)); > if (!flow_bb_inside_loop_p (loop, bb)) > { > + if (is_gimple_debug (USE_STMT (use_p))) > + continue; > + > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "vec_stmt_relevant_p: used out of loop.\n"); > > - if (is_gimple_debug (USE_STMT (use_p))) > - continue; > - > /* We expect all such uses to be in the loop exit phis > (because of loop closed form) */ > gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h (revision 277922) > +++ gcc/tree-vectorizer.h (working copy) > @@ -1050,6 +1050,9 @@ public: > /* The vector input type relevant for reduction vectorization. */ > tree reduc_vectype_in; > > + /* The vector type for performing the actual reduction. */ > + tree reduc_vectype; > + > /* Whether we force a single cycle PHI during reduction vectorization. */ > bool force_single_cycle; > > @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_ > #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code > #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn > #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def > +#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype > #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in > #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p > > Index: gcc/testsuite/gcc.dg/vect/pr92205.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922) > +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy) > @@ -10,4 +10,4 @@ int b(int n, unsigned char *a) > return d; > } > > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */ > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ > Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/pr92324-1.c (nonexistent) > +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c (working copy) > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > + > +unsigned a, b; > +int c, d; > +unsigned e(int f) { > + if (a > f) > + return a; > + return f; > +} > +void g() { > + for (; c; c++) > + d = e(d); > + b = d; > +} > Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c > =================================================================== > --- gcc/testsuite/gcc.dg/vect/pr92324-2.c (nonexistent) > +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c (working copy) > @@ -0,0 +1,21 @@ > +#include "tree-vect.h" > + > +unsigned b[1024]; > + > +int __attribute__((noipa)) > +foo (int n) > +{ > + int res = 0; > + for (int i = 0; i < n; ++i) > + res = res > b[i] ? res : b[i]; > + return res; > +} > + > +int main () > +{ > + check_vect (); > + b[15] = (unsigned)__INT_MAX__ + 1; > + if (foo (16) != -__INT_MAX__ - 1) > + __builtin_abort (); > + return 0; > +}
On Mon, 11 Nov 2019, Christophe Lyon wrote: > On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote: > > > > > > I've been sitting on this for a few days since I'm not 100% happy > > with how the code looks like. There's possibly still holes in it > > (chains with mixed signed/unsigned adds for example might pick > > up signed adds in the epilogue), but the wrong-code cases should > > work fine now. I'm probably going to followup with some > > mass renaming of variable/parameter names to make it more clear > > which stmt / type we are actually looking at ... > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > > > Richard. > > > > 2019-11-08 Richard Biener <rguenther@suse.de> > > > > PR tree-optimization/ > > * tree-vect-loop.c (vect_create_epilog_for_reduction): Use > > STMT_VINFO_REDUC_VECTYPE for all computations, inserting > > sign-conversions as necessary. > > (vectorizable_reduction): Reject conversions in the chain > > that are not sign-conversions, base analysis on a non-converting > > stmt and its operation sign. Set STMT_VINFO_REDUC_VECTYPE. > > * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything > > for debug stmts. > > * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New. > > (STMT_VINFO_REDUC_VECTYPE): Likewise. > > > > * gcc.dg/vect/pr92205.c: XFAIL. > > * gcc.dg/vect/pr92324-1.c: New testcase. > > * gcc.dg/vect/pr92324-2.c: Likewise. > > > > Hi Richard, > > This new testcase (pr92324-2.c) fails on arm/aarch64: > FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test > FAIL: gcc.dg/vect/pr92324-2.c execution test Can you open a bugreport? I suspect the arm/aarch64 vector min/max patterns are bogus or we go a different code-path in the vectorizer here (IIRC x86 has reduc_[us]max_* patterns while aarch64 has none of that). Thanks, Richard. > Christophe > > > > Index: gcc/tree-vect-loop.c > > =================================================================== > > --- gcc/tree-vect-loop.c (revision 277922) > > +++ gcc/tree-vect-loop.c (working copy) > > @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v > > gimple *new_phi = NULL, *phi; > > stmt_vec_info phi_info; > > gimple_stmt_iterator exit_gsi; > > - tree vec_dest; > > tree new_temp = NULL_TREE, new_name, new_scalar_dest; > > gimple *epilog_stmt = NULL; > > gimple *exit_phi; > > @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v > > } > > gcc_assert (!nested_in_vect_loop || double_reduc); > > > > - vectype = STMT_VINFO_VECTYPE (stmt_info); > > + vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); > > gcc_assert (vectype); > > mode = TYPE_MODE (vectype); > > > > @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v > > one vector. */ > > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) > > { > > + gimple_seq stmts = NULL; > > tree first_vect = PHI_RESULT (new_phis[0]); > > - gassign *new_vec_stmt = NULL; > > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > > + first_vect = gimple_convert (&stmts, vectype, first_vect); > > for (k = 1; k < new_phis.length (); k++) > > { > > gimple *next_phi = new_phis[k]; > > tree second_vect = PHI_RESULT (next_phi); > > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > > - new_vec_stmt = gimple_build_assign (tem, code, > > - first_vect, second_vect); > > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > > - first_vect = tem; > > + second_vect = gimple_convert (&stmts, vectype, second_vect); > > + first_vect = gimple_build (&stmts, code, vectype, > > + first_vect, second_vect); > > } > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > > > new_phi_result = first_vect; > > - if (new_vec_stmt) > > - { > > - new_phis.truncate (0); > > - new_phis.safe_push (new_vec_stmt); > > - } > > + new_phis.truncate (0); > > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > > } > > /* Likewise if we couldn't use a single defuse cycle. */ > > else if (ncopies > 1) > > { > > gcc_assert (new_phis.length () == 1); > > + gimple_seq stmts = NULL; > > tree first_vect = PHI_RESULT (new_phis[0]); > > - gassign *new_vec_stmt = NULL; > > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > > + first_vect = gimple_convert (&stmts, vectype, first_vect); > > stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]); > > for (int k = 1; k < ncopies; ++k) > > { > > next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info); > > tree second_vect = PHI_RESULT (next_phi_info->stmt); > > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > > - new_vec_stmt = gimple_build_assign (tem, code, > > - first_vect, second_vect); > > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > > - first_vect = tem; > > + second_vect = gimple_convert (&stmts, vectype, second_vect); > > + first_vect = gimple_build (&stmts, code, vectype, > > + first_vect, second_vect); > > } > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > new_phi_result = first_vect; > > new_phis.truncate (0); > > - new_phis.safe_push (new_vec_stmt); > > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > > } > > else > > new_phi_result = PHI_RESULT (new_phis[0]); > > @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v > > in a vector mode of smaller size and first reduce upper/lower > > halves against each other. */ > > enum machine_mode mode1 = mode; > > + tree stype = TREE_TYPE (vectype); > > unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); > > unsigned sz1 = sz; > > if (!slp_reduc > > && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) > > sz1 = GET_MODE_SIZE (mode1).to_constant (); > > > > - tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1); > > + tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1); > > reduce_with_shift = have_whole_vector_shift (mode1); > > if (!VECTOR_MODE_P (mode1)) > > reduce_with_shift = false; > > @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v > > { > > gcc_assert (!slp_reduc); > > sz /= 2; > > - vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz); > > + vectype1 = get_vectype_for_scalar_type_and_size (stype, sz); > > > > /* The target has to make sure we support lowpart/highpart > > extraction, either via direct vector extract or through > > @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v > > dump_printf_loc (MSG_NOTE, vect_location, > > "Reduce using vector shifts\n"); > > > > - vec_dest = vect_create_destination_var (scalar_dest, vectype1); > > + gimple_seq stmts = NULL; > > + new_temp = gimple_convert (&stmts, vectype1, new_temp); > > for (elt_offset = nelements / 2; > > elt_offset >= 1; > > elt_offset /= 2) > > @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v > > calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); > > indices.new_vector (sel, 2, nelements); > > tree mask = vect_gen_perm_mask_any (vectype1, indices); > > - epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, > > - new_temp, zero_vec, mask); > > - new_name = make_ssa_name (vec_dest, epilog_stmt); > > - gimple_assign_set_lhs (epilog_stmt, new_name); > > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > > - > > - epilog_stmt = gimple_build_assign (vec_dest, code, new_name, > > - new_temp); > > - new_temp = make_ssa_name (vec_dest, epilog_stmt); > > - gimple_assign_set_lhs (epilog_stmt, new_temp); > > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > > + new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1, > > + new_temp, zero_vec, mask); > > + new_temp = gimple_build (&stmts, code, > > + vectype1, new_name, new_temp); > > } > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > > > /* 2.4 Extract the final scalar result. Create: > > s_out3 = extract_field <v_out2, bitpos> */ > > @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st > > stmt_vector_for_cost *cost_vec) > > { > > tree scalar_dest; > > - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > > tree vectype_in = NULL_TREE; > > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); > > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > > @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st > > phi_info = loop_vinfo->lookup_stmt (use_stmt); > > stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > > } > > - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > > - element. */ > > - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > + } > > + > > + /* PHIs should not participate in patterns. */ > > + gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > > + gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > > + > > + /* Verify following REDUC_IDX from the latch def leads us back to the PHI > > + and compute the reduction chain length. */ > > + tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > > + loop_latch_edge (loop)); > > + unsigned reduc_chain_length = 0; > > + bool only_slp_reduc_chain = true; > > + stmt_info = NULL; > > + while (reduc_def != PHI_RESULT (reduc_def_phi)) > > + { > > + stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > > + stmt_vec_info vdef = vect_stmt_to_vectorize (def); > > + if (STMT_VINFO_REDUC_IDX (vdef) == -1) > > { > > - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > > - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "reduction chain broken by patterns.\n"); > > + return false; > > + } > > + if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > > + only_slp_reduc_chain = false; > > + /* ??? For epilogue generation live members of the chain need > > + to point back to the PHI via their original stmt for > > + info_for_reduction to work. */ > > + if (STMT_VINFO_LIVE_P (vdef)) > > + STMT_VINFO_REDUC_DEF (def) = phi_info; > > + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt))) > > + { > > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)), > > + TREE_TYPE (gimple_assign_rhs1 (vdef->stmt)))) > > + { > > + if (dump_enabled_p ()) > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > + "conversion in the reduction chain.\n"); > > + return false; > > + } > > } > > + else if (!stmt_info) > > + /* First non-conversion stmt. */ > > + stmt_info = vdef; > > + reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > > + reduc_chain_length++; > > } > > /* PHIs should not participate in patterns. */ > > gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > > @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st > > nested_cycle = true; > > } > > > > + /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > > + element. */ > > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > + { > > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > > + stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > > + } > > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > gcc_assert (slp_node > > && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); > > @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st > > inside the loop body. The last operand is the reduction variable, > > which is defined by the loop-header-phi. */ > > > > + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > > + STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; > > gassign *stmt = as_a <gassign *> (stmt_info->stmt); > > enum tree_code code = gimple_assign_rhs_code (stmt); > > bool lane_reduc_code_p > > @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st > > if (!type_has_mode_precision_p (scalar_type)) > > return false; > > > > - /* All uses but the last are expected to be defined in the loop. > > - The last use is the reduction variable. In case of nested cycle this > > - assumption is not true: we use reduc_index to record the index of the > > - reduction variable. */ > > - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > > - > > - /* Verify following REDUC_IDX from the latch def leads us back to the PHI > > - and compute the reduction chain length. */ > > - tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > > - loop_latch_edge (loop)); > > - unsigned reduc_chain_length = 0; > > - bool only_slp_reduc_chain = true; > > - while (reduc_def != PHI_RESULT (reduc_def_phi)) > > - { > > - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > > - stmt_vec_info vdef = vect_stmt_to_vectorize (def); > > - if (STMT_VINFO_REDUC_IDX (vdef) == -1) > > - { > > - if (dump_enabled_p ()) > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > - "reduction chain broken by patterns.\n"); > > - return false; > > - } > > - if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > > - only_slp_reduc_chain = false; > > - /* ??? For epilogue generation live members of the chain need > > - to point back to the PHI via their original stmt for > > - info_for_reduction to work. */ > > - if (STMT_VINFO_LIVE_P (vdef)) > > - STMT_VINFO_REDUC_DEF (def) = phi_info; > > - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > > - reduc_chain_length++; > > - } > > - > > /* For lane-reducing ops we're reducing the number of reduction PHIs > > which means the only use of that may be in the lane-reducing operation. */ > > if (lane_reduc_code_p > > @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st > > return false; > > } > > > > + /* All uses but the last are expected to be defined in the loop. > > + The last use is the reduction variable. In case of nested cycle this > > + assumption is not true: we use reduc_index to record the index of the > > + reduction variable. */ > > reduc_def = PHI_RESULT (reduc_def_phi); > > for (i = 0; i < op_type; i++) > > { > > @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st > > } > > } > > if (!vectype_in) > > - vectype_in = vectype_out; > > + vectype_in = STMT_VINFO_VECTYPE (phi_info); > > STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; > > > > enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info); > > @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st > > } > > } > > > > - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > - /* We changed STMT to be the first stmt in reduction chain, hence we > > - check that in this case the first element in the chain is STMT. */ > > - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info)) > > - == vect_orig_stmt (stmt_info)); > > - > > if (STMT_VINFO_LIVE_P (phi_info)) > > return false; > > > > @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st > > && code != SAD_EXPR > > && reduction_type != FOLD_LEFT_REDUCTION) > > { > > - STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; > > - STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; > > + stmt_vec_info tem > > + = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem)) > > + { > > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem)); > > + tem = REDUC_GROUP_FIRST_ELEMENT (tem); > > + } > > + STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def; > > + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def; > > } > > else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) > > { > > Index: gcc/tree-vect-stmts.c > > =================================================================== > > --- gcc/tree-vect-stmts.c (revision 277922) > > +++ gcc/tree-vect-stmts.c (working copy) > > @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt > > basic_block bb = gimple_bb (USE_STMT (use_p)); > > if (!flow_bb_inside_loop_p (loop, bb)) > > { > > + if (is_gimple_debug (USE_STMT (use_p))) > > + continue; > > + > > if (dump_enabled_p ()) > > dump_printf_loc (MSG_NOTE, vect_location, > > "vec_stmt_relevant_p: used out of loop.\n"); > > > > - if (is_gimple_debug (USE_STMT (use_p))) > > - continue; > > - > > /* We expect all such uses to be in the loop exit phis > > (because of loop closed form) */ > > gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); > > Index: gcc/tree-vectorizer.h > > =================================================================== > > --- gcc/tree-vectorizer.h (revision 277922) > > +++ gcc/tree-vectorizer.h (working copy) > > @@ -1050,6 +1050,9 @@ public: > > /* The vector input type relevant for reduction vectorization. */ > > tree reduc_vectype_in; > > > > + /* The vector type for performing the actual reduction. */ > > + tree reduc_vectype; > > + > > /* Whether we force a single cycle PHI during reduction vectorization. */ > > bool force_single_cycle; > > > > @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_ > > #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code > > #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn > > #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def > > +#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype > > #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in > > #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p > > > > Index: gcc/testsuite/gcc.dg/vect/pr92205.c > > =================================================================== > > --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922) > > +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy) > > @@ -10,4 +10,4 @@ int b(int n, unsigned char *a) > > return d; > > } > > > > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */ > > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ > > Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c > > =================================================================== > > --- gcc/testsuite/gcc.dg/vect/pr92324-1.c (nonexistent) > > +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c (working copy) > > @@ -0,0 +1,14 @@ > > +/* { dg-do compile } */ > > + > > +unsigned a, b; > > +int c, d; > > +unsigned e(int f) { > > + if (a > f) > > + return a; > > + return f; > > +} > > +void g() { > > + for (; c; c++) > > + d = e(d); > > + b = d; > > +} > > Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c > > =================================================================== > > --- gcc/testsuite/gcc.dg/vect/pr92324-2.c (nonexistent) > > +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c (working copy) > > @@ -0,0 +1,21 @@ > > +#include "tree-vect.h" > > + > > +unsigned b[1024]; > > + > > +int __attribute__((noipa)) > > +foo (int n) > > +{ > > + int res = 0; > > + for (int i = 0; i < n; ++i) > > + res = res > b[i] ? res : b[i]; > > + return res; > > +} > > + > > +int main () > > +{ > > + check_vect (); > > + b[15] = (unsigned)__INT_MAX__ + 1; > > + if (foo (16) != -__INT_MAX__ - 1) > > + __builtin_abort (); > > + return 0; > > +} >
On Mon, 11 Nov 2019 at 14:36, Richard Biener <rguenther@suse.de> wrote: > > On Mon, 11 Nov 2019, Christophe Lyon wrote: > > > On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote: > > > > > > > > > I've been sitting on this for a few days since I'm not 100% happy > > > with how the code looks like. There's possibly still holes in it > > > (chains with mixed signed/unsigned adds for example might pick > > > up signed adds in the epilogue), but the wrong-code cases should > > > work fine now. I'm probably going to followup with some > > > mass renaming of variable/parameter names to make it more clear > > > which stmt / type we are actually looking at ... > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. > > > > > > Richard. > > > > > > 2019-11-08 Richard Biener <rguenther@suse.de> > > > > > > PR tree-optimization/ > > > * tree-vect-loop.c (vect_create_epilog_for_reduction): Use > > > STMT_VINFO_REDUC_VECTYPE for all computations, inserting > > > sign-conversions as necessary. > > > (vectorizable_reduction): Reject conversions in the chain > > > that are not sign-conversions, base analysis on a non-converting > > > stmt and its operation sign. Set STMT_VINFO_REDUC_VECTYPE. > > > * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything > > > for debug stmts. > > > * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New. > > > (STMT_VINFO_REDUC_VECTYPE): Likewise. > > > > > > * gcc.dg/vect/pr92205.c: XFAIL. > > > * gcc.dg/vect/pr92324-1.c: New testcase. > > > * gcc.dg/vect/pr92324-2.c: Likewise. > > > > > > > Hi Richard, > > > > This new testcase (pr92324-2.c) fails on arm/aarch64: > > FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test > > FAIL: gcc.dg/vect/pr92324-2.c execution test > > Can you open a bugreport? I suspect the arm/aarch64 vector > min/max patterns are bogus or we go a different code-path > in the vectorizer here (IIRC x86 has reduc_[us]max_* patterns > while aarch64 has none of that). > Sure, I've filed PR92473. > Thanks, > Richard. > > > Christophe > > > > > > > Index: gcc/tree-vect-loop.c > > > =================================================================== > > > --- gcc/tree-vect-loop.c (revision 277922) > > > +++ gcc/tree-vect-loop.c (working copy) > > > @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v > > > gimple *new_phi = NULL, *phi; > > > stmt_vec_info phi_info; > > > gimple_stmt_iterator exit_gsi; > > > - tree vec_dest; > > > tree new_temp = NULL_TREE, new_name, new_scalar_dest; > > > gimple *epilog_stmt = NULL; > > > gimple *exit_phi; > > > @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v > > > } > > > gcc_assert (!nested_in_vect_loop || double_reduc); > > > > > > - vectype = STMT_VINFO_VECTYPE (stmt_info); > > > + vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); > > > gcc_assert (vectype); > > > mode = TYPE_MODE (vectype); > > > > > > @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v > > > one vector. */ > > > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) > > > { > > > + gimple_seq stmts = NULL; > > > tree first_vect = PHI_RESULT (new_phis[0]); > > > - gassign *new_vec_stmt = NULL; > > > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > > > + first_vect = gimple_convert (&stmts, vectype, first_vect); > > > for (k = 1; k < new_phis.length (); k++) > > > { > > > gimple *next_phi = new_phis[k]; > > > tree second_vect = PHI_RESULT (next_phi); > > > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > > > - new_vec_stmt = gimple_build_assign (tem, code, > > > - first_vect, second_vect); > > > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > > > - first_vect = tem; > > > + second_vect = gimple_convert (&stmts, vectype, second_vect); > > > + first_vect = gimple_build (&stmts, code, vectype, > > > + first_vect, second_vect); > > > } > > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > > > > > new_phi_result = first_vect; > > > - if (new_vec_stmt) > > > - { > > > - new_phis.truncate (0); > > > - new_phis.safe_push (new_vec_stmt); > > > - } > > > + new_phis.truncate (0); > > > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > > > } > > > /* Likewise if we couldn't use a single defuse cycle. */ > > > else if (ncopies > 1) > > > { > > > gcc_assert (new_phis.length () == 1); > > > + gimple_seq stmts = NULL; > > > tree first_vect = PHI_RESULT (new_phis[0]); > > > - gassign *new_vec_stmt = NULL; > > > - vec_dest = vect_create_destination_var (scalar_dest, vectype); > > > + first_vect = gimple_convert (&stmts, vectype, first_vect); > > > stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]); > > > for (int k = 1; k < ncopies; ++k) > > > { > > > next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info); > > > tree second_vect = PHI_RESULT (next_phi_info->stmt); > > > - tree tem = make_ssa_name (vec_dest, new_vec_stmt); > > > - new_vec_stmt = gimple_build_assign (tem, code, > > > - first_vect, second_vect); > > > - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); > > > - first_vect = tem; > > > + second_vect = gimple_convert (&stmts, vectype, second_vect); > > > + first_vect = gimple_build (&stmts, code, vectype, > > > + first_vect, second_vect); > > > } > > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > > new_phi_result = first_vect; > > > new_phis.truncate (0); > > > - new_phis.safe_push (new_vec_stmt); > > > + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); > > > } > > > else > > > new_phi_result = PHI_RESULT (new_phis[0]); > > > @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v > > > in a vector mode of smaller size and first reduce upper/lower > > > halves against each other. */ > > > enum machine_mode mode1 = mode; > > > + tree stype = TREE_TYPE (vectype); > > > unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); > > > unsigned sz1 = sz; > > > if (!slp_reduc > > > && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) > > > sz1 = GET_MODE_SIZE (mode1).to_constant (); > > > > > > - tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1); > > > + tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1); > > > reduce_with_shift = have_whole_vector_shift (mode1); > > > if (!VECTOR_MODE_P (mode1)) > > > reduce_with_shift = false; > > > @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v > > > { > > > gcc_assert (!slp_reduc); > > > sz /= 2; > > > - vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz); > > > + vectype1 = get_vectype_for_scalar_type_and_size (stype, sz); > > > > > > /* The target has to make sure we support lowpart/highpart > > > extraction, either via direct vector extract or through > > > @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v > > > dump_printf_loc (MSG_NOTE, vect_location, > > > "Reduce using vector shifts\n"); > > > > > > - vec_dest = vect_create_destination_var (scalar_dest, vectype1); > > > + gimple_seq stmts = NULL; > > > + new_temp = gimple_convert (&stmts, vectype1, new_temp); > > > for (elt_offset = nelements / 2; > > > elt_offset >= 1; > > > elt_offset /= 2) > > > @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v > > > calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); > > > indices.new_vector (sel, 2, nelements); > > > tree mask = vect_gen_perm_mask_any (vectype1, indices); > > > - epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, > > > - new_temp, zero_vec, mask); > > > - new_name = make_ssa_name (vec_dest, epilog_stmt); > > > - gimple_assign_set_lhs (epilog_stmt, new_name); > > > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > > > - > > > - epilog_stmt = gimple_build_assign (vec_dest, code, new_name, > > > - new_temp); > > > - new_temp = make_ssa_name (vec_dest, epilog_stmt); > > > - gimple_assign_set_lhs (epilog_stmt, new_temp); > > > - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); > > > + new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1, > > > + new_temp, zero_vec, mask); > > > + new_temp = gimple_build (&stmts, code, > > > + vectype1, new_name, new_temp); > > > } > > > + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); > > > > > > /* 2.4 Extract the final scalar result. Create: > > > s_out3 = extract_field <v_out2, bitpos> */ > > > @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st > > > stmt_vector_for_cost *cost_vec) > > > { > > > tree scalar_dest; > > > - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > > > tree vectype_in = NULL_TREE; > > > loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); > > > class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); > > > @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st > > > phi_info = loop_vinfo->lookup_stmt (use_stmt); > > > stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > > > } > > > - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > > > - element. */ > > > - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > > + } > > > + > > > + /* PHIs should not participate in patterns. */ > > > + gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > > > + gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > > > + > > > + /* Verify following REDUC_IDX from the latch def leads us back to the PHI > > > + and compute the reduction chain length. */ > > > + tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > > > + loop_latch_edge (loop)); > > > + unsigned reduc_chain_length = 0; > > > + bool only_slp_reduc_chain = true; > > > + stmt_info = NULL; > > > + while (reduc_def != PHI_RESULT (reduc_def_phi)) > > > + { > > > + stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > > > + stmt_vec_info vdef = vect_stmt_to_vectorize (def); > > > + if (STMT_VINFO_REDUC_IDX (vdef) == -1) > > > { > > > - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > > > - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > > > + if (dump_enabled_p ()) > > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > > + "reduction chain broken by patterns.\n"); > > > + return false; > > > + } > > > + if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > > > + only_slp_reduc_chain = false; > > > + /* ??? For epilogue generation live members of the chain need > > > + to point back to the PHI via their original stmt for > > > + info_for_reduction to work. */ > > > + if (STMT_VINFO_LIVE_P (vdef)) > > > + STMT_VINFO_REDUC_DEF (def) = phi_info; > > > + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt))) > > > + { > > > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)), > > > + TREE_TYPE (gimple_assign_rhs1 (vdef->stmt)))) > > > + { > > > + if (dump_enabled_p ()) > > > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > > + "conversion in the reduction chain.\n"); > > > + return false; > > > + } > > > } > > > + else if (!stmt_info) > > > + /* First non-conversion stmt. */ > > > + stmt_info = vdef; > > > + reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > > > + reduc_chain_length++; > > > } > > > /* PHIs should not participate in patterns. */ > > > gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); > > > @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st > > > nested_cycle = true; > > > } > > > > > > + /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last > > > + element. */ > > > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > > + { > > > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); > > > + stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); > > > + } > > > if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > > gcc_assert (slp_node > > > && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); > > > @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st > > > inside the loop body. The last operand is the reduction variable, > > > which is defined by the loop-header-phi. */ > > > > > > + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); > > > + STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; > > > gassign *stmt = as_a <gassign *> (stmt_info->stmt); > > > enum tree_code code = gimple_assign_rhs_code (stmt); > > > bool lane_reduc_code_p > > > @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st > > > if (!type_has_mode_precision_p (scalar_type)) > > > return false; > > > > > > - /* All uses but the last are expected to be defined in the loop. > > > - The last use is the reduction variable. In case of nested cycle this > > > - assumption is not true: we use reduc_index to record the index of the > > > - reduction variable. */ > > > - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); > > > - > > > - /* Verify following REDUC_IDX from the latch def leads us back to the PHI > > > - and compute the reduction chain length. */ > > > - tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, > > > - loop_latch_edge (loop)); > > > - unsigned reduc_chain_length = 0; > > > - bool only_slp_reduc_chain = true; > > > - while (reduc_def != PHI_RESULT (reduc_def_phi)) > > > - { > > > - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); > > > - stmt_vec_info vdef = vect_stmt_to_vectorize (def); > > > - if (STMT_VINFO_REDUC_IDX (vdef) == -1) > > > - { > > > - if (dump_enabled_p ()) > > > - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > > > - "reduction chain broken by patterns.\n"); > > > - return false; > > > - } > > > - if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) > > > - only_slp_reduc_chain = false; > > > - /* ??? For epilogue generation live members of the chain need > > > - to point back to the PHI via their original stmt for > > > - info_for_reduction to work. */ > > > - if (STMT_VINFO_LIVE_P (vdef)) > > > - STMT_VINFO_REDUC_DEF (def) = phi_info; > > > - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); > > > - reduc_chain_length++; > > > - } > > > - > > > /* For lane-reducing ops we're reducing the number of reduction PHIs > > > which means the only use of that may be in the lane-reducing operation. */ > > > if (lane_reduc_code_p > > > @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st > > > return false; > > > } > > > > > > + /* All uses but the last are expected to be defined in the loop. > > > + The last use is the reduction variable. In case of nested cycle this > > > + assumption is not true: we use reduc_index to record the index of the > > > + reduction variable. */ > > > reduc_def = PHI_RESULT (reduc_def_phi); > > > for (i = 0; i < op_type; i++) > > > { > > > @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st > > > } > > > } > > > if (!vectype_in) > > > - vectype_in = vectype_out; > > > + vectype_in = STMT_VINFO_VECTYPE (phi_info); > > > STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; > > > > > > enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info); > > > @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st > > > } > > > } > > > > > > - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) > > > - /* We changed STMT to be the first stmt in reduction chain, hence we > > > - check that in this case the first element in the chain is STMT. */ > > > - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info)) > > > - == vect_orig_stmt (stmt_info)); > > > - > > > if (STMT_VINFO_LIVE_P (phi_info)) > > > return false; > > > > > > @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st > > > && code != SAD_EXPR > > > && reduction_type != FOLD_LEFT_REDUCTION) > > > { > > > - STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; > > > - STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; > > > + stmt_vec_info tem > > > + = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); > > > + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem)) > > > + { > > > + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem)); > > > + tem = REDUC_GROUP_FIRST_ELEMENT (tem); > > > + } > > > + STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def; > > > + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def; > > > } > > > else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) > > > { > > > Index: gcc/tree-vect-stmts.c > > > =================================================================== > > > --- gcc/tree-vect-stmts.c (revision 277922) > > > +++ gcc/tree-vect-stmts.c (working copy) > > > @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt > > > basic_block bb = gimple_bb (USE_STMT (use_p)); > > > if (!flow_bb_inside_loop_p (loop, bb)) > > > { > > > + if (is_gimple_debug (USE_STMT (use_p))) > > > + continue; > > > + > > > if (dump_enabled_p ()) > > > dump_printf_loc (MSG_NOTE, vect_location, > > > "vec_stmt_relevant_p: used out of loop.\n"); > > > > > > - if (is_gimple_debug (USE_STMT (use_p))) > > > - continue; > > > - > > > /* We expect all such uses to be in the loop exit phis > > > (because of loop closed form) */ > > > gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); > > > Index: gcc/tree-vectorizer.h > > > =================================================================== > > > --- gcc/tree-vectorizer.h (revision 277922) > > > +++ gcc/tree-vectorizer.h (working copy) > > > @@ -1050,6 +1050,9 @@ public: > > > /* The vector input type relevant for reduction vectorization. */ > > > tree reduc_vectype_in; > > > > > > + /* The vector type for performing the actual reduction. */ > > > + tree reduc_vectype; > > > + > > > /* Whether we force a single cycle PHI during reduction vectorization. */ > > > bool force_single_cycle; > > > > > > @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_ > > > #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code > > > #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn > > > #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def > > > +#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype > > > #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in > > > #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p > > > > > > Index: gcc/testsuite/gcc.dg/vect/pr92205.c > > > =================================================================== > > > --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922) > > > +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy) > > > @@ -10,4 +10,4 @@ int b(int n, unsigned char *a) > > > return d; > > > } > > > > > > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */ > > > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ > > > Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c > > > =================================================================== > > > --- gcc/testsuite/gcc.dg/vect/pr92324-1.c (nonexistent) > > > +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c (working copy) > > > @@ -0,0 +1,14 @@ > > > +/* { dg-do compile } */ > > > + > > > +unsigned a, b; > > > +int c, d; > > > +unsigned e(int f) { > > > + if (a > f) > > > + return a; > > > + return f; > > > +} > > > +void g() { > > > + for (; c; c++) > > > + d = e(d); > > > + b = d; > > > +} > > > Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c > > > =================================================================== > > > --- gcc/testsuite/gcc.dg/vect/pr92324-2.c (nonexistent) > > > +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c (working copy) > > > @@ -0,0 +1,21 @@ > > > +#include "tree-vect.h" > > > + > > > +unsigned b[1024]; > > > + > > > +int __attribute__((noipa)) > > > +foo (int n) > > > +{ > > > + int res = 0; > > > + for (int i = 0; i < n; ++i) > > > + res = res > b[i] ? res : b[i]; > > > + return res; > > > +} > > > + > > > +int main () > > > +{ > > > + check_vect (); > > > + b[15] = (unsigned)__INT_MAX__ + 1; > > > + if (foo (16) != -__INT_MAX__ - 1) > > > + __builtin_abort (); > > > + return 0; > > > +} > > > > -- > Richard Biener <rguenther@suse.de> > SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, > Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c (revision 277922) +++ gcc/tree-vect-loop.c (working copy) @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v gimple *new_phi = NULL, *phi; stmt_vec_info phi_info; gimple_stmt_iterator exit_gsi; - tree vec_dest; tree new_temp = NULL_TREE, new_name, new_scalar_dest; gimple *epilog_stmt = NULL; gimple *exit_phi; @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v } gcc_assert (!nested_in_vect_loop || double_reduc); - vectype = STMT_VINFO_VECTYPE (stmt_info); + vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info); gcc_assert (vectype); mode = TYPE_MODE (vectype); @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v one vector. */ if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) { + gimple_seq stmts = NULL; tree first_vect = PHI_RESULT (new_phis[0]); - gassign *new_vec_stmt = NULL; - vec_dest = vect_create_destination_var (scalar_dest, vectype); + first_vect = gimple_convert (&stmts, vectype, first_vect); for (k = 1; k < new_phis.length (); k++) { gimple *next_phi = new_phis[k]; tree second_vect = PHI_RESULT (next_phi); - tree tem = make_ssa_name (vec_dest, new_vec_stmt); - new_vec_stmt = gimple_build_assign (tem, code, - first_vect, second_vect); - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); - first_vect = tem; + second_vect = gimple_convert (&stmts, vectype, second_vect); + first_vect = gimple_build (&stmts, code, vectype, + first_vect, second_vect); } + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); new_phi_result = first_vect; - if (new_vec_stmt) - { - new_phis.truncate (0); - new_phis.safe_push (new_vec_stmt); - } + new_phis.truncate (0); + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); } /* Likewise if we couldn't use a single defuse cycle. */ else if (ncopies > 1) { gcc_assert (new_phis.length () == 1); + gimple_seq stmts = NULL; tree first_vect = PHI_RESULT (new_phis[0]); - gassign *new_vec_stmt = NULL; - vec_dest = vect_create_destination_var (scalar_dest, vectype); + first_vect = gimple_convert (&stmts, vectype, first_vect); stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]); for (int k = 1; k < ncopies; ++k) { next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info); tree second_vect = PHI_RESULT (next_phi_info->stmt); - tree tem = make_ssa_name (vec_dest, new_vec_stmt); - new_vec_stmt = gimple_build_assign (tem, code, - first_vect, second_vect); - gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); - first_vect = tem; + second_vect = gimple_convert (&stmts, vectype, second_vect); + first_vect = gimple_build (&stmts, code, vectype, + first_vect, second_vect); } + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); new_phi_result = first_vect; new_phis.truncate (0); - new_phis.safe_push (new_vec_stmt); + new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect)); } else new_phi_result = PHI_RESULT (new_phis[0]); @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v in a vector mode of smaller size and first reduce upper/lower halves against each other. */ enum machine_mode mode1 = mode; + tree stype = TREE_TYPE (vectype); unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); unsigned sz1 = sz; if (!slp_reduc && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) sz1 = GET_MODE_SIZE (mode1).to_constant (); - tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1); + tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1); reduce_with_shift = have_whole_vector_shift (mode1); if (!VECTOR_MODE_P (mode1)) reduce_with_shift = false; @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v { gcc_assert (!slp_reduc); sz /= 2; - vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz); + vectype1 = get_vectype_for_scalar_type_and_size (stype, sz); /* The target has to make sure we support lowpart/highpart extraction, either via direct vector extract or through @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v dump_printf_loc (MSG_NOTE, vect_location, "Reduce using vector shifts\n"); - vec_dest = vect_create_destination_var (scalar_dest, vectype1); + gimple_seq stmts = NULL; + new_temp = gimple_convert (&stmts, vectype1, new_temp); for (elt_offset = nelements / 2; elt_offset >= 1; elt_offset /= 2) @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); indices.new_vector (sel, 2, nelements); tree mask = vect_gen_perm_mask_any (vectype1, indices); - epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, - new_temp, zero_vec, mask); - new_name = make_ssa_name (vec_dest, epilog_stmt); - gimple_assign_set_lhs (epilog_stmt, new_name); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); - - epilog_stmt = gimple_build_assign (vec_dest, code, new_name, - new_temp); - new_temp = make_ssa_name (vec_dest, epilog_stmt); - gimple_assign_set_lhs (epilog_stmt, new_temp); - gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); + new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1, + new_temp, zero_vec, mask); + new_temp = gimple_build (&stmts, code, + vectype1, new_name, new_temp); } + gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); /* 2.4 Extract the final scalar result. Create: s_out3 = extract_field <v_out2, bitpos> */ @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st stmt_vector_for_cost *cost_vec) { tree scalar_dest; - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); tree vectype_in = NULL_TREE; loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st phi_info = loop_vinfo->lookup_stmt (use_stmt); stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); } - /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last - element. */ - if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + } + + /* PHIs should not participate in patterns. */ + gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); + gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); + + /* Verify following REDUC_IDX from the latch def leads us back to the PHI + and compute the reduction chain length. */ + tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, + loop_latch_edge (loop)); + unsigned reduc_chain_length = 0; + bool only_slp_reduc_chain = true; + stmt_info = NULL; + while (reduc_def != PHI_RESULT (reduc_def_phi)) + { + stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); + stmt_vec_info vdef = vect_stmt_to_vectorize (def); + if (STMT_VINFO_REDUC_IDX (vdef) == -1) { - gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "reduction chain broken by patterns.\n"); + return false; + } + if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) + only_slp_reduc_chain = false; + /* ??? For epilogue generation live members of the chain need + to point back to the PHI via their original stmt for + info_for_reduction to work. */ + if (STMT_VINFO_LIVE_P (vdef)) + STMT_VINFO_REDUC_DEF (def) = phi_info; + if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt))) + { + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)), + TREE_TYPE (gimple_assign_rhs1 (vdef->stmt)))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "conversion in the reduction chain.\n"); + return false; + } } + else if (!stmt_info) + /* First non-conversion stmt. */ + stmt_info = vdef; + reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); + reduc_chain_length++; } /* PHIs should not participate in patterns. */ gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info)); @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st nested_cycle = true; } + /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last + element. */ + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) + { + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info)); + stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); + } if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) gcc_assert (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st inside the loop body. The last operand is the reduction variable, which is defined by the loop-header-phi. */ + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out; gassign *stmt = as_a <gassign *> (stmt_info->stmt); enum tree_code code = gimple_assign_rhs_code (stmt); bool lane_reduc_code_p @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st if (!type_has_mode_precision_p (scalar_type)) return false; - /* All uses but the last are expected to be defined in the loop. - The last use is the reduction variable. In case of nested cycle this - assumption is not true: we use reduc_index to record the index of the - reduction variable. */ - gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt); - - /* Verify following REDUC_IDX from the latch def leads us back to the PHI - and compute the reduction chain length. */ - tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, - loop_latch_edge (loop)); - unsigned reduc_chain_length = 0; - bool only_slp_reduc_chain = true; - while (reduc_def != PHI_RESULT (reduc_def_phi)) - { - stmt_vec_info def = loop_vinfo->lookup_def (reduc_def); - stmt_vec_info vdef = vect_stmt_to_vectorize (def); - if (STMT_VINFO_REDUC_IDX (vdef) == -1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "reduction chain broken by patterns.\n"); - return false; - } - if (!REDUC_GROUP_FIRST_ELEMENT (vdef)) - only_slp_reduc_chain = false; - /* ??? For epilogue generation live members of the chain need - to point back to the PHI via their original stmt for - info_for_reduction to work. */ - if (STMT_VINFO_LIVE_P (vdef)) - STMT_VINFO_REDUC_DEF (def) = phi_info; - reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef)); - reduc_chain_length++; - } - /* For lane-reducing ops we're reducing the number of reduction PHIs which means the only use of that may be in the lane-reducing operation. */ if (lane_reduc_code_p @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st return false; } + /* All uses but the last are expected to be defined in the loop. + The last use is the reduction variable. In case of nested cycle this + assumption is not true: we use reduc_index to record the index of the + reduction variable. */ reduc_def = PHI_RESULT (reduc_def_phi); for (i = 0; i < op_type; i++) { @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st } } if (!vectype_in) - vectype_in = vectype_out; + vectype_in = STMT_VINFO_VECTYPE (phi_info); STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in; enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info); @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st } } - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - /* We changed STMT to be the first stmt in reduction chain, hence we - check that in this case the first element in the chain is STMT. */ - gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info)) - == vect_orig_stmt (stmt_info)); - if (STMT_VINFO_LIVE_P (phi_info)) return false; @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st && code != SAD_EXPR && reduction_type != FOLD_LEFT_REDUCTION) { - STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; - STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def; + stmt_vec_info tem + = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info)); + if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem)) + { + gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem)); + tem = REDUC_GROUP_FIRST_ELEMENT (tem); + } + STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def; + STMT_VINFO_DEF_TYPE (tem) = vect_internal_def; } else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) { Index: gcc/tree-vect-stmts.c =================================================================== --- gcc/tree-vect-stmts.c (revision 277922) +++ gcc/tree-vect-stmts.c (working copy) @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt basic_block bb = gimple_bb (USE_STMT (use_p)); if (!flow_bb_inside_loop_p (loop, bb)) { + if (is_gimple_debug (USE_STMT (use_p))) + continue; + if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "vec_stmt_relevant_p: used out of loop.\n"); - if (is_gimple_debug (USE_STMT (use_p))) - continue; - /* We expect all such uses to be in the loop exit phis (because of loop closed form) */ gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI); Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h (revision 277922) +++ gcc/tree-vectorizer.h (working copy) @@ -1050,6 +1050,9 @@ public: /* The vector input type relevant for reduction vectorization. */ tree reduc_vectype_in; + /* The vector type for performing the actual reduction. */ + tree reduc_vectype; + /* Whether we force a single cycle PHI during reduction vectorization. */ bool force_single_cycle; @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_ #define STMT_VINFO_REDUC_CODE(S) (S)->reduc_code #define STMT_VINFO_REDUC_FN(S) (S)->reduc_fn #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def +#define STMT_VINFO_REDUC_VECTYPE(S) (S)->reduc_vectype #define STMT_VINFO_REDUC_VECTYPE_IN(S) (S)->reduc_vectype_in #define STMT_VINFO_SLP_VECT_ONLY(S) (S)->slp_vect_only_p Index: gcc/testsuite/gcc.dg/vect/pr92205.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922) +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy) @@ -10,4 +10,4 @@ int b(int n, unsigned char *a) return d; } -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */ +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */ Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr92324-1.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c (working copy) @@ -0,0 +1,14 @@ +/* { dg-do compile } */ + +unsigned a, b; +int c, d; +unsigned e(int f) { + if (a > f) + return a; + return f; +} +void g() { + for (; c; c++) + d = e(d); + b = d; +} Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c =================================================================== --- gcc/testsuite/gcc.dg/vect/pr92324-2.c (nonexistent) +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c (working copy) @@ -0,0 +1,21 @@ +#include "tree-vect.h" + +unsigned b[1024]; + +int __attribute__((noipa)) +foo (int n) +{ + int res = 0; + for (int i = 0; i < n; ++i) + res = res > b[i] ? res : b[i]; + return res; +} + +int main () +{ + check_vect (); + b[15] = (unsigned)__INT_MAX__ + 1; + if (foo (16) != -__INT_MAX__ - 1) + __builtin_abort (); + return 0; +}