diff mbox series

Fix PR92324

Message ID nycvar.YFH.7.76.1911080955100.5566@zhemvz.fhfr.qr
State New
Headers show
Series Fix PR92324 | expand

Commit Message

Richard Biener Nov. 8, 2019, 8:57 a.m. UTC
I've been sitting on this for a few days since I'm not 100% happy
with how the code looks like.  There's possibly still holes in it
(chains with mixed signed/unsigned adds for example might pick
up signed adds in the epilogue), but the wrong-code cases should
work fine now.  I'm probably going to followup with some
mass renaming of variable/parameter names to make it more clear
which stmt / type we are actually looking at ...

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2019-11-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/
	* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
	STMT_VINFO_REDUC_VECTYPE for all computations, inserting
	sign-conversions as necessary.
	(vectorizable_reduction): Reject conversions in the chain
	that are not sign-conversions, base analysis on a non-converting
	stmt and its operation sign.  Set STMT_VINFO_REDUC_VECTYPE.
	* tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
	for debug stmts.
	* tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
	(STMT_VINFO_REDUC_VECTYPE): Likewise.

	* gcc.dg/vect/pr92205.c: XFAIL.
	* gcc.dg/vect/pr92324-1.c: New testcase.
	* gcc.dg/vect/pr92324-2.c: Likewise.

Comments

Richard Sandiford Nov. 8, 2019, 10:46 a.m. UTC | #1
Richard Biener <rguenther@suse.de> writes:
> I've been sitting on this for a few days since I'm not 100% happy
> with how the code looks like.  There's possibly still holes in it
> (chains with mixed signed/unsigned adds for example might pick
> up signed adds in the epilogue), but the wrong-code cases should
> work fine now.  I'm probably going to followup with some
> mass renaming of variable/parameter names to make it more clear
> which stmt / type we are actually looking at ...
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Does this look like the right way of updating neutral_op_for_slp_reduction?
It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
(for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).

Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
full test on aarch64-linux-gnu.

Thanks,
Richard


2019-11-08  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
	vector type as an argument rather than reading it from the
	stmt_vec_info.
	(vect_create_epilog_for_reduction): Update accordingly,
	passing the STMT_VINFO_REDUC_VECTYPE.
	(vectorizable_reduction): Likewise.
	(vect_transform_cycle_phi): Likewise, but passing the
	STMT_VINFO_REDUC_VECTYPE_IN.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2019-11-08 09:06:29.654896085 +0000
+++ gcc/tree-vect-loop.c	2019-11-08 10:41:54.498861004 +0000
@@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_
 
 /* If there is a neutral value X such that SLP reduction NODE would not
    be affected by the introduction of additional X elements, return that X,
-   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
-   is true if the SLP statements perform a single reduction, false if each
-   statement performs an independent reduction.  */
+   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
+   is the vector type that would hold element X.  REDUC_CHAIN is true if
+   the SLP statements perform a single reduction, false if each statement
+   performs an independent reduction.  */
 
 static tree
-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
-			      bool reduc_chain)
+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
+			      tree_code code, bool reduc_chain)
 {
   vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
   stmt_vec_info stmt_vinfo = stmts[0];
-  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
   tree scalar_type = TREE_TYPE (vector_type);
   class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
   gcc_assert (loop);
@@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v
     = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
   internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-  tree neutral_op = NULL_TREE;
-  if (slp_node)
-    neutral_op
-      = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
-				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   stmt_vec_info prev_phi_info;
   tree vectype;
   machine_mode mode;
@@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v
   gcc_assert (vectype);
   mode = TYPE_MODE (vectype);
 
+  tree neutral_op = NULL_TREE;
   tree initial_def = NULL;
   tree induc_val = NULL_TREE;
   tree adjustment_def = NULL;
   if (slp_node)
-    ;
+    neutral_op
+      = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
+				      vectype, code,
+				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   else
     {
       /* Get at the scalar def before the loop, that defines the initial value
@@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st
   tree neutral_op = NULL_TREE;
   if (slp_node)
     neutral_op = neutral_op_for_slp_reduction
-      (slp_node_instance->reduc_phis, orig_code,
+      (slp_node_instance->reduc_phis, vectype_out, orig_code,
        REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
 
   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
@@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info
       gcc_assert (slp_node == slp_node_instance->reduc_phis);
       stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
       tree neutral_op
-	= neutral_op_for_slp_reduction (slp_node,
+	= neutral_op_for_slp_reduction (slp_node, vectype_in,
 					STMT_VINFO_REDUC_CODE (reduc_info),
 					first != NULL);
       get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
Richard Biener Nov. 8, 2019, 11:37 a.m. UTC | #2
On Fri, 8 Nov 2019, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > I've been sitting on this for a few days since I'm not 100% happy
> > with how the code looks like.  There's possibly still holes in it
> > (chains with mixed signed/unsigned adds for example might pick
> > up signed adds in the epilogue), but the wrong-code cases should
> > work fine now.  I'm probably going to followup with some
> > mass renaming of variable/parameter names to make it more clear
> > which stmt / type we are actually looking at ...
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> 
> Does this look like the right way of updating neutral_op_for_slp_reduction?
> It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
> (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).
> 
> Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
> full test on aarch64-linux-gnu.

Yeah, it looks sensible.  In vect_create_epilog_for_reduction
please move the call down to the only use in the

  else if (direct_slp_reduc)
    {

block.

Thanks,
Richard.

> Thanks,
> Richard
> 
> 
> 2019-11-08  Richard Sandiford  <richard.sandiford@arm.com>
> 
> gcc/
> 	* tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
> 	vector type as an argument rather than reading it from the
> 	stmt_vec_info.
> 	(vect_create_epilog_for_reduction): Update accordingly,
> 	passing the STMT_VINFO_REDUC_VECTYPE.
> 	(vectorizable_reduction): Likewise.
> 	(vect_transform_cycle_phi): Likewise, but passing the
> 	STMT_VINFO_REDUC_VECTYPE_IN.
> 
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c	2019-11-08 09:06:29.654896085 +0000
> +++ gcc/tree-vect-loop.c	2019-11-08 10:41:54.498861004 +0000
> @@ -2586,17 +2586,17 @@ reduction_fn_for_scalar_code (enum tree_
>  
>  /* If there is a neutral value X such that SLP reduction NODE would not
>     be affected by the introduction of additional X elements, return that X,
> -   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
> -   is true if the SLP statements perform a single reduction, false if each
> -   statement performs an independent reduction.  */
> +   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
> +   is the vector type that would hold element X.  REDUC_CHAIN is true if
> +   the SLP statements perform a single reduction, false if each statement
> +   performs an independent reduction.  */
>  
>  static tree
> -neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
> -			      bool reduc_chain)
> +neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
> +			      tree_code code, bool reduc_chain)
>  {
>    vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
>    stmt_vec_info stmt_vinfo = stmts[0];
> -  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
>    tree scalar_type = TREE_TYPE (vector_type);
>    class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
>    gcc_assert (loop);
> @@ -4216,11 +4216,6 @@ vect_create_epilog_for_reduction (stmt_v
>      = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
>    enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
>    internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
> -  tree neutral_op = NULL_TREE;
> -  if (slp_node)
> -    neutral_op
> -      = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
> -				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
>    stmt_vec_info prev_phi_info;
>    tree vectype;
>    machine_mode mode;
> @@ -4267,11 +4262,15 @@ vect_create_epilog_for_reduction (stmt_v
>    gcc_assert (vectype);
>    mode = TYPE_MODE (vectype);
>  
> +  tree neutral_op = NULL_TREE;
>    tree initial_def = NULL;
>    tree induc_val = NULL_TREE;
>    tree adjustment_def = NULL;
>    if (slp_node)
> -    ;
> +    neutral_op
> +      = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
> +				      vectype, code,
> +				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
>    else
>      {
>        /* Get at the scalar def before the loop, that defines the initial value
> @@ -6210,7 +6209,7 @@ vectorizable_reduction (stmt_vec_info st
>    tree neutral_op = NULL_TREE;
>    if (slp_node)
>      neutral_op = neutral_op_for_slp_reduction
> -      (slp_node_instance->reduc_phis, orig_code,
> +      (slp_node_instance->reduc_phis, vectype_out, orig_code,
>         REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
>  
>    if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
> @@ -6793,7 +6792,7 @@ vect_transform_cycle_phi (stmt_vec_info
>        gcc_assert (slp_node == slp_node_instance->reduc_phis);
>        stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
>        tree neutral_op
> -	= neutral_op_for_slp_reduction (slp_node,
> +	= neutral_op_for_slp_reduction (slp_node, vectype_in,
>  					STMT_VINFO_REDUC_CODE (reduc_info),
>  					first != NULL);
>        get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
>
Richard Sandiford Nov. 8, 2019, 4:09 p.m. UTC | #3
Richard Biener <rguenther@suse.de> writes:
> On Fri, 8 Nov 2019, Richard Sandiford wrote:
>
>> Richard Biener <rguenther@suse.de> writes:
>> > I've been sitting on this for a few days since I'm not 100% happy
>> > with how the code looks like.  There's possibly still holes in it
>> > (chains with mixed signed/unsigned adds for example might pick
>> > up signed adds in the epilogue), but the wrong-code cases should
>> > work fine now.  I'm probably going to followup with some
>> > mass renaming of variable/parameter names to make it more clear
>> > which stmt / type we are actually looking at ...
>> >
>> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>> 
>> Does this look like the right way of updating neutral_op_for_slp_reduction?
>> It now needs to know whether the caller is using STMT_VINFO_REDUC_VECTYPE
>> (for an epilogue value) or STMT_VINFO_REDUC_VECTYPE_IN (for a PHI argument).
>> 
>> Fixes various gcc.target/aarch64/sve/slp_* tests, will give it a
>> full test on aarch64-linux-gnu.
>
> Yeah, it looks sensible.  In vect_create_epilog_for_reduction
> please move the call down to the only use in the
>
>   else if (direct_slp_reduc)
>     {
>
> block.

Thanks, here's what I installed after testing on aarch64-linux-gnu
and x86_64-linux-gnu.

Richard

2019-11-08  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vect-loop.c (neutral_op_for_slp_reduction): Take the
	vector type as an argument rather than reading it from the
	stmt_vec_info.
	(vect_create_epilog_for_reduction): Update accordingly.
	(vectorizable_reduction): Likewise.
	(vect_transform_cycle_phi): Likewise.

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2019-11-08 11:58:22.331095690 +0000
+++ gcc/tree-vect-loop.c	2019-11-08 16:06:11.389032120 +0000
@@ -2590,17 +2590,17 @@ reduction_fn_for_scalar_code (enum tree_
 
 /* If there is a neutral value X such that SLP reduction NODE would not
    be affected by the introduction of additional X elements, return that X,
-   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
-   is true if the SLP statements perform a single reduction, false if each
-   statement performs an independent reduction.  */
+   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
+   is the vector type that would hold element X.  REDUC_CHAIN is true if
+   the SLP statements perform a single reduction, false if each statement
+   performs an independent reduction.  */
 
 static tree
-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
-			      bool reduc_chain)
+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
+			      tree_code code, bool reduc_chain)
 {
   vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
   stmt_vec_info stmt_vinfo = stmts[0];
-  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
   tree scalar_type = TREE_TYPE (vector_type);
   class loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
   gcc_assert (loop);
@@ -4220,11 +4220,6 @@ vect_create_epilog_for_reduction (stmt_v
     = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
   enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
   internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-  tree neutral_op = NULL_TREE;
-  if (slp_node)
-    neutral_op
-      = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis, code,
-				      REDUC_GROUP_FIRST_ELEMENT (stmt_info));
   stmt_vec_info prev_phi_info;
   tree vectype;
   machine_mode mode;
@@ -4822,6 +4817,14 @@ vect_create_epilog_for_reduction (stmt_v
 	 scalar value if we have one, otherwise the initial scalar value
 	 is itself a neutral value.  */
       tree vector_identity = NULL_TREE;
+      tree neutral_op = NULL_TREE;
+      if (slp_node)
+	{
+	  stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+	  neutral_op
+	    = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
+					    vectype, code, first != NULL);
+	}
       if (neutral_op)
 	vector_identity = gimple_build_vector_from_val (&seq, vectype,
 							neutral_op);
@@ -6214,7 +6217,7 @@ vectorizable_reduction (stmt_vec_info st
   tree neutral_op = NULL_TREE;
   if (slp_node)
     neutral_op = neutral_op_for_slp_reduction
-      (slp_node_instance->reduc_phis, orig_code,
+      (slp_node_instance->reduc_phis, vectype_out, orig_code,
        REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
 
   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
@@ -6797,7 +6800,7 @@ vect_transform_cycle_phi (stmt_vec_info
       gcc_assert (slp_node == slp_node_instance->reduc_phis);
       stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
       tree neutral_op
-	= neutral_op_for_slp_reduction (slp_node,
+	= neutral_op_for_slp_reduction (slp_node, vectype_out,
 					STMT_VINFO_REDUC_CODE (reduc_info),
 					first != NULL);
       get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
Christophe Lyon Nov. 11, 2019, 11:21 a.m. UTC | #4
On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote:
>
>
> I've been sitting on this for a few days since I'm not 100% happy
> with how the code looks like.  There's possibly still holes in it
> (chains with mixed signed/unsigned adds for example might pick
> up signed adds in the epilogue), but the wrong-code cases should
> work fine now.  I'm probably going to followup with some
> mass renaming of variable/parameter names to make it more clear
> which stmt / type we are actually looking at ...
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
>
> Richard.
>
> 2019-11-08  Richard Biener  <rguenther@suse.de>
>
>         PR tree-optimization/
>         * tree-vect-loop.c (vect_create_epilog_for_reduction): Use
>         STMT_VINFO_REDUC_VECTYPE for all computations, inserting
>         sign-conversions as necessary.
>         (vectorizable_reduction): Reject conversions in the chain
>         that are not sign-conversions, base analysis on a non-converting
>         stmt and its operation sign.  Set STMT_VINFO_REDUC_VECTYPE.
>         * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
>         for debug stmts.
>         * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
>         (STMT_VINFO_REDUC_VECTYPE): Likewise.
>
>         * gcc.dg/vect/pr92205.c: XFAIL.
>         * gcc.dg/vect/pr92324-1.c: New testcase.
>         * gcc.dg/vect/pr92324-2.c: Likewise.
>

Hi Richard,

This new testcase (pr92324-2.c) fails on arm/aarch64:
FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test
FAIL: gcc.dg/vect/pr92324-2.c execution test

Christophe


> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 277922)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v
>    gimple *new_phi = NULL, *phi;
>    stmt_vec_info phi_info;
>    gimple_stmt_iterator exit_gsi;
> -  tree vec_dest;
>    tree new_temp = NULL_TREE, new_name, new_scalar_dest;
>    gimple *epilog_stmt = NULL;
>    gimple *exit_phi;
> @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v
>      }
>    gcc_assert (!nested_in_vect_loop || double_reduc);
>
> -  vectype = STMT_VINFO_VECTYPE (stmt_info);
> +  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
>    gcc_assert (vectype);
>    mode = TYPE_MODE (vectype);
>
> @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v
>       one vector.  */
>    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
>      {
> +      gimple_seq stmts = NULL;
>        tree first_vect = PHI_RESULT (new_phis[0]);
> -      gassign *new_vec_stmt = NULL;
> -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> +      first_vect = gimple_convert (&stmts, vectype, first_vect);
>        for (k = 1; k < new_phis.length (); k++)
>          {
>           gimple *next_phi = new_phis[k];
>            tree second_vect = PHI_RESULT (next_phi);
> -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> -          new_vec_stmt = gimple_build_assign (tem, code,
> -                                             first_vect, second_vect);
> -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> -         first_vect = tem;
> +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> +          first_vect = gimple_build (&stmts, code, vectype,
> +                                    first_vect, second_vect);
>          }
> +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
>        new_phi_result = first_vect;
> -      if (new_vec_stmt)
> -        {
> -          new_phis.truncate (0);
> -          new_phis.safe_push (new_vec_stmt);
> -        }
> +      new_phis.truncate (0);
> +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
>      }
>    /* Likewise if we couldn't use a single defuse cycle.  */
>    else if (ncopies > 1)
>      {
>        gcc_assert (new_phis.length () == 1);
> +      gimple_seq stmts = NULL;
>        tree first_vect = PHI_RESULT (new_phis[0]);
> -      gassign *new_vec_stmt = NULL;
> -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> +      first_vect = gimple_convert (&stmts, vectype, first_vect);
>        stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
>        for (int k = 1; k < ncopies; ++k)
>         {
>           next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
>           tree second_vect = PHI_RESULT (next_phi_info->stmt);
> -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> -          new_vec_stmt = gimple_build_assign (tem, code,
> -                                             first_vect, second_vect);
> -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> -         first_vect = tem;
> +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> +         first_vect = gimple_build (&stmts, code, vectype,
> +                                    first_vect, second_vect);
>         }
> +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>        new_phi_result = first_vect;
>        new_phis.truncate (0);
> -      new_phis.safe_push (new_vec_stmt);
> +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
>      }
>    else
>      new_phi_result = PHI_RESULT (new_phis[0]);
> @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v
>          in a vector mode of smaller size and first reduce upper/lower
>          halves against each other.  */
>        enum machine_mode mode1 = mode;
> +      tree stype = TREE_TYPE (vectype);
>        unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
>        unsigned sz1 = sz;
>        if (!slp_reduc
>           && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
>         sz1 = GET_MODE_SIZE (mode1).to_constant ();
>
> -      tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
> +      tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
>        reduce_with_shift = have_whole_vector_shift (mode1);
>        if (!VECTOR_MODE_P (mode1))
>         reduce_with_shift = false;
> @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v
>         {
>           gcc_assert (!slp_reduc);
>           sz /= 2;
> -         vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
> +         vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
>
>           /* The target has to make sure we support lowpart/highpart
>              extraction, either via direct vector extract or through
> @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v
>              dump_printf_loc (MSG_NOTE, vect_location,
>                              "Reduce using vector shifts\n");
>
> -          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
> +         gimple_seq stmts = NULL;
> +         new_temp = gimple_convert (&stmts, vectype1, new_temp);
>            for (elt_offset = nelements / 2;
>                 elt_offset >= 1;
>                 elt_offset /= 2)
> @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v
>               calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
>               indices.new_vector (sel, 2, nelements);
>               tree mask = vect_gen_perm_mask_any (vectype1, indices);
> -             epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
> -                                                new_temp, zero_vec, mask);
> -              new_name = make_ssa_name (vec_dest, epilog_stmt);
> -              gimple_assign_set_lhs (epilog_stmt, new_name);
> -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> -
> -             epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
> -                                                new_temp);
> -              new_temp = make_ssa_name (vec_dest, epilog_stmt);
> -              gimple_assign_set_lhs (epilog_stmt, new_temp);
> -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> +             new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
> +                                      new_temp, zero_vec, mask);
> +             new_temp = gimple_build (&stmts, code,
> +                                      vectype1, new_name, new_temp);
>              }
> +         gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
>
>           /* 2.4  Extract the final scalar result.  Create:
>              s_out3 = extract_field <v_out2, bitpos>  */
> @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st
>                         stmt_vector_for_cost *cost_vec)
>  {
>    tree scalar_dest;
> -  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
>    tree vectype_in = NULL_TREE;
>    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st
>           phi_info = loop_vinfo->lookup_stmt (use_stmt);
>           stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
>         }
> -      /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> -         element.  */
> -      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> +    }
> +
> +  /* PHIs should not participate in patterns.  */
> +  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> +  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> +
> +  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> +     and compute the reduction chain length.  */
> +  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> +                                         loop_latch_edge (loop));
> +  unsigned reduc_chain_length = 0;
> +  bool only_slp_reduc_chain = true;
> +  stmt_info = NULL;
> +  while (reduc_def != PHI_RESULT (reduc_def_phi))
> +    {
> +      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> +      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> +      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
>         {
> -         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> -         stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> +         if (dump_enabled_p ())
> +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                            "reduction chain broken by patterns.\n");
> +         return false;
> +       }
> +      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> +       only_slp_reduc_chain = false;
> +      /* ???  For epilogue generation live members of the chain need
> +         to point back to the PHI via their original stmt for
> +        info_for_reduction to work.  */
> +      if (STMT_VINFO_LIVE_P (vdef))
> +       STMT_VINFO_REDUC_DEF (def) = phi_info;
> +      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
> +       {
> +         if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
> +                                     TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
> +           {
> +             if (dump_enabled_p ())
> +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                                "conversion in the reduction chain.\n");
> +             return false;
> +           }
>         }
> +      else if (!stmt_info)
> +       /* First non-conversion stmt.  */
> +       stmt_info = vdef;
> +      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> +      reduc_chain_length++;
>      }
>    /* PHIs should not participate in patterns.  */
>    gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st
>        nested_cycle = true;
>      }
>
> +  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> +     element.  */
> +  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> +    {
> +      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> +      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> +    }
>    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
>      gcc_assert (slp_node
>                 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
> @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st
>          inside the loop body. The last operand is the reduction variable,
>          which is defined by the loop-header-phi.  */
>
> +  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> +  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
>    gassign *stmt = as_a <gassign *> (stmt_info->stmt);
>    enum tree_code code = gimple_assign_rhs_code (stmt);
>    bool lane_reduc_code_p
> @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st
>    if (!type_has_mode_precision_p (scalar_type))
>      return false;
>
> -  /* All uses but the last are expected to be defined in the loop.
> -     The last use is the reduction variable.  In case of nested cycle this
> -     assumption is not true: we use reduc_index to record the index of the
> -     reduction variable.  */
> -  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> -
> -  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> -     and compute the reduction chain length.  */
> -  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> -                                         loop_latch_edge (loop));
> -  unsigned reduc_chain_length = 0;
> -  bool only_slp_reduc_chain = true;
> -  while (reduc_def != PHI_RESULT (reduc_def_phi))
> -    {
> -      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> -      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> -      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> -       {
> -         if (dump_enabled_p ())
> -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -                            "reduction chain broken by patterns.\n");
> -         return false;
> -       }
> -      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> -       only_slp_reduc_chain = false;
> -      /* ???  For epilogue generation live members of the chain need
> -         to point back to the PHI via their original stmt for
> -        info_for_reduction to work.  */
> -      if (STMT_VINFO_LIVE_P (vdef))
> -       STMT_VINFO_REDUC_DEF (def) = phi_info;
> -      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> -      reduc_chain_length++;
> -    }
> -
>    /* For lane-reducing ops we're reducing the number of reduction PHIs
>       which means the only use of that may be in the lane-reducing operation.  */
>    if (lane_reduc_code_p
> @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st
>        return false;
>      }
>
> +  /* All uses but the last are expected to be defined in the loop.
> +     The last use is the reduction variable.  In case of nested cycle this
> +     assumption is not true: we use reduc_index to record the index of the
> +     reduction variable.  */
>    reduc_def = PHI_RESULT (reduc_def_phi);
>    for (i = 0; i < op_type; i++)
>      {
> @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st
>         }
>      }
>    if (!vectype_in)
> -    vectype_in = vectype_out;
> +    vectype_in = STMT_VINFO_VECTYPE (phi_info);
>    STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
>
>    enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
> @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st
>         }
>      }
>
> -  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> -    /* We changed STMT to be the first stmt in reduction chain, hence we
> -       check that in this case the first element in the chain is STMT.  */
> -    gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info))
> -               == vect_orig_stmt (stmt_info));
> -
>    if (STMT_VINFO_LIVE_P (phi_info))
>      return false;
>
> @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st
>        && code != SAD_EXPR
>        && reduction_type != FOLD_LEFT_REDUCTION)
>      {
> -      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
> -      STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
> +      stmt_vec_info tem
> +       = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> +      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
> +       {
> +         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
> +         tem = REDUC_GROUP_FIRST_ELEMENT (tem);
> +       }
> +      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
> +      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
>      }
>    else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
>      {
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c       (revision 277922)
> +++ gcc/tree-vect-stmts.c       (working copy)
> @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt
>           basic_block bb = gimple_bb (USE_STMT (use_p));
>           if (!flow_bb_inside_loop_p (loop, bb))
>             {
> +             if (is_gimple_debug (USE_STMT (use_p)))
> +               continue;
> +
>               if (dump_enabled_p ())
>                 dump_printf_loc (MSG_NOTE, vect_location,
>                                   "vec_stmt_relevant_p: used out of loop.\n");
>
> -             if (is_gimple_debug (USE_STMT (use_p)))
> -               continue;
> -
>               /* We expect all such uses to be in the loop exit phis
>                  (because of loop closed form)   */
>               gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       (revision 277922)
> +++ gcc/tree-vectorizer.h       (working copy)
> @@ -1050,6 +1050,9 @@ public:
>    /* The vector input type relevant for reduction vectorization.  */
>    tree reduc_vectype_in;
>
> +  /* The vector type for performing the actual reduction.  */
> +  tree reduc_vectype;
> +
>    /* Whether we force a single cycle PHI during reduction vectorization.  */
>    bool force_single_cycle;
>
> @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
>  #define STMT_VINFO_REDUC_CODE(S)       (S)->reduc_code
>  #define STMT_VINFO_REDUC_FN(S)         (S)->reduc_fn
>  #define STMT_VINFO_REDUC_DEF(S)                (S)->reduc_def
> +#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
>  #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
>  #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
>
> Index: gcc/testsuite/gcc.dg/vect/pr92205.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922)
> +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy)
> @@ -10,4 +10,4 @@ int b(int n, unsigned char *a)
>    return d;
>  }
>
> -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
> Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92324-1.c       (nonexistent)
> +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c       (working copy)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +
> +unsigned a, b;
> +int c, d;
> +unsigned e(int f) {
> +  if (a > f)
> +    return a;
> +  return f;
> +}
> +void g() {
> +  for (; c; c++)
> +    d = e(d);
> +  b = d;
> +}
> Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vect/pr92324-2.c       (nonexistent)
> +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c       (working copy)
> @@ -0,0 +1,21 @@
> +#include "tree-vect.h"
> +
> +unsigned b[1024];
> +
> +int __attribute__((noipa))
> +foo (int n)
> +{
> +  int res = 0;
> +  for (int i = 0; i < n; ++i)
> +    res = res > b[i] ? res : b[i];
> +  return res;
> +}
> +
> +int main ()
> +{
> +  check_vect ();
> +  b[15] = (unsigned)__INT_MAX__ + 1;
> +  if (foo (16) != -__INT_MAX__ - 1)
> +    __builtin_abort ();
> +  return 0;
> +}
Richard Biener Nov. 11, 2019, 1:36 p.m. UTC | #5
On Mon, 11 Nov 2019, Christophe Lyon wrote:

> On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote:
> >
> >
> > I've been sitting on this for a few days since I'm not 100% happy
> > with how the code looks like.  There's possibly still holes in it
> > (chains with mixed signed/unsigned adds for example might pick
> > up signed adds in the epilogue), but the wrong-code cases should
> > work fine now.  I'm probably going to followup with some
> > mass renaming of variable/parameter names to make it more clear
> > which stmt / type we are actually looking at ...
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> >
> > Richard.
> >
> > 2019-11-08  Richard Biener  <rguenther@suse.de>
> >
> >         PR tree-optimization/
> >         * tree-vect-loop.c (vect_create_epilog_for_reduction): Use
> >         STMT_VINFO_REDUC_VECTYPE for all computations, inserting
> >         sign-conversions as necessary.
> >         (vectorizable_reduction): Reject conversions in the chain
> >         that are not sign-conversions, base analysis on a non-converting
> >         stmt and its operation sign.  Set STMT_VINFO_REDUC_VECTYPE.
> >         * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
> >         for debug stmts.
> >         * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
> >         (STMT_VINFO_REDUC_VECTYPE): Likewise.
> >
> >         * gcc.dg/vect/pr92205.c: XFAIL.
> >         * gcc.dg/vect/pr92324-1.c: New testcase.
> >         * gcc.dg/vect/pr92324-2.c: Likewise.
> >
> 
> Hi Richard,
> 
> This new testcase (pr92324-2.c) fails on arm/aarch64:
> FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test
> FAIL: gcc.dg/vect/pr92324-2.c execution test

Can you open a bugreport?  I suspect the arm/aarch64 vector
min/max patterns are bogus or we go a different code-path
in the vectorizer here (IIRC x86 has reduc_[us]max_* patterns
while aarch64 has none of that).

Thanks,
Richard.

> Christophe
> 
> 
> > Index: gcc/tree-vect-loop.c
> > ===================================================================
> > --- gcc/tree-vect-loop.c        (revision 277922)
> > +++ gcc/tree-vect-loop.c        (working copy)
> > @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v
> >    gimple *new_phi = NULL, *phi;
> >    stmt_vec_info phi_info;
> >    gimple_stmt_iterator exit_gsi;
> > -  tree vec_dest;
> >    tree new_temp = NULL_TREE, new_name, new_scalar_dest;
> >    gimple *epilog_stmt = NULL;
> >    gimple *exit_phi;
> > @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v
> >      }
> >    gcc_assert (!nested_in_vect_loop || double_reduc);
> >
> > -  vectype = STMT_VINFO_VECTYPE (stmt_info);
> > +  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
> >    gcc_assert (vectype);
> >    mode = TYPE_MODE (vectype);
> >
> > @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v
> >       one vector.  */
> >    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
> >      {
> > +      gimple_seq stmts = NULL;
> >        tree first_vect = PHI_RESULT (new_phis[0]);
> > -      gassign *new_vec_stmt = NULL;
> > -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> > +      first_vect = gimple_convert (&stmts, vectype, first_vect);
> >        for (k = 1; k < new_phis.length (); k++)
> >          {
> >           gimple *next_phi = new_phis[k];
> >            tree second_vect = PHI_RESULT (next_phi);
> > -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> > -          new_vec_stmt = gimple_build_assign (tem, code,
> > -                                             first_vect, second_vect);
> > -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> > -         first_vect = tem;
> > +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> > +          first_vect = gimple_build (&stmts, code, vectype,
> > +                                    first_vect, second_vect);
> >          }
> > +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> >
> >        new_phi_result = first_vect;
> > -      if (new_vec_stmt)
> > -        {
> > -          new_phis.truncate (0);
> > -          new_phis.safe_push (new_vec_stmt);
> > -        }
> > +      new_phis.truncate (0);
> > +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> >      }
> >    /* Likewise if we couldn't use a single defuse cycle.  */
> >    else if (ncopies > 1)
> >      {
> >        gcc_assert (new_phis.length () == 1);
> > +      gimple_seq stmts = NULL;
> >        tree first_vect = PHI_RESULT (new_phis[0]);
> > -      gassign *new_vec_stmt = NULL;
> > -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> > +      first_vect = gimple_convert (&stmts, vectype, first_vect);
> >        stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
> >        for (int k = 1; k < ncopies; ++k)
> >         {
> >           next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
> >           tree second_vect = PHI_RESULT (next_phi_info->stmt);
> > -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> > -          new_vec_stmt = gimple_build_assign (tem, code,
> > -                                             first_vect, second_vect);
> > -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> > -         first_vect = tem;
> > +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> > +         first_vect = gimple_build (&stmts, code, vectype,
> > +                                    first_vect, second_vect);
> >         }
> > +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> >        new_phi_result = first_vect;
> >        new_phis.truncate (0);
> > -      new_phis.safe_push (new_vec_stmt);
> > +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> >      }
> >    else
> >      new_phi_result = PHI_RESULT (new_phis[0]);
> > @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v
> >          in a vector mode of smaller size and first reduce upper/lower
> >          halves against each other.  */
> >        enum machine_mode mode1 = mode;
> > +      tree stype = TREE_TYPE (vectype);
> >        unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
> >        unsigned sz1 = sz;
> >        if (!slp_reduc
> >           && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
> >         sz1 = GET_MODE_SIZE (mode1).to_constant ();
> >
> > -      tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
> > +      tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
> >        reduce_with_shift = have_whole_vector_shift (mode1);
> >        if (!VECTOR_MODE_P (mode1))
> >         reduce_with_shift = false;
> > @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v
> >         {
> >           gcc_assert (!slp_reduc);
> >           sz /= 2;
> > -         vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
> > +         vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
> >
> >           /* The target has to make sure we support lowpart/highpart
> >              extraction, either via direct vector extract or through
> > @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v
> >              dump_printf_loc (MSG_NOTE, vect_location,
> >                              "Reduce using vector shifts\n");
> >
> > -          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
> > +         gimple_seq stmts = NULL;
> > +         new_temp = gimple_convert (&stmts, vectype1, new_temp);
> >            for (elt_offset = nelements / 2;
> >                 elt_offset >= 1;
> >                 elt_offset /= 2)
> > @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v
> >               calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> >               indices.new_vector (sel, 2, nelements);
> >               tree mask = vect_gen_perm_mask_any (vectype1, indices);
> > -             epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
> > -                                                new_temp, zero_vec, mask);
> > -              new_name = make_ssa_name (vec_dest, epilog_stmt);
> > -              gimple_assign_set_lhs (epilog_stmt, new_name);
> > -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> > -
> > -             epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
> > -                                                new_temp);
> > -              new_temp = make_ssa_name (vec_dest, epilog_stmt);
> > -              gimple_assign_set_lhs (epilog_stmt, new_temp);
> > -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> > +             new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
> > +                                      new_temp, zero_vec, mask);
> > +             new_temp = gimple_build (&stmts, code,
> > +                                      vectype1, new_name, new_temp);
> >              }
> > +         gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> >
> >           /* 2.4  Extract the final scalar result.  Create:
> >              s_out3 = extract_field <v_out2, bitpos>  */
> > @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st
> >                         stmt_vector_for_cost *cost_vec)
> >  {
> >    tree scalar_dest;
> > -  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> >    tree vectype_in = NULL_TREE;
> >    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> >    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st
> >           phi_info = loop_vinfo->lookup_stmt (use_stmt);
> >           stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> >         }
> > -      /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> > -         element.  */
> > -      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > +    }
> > +
> > +  /* PHIs should not participate in patterns.  */
> > +  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> > +  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> > +
> > +  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> > +     and compute the reduction chain length.  */
> > +  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> > +                                         loop_latch_edge (loop));
> > +  unsigned reduc_chain_length = 0;
> > +  bool only_slp_reduc_chain = true;
> > +  stmt_info = NULL;
> > +  while (reduc_def != PHI_RESULT (reduc_def_phi))
> > +    {
> > +      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> > +      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> > +      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> >         {
> > -         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> > -         stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> > +         if (dump_enabled_p ())
> > +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                            "reduction chain broken by patterns.\n");
> > +         return false;
> > +       }
> > +      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> > +       only_slp_reduc_chain = false;
> > +      /* ???  For epilogue generation live members of the chain need
> > +         to point back to the PHI via their original stmt for
> > +        info_for_reduction to work.  */
> > +      if (STMT_VINFO_LIVE_P (vdef))
> > +       STMT_VINFO_REDUC_DEF (def) = phi_info;
> > +      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
> > +       {
> > +         if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
> > +                                     TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
> > +           {
> > +             if (dump_enabled_p ())
> > +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > +                                "conversion in the reduction chain.\n");
> > +             return false;
> > +           }
> >         }
> > +      else if (!stmt_info)
> > +       /* First non-conversion stmt.  */
> > +       stmt_info = vdef;
> > +      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> > +      reduc_chain_length++;
> >      }
> >    /* PHIs should not participate in patterns.  */
> >    gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> > @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st
> >        nested_cycle = true;
> >      }
> >
> > +  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> > +     element.  */
> > +  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > +    {
> > +      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> > +      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> > +    }
> >    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> >      gcc_assert (slp_node
> >                 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
> > @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st
> >          inside the loop body. The last operand is the reduction variable,
> >          which is defined by the loop-header-phi.  */
> >
> > +  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> > +  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
> >    gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> >    enum tree_code code = gimple_assign_rhs_code (stmt);
> >    bool lane_reduc_code_p
> > @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st
> >    if (!type_has_mode_precision_p (scalar_type))
> >      return false;
> >
> > -  /* All uses but the last are expected to be defined in the loop.
> > -     The last use is the reduction variable.  In case of nested cycle this
> > -     assumption is not true: we use reduc_index to record the index of the
> > -     reduction variable.  */
> > -  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> > -
> > -  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> > -     and compute the reduction chain length.  */
> > -  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> > -                                         loop_latch_edge (loop));
> > -  unsigned reduc_chain_length = 0;
> > -  bool only_slp_reduc_chain = true;
> > -  while (reduc_def != PHI_RESULT (reduc_def_phi))
> > -    {
> > -      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> > -      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> > -      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> > -       {
> > -         if (dump_enabled_p ())
> > -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > -                            "reduction chain broken by patterns.\n");
> > -         return false;
> > -       }
> > -      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> > -       only_slp_reduc_chain = false;
> > -      /* ???  For epilogue generation live members of the chain need
> > -         to point back to the PHI via their original stmt for
> > -        info_for_reduction to work.  */
> > -      if (STMT_VINFO_LIVE_P (vdef))
> > -       STMT_VINFO_REDUC_DEF (def) = phi_info;
> > -      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> > -      reduc_chain_length++;
> > -    }
> > -
> >    /* For lane-reducing ops we're reducing the number of reduction PHIs
> >       which means the only use of that may be in the lane-reducing operation.  */
> >    if (lane_reduc_code_p
> > @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st
> >        return false;
> >      }
> >
> > +  /* All uses but the last are expected to be defined in the loop.
> > +     The last use is the reduction variable.  In case of nested cycle this
> > +     assumption is not true: we use reduc_index to record the index of the
> > +     reduction variable.  */
> >    reduc_def = PHI_RESULT (reduc_def_phi);
> >    for (i = 0; i < op_type; i++)
> >      {
> > @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st
> >         }
> >      }
> >    if (!vectype_in)
> > -    vectype_in = vectype_out;
> > +    vectype_in = STMT_VINFO_VECTYPE (phi_info);
> >    STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
> >
> >    enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
> > @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st
> >         }
> >      }
> >
> > -  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > -    /* We changed STMT to be the first stmt in reduction chain, hence we
> > -       check that in this case the first element in the chain is STMT.  */
> > -    gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info))
> > -               == vect_orig_stmt (stmt_info));
> > -
> >    if (STMT_VINFO_LIVE_P (phi_info))
> >      return false;
> >
> > @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st
> >        && code != SAD_EXPR
> >        && reduction_type != FOLD_LEFT_REDUCTION)
> >      {
> > -      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
> > -      STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
> > +      stmt_vec_info tem
> > +       = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> > +      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
> > +       {
> > +         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
> > +         tem = REDUC_GROUP_FIRST_ELEMENT (tem);
> > +       }
> > +      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
> > +      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
> >      }
> >    else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
> >      {
> > Index: gcc/tree-vect-stmts.c
> > ===================================================================
> > --- gcc/tree-vect-stmts.c       (revision 277922)
> > +++ gcc/tree-vect-stmts.c       (working copy)
> > @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt
> >           basic_block bb = gimple_bb (USE_STMT (use_p));
> >           if (!flow_bb_inside_loop_p (loop, bb))
> >             {
> > +             if (is_gimple_debug (USE_STMT (use_p)))
> > +               continue;
> > +
> >               if (dump_enabled_p ())
> >                 dump_printf_loc (MSG_NOTE, vect_location,
> >                                   "vec_stmt_relevant_p: used out of loop.\n");
> >
> > -             if (is_gimple_debug (USE_STMT (use_p)))
> > -               continue;
> > -
> >               /* We expect all such uses to be in the loop exit phis
> >                  (because of loop closed form)   */
> >               gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
> > Index: gcc/tree-vectorizer.h
> > ===================================================================
> > --- gcc/tree-vectorizer.h       (revision 277922)
> > +++ gcc/tree-vectorizer.h       (working copy)
> > @@ -1050,6 +1050,9 @@ public:
> >    /* The vector input type relevant for reduction vectorization.  */
> >    tree reduc_vectype_in;
> >
> > +  /* The vector type for performing the actual reduction.  */
> > +  tree reduc_vectype;
> > +
> >    /* Whether we force a single cycle PHI during reduction vectorization.  */
> >    bool force_single_cycle;
> >
> > @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
> >  #define STMT_VINFO_REDUC_CODE(S)       (S)->reduc_code
> >  #define STMT_VINFO_REDUC_FN(S)         (S)->reduc_fn
> >  #define STMT_VINFO_REDUC_DEF(S)                (S)->reduc_def
> > +#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
> >  #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
> >  #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
> >
> > Index: gcc/testsuite/gcc.dg/vect/pr92205.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922)
> > +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy)
> > @@ -10,4 +10,4 @@ int b(int n, unsigned char *a)
> >    return d;
> >  }
> >
> > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */
> > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
> > Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/vect/pr92324-1.c       (nonexistent)
> > +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c       (working copy)
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +
> > +unsigned a, b;
> > +int c, d;
> > +unsigned e(int f) {
> > +  if (a > f)
> > +    return a;
> > +  return f;
> > +}
> > +void g() {
> > +  for (; c; c++)
> > +    d = e(d);
> > +  b = d;
> > +}
> > Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c
> > ===================================================================
> > --- gcc/testsuite/gcc.dg/vect/pr92324-2.c       (nonexistent)
> > +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c       (working copy)
> > @@ -0,0 +1,21 @@
> > +#include "tree-vect.h"
> > +
> > +unsigned b[1024];
> > +
> > +int __attribute__((noipa))
> > +foo (int n)
> > +{
> > +  int res = 0;
> > +  for (int i = 0; i < n; ++i)
> > +    res = res > b[i] ? res : b[i];
> > +  return res;
> > +}
> > +
> > +int main ()
> > +{
> > +  check_vect ();
> > +  b[15] = (unsigned)__INT_MAX__ + 1;
> > +  if (foo (16) != -__INT_MAX__ - 1)
> > +    __builtin_abort ();
> > +  return 0;
> > +}
>
Christophe Lyon Nov. 12, 2019, 9:52 a.m. UTC | #6
On Mon, 11 Nov 2019 at 14:36, Richard Biener <rguenther@suse.de> wrote:
>
> On Mon, 11 Nov 2019, Christophe Lyon wrote:
>
> > On Fri, 8 Nov 2019 at 09:57, Richard Biener <rguenther@suse.de> wrote:
> > >
> > >
> > > I've been sitting on this for a few days since I'm not 100% happy
> > > with how the code looks like.  There's possibly still holes in it
> > > (chains with mixed signed/unsigned adds for example might pick
> > > up signed adds in the epilogue), but the wrong-code cases should
> > > work fine now.  I'm probably going to followup with some
> > > mass renaming of variable/parameter names to make it more clear
> > > which stmt / type we are actually looking at ...
> > >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > >
> > > Richard.
> > >
> > > 2019-11-08  Richard Biener  <rguenther@suse.de>
> > >
> > >         PR tree-optimization/
> > >         * tree-vect-loop.c (vect_create_epilog_for_reduction): Use
> > >         STMT_VINFO_REDUC_VECTYPE for all computations, inserting
> > >         sign-conversions as necessary.
> > >         (vectorizable_reduction): Reject conversions in the chain
> > >         that are not sign-conversions, base analysis on a non-converting
> > >         stmt and its operation sign.  Set STMT_VINFO_REDUC_VECTYPE.
> > >         * tree-vect-stmts.c (vect_stmt_relevant_p): Don't dump anything
> > >         for debug stmts.
> > >         * tree-vectorizer.h (_stmt_vec_info::reduc_vectype): New.
> > >         (STMT_VINFO_REDUC_VECTYPE): Likewise.
> > >
> > >         * gcc.dg/vect/pr92205.c: XFAIL.
> > >         * gcc.dg/vect/pr92324-1.c: New testcase.
> > >         * gcc.dg/vect/pr92324-2.c: Likewise.
> > >
> >
> > Hi Richard,
> >
> > This new testcase (pr92324-2.c) fails on arm/aarch64:
> > FAIL: gcc.dg/vect/pr92324-2.c -flto -ffat-lto-objects execution test
> > FAIL: gcc.dg/vect/pr92324-2.c execution test
>
> Can you open a bugreport?  I suspect the arm/aarch64 vector
> min/max patterns are bogus or we go a different code-path
> in the vectorizer here (IIRC x86 has reduc_[us]max_* patterns
> while aarch64 has none of that).
>

Sure, I've filed PR92473.


> Thanks,
> Richard.
>
> > Christophe
> >
> >
> > > Index: gcc/tree-vect-loop.c
> > > ===================================================================
> > > --- gcc/tree-vect-loop.c        (revision 277922)
> > > +++ gcc/tree-vect-loop.c        (working copy)
> > > @@ -4231,7 +4231,6 @@ vect_create_epilog_for_reduction (stmt_v
> > >    gimple *new_phi = NULL, *phi;
> > >    stmt_vec_info phi_info;
> > >    gimple_stmt_iterator exit_gsi;
> > > -  tree vec_dest;
> > >    tree new_temp = NULL_TREE, new_name, new_scalar_dest;
> > >    gimple *epilog_stmt = NULL;
> > >    gimple *exit_phi;
> > > @@ -4264,7 +4263,7 @@ vect_create_epilog_for_reduction (stmt_v
> > >      }
> > >    gcc_assert (!nested_in_vect_loop || double_reduc);
> > >
> > > -  vectype = STMT_VINFO_VECTYPE (stmt_info);
> > > +  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
> > >    gcc_assert (vectype);
> > >    mode = TYPE_MODE (vectype);
> > >
> > > @@ -4505,48 +4504,43 @@ vect_create_epilog_for_reduction (stmt_v
> > >       one vector.  */
> > >    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
> > >      {
> > > +      gimple_seq stmts = NULL;
> > >        tree first_vect = PHI_RESULT (new_phis[0]);
> > > -      gassign *new_vec_stmt = NULL;
> > > -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> > > +      first_vect = gimple_convert (&stmts, vectype, first_vect);
> > >        for (k = 1; k < new_phis.length (); k++)
> > >          {
> > >           gimple *next_phi = new_phis[k];
> > >            tree second_vect = PHI_RESULT (next_phi);
> > > -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> > > -          new_vec_stmt = gimple_build_assign (tem, code,
> > > -                                             first_vect, second_vect);
> > > -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> > > -         first_vect = tem;
> > > +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> > > +          first_vect = gimple_build (&stmts, code, vectype,
> > > +                                    first_vect, second_vect);
> > >          }
> > > +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> > >
> > >        new_phi_result = first_vect;
> > > -      if (new_vec_stmt)
> > > -        {
> > > -          new_phis.truncate (0);
> > > -          new_phis.safe_push (new_vec_stmt);
> > > -        }
> > > +      new_phis.truncate (0);
> > > +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> > >      }
> > >    /* Likewise if we couldn't use a single defuse cycle.  */
> > >    else if (ncopies > 1)
> > >      {
> > >        gcc_assert (new_phis.length () == 1);
> > > +      gimple_seq stmts = NULL;
> > >        tree first_vect = PHI_RESULT (new_phis[0]);
> > > -      gassign *new_vec_stmt = NULL;
> > > -      vec_dest = vect_create_destination_var (scalar_dest, vectype);
> > > +      first_vect = gimple_convert (&stmts, vectype, first_vect);
> > >        stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
> > >        for (int k = 1; k < ncopies; ++k)
> > >         {
> > >           next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
> > >           tree second_vect = PHI_RESULT (next_phi_info->stmt);
> > > -          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
> > > -          new_vec_stmt = gimple_build_assign (tem, code,
> > > -                                             first_vect, second_vect);
> > > -          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
> > > -         first_vect = tem;
> > > +         second_vect = gimple_convert (&stmts, vectype, second_vect);
> > > +         first_vect = gimple_build (&stmts, code, vectype,
> > > +                                    first_vect, second_vect);
> > >         }
> > > +      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> > >        new_phi_result = first_vect;
> > >        new_phis.truncate (0);
> > > -      new_phis.safe_push (new_vec_stmt);
> > > +      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
> > >      }
> > >    else
> > >      new_phi_result = PHI_RESULT (new_phis[0]);
> > > @@ -4877,13 +4871,14 @@ vect_create_epilog_for_reduction (stmt_v
> > >          in a vector mode of smaller size and first reduce upper/lower
> > >          halves against each other.  */
> > >        enum machine_mode mode1 = mode;
> > > +      tree stype = TREE_TYPE (vectype);
> > >        unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
> > >        unsigned sz1 = sz;
> > >        if (!slp_reduc
> > >           && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
> > >         sz1 = GET_MODE_SIZE (mode1).to_constant ();
> > >
> > > -      tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
> > > +      tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
> > >        reduce_with_shift = have_whole_vector_shift (mode1);
> > >        if (!VECTOR_MODE_P (mode1))
> > >         reduce_with_shift = false;
> > > @@ -4901,7 +4896,7 @@ vect_create_epilog_for_reduction (stmt_v
> > >         {
> > >           gcc_assert (!slp_reduc);
> > >           sz /= 2;
> > > -         vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
> > > +         vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
> > >
> > >           /* The target has to make sure we support lowpart/highpart
> > >              extraction, either via direct vector extract or through
> > > @@ -5004,7 +4999,8 @@ vect_create_epilog_for_reduction (stmt_v
> > >              dump_printf_loc (MSG_NOTE, vect_location,
> > >                              "Reduce using vector shifts\n");
> > >
> > > -          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
> > > +         gimple_seq stmts = NULL;
> > > +         new_temp = gimple_convert (&stmts, vectype1, new_temp);
> > >            for (elt_offset = nelements / 2;
> > >                 elt_offset >= 1;
> > >                 elt_offset /= 2)
> > > @@ -5012,18 +5008,12 @@ vect_create_epilog_for_reduction (stmt_v
> > >               calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> > >               indices.new_vector (sel, 2, nelements);
> > >               tree mask = vect_gen_perm_mask_any (vectype1, indices);
> > > -             epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
> > > -                                                new_temp, zero_vec, mask);
> > > -              new_name = make_ssa_name (vec_dest, epilog_stmt);
> > > -              gimple_assign_set_lhs (epilog_stmt, new_name);
> > > -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> > > -
> > > -             epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
> > > -                                                new_temp);
> > > -              new_temp = make_ssa_name (vec_dest, epilog_stmt);
> > > -              gimple_assign_set_lhs (epilog_stmt, new_temp);
> > > -              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> > > +             new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
> > > +                                      new_temp, zero_vec, mask);
> > > +             new_temp = gimple_build (&stmts, code,
> > > +                                      vectype1, new_name, new_temp);
> > >              }
> > > +         gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
> > >
> > >           /* 2.4  Extract the final scalar result.  Create:
> > >              s_out3 = extract_field <v_out2, bitpos>  */
> > > @@ -5696,7 +5686,6 @@ vectorizable_reduction (stmt_vec_info st
> > >                         stmt_vector_for_cost *cost_vec)
> > >  {
> > >    tree scalar_dest;
> > > -  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> > >    tree vectype_in = NULL_TREE;
> > >    loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
> > >    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> > > @@ -5763,13 +5752,53 @@ vectorizable_reduction (stmt_vec_info st
> > >           phi_info = loop_vinfo->lookup_stmt (use_stmt);
> > >           stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> > >         }
> > > -      /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> > > -         element.  */
> > > -      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > > +    }
> > > +
> > > +  /* PHIs should not participate in patterns.  */
> > > +  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> > > +  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> > > +
> > > +  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> > > +     and compute the reduction chain length.  */
> > > +  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> > > +                                         loop_latch_edge (loop));
> > > +  unsigned reduc_chain_length = 0;
> > > +  bool only_slp_reduc_chain = true;
> > > +  stmt_info = NULL;
> > > +  while (reduc_def != PHI_RESULT (reduc_def_phi))
> > > +    {
> > > +      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> > > +      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> > > +      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> > >         {
> > > -         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> > > -         stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> > > +         if (dump_enabled_p ())
> > > +           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > +                            "reduction chain broken by patterns.\n");
> > > +         return false;
> > > +       }
> > > +      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> > > +       only_slp_reduc_chain = false;
> > > +      /* ???  For epilogue generation live members of the chain need
> > > +         to point back to the PHI via their original stmt for
> > > +        info_for_reduction to work.  */
> > > +      if (STMT_VINFO_LIVE_P (vdef))
> > > +       STMT_VINFO_REDUC_DEF (def) = phi_info;
> > > +      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
> > > +       {
> > > +         if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
> > > +                                     TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
> > > +           {
> > > +             if (dump_enabled_p ())
> > > +               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > +                                "conversion in the reduction chain.\n");
> > > +             return false;
> > > +           }
> > >         }
> > > +      else if (!stmt_info)
> > > +       /* First non-conversion stmt.  */
> > > +       stmt_info = vdef;
> > > +      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> > > +      reduc_chain_length++;
> > >      }
> > >    /* PHIs should not participate in patterns.  */
> > >    gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
> > > @@ -5780,6 +5809,13 @@ vectorizable_reduction (stmt_vec_info st
> > >        nested_cycle = true;
> > >      }
> > >
> > > +  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
> > > +     element.  */
> > > +  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > > +    {
> > > +      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
> > > +      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
> > > +    }
> > >    if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > >      gcc_assert (slp_node
> > >                 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
> > > @@ -5815,6 +5851,8 @@ vectorizable_reduction (stmt_vec_info st
> > >          inside the loop body. The last operand is the reduction variable,
> > >          which is defined by the loop-header-phi.  */
> > >
> > > +  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
> > > +  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
> > >    gassign *stmt = as_a <gassign *> (stmt_info->stmt);
> > >    enum tree_code code = gimple_assign_rhs_code (stmt);
> > >    bool lane_reduc_code_p
> > > @@ -5831,40 +5869,6 @@ vectorizable_reduction (stmt_vec_info st
> > >    if (!type_has_mode_precision_p (scalar_type))
> > >      return false;
> > >
> > > -  /* All uses but the last are expected to be defined in the loop.
> > > -     The last use is the reduction variable.  In case of nested cycle this
> > > -     assumption is not true: we use reduc_index to record the index of the
> > > -     reduction variable.  */
> > > -  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
> > > -
> > > -  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
> > > -     and compute the reduction chain length.  */
> > > -  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
> > > -                                         loop_latch_edge (loop));
> > > -  unsigned reduc_chain_length = 0;
> > > -  bool only_slp_reduc_chain = true;
> > > -  while (reduc_def != PHI_RESULT (reduc_def_phi))
> > > -    {
> > > -      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
> > > -      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
> > > -      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
> > > -       {
> > > -         if (dump_enabled_p ())
> > > -           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > -                            "reduction chain broken by patterns.\n");
> > > -         return false;
> > > -       }
> > > -      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
> > > -       only_slp_reduc_chain = false;
> > > -      /* ???  For epilogue generation live members of the chain need
> > > -         to point back to the PHI via their original stmt for
> > > -        info_for_reduction to work.  */
> > > -      if (STMT_VINFO_LIVE_P (vdef))
> > > -       STMT_VINFO_REDUC_DEF (def) = phi_info;
> > > -      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
> > > -      reduc_chain_length++;
> > > -    }
> > > -
> > >    /* For lane-reducing ops we're reducing the number of reduction PHIs
> > >       which means the only use of that may be in the lane-reducing operation.  */
> > >    if (lane_reduc_code_p
> > > @@ -5877,6 +5881,10 @@ vectorizable_reduction (stmt_vec_info st
> > >        return false;
> > >      }
> > >
> > > +  /* All uses but the last are expected to be defined in the loop.
> > > +     The last use is the reduction variable.  In case of nested cycle this
> > > +     assumption is not true: we use reduc_index to record the index of the
> > > +     reduction variable.  */
> > >    reduc_def = PHI_RESULT (reduc_def_phi);
> > >    for (i = 0; i < op_type; i++)
> > >      {
> > > @@ -5931,7 +5939,7 @@ vectorizable_reduction (stmt_vec_info st
> > >         }
> > >      }
> > >    if (!vectype_in)
> > > -    vectype_in = vectype_out;
> > > +    vectype_in = STMT_VINFO_VECTYPE (phi_info);
> > >    STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
> > >
> > >    enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
> > > @@ -6037,12 +6045,6 @@ vectorizable_reduction (stmt_vec_info st
> > >         }
> > >      }
> > >
> > > -  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> > > -    /* We changed STMT to be the first stmt in reduction chain, hence we
> > > -       check that in this case the first element in the chain is STMT.  */
> > > -    gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info))
> > > -               == vect_orig_stmt (stmt_info));
> > > -
> > >    if (STMT_VINFO_LIVE_P (phi_info))
> > >      return false;
> > >
> > > @@ -6447,8 +6449,15 @@ vectorizable_reduction (stmt_vec_info st
> > >        && code != SAD_EXPR
> > >        && reduction_type != FOLD_LEFT_REDUCTION)
> > >      {
> > > -      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
> > > -      STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
> > > +      stmt_vec_info tem
> > > +       = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
> > > +      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
> > > +       {
> > > +         gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
> > > +         tem = REDUC_GROUP_FIRST_ELEMENT (tem);
> > > +       }
> > > +      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
> > > +      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
> > >      }
> > >    else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
> > >      {
> > > Index: gcc/tree-vect-stmts.c
> > > ===================================================================
> > > --- gcc/tree-vect-stmts.c       (revision 277922)
> > > +++ gcc/tree-vect-stmts.c       (working copy)
> > > @@ -330,13 +330,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt
> > >           basic_block bb = gimple_bb (USE_STMT (use_p));
> > >           if (!flow_bb_inside_loop_p (loop, bb))
> > >             {
> > > +             if (is_gimple_debug (USE_STMT (use_p)))
> > > +               continue;
> > > +
> > >               if (dump_enabled_p ())
> > >                 dump_printf_loc (MSG_NOTE, vect_location,
> > >                                   "vec_stmt_relevant_p: used out of loop.\n");
> > >
> > > -             if (is_gimple_debug (USE_STMT (use_p)))
> > > -               continue;
> > > -
> > >               /* We expect all such uses to be in the loop exit phis
> > >                  (because of loop closed form)   */
> > >               gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
> > > Index: gcc/tree-vectorizer.h
> > > ===================================================================
> > > --- gcc/tree-vectorizer.h       (revision 277922)
> > > +++ gcc/tree-vectorizer.h       (working copy)
> > > @@ -1050,6 +1050,9 @@ public:
> > >    /* The vector input type relevant for reduction vectorization.  */
> > >    tree reduc_vectype_in;
> > >
> > > +  /* The vector type for performing the actual reduction.  */
> > > +  tree reduc_vectype;
> > > +
> > >    /* Whether we force a single cycle PHI during reduction vectorization.  */
> > >    bool force_single_cycle;
> > >
> > > @@ -1175,6 +1178,7 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
> > >  #define STMT_VINFO_REDUC_CODE(S)       (S)->reduc_code
> > >  #define STMT_VINFO_REDUC_FN(S)         (S)->reduc_fn
> > >  #define STMT_VINFO_REDUC_DEF(S)                (S)->reduc_def
> > > +#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
> > >  #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
> > >  #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
> > >
> > > Index: gcc/testsuite/gcc.dg/vect/pr92205.c
> > > ===================================================================
> > > --- gcc/testsuite/gcc.dg/vect/pr92205.c (revision 277922)
> > > +++ gcc/testsuite/gcc.dg/vect/pr92205.c (working copy)
> > > @@ -10,4 +10,4 @@ int b(int n, unsigned char *a)
> > >    return d;
> > >  }
> > >
> > > -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */
> > > +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
> > > Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c
> > > ===================================================================
> > > --- gcc/testsuite/gcc.dg/vect/pr92324-1.c       (nonexistent)
> > > +++ gcc/testsuite/gcc.dg/vect/pr92324-1.c       (working copy)
> > > @@ -0,0 +1,14 @@
> > > +/* { dg-do compile } */
> > > +
> > > +unsigned a, b;
> > > +int c, d;
> > > +unsigned e(int f) {
> > > +  if (a > f)
> > > +    return a;
> > > +  return f;
> > > +}
> > > +void g() {
> > > +  for (; c; c++)
> > > +    d = e(d);
> > > +  b = d;
> > > +}
> > > Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c
> > > ===================================================================
> > > --- gcc/testsuite/gcc.dg/vect/pr92324-2.c       (nonexistent)
> > > +++ gcc/testsuite/gcc.dg/vect/pr92324-2.c       (working copy)
> > > @@ -0,0 +1,21 @@
> > > +#include "tree-vect.h"
> > > +
> > > +unsigned b[1024];
> > > +
> > > +int __attribute__((noipa))
> > > +foo (int n)
> > > +{
> > > +  int res = 0;
> > > +  for (int i = 0; i < n; ++i)
> > > +    res = res > b[i] ? res : b[i];
> > > +  return res;
> > > +}
> > > +
> > > +int main ()
> > > +{
> > > +  check_vect ();
> > > +  b[15] = (unsigned)__INT_MAX__ + 1;
> > > +  if (foo (16) != -__INT_MAX__ - 1)
> > > +    __builtin_abort ();
> > > +  return 0;
> > > +}
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)
diff mbox series

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 277922)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -4231,7 +4231,6 @@  vect_create_epilog_for_reduction (stmt_v
   gimple *new_phi = NULL, *phi;
   stmt_vec_info phi_info;
   gimple_stmt_iterator exit_gsi;
-  tree vec_dest;
   tree new_temp = NULL_TREE, new_name, new_scalar_dest;
   gimple *epilog_stmt = NULL;
   gimple *exit_phi;
@@ -4264,7 +4263,7 @@  vect_create_epilog_for_reduction (stmt_v
     }
   gcc_assert (!nested_in_vect_loop || double_reduc);
 
-  vectype = STMT_VINFO_VECTYPE (stmt_info);
+  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
   gcc_assert (vectype);
   mode = TYPE_MODE (vectype);
 
@@ -4505,48 +4504,43 @@  vect_create_epilog_for_reduction (stmt_v
      one vector.  */
   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
     {
+      gimple_seq stmts = NULL;
       tree first_vect = PHI_RESULT (new_phis[0]);
-      gassign *new_vec_stmt = NULL;
-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+      first_vect = gimple_convert (&stmts, vectype, first_vect);
       for (k = 1; k < new_phis.length (); k++)
         {
 	  gimple *next_phi = new_phis[k];
           tree second_vect = PHI_RESULT (next_phi);
-          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
-          new_vec_stmt = gimple_build_assign (tem, code,
-					      first_vect, second_vect);
-          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
-	  first_vect = tem;
+	  second_vect = gimple_convert (&stmts, vectype, second_vect);
+          first_vect = gimple_build (&stmts, code, vectype,
+				     first_vect, second_vect);
         }
+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
 
       new_phi_result = first_vect;
-      if (new_vec_stmt)
-        {
-          new_phis.truncate (0);
-          new_phis.safe_push (new_vec_stmt);
-        }
+      new_phis.truncate (0);
+      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
     }
   /* Likewise if we couldn't use a single defuse cycle.  */
   else if (ncopies > 1)
     {
       gcc_assert (new_phis.length () == 1);
+      gimple_seq stmts = NULL;
       tree first_vect = PHI_RESULT (new_phis[0]);
-      gassign *new_vec_stmt = NULL;
-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
+      first_vect = gimple_convert (&stmts, vectype, first_vect);
       stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
       for (int k = 1; k < ncopies; ++k)
 	{
 	  next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
 	  tree second_vect = PHI_RESULT (next_phi_info->stmt);
-          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
-          new_vec_stmt = gimple_build_assign (tem, code,
-					      first_vect, second_vect);
-          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
-	  first_vect = tem;
+	  second_vect = gimple_convert (&stmts, vectype, second_vect);
+	  first_vect = gimple_build (&stmts, code, vectype,
+				     first_vect, second_vect);
 	}
+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
       new_phi_result = first_vect;
       new_phis.truncate (0);
-      new_phis.safe_push (new_vec_stmt);
+      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
     }
   else
     new_phi_result = PHI_RESULT (new_phis[0]);
@@ -4877,13 +4871,14 @@  vect_create_epilog_for_reduction (stmt_v
 	 in a vector mode of smaller size and first reduce upper/lower
 	 halves against each other.  */
       enum machine_mode mode1 = mode;
+      tree stype = TREE_TYPE (vectype);
       unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
       unsigned sz1 = sz;
       if (!slp_reduc
 	  && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
 	sz1 = GET_MODE_SIZE (mode1).to_constant ();
 
-      tree vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
+      tree vectype1 = get_vectype_for_scalar_type_and_size (stype, sz1);
       reduce_with_shift = have_whole_vector_shift (mode1);
       if (!VECTOR_MODE_P (mode1))
 	reduce_with_shift = false;
@@ -4901,7 +4896,7 @@  vect_create_epilog_for_reduction (stmt_v
 	{
 	  gcc_assert (!slp_reduc);
 	  sz /= 2;
-	  vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
+	  vectype1 = get_vectype_for_scalar_type_and_size (stype, sz);
 
 	  /* The target has to make sure we support lowpart/highpart
 	     extraction, either via direct vector extract or through
@@ -5004,7 +4999,8 @@  vect_create_epilog_for_reduction (stmt_v
             dump_printf_loc (MSG_NOTE, vect_location,
 			     "Reduce using vector shifts\n");
 
-          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
+	  gimple_seq stmts = NULL;
+	  new_temp = gimple_convert (&stmts, vectype1, new_temp);
           for (elt_offset = nelements / 2;
                elt_offset >= 1;
                elt_offset /= 2)
@@ -5012,18 +5008,12 @@  vect_create_epilog_for_reduction (stmt_v
 	      calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
 	      indices.new_vector (sel, 2, nelements);
 	      tree mask = vect_gen_perm_mask_any (vectype1, indices);
-	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
-						 new_temp, zero_vec, mask);
-              new_name = make_ssa_name (vec_dest, epilog_stmt);
-              gimple_assign_set_lhs (epilog_stmt, new_name);
-              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
-
-	      epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
-						 new_temp);
-              new_temp = make_ssa_name (vec_dest, epilog_stmt);
-              gimple_assign_set_lhs (epilog_stmt, new_temp);
-              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+	      new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
+				       new_temp, zero_vec, mask);
+	      new_temp = gimple_build (&stmts, code,
+				       vectype1, new_name, new_temp);
             }
+	  gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
 
 	  /* 2.4  Extract the final scalar result.  Create:
 	     s_out3 = extract_field <v_out2, bitpos>  */
@@ -5696,7 +5686,6 @@  vectorizable_reduction (stmt_vec_info st
 			stmt_vector_for_cost *cost_vec)
 {
   tree scalar_dest;
-  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   tree vectype_in = NULL_TREE;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -5763,13 +5752,53 @@  vectorizable_reduction (stmt_vec_info st
 	  phi_info = loop_vinfo->lookup_stmt (use_stmt);
 	  stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
 	}
-      /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
-         element.  */
-      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+    }
+
+  /* PHIs should not participate in patterns.  */
+  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
+  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
+
+  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
+     and compute the reduction chain length.  */
+  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+					  loop_latch_edge (loop));
+  unsigned reduc_chain_length = 0;
+  bool only_slp_reduc_chain = true;
+  stmt_info = NULL;
+  while (reduc_def != PHI_RESULT (reduc_def_phi))
+    {
+      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
+      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
+      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
 	{
-	  gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
-	  stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "reduction chain broken by patterns.\n");
+	  return false;
+	}
+      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
+	only_slp_reduc_chain = false;
+      /* ???  For epilogue generation live members of the chain need
+         to point back to the PHI via their original stmt for
+	 info_for_reduction to work.  */
+      if (STMT_VINFO_LIVE_P (vdef))
+	STMT_VINFO_REDUC_DEF (def) = phi_info;
+      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
+	{
+	  if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
+				      TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "conversion in the reduction chain.\n");
+	      return false;
+	    }
 	}
+      else if (!stmt_info)
+	/* First non-conversion stmt.  */
+	stmt_info = vdef;
+      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
+      reduc_chain_length++;
     }
   /* PHIs should not participate in patterns.  */
   gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
@@ -5780,6 +5809,13 @@  vectorizable_reduction (stmt_vec_info st
       nested_cycle = true;
     }
 
+  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
+     element.  */
+  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+    {
+      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
+      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+    }
   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
     gcc_assert (slp_node
 		&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
@@ -5815,6 +5851,8 @@  vectorizable_reduction (stmt_vec_info st
         inside the loop body. The last operand is the reduction variable,
         which is defined by the loop-header-phi.  */
 
+  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
   gassign *stmt = as_a <gassign *> (stmt_info->stmt);
   enum tree_code code = gimple_assign_rhs_code (stmt);
   bool lane_reduc_code_p
@@ -5831,40 +5869,6 @@  vectorizable_reduction (stmt_vec_info st
   if (!type_has_mode_precision_p (scalar_type))
     return false;
 
-  /* All uses but the last are expected to be defined in the loop.
-     The last use is the reduction variable.  In case of nested cycle this
-     assumption is not true: we use reduc_index to record the index of the
-     reduction variable.  */
-  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
-
-  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
-     and compute the reduction chain length.  */
-  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
-					  loop_latch_edge (loop));
-  unsigned reduc_chain_length = 0;
-  bool only_slp_reduc_chain = true;
-  while (reduc_def != PHI_RESULT (reduc_def_phi))
-    {
-      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
-      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
-      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
-	{
-	  if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "reduction chain broken by patterns.\n");
-	  return false;
-	}
-      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
-	only_slp_reduc_chain = false;
-      /* ???  For epilogue generation live members of the chain need
-         to point back to the PHI via their original stmt for
-	 info_for_reduction to work.  */
-      if (STMT_VINFO_LIVE_P (vdef))
-	STMT_VINFO_REDUC_DEF (def) = phi_info;
-      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
-      reduc_chain_length++;
-    }
-
   /* For lane-reducing ops we're reducing the number of reduction PHIs
      which means the only use of that may be in the lane-reducing operation.  */
   if (lane_reduc_code_p
@@ -5877,6 +5881,10 @@  vectorizable_reduction (stmt_vec_info st
       return false;
     }
 
+  /* All uses but the last are expected to be defined in the loop.
+     The last use is the reduction variable.  In case of nested cycle this
+     assumption is not true: we use reduc_index to record the index of the
+     reduction variable.  */
   reduc_def = PHI_RESULT (reduc_def_phi);
   for (i = 0; i < op_type; i++)
     {
@@ -5931,7 +5939,7 @@  vectorizable_reduction (stmt_vec_info st
 	}
     }
   if (!vectype_in)
-    vectype_in = vectype_out;
+    vectype_in = STMT_VINFO_VECTYPE (phi_info);
   STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
 
   enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
@@ -6037,12 +6045,6 @@  vectorizable_reduction (stmt_vec_info st
 	}
     }
 
-  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-    /* We changed STMT to be the first stmt in reduction chain, hence we
-       check that in this case the first element in the chain is STMT.  */
-    gcc_assert (REDUC_GROUP_FIRST_ELEMENT (STMT_VINFO_REDUC_DEF (phi_info))
-		== vect_orig_stmt (stmt_info));
-
   if (STMT_VINFO_LIVE_P (phi_info))
     return false;
 
@@ -6447,8 +6449,15 @@  vectorizable_reduction (stmt_vec_info st
       && code != SAD_EXPR
       && reduction_type != FOLD_LEFT_REDUCTION)
     {
-      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
-      STMT_VINFO_DEF_TYPE (vect_orig_stmt (stmt_info)) = vect_internal_def;
+      stmt_vec_info tem
+	= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
+      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
+	{
+	  gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
+	  tem = REDUC_GROUP_FIRST_ELEMENT (tem);
+	}
+      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
+      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
     }
   else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
     {
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 277922)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -330,13 +330,13 @@  vect_stmt_relevant_p (stmt_vec_info stmt
 	  basic_block bb = gimple_bb (USE_STMT (use_p));
 	  if (!flow_bb_inside_loop_p (loop, bb))
 	    {
+	      if (is_gimple_debug (USE_STMT (use_p)))
+		continue;
+
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_NOTE, vect_location,
                                  "vec_stmt_relevant_p: used out of loop.\n");
 
-	      if (is_gimple_debug (USE_STMT (use_p)))
-		continue;
-
 	      /* We expect all such uses to be in the loop exit phis
 		 (because of loop closed form)   */
 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 277922)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -1050,6 +1050,9 @@  public:
   /* The vector input type relevant for reduction vectorization.  */
   tree reduc_vectype_in;
 
+  /* The vector type for performing the actual reduction.  */
+  tree reduc_vectype;
+
   /* Whether we force a single cycle PHI during reduction vectorization.  */
   bool force_single_cycle;
 
@@ -1175,6 +1178,7 @@  STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
 #define STMT_VINFO_REDUC_CODE(S)	(S)->reduc_code
 #define STMT_VINFO_REDUC_FN(S)		(S)->reduc_fn
 #define STMT_VINFO_REDUC_DEF(S)		(S)->reduc_def
+#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
 #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
 #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
 
Index: gcc/testsuite/gcc.dg/vect/pr92205.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr92205.c	(revision 277922)
+++ gcc/testsuite/gcc.dg/vect/pr92205.c	(working copy)
@@ -10,4 +10,4 @@  int b(int n, unsigned char *a)
   return d;
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { vect_unpack && { ! vect_no_bitwise } } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
Index: gcc/testsuite/gcc.dg/vect/pr92324-1.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr92324-1.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr92324-1.c	(working copy)
@@ -0,0 +1,14 @@ 
+/* { dg-do compile } */
+
+unsigned a, b;
+int c, d;
+unsigned e(int f) {
+  if (a > f)
+    return a;
+  return f;
+}
+void g() {
+  for (; c; c++)
+    d = e(d);
+  b = d;
+}
Index: gcc/testsuite/gcc.dg/vect/pr92324-2.c
===================================================================
--- gcc/testsuite/gcc.dg/vect/pr92324-2.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/pr92324-2.c	(working copy)
@@ -0,0 +1,21 @@ 
+#include "tree-vect.h"
+
+unsigned b[1024];
+
+int __attribute__((noipa))
+foo (int n)
+{
+  int res = 0;
+  for (int i = 0; i < n; ++i)
+    res = res > b[i] ? res : b[i];
+  return res;
+}
+
+int main ()
+{
+  check_vect ();
+  b[15] = (unsigned)__INT_MAX__ + 1;
+  if (foo (16) != -__INT_MAX__ - 1)
+    __builtin_abort ();
+  return 0;
+}