diff mbox series

Remove vectorizer reduction operand swapping

Message ID nycvar.YFH.7.76.1909182010440.5566@zhemvz.fhfr.qr
State New
Headers show
Series Remove vectorizer reduction operand swapping | expand

Commit Message

Richard Biener Sept. 18, 2019, 6:11 p.m. UTC
It shouldn't be neccessary.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
(SLP part testing separately)

Richard.

2019-09-18  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_is_simple_reduction): Remove operand
	swapping.
	(vectorize_fold_left_reduction): Remove assert.
	(vectorizable_reduction): Also expect COND_EXPR non-reduction
	operand in position 2.  Remove assert.

Comments

Richard Sandiford Sept. 19, 2019, 7:46 a.m. UTC | #1
Richard Biener <rguenther@suse.de> writes:
> It shouldn't be neccessary.

SVE is the counter-example :-)  But the fix is simpler than the code
you removed, so it's still a net win.

Fixes various vect.exp and aarch64-sve.exp ICEs.  OK if it passes
proper testing?

Thanks,
Richard


2019-09-19  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vectorizer.h (vectorizable_condition): Take an int
	reduction index instead of a boolean flag.
	* tree-vect-stmts.c (vectorizable_condition): Likewise.
	Swap the "then" and "else" values for EXTRACT_LAST_REDUCTION
	reductions if the reduction accumulator is the "then" rather
	than the "else" value.
	(vect_analyze_stmt): Update call accordingly.
	(vect_transform_stmt): Likewise.
	* tree-vect-loop.c (vectorizable_reduction): Likewise,
	asserting that the index is > 0.

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2019-09-05 08:49:30.717740417 +0100
+++ gcc/tree-vectorizer.h	2019-09-19 08:43:53.965866883 +0100
@@ -1541,7 +1541,7 @@ extern void vect_remove_stores (stmt_vec
 extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
 				     slp_instance, stmt_vector_for_cost *);
 extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
-				    stmt_vec_info *, bool, slp_tree,
+				    stmt_vec_info *, int, slp_tree,
 				    stmt_vector_for_cost *);
 extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
 				stmt_vec_info *, slp_tree,
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2019-09-17 15:27:13.090053952 +0100
+++ gcc/tree-vect-stmts.c	2019-09-19 08:43:53.965866883 +0100
@@ -9778,7 +9778,7 @@ vect_is_simple_cond (tree cond, vec_info
 
 bool
 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-			stmt_vec_info *vec_stmt, bool for_reduction,
+			stmt_vec_info *vec_stmt, int reduc_index,
 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
 {
   vec_info *vinfo = stmt_info->vinfo;
@@ -9807,6 +9807,7 @@ vectorizable_condition (stmt_vec_info st
   vec<tree> vec_oprnds3 = vNULL;
   tree vec_cmp_type;
   bool masked = false;
+  bool for_reduction = (reduc_index > 0);
 
   if (for_reduction && STMT_SLP_TYPE (stmt_info))
     return false;
@@ -9888,6 +9889,29 @@ vectorizable_condition (stmt_vec_info st
       cond_expr1 = TREE_OPERAND (cond_expr, 1);
     }
 
+  /* For conditional reductions, the "then" value needs to be the candidate
+     value calculated by this iteration while the "else" value needs to be
+     the result carried over from previous iterations.  If the COND_EXPR
+     is the other way around, we need to swap it.  */
+  bool must_invert_cmp_result = false;
+  if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
+    {
+      if (masked)
+	must_invert_cmp_result = true;
+      else
+	{
+	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
+	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
+	  if (new_code == ERROR_MARK)
+	    must_invert_cmp_result = true;
+	  else
+	    cond_code = new_code;
+	}
+      /* Make sure we don't accidentally use the old condition.  */
+      cond_expr = NULL_TREE;
+      std::swap (then_clause, else_clause);
+    }
+
   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
     {
       /* Boolean values may have another representation in vectors
@@ -10102,6 +10126,15 @@ vectorizable_condition (stmt_vec_info st
 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 		  vec_compare = vec_compare_name;
 		}
+	      if (must_invert_cmp_result)
+		{
+		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
+		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
+							   BIT_NOT_EXPR,
+							   vec_compare);
+		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+		  vec_compare = vec_compare_name;
+		}
 	      gcall *new_stmt = gimple_build_call_internal
 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
 		 vec_then_clause);
@@ -10635,7 +10668,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
 				     node_instance, cost_vec)
 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
-	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+	  || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
 				     cost_vec)
 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
 				      cost_vec));
@@ -10654,7 +10687,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
 				    cost_vec)
 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
-	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+	      || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
 					 cost_vec)
 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
 					  cost_vec));
@@ -10759,7 +10792,7 @@ vect_transform_stmt (stmt_vec_info stmt_
       break;
 
     case condition_vec_info_type:
-      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
+      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, 0,
 				     slp_node, NULL);
       gcc_assert (done);
       break;
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2019-09-17 15:27:13.078054042 +0100
+++ gcc/tree-vect-loop.c	2019-09-19 08:43:53.961866913 +0100
@@ -6774,8 +6774,9 @@ vectorizable_reduction (stmt_vec_info st
     {
       /* Only call during the analysis stage, otherwise we'll lose
 	 STMT_VINFO_TYPE.  */
+      gcc_assert (reduc_index > 0);
       if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
-						true, NULL, cost_vec))
+						reduc_index, NULL, cost_vec))
         {
           if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7228,9 +7229,9 @@ vectorizable_reduction (stmt_vec_info st
 
   if (reduction_type == EXTRACT_LAST_REDUCTION)
     {
-      gcc_assert (!slp_node);
+      gcc_assert (!slp_node && reduc_index > 0);
       return vectorizable_condition (stmt_info, gsi, vec_stmt,
-				     true, NULL, NULL);
+				     reduc_index, NULL, NULL);
     }
 
   /* Create the destination vector  */
@@ -7260,9 +7261,9 @@ vectorizable_reduction (stmt_vec_info st
     {
       if (code == COND_EXPR)
         {
-          gcc_assert (!slp_node);
+          gcc_assert (!slp_node && reduc_index > 0);
 	  vectorizable_condition (stmt_info, gsi, vec_stmt,
-				  true, NULL, NULL);
+				  reduc_index, NULL, NULL);
           break;
         }
       if (code == LSHIFT_EXPR
Richard Biener Sept. 19, 2019, 9:27 a.m. UTC | #2
On Thu, 19 Sep 2019, Richard Sandiford wrote:

> Richard Biener <rguenther@suse.de> writes:
> > It shouldn't be neccessary.
> 
> SVE is the counter-example :-)  But the fix is simpler than the code
> you removed, so it's still a net win.

Yeah, I meant it shouldn't be necessary to swap operands of the
original scalar stmts since we keep track of the "order" via
reduc_index.

> Fixes various vect.exp and aarch64-sve.exp ICEs.  OK if it passes
> proper testing?

OK.

Richard.

> Thanks,
> Richard
> 
> 
> 2019-09-19  Richard Sandiford  <richard.sandiford@arm.com>
> 
> gcc/
> 	* tree-vectorizer.h (vectorizable_condition): Take an int
> 	reduction index instead of a boolean flag.
> 	* tree-vect-stmts.c (vectorizable_condition): Likewise.
> 	Swap the "then" and "else" values for EXTRACT_LAST_REDUCTION
> 	reductions if the reduction accumulator is the "then" rather
> 	than the "else" value.
> 	(vect_analyze_stmt): Update call accordingly.
> 	(vect_transform_stmt): Likewise.
> 	* tree-vect-loop.c (vectorizable_reduction): Likewise,
> 	asserting that the index is > 0.
> 
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h	2019-09-05 08:49:30.717740417 +0100
> +++ gcc/tree-vectorizer.h	2019-09-19 08:43:53.965866883 +0100
> @@ -1541,7 +1541,7 @@ extern void vect_remove_stores (stmt_vec
>  extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
>  				     slp_instance, stmt_vector_for_cost *);
>  extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
> -				    stmt_vec_info *, bool, slp_tree,
> +				    stmt_vec_info *, int, slp_tree,
>  				    stmt_vector_for_cost *);
>  extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
>  				stmt_vec_info *, slp_tree,
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c	2019-09-17 15:27:13.090053952 +0100
> +++ gcc/tree-vect-stmts.c	2019-09-19 08:43:53.965866883 +0100
> @@ -9778,7 +9778,7 @@ vect_is_simple_cond (tree cond, vec_info
>  
>  bool
>  vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
> -			stmt_vec_info *vec_stmt, bool for_reduction,
> +			stmt_vec_info *vec_stmt, int reduc_index,
>  			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
>  {
>    vec_info *vinfo = stmt_info->vinfo;
> @@ -9807,6 +9807,7 @@ vectorizable_condition (stmt_vec_info st
>    vec<tree> vec_oprnds3 = vNULL;
>    tree vec_cmp_type;
>    bool masked = false;
> +  bool for_reduction = (reduc_index > 0);
>  
>    if (for_reduction && STMT_SLP_TYPE (stmt_info))
>      return false;
> @@ -9888,6 +9889,29 @@ vectorizable_condition (stmt_vec_info st
>        cond_expr1 = TREE_OPERAND (cond_expr, 1);
>      }
>  
> +  /* For conditional reductions, the "then" value needs to be the candidate
> +     value calculated by this iteration while the "else" value needs to be
> +     the result carried over from previous iterations.  If the COND_EXPR
> +     is the other way around, we need to swap it.  */
> +  bool must_invert_cmp_result = false;
> +  if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
> +    {
> +      if (masked)
> +	must_invert_cmp_result = true;
> +      else
> +	{
> +	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
> +	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
> +	  if (new_code == ERROR_MARK)
> +	    must_invert_cmp_result = true;
> +	  else
> +	    cond_code = new_code;
> +	}
> +      /* Make sure we don't accidentally use the old condition.  */
> +      cond_expr = NULL_TREE;
> +      std::swap (then_clause, else_clause);
> +    }
> +
>    if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
>      {
>        /* Boolean values may have another representation in vectors
> @@ -10102,6 +10126,15 @@ vectorizable_condition (stmt_vec_info st
>  		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
>  		  vec_compare = vec_compare_name;
>  		}
> +	      if (must_invert_cmp_result)
> +		{
> +		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
> +		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
> +							   BIT_NOT_EXPR,
> +							   vec_compare);
> +		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
> +		  vec_compare = vec_compare_name;
> +		}
>  	      gcall *new_stmt = gimple_build_call_internal
>  		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
>  		 vec_then_clause);
> @@ -10635,7 +10668,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
>  				     node_instance, cost_vec)
>  	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
>  	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
> -	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
> +	  || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
>  				     cost_vec)
>  	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
>  				      cost_vec));
> @@ -10654,7 +10687,7 @@ vect_analyze_stmt (stmt_vec_info stmt_in
>  	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
>  				    cost_vec)
>  	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
> -	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
> +	      || vectorizable_condition (stmt_info, NULL, NULL, 0, node,
>  					 cost_vec)
>  	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
>  					  cost_vec));
> @@ -10759,7 +10792,7 @@ vect_transform_stmt (stmt_vec_info stmt_
>        break;
>  
>      case condition_vec_info_type:
> -      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
> +      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, 0,
>  				     slp_node, NULL);
>        gcc_assert (done);
>        break;
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c	2019-09-17 15:27:13.078054042 +0100
> +++ gcc/tree-vect-loop.c	2019-09-19 08:43:53.961866913 +0100
> @@ -6774,8 +6774,9 @@ vectorizable_reduction (stmt_vec_info st
>      {
>        /* Only call during the analysis stage, otherwise we'll lose
>  	 STMT_VINFO_TYPE.  */
> +      gcc_assert (reduc_index > 0);
>        if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
> -						true, NULL, cost_vec))
> +						reduc_index, NULL, cost_vec))
>          {
>            if (dump_enabled_p ())
>  	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -7228,9 +7229,9 @@ vectorizable_reduction (stmt_vec_info st
>  
>    if (reduction_type == EXTRACT_LAST_REDUCTION)
>      {
> -      gcc_assert (!slp_node);
> +      gcc_assert (!slp_node && reduc_index > 0);
>        return vectorizable_condition (stmt_info, gsi, vec_stmt,
> -				     true, NULL, NULL);
> +				     reduc_index, NULL, NULL);
>      }
>  
>    /* Create the destination vector  */
> @@ -7260,9 +7261,9 @@ vectorizable_reduction (stmt_vec_info st
>      {
>        if (code == COND_EXPR)
>          {
> -          gcc_assert (!slp_node);
> +          gcc_assert (!slp_node && reduc_index > 0);
>  	  vectorizable_condition (stmt_info, gsi, vec_stmt,
> -				  true, NULL, NULL);
> +				  reduc_index, NULL, NULL);
>            break;
>          }
>        if (code == LSHIFT_EXPR
>
Rainer Orth Sept. 20, 2019, 10:33 a.m. UTC | #3
Hi Richard,

> On Thu, 19 Sep 2019, Richard Sandiford wrote:
>
>> Richard Biener <rguenther@suse.de> writes:
>> > It shouldn't be neccessary.
>> 
>> SVE is the counter-example :-)  But the fix is simpler than the code
>> you removed, so it's still a net win.
>
> Yeah, I meant it shouldn't be necessary to swap operands of the
> original scalar stmts since we keep track of the "order" via
> reduc_index.
>
>> Fixes various vect.exp and aarch64-sve.exp ICEs.  OK if it passes
>> proper testing?
>
> OK.

I suspect this patch caused

+FAIL: gcc.dg/pr88031.c (internal compiler error)
+FAIL: gcc.dg/pr88031.c (test for excess errors)

I'm seeing it on 32 and 64-bit Solaris/x86, but there are also several
reports on aarch64, armv8l, i686, powerpc64*, s390x, x86_64.

Excess errors:
during GIMPLE pass: vect
/vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/pr88031.c:6:6: internal compiler error: in vectorizable_reduction, at tree-vect-loop.c:6662
0x938ecdf vectorizable_reduction(_stmt_vec_info*, gimple_stmt_iterator*, _stmt_vec_info**, _slp_tree*, _slp_instance*, vec<stmt_info_for_cost, va_heap, vl_ptr>*)
        /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:6662
0x936ac2a vect_analyze_stmt(_stmt_vec_info*, bool*, _slp_tree*, _slp_instance*, vec<stmt_info_for_cost, va_heap, vl_ptr>*)
        /vol/gcc/src/hg/trunk/local/gcc/tree-vect-stmts.c:10667
0x938afce vect_analyze_loop_operations
        /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:1580
0x938afce vect_analyze_loop_2
        /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:2026
0x938afce vect_analyze_loop(loop*, _loop_vec_info*, vec_info_shared*)
        /vol/gcc/src/hg/trunk/local/gcc/tree-vect-loop.c:2329
0x93ab60e try_vectorize_loop_1
        /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:884
0x93abd6c try_vectorize_loop
        /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:1030
0x93ac2fb vectorize_loops()
        /vol/gcc/src/hg/trunk/local/gcc/tree-vectorizer.c:1104

	Rainer
Richard Biener Sept. 20, 2019, 10:36 a.m. UTC | #4
On Fri, 20 Sep 2019, Rainer Orth wrote:

> Hi Richard,
> 
> > On Thu, 19 Sep 2019, Richard Sandiford wrote:
> >
> >> Richard Biener <rguenther@suse.de> writes:
> >> > It shouldn't be neccessary.
> >> 
> >> SVE is the counter-example :-)  But the fix is simpler than the code
> >> you removed, so it's still a net win.
> >
> > Yeah, I meant it shouldn't be necessary to swap operands of the
> > original scalar stmts since we keep track of the "order" via
> > reduc_index.
> >
> >> Fixes various vect.exp and aarch64-sve.exp ICEs.  OK if it passes
> >> proper testing?
> >
> > OK.
> 
> I suspect this patch caused
> 
> +FAIL: gcc.dg/pr88031.c (internal compiler error)
> +FAIL: gcc.dg/pr88031.c (test for excess errors)
> 
> I'm seeing it on 32 and 64-bit Solaris/x86, but there are also several
> reports on aarch64, armv8l, i686, powerpc64*, s390x, x86_64.

PR91822, should be fixed now.

Richard.
Christophe Lyon Sept. 24, 2019, 8:50 p.m. UTC | #5
On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote:
>
>
> It shouldn't be neccessary.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> (SLP part testing separately)
>
> Richard.
>
> 2019-09-18  Richard Biener  <rguenther@suse.de>
>
>         * tree-vect-loop.c (vect_is_simple_reduction): Remove operand
>         swapping.
>         (vectorize_fold_left_reduction): Remove assert.
>         (vectorizable_reduction): Also expect COND_EXPR non-reduction
>         operand in position 2.  Remove assert.
>

Hi,

Since this was committed (r275898), I've noticed a regression on armeb:
FAIL: gcc.dg/vect/vect-cond-4.c execution test

I'm seeing this with qemu, but I do not have the execution traces yet.

Christophe

> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        (revision 275872)
> +++ gcc/tree-vect-loop.c        (working copy)
> @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info
>           || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
>           || vect_valid_reduction_input_p (def2_info)))
>      {
> -      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> -       {
> -         /* Check if we can swap operands (just for simplicity - so that
> -            the rest of the code can assume that the reduction variable
> -            is always the last (second) argument).  */
> -         if (code == COND_EXPR)
> -           {
> -             /* Swap cond_expr by inverting the condition.  */
> -             tree cond_expr = gimple_assign_rhs1 (def_stmt);
> -             enum tree_code invert_code = ERROR_MARK;
> -             enum tree_code cond_code = TREE_CODE (cond_expr);
> -
> -             if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
> -               {
> -                 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
> -                 invert_code = invert_tree_comparison (cond_code, honor_nans);
> -               }
> -             if (invert_code != ERROR_MARK)
> -               {
> -                 TREE_SET_CODE (cond_expr, invert_code);
> -                 swap_ssa_operands (def_stmt,
> -                                    gimple_assign_rhs2_ptr (def_stmt),
> -                                    gimple_assign_rhs3_ptr (def_stmt));
> -               }
> -             else
> -               {
> -                 if (dump_enabled_p ())
> -                   report_vect_op (MSG_NOTE, def_stmt,
> -                                   "detected reduction: cannot swap operands "
> -                                   "for cond_expr");
> -                 return NULL;
> -               }
> -           }
> -         else
> -           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
> -                              gimple_assign_rhs2_ptr (def_stmt));
> -
> -         if (dump_enabled_p ())
> -           report_vect_op (MSG_NOTE, def_stmt,
> -                           "detected reduction: need to swap operands: ");
> -
> -         if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
> -           LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
> -        }
> -      else
> -        {
> -          if (dump_enabled_p ())
> -            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> -        }
> -
> +      if (dump_enabled_p ())
> +       report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
>        return def_stmt_info;
>      }
>
> @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_
>    gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
>    gcc_assert (ncopies == 1);
>    gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
> -  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
>    gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
>               == FOLD_LEFT_REDUCTION);
>
> @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st
>           reduc_index = i;
>         }
>
> -      if (i == 1 && code == COND_EXPR)
> +      if (code == COND_EXPR)
>         {
> -         /* Record how value of COND_EXPR is defined.  */
> +         /* Record how the non-reduction-def value of COND_EXPR is defined.  */
>           if (dt == vect_constant_def)
>             {
>               cond_reduc_dt = dt;
> @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st
>           return false;
>         }
>
> -      /* vect_is_simple_reduction ensured that operand 2 is the
> -        loop-carried operand.  */
> -      gcc_assert (reduc_index == 2);
> -
>        /* Loop peeling modifies initial value of reduction PHI, which
>          makes the reduction stmt to be transformed different to the
>          original stmt analyzed.  We need to record reduction code for
Richard Biener Sept. 25, 2019, 7:33 a.m. UTC | #6
On Tue, 24 Sep 2019, Christophe Lyon wrote:

> On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote:
> >
> >
> > It shouldn't be neccessary.
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > (SLP part testing separately)
> >
> > Richard.
> >
> > 2019-09-18  Richard Biener  <rguenther@suse.de>
> >
> >         * tree-vect-loop.c (vect_is_simple_reduction): Remove operand
> >         swapping.
> >         (vectorize_fold_left_reduction): Remove assert.
> >         (vectorizable_reduction): Also expect COND_EXPR non-reduction
> >         operand in position 2.  Remove assert.
> >
> 
> Hi,
> 
> Since this was committed (r275898), I've noticed a regression on armeb:
> FAIL: gcc.dg/vect/vect-cond-4.c execution test
> 
> I'm seeing this with qemu, but I do not have the execution traces yet.

Can you open a bugreport please?

Thanks,
Richard.

> Christophe
> 
> > Index: gcc/tree-vect-loop.c
> > ===================================================================
> > --- gcc/tree-vect-loop.c        (revision 275872)
> > +++ gcc/tree-vect-loop.c        (working copy)
> > @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info
> >           || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
> >           || vect_valid_reduction_input_p (def2_info)))
> >      {
> > -      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> > -       {
> > -         /* Check if we can swap operands (just for simplicity - so that
> > -            the rest of the code can assume that the reduction variable
> > -            is always the last (second) argument).  */
> > -         if (code == COND_EXPR)
> > -           {
> > -             /* Swap cond_expr by inverting the condition.  */
> > -             tree cond_expr = gimple_assign_rhs1 (def_stmt);
> > -             enum tree_code invert_code = ERROR_MARK;
> > -             enum tree_code cond_code = TREE_CODE (cond_expr);
> > -
> > -             if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
> > -               {
> > -                 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
> > -                 invert_code = invert_tree_comparison (cond_code, honor_nans);
> > -               }
> > -             if (invert_code != ERROR_MARK)
> > -               {
> > -                 TREE_SET_CODE (cond_expr, invert_code);
> > -                 swap_ssa_operands (def_stmt,
> > -                                    gimple_assign_rhs2_ptr (def_stmt),
> > -                                    gimple_assign_rhs3_ptr (def_stmt));
> > -               }
> > -             else
> > -               {
> > -                 if (dump_enabled_p ())
> > -                   report_vect_op (MSG_NOTE, def_stmt,
> > -                                   "detected reduction: cannot swap operands "
> > -                                   "for cond_expr");
> > -                 return NULL;
> > -               }
> > -           }
> > -         else
> > -           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
> > -                              gimple_assign_rhs2_ptr (def_stmt));
> > -
> > -         if (dump_enabled_p ())
> > -           report_vect_op (MSG_NOTE, def_stmt,
> > -                           "detected reduction: need to swap operands: ");
> > -
> > -         if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
> > -           LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
> > -        }
> > -      else
> > -        {
> > -          if (dump_enabled_p ())
> > -            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> > -        }
> > -
> > +      if (dump_enabled_p ())
> > +       report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> >        return def_stmt_info;
> >      }
> >
> > @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_
> >    gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
> >    gcc_assert (ncopies == 1);
> >    gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
> > -  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
> >    gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
> >               == FOLD_LEFT_REDUCTION);
> >
> > @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st
> >           reduc_index = i;
> >         }
> >
> > -      if (i == 1 && code == COND_EXPR)
> > +      if (code == COND_EXPR)
> >         {
> > -         /* Record how value of COND_EXPR is defined.  */
> > +         /* Record how the non-reduction-def value of COND_EXPR is defined.  */
> >           if (dt == vect_constant_def)
> >             {
> >               cond_reduc_dt = dt;
> > @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st
> >           return false;
> >         }
> >
> > -      /* vect_is_simple_reduction ensured that operand 2 is the
> > -        loop-carried operand.  */
> > -      gcc_assert (reduc_index == 2);
> > -
> >        /* Loop peeling modifies initial value of reduction PHI, which
> >          makes the reduction stmt to be transformed different to the
> >          original stmt analyzed.  We need to record reduction code for
>
Christophe Lyon Sept. 26, 2019, 1:40 a.m. UTC | #7
On Wed, 25 Sep 2019 at 09:33, Richard Biener <rguenther@suse.de> wrote:
>
> On Tue, 24 Sep 2019, Christophe Lyon wrote:
>
> > On Wed, 18 Sep 2019 at 20:11, Richard Biener <rguenther@suse.de> wrote:
> > >
> > >
> > > It shouldn't be neccessary.
> > >
> > > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > > (SLP part testing separately)
> > >
> > > Richard.
> > >
> > > 2019-09-18  Richard Biener  <rguenther@suse.de>
> > >
> > >         * tree-vect-loop.c (vect_is_simple_reduction): Remove operand
> > >         swapping.
> > >         (vectorize_fold_left_reduction): Remove assert.
> > >         (vectorizable_reduction): Also expect COND_EXPR non-reduction
> > >         operand in position 2.  Remove assert.
> > >
> >
> > Hi,
> >
> > Since this was committed (r275898), I've noticed a regression on armeb:
> > FAIL: gcc.dg/vect/vect-cond-4.c execution test
> >
> > I'm seeing this with qemu, but I do not have the execution traces yet.
>
> Can you open a bugreport please?
>
Sure, I've created https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91909


> Thanks,
> Richard.
>
> > Christophe
> >
> > > Index: gcc/tree-vect-loop.c
> > > ===================================================================
> > > --- gcc/tree-vect-loop.c        (revision 275872)
> > > +++ gcc/tree-vect-loop.c        (working copy)
> > > @@ -3278,56 +3278,8 @@ vect_is_simple_reduction (loop_vec_info
> > >           || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
> > >           || vect_valid_reduction_input_p (def2_info)))
> > >      {
> > > -      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
> > > -       {
> > > -         /* Check if we can swap operands (just for simplicity - so that
> > > -            the rest of the code can assume that the reduction variable
> > > -            is always the last (second) argument).  */
> > > -         if (code == COND_EXPR)
> > > -           {
> > > -             /* Swap cond_expr by inverting the condition.  */
> > > -             tree cond_expr = gimple_assign_rhs1 (def_stmt);
> > > -             enum tree_code invert_code = ERROR_MARK;
> > > -             enum tree_code cond_code = TREE_CODE (cond_expr);
> > > -
> > > -             if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
> > > -               {
> > > -                 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
> > > -                 invert_code = invert_tree_comparison (cond_code, honor_nans);
> > > -               }
> > > -             if (invert_code != ERROR_MARK)
> > > -               {
> > > -                 TREE_SET_CODE (cond_expr, invert_code);
> > > -                 swap_ssa_operands (def_stmt,
> > > -                                    gimple_assign_rhs2_ptr (def_stmt),
> > > -                                    gimple_assign_rhs3_ptr (def_stmt));
> > > -               }
> > > -             else
> > > -               {
> > > -                 if (dump_enabled_p ())
> > > -                   report_vect_op (MSG_NOTE, def_stmt,
> > > -                                   "detected reduction: cannot swap operands "
> > > -                                   "for cond_expr");
> > > -                 return NULL;
> > > -               }
> > > -           }
> > > -         else
> > > -           swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
> > > -                              gimple_assign_rhs2_ptr (def_stmt));
> > > -
> > > -         if (dump_enabled_p ())
> > > -           report_vect_op (MSG_NOTE, def_stmt,
> > > -                           "detected reduction: need to swap operands: ");
> > > -
> > > -         if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
> > > -           LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
> > > -        }
> > > -      else
> > > -        {
> > > -          if (dump_enabled_p ())
> > > -            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> > > -        }
> > > -
> > > +      if (dump_enabled_p ())
> > > +       report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
> > >        return def_stmt_info;
> > >      }
> > >
> > > @@ -5969,7 +5921,6 @@ vectorize_fold_left_reduction (stmt_vec_
> > >    gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
> > >    gcc_assert (ncopies == 1);
> > >    gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
> > > -  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
> > >    gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
> > >               == FOLD_LEFT_REDUCTION);
> > >
> > > @@ -6542,9 +6493,9 @@ vectorizable_reduction (stmt_vec_info st
> > >           reduc_index = i;
> > >         }
> > >
> > > -      if (i == 1 && code == COND_EXPR)
> > > +      if (code == COND_EXPR)
> > >         {
> > > -         /* Record how value of COND_EXPR is defined.  */
> > > +         /* Record how the non-reduction-def value of COND_EXPR is defined.  */
> > >           if (dt == vect_constant_def)
> > >             {
> > >               cond_reduc_dt = dt;
> > > @@ -6622,10 +6573,6 @@ vectorizable_reduction (stmt_vec_info st
> > >           return false;
> > >         }
> > >
> > > -      /* vect_is_simple_reduction ensured that operand 2 is the
> > > -        loop-carried operand.  */
> > > -      gcc_assert (reduc_index == 2);
> > > -
> > >        /* Loop peeling modifies initial value of reduction PHI, which
> > >          makes the reduction stmt to be transformed different to the
> > >          original stmt analyzed.  We need to record reduction code for
> >
>
> --
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 247165 (AG München)
diff mbox series

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 275872)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -3278,56 +3278,8 @@  vect_is_simple_reduction (loop_vec_info
 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
 	  || vect_valid_reduction_input_p (def2_info)))
     {
-      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
-	{
-	  /* Check if we can swap operands (just for simplicity - so that
-	     the rest of the code can assume that the reduction variable
-	     is always the last (second) argument).  */
-	  if (code == COND_EXPR)
-	    {
-	      /* Swap cond_expr by inverting the condition.  */
-	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
-	      enum tree_code invert_code = ERROR_MARK;
-	      enum tree_code cond_code = TREE_CODE (cond_expr);
-
-	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
-		{
-		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
-		  invert_code = invert_tree_comparison (cond_code, honor_nans);
-		}
-	      if (invert_code != ERROR_MARK)
-		{
-		  TREE_SET_CODE (cond_expr, invert_code);
-		  swap_ssa_operands (def_stmt,
-				     gimple_assign_rhs2_ptr (def_stmt),
-				     gimple_assign_rhs3_ptr (def_stmt));
-		}
-	      else
-		{
-		  if (dump_enabled_p ())
-		    report_vect_op (MSG_NOTE, def_stmt,
-				    "detected reduction: cannot swap operands "
-				    "for cond_expr");
-		  return NULL;
-		}
-	    }
-	  else
-	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
-			       gimple_assign_rhs2_ptr (def_stmt));
-
-	  if (dump_enabled_p ())
-	    report_vect_op (MSG_NOTE, def_stmt,
-			    "detected reduction: need to swap operands: ");
-
-	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
-	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
-        }
-      else
-        {
-          if (dump_enabled_p ())
-            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
-        }
-
+      if (dump_enabled_p ())
+	report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
       return def_stmt_info;
     }
 
@@ -5969,7 +5921,6 @@  vectorize_fold_left_reduction (stmt_vec_
   gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
   gcc_assert (ncopies == 1);
   gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
-  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
   gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
 	      == FOLD_LEFT_REDUCTION);
 
@@ -6542,9 +6493,9 @@  vectorizable_reduction (stmt_vec_info st
 	  reduc_index = i;
 	}
 
-      if (i == 1 && code == COND_EXPR)
+      if (code == COND_EXPR)
 	{
-	  /* Record how value of COND_EXPR is defined.  */
+	  /* Record how the non-reduction-def value of COND_EXPR is defined.  */
 	  if (dt == vect_constant_def)
 	    {
 	      cond_reduc_dt = dt;
@@ -6622,10 +6573,6 @@  vectorizable_reduction (stmt_vec_info st
 	  return false;
 	}
 
-      /* vect_is_simple_reduction ensured that operand 2 is the
-	 loop-carried operand.  */
-      gcc_assert (reduc_index == 2);
-
       /* Loop peeling modifies initial value of reduction PHI, which
 	 makes the reduction stmt to be transformed different to the
 	 original stmt analyzed.  We need to record reduction code for