diff mbox

[VECT] Swap operands for cond_reduction when necessary

Message ID VI1PR0802MB217642316C51EB0B2358FC46E7AB0@VI1PR0802MB2176.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng Oct. 26, 2016, 4:42 p.m. UTC
Hi,
For stmt defining reduction, GCC vectorizer assumes that the reduction variable is always the last (second) operand.  Another fact is that vectorizer doesn't swap operands for cond_reduction during analysis stage.  The problem is GCC middle-end may canonicalize cond_expr into a form that reduction variable is not the last one.  At the moment, such case cannot be vectorized.
The patch fixes this issue by swapping operands in cond_reduction when it's necessary.  The patch also swaps it back if vectorization fails.  The patch resolves failures introduced by previous match.pd patches.  In addition, couple cases are XPASSed on AArch64 now, which means more loops are vectorized.  I will send following patch addressing those XPASS tests.
Bootstrap and test on x86_64 and AArch64 ongoing, is it OK?

Thanks,
bin

2016-10-25  Bin Cheng  <bin.cheng@arm.com>

	* tree-vect-loop.c (destroy_loop_vec_info): Handle cond_expr.
	(vect_is_simple_reduction): Swap cond_reduction by inversion.

Comments

Richard Biener Oct. 28, 2016, 12:38 p.m. UTC | #1
On Wed, Oct 26, 2016 at 6:42 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,
> For stmt defining reduction, GCC vectorizer assumes that the reduction variable is always the last (second) operand.  Another fact is that vectorizer doesn't swap operands for cond_reduction during analysis stage.  The problem is GCC middle-end may canonicalize cond_expr into a form that reduction variable is not the last one.  At the moment, such case cannot be vectorized.
> The patch fixes this issue by swapping operands in cond_reduction when it's necessary.  The patch also swaps it back if vectorization fails.  The patch resolves failures introduced by previous match.pd patches.  In addition, couple cases are XPASSed on AArch64 now, which means more loops are vectorized.  I will send following patch addressing those XPASS tests.
> Bootstrap and test on x86_64 and AArch64 ongoing, is it OK?

@@ -1225,6 +1225,20 @@ destroy_loop_vec_info (loop_vec_info
loop_vinfo, bool clean_stmts)
                swap_ssa_operands (stmt,
                                   gimple_assign_rhs1_ptr (stmt),
                                   gimple_assign_rhs2_ptr (stmt));
+             else if (code == COND_EXPR
+                      && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
+               {
+                 tree cond_expr = gimple_assign_rhs1 (stmt);
+                 enum tree_code cond_code = TREE_CODE (cond_expr);
+
+                 gcc_assert (TREE_CODE_CLASS (cond_code) == tcc_comparison);
+                 /* HONOR_NANS doesn't matter when inverting it back.  */

I think this doesn't hold true for COND_EXPRs that were originally
this way as canonicalization
is also inhibited by this.  I suggest to simply not invert back when
cond_code == ERROR_MARK
as we can't possibly have swapped it to the current non-canonical way.

Ok with that change.

Thanks,
Richard.

+                 cond_code = invert_tree_comparison (cond_code, false);
+                 gcc_assert (cond_code != ERROR_MARK);
+                 TREE_SET_CODE (cond_expr, cond_code);
+                 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
+                                    gimple_assign_rhs3_ptr (stmt));


> Thanks,
> bin
>
> 2016-10-25  Bin Cheng  <bin.cheng@arm.com>
>
>         * tree-vect-loop.c (destroy_loop_vec_info): Handle cond_expr.
>         (vect_is_simple_reduction): Swap cond_reduction by inversion.
diff mbox

Patch

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9cca9b7..4a5946b 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1225,6 +1225,20 @@  destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
 		swap_ssa_operands (stmt,
 				   gimple_assign_rhs1_ptr (stmt),
 				   gimple_assign_rhs2_ptr (stmt));
+	      else if (code == COND_EXPR
+		       && CONSTANT_CLASS_P (gimple_assign_rhs2 (stmt)))
+		{
+		  tree cond_expr = gimple_assign_rhs1 (stmt);
+		  enum tree_code cond_code = TREE_CODE (cond_expr);
+
+		  gcc_assert (TREE_CODE_CLASS (cond_code) == tcc_comparison);
+		  /* HONOR_NANS doesn't matter when inverting it back.  */
+		  cond_code = invert_tree_comparison (cond_code, false);
+		  gcc_assert (cond_code != ERROR_MARK);
+		  TREE_SET_CODE (cond_expr, cond_code);
+		  swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
+				     gimple_assign_rhs3_ptr (stmt));
+		}
 	    }
 
 	  /* Free stmt_vec_info.  */
@@ -3006,38 +3020,56 @@  vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi,
       && (code == COND_EXPR
 	  || !def2 || gimple_nop_p (def2)
 	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
-          || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
- 	      && (is_gimple_assign (def2)
+	  || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
+	      && (is_gimple_assign (def2)
 		  || is_gimple_call (def2)
-	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
-                      == vect_induction_def
- 	          || (gimple_code (def2) == GIMPLE_PHI
+		  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
+		       == vect_induction_def
+		  || (gimple_code (def2) == GIMPLE_PHI
 		      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
-                          == vect_internal_def
+			   == vect_internal_def
 		      && !is_loop_header_bb_p (gimple_bb (def2)))))))
     {
-      if (check_reduction
-	  && orig_code != MINUS_EXPR)
-        {
+      if (check_reduction && orig_code != MINUS_EXPR)
+	{
+	  /* Check if we can swap operands (just for simplicity - so that
+	     the rest of the code can assume that the reduction variable
+	     is always the last (second) argument).  */
 	  if (code == COND_EXPR)
 	    {
-	      /* No current known use where this case would be useful.  */
-	      if (dump_enabled_p ())
-		report_vect_op (MSG_NOTE, def_stmt,
-				"detected reduction: cannot currently swap "
-				"operands for cond_expr");
-	      return NULL;
+	      /* Swap cond_expr by inverting the condition.  */
+	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
+	      enum tree_code invert_code = ERROR_MARK;
+	      enum tree_code cond_code = TREE_CODE (cond_expr);
+
+	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
+		{
+		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
+		  invert_code = invert_tree_comparison (cond_code, honor_nans);
+		}
+	      if (invert_code != ERROR_MARK)
+		{
+		  TREE_SET_CODE (cond_expr, invert_code);
+		  swap_ssa_operands (def_stmt,
+				     gimple_assign_rhs2_ptr (def_stmt),
+				     gimple_assign_rhs3_ptr (def_stmt));
+		}
+	      else
+		{
+		  if (dump_enabled_p ())
+		    report_vect_op (MSG_NOTE, def_stmt,
+				    "detected reduction: cannot swap operands "
+				    "for cond_expr");
+		  return NULL;
+		}
 	    }
+	  else
+	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
+			       gimple_assign_rhs2_ptr (def_stmt));
 
-          /* Swap operands (just for simplicity - so that the rest of the code
-	     can assume that the reduction variable is always the last (second)
-	     argument).  */
-          if (dump_enabled_p ())
+	  if (dump_enabled_p ())
 	    report_vect_op (MSG_NOTE, def_stmt,
-	  	            "detected reduction: need to swap operands: ");
-
-          swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
- 			     gimple_assign_rhs2_ptr (def_stmt));
+			    "detected reduction: need to swap operands: ");
 
 	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
 	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;