diff mbox

[PR69848] Introduce special conditional reduction CONST_COND_REDUCTION

Message ID VI1PR0802MB2176CE2C17F286F158A7BA77E71D0@VI1PR0802MB2176.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng Aug. 10, 2016, 4:01 p.m. UTC
Hi,
This patch fixes the inefficient code generated by vectorizer as reported by PR69848.
It introduces new conditional reduction type CONST_COND_REDUCTION.  As a result,
we don't need to compute index vector in loop; also the epilog reduction code can be 
simplified using single reduc_max/reduc_min operation.  Together with AArch64 vcond 
patches, the # of insns in loop body is reduced from 10 to 4 on AArch64.  Note, this one
doesn't handle cases in which reduction values are loop invariants because it requires 
quite different code to current implementation, and I failed to work out a "clean" patch at 
the moment.

Bootstrap and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin

2016-08-08  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/69848
	* tree-vectorizer.h (enum vect_def_type): New condition reduction
	type CONST_COND_REDUCTION.
	* tree-vect-loop.c (vectorizable_reduction): Support new condition
	reudction type CONST_COND_REDUCTION.

gcc/testsuite/ChangeLog
2016-08-08  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/69848
	* gcc.dg/vect/vect-pr69848.c: New test.

Comments

Richard Biener Aug. 11, 2016, 9:52 a.m. UTC | #1
On Wed, Aug 10, 2016 at 6:01 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,
> This patch fixes the inefficient code generated by vectorizer as reported by PR69848.
> It introduces new conditional reduction type CONST_COND_REDUCTION.  As a result,
> we don't need to compute index vector in loop; also the epilog reduction code can be
> simplified using single reduc_max/reduc_min operation.  Together with AArch64 vcond
> patches, the # of insns in loop body is reduced from 10 to 4 on AArch64.  Note, this one
> doesn't handle cases in which reduction values are loop invariants because it requires
> quite different code to current implementation, and I failed to work out a "clean" patch at
> the moment.
>
> Bootstrap and test on x86_64 and AArch64.  Is it OK?

Ok.

Thanks,
Richard.

> Thanks,
> bin
>
> 2016-08-08  Bin Cheng  <bin.cheng@arm.com>
>
>         PR tree-optimization/69848
>         * tree-vectorizer.h (enum vect_def_type): New condition reduction
>         type CONST_COND_REDUCTION.
>         * tree-vect-loop.c (vectorizable_reduction): Support new condition
>         reudction type CONST_COND_REDUCTION.
>
> gcc/testsuite/ChangeLog
> 2016-08-08  Bin Cheng  <bin.cheng@arm.com>
>
>         PR tree-optimization/69848
>         * gcc.dg/vect/vect-pr69848.c: New test.
diff mbox

Patch

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 41c4c29..4957b66 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5416,7 +5416,7 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   optab optab, reduc_optab;
   tree new_temp = NULL_TREE;
   gimple *def_stmt;
-  enum vect_def_type dt;
+  enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
   gphi *new_phi = NULL;
   tree scalar_type;
   bool is_simple_use;
@@ -5447,7 +5447,7 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
-  gimple *cond_expr_induction_def_stmt = NULL;
+  tree cond_reduc_val = NULL_TREE, const_cond_cmp = NULL_TREE;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5597,8 +5597,18 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
           reduc_index = i;
         }
 
-      if (i == 1 && code == COND_EXPR && dt == vect_induction_def)
-	cond_expr_induction_def_stmt = def_stmt;
+      if (i == 1 && code == COND_EXPR)
+	{
+	  /* Record how value of COND_EXPR is defined.  */
+	  if (dt == vect_constant_def)
+	    {
+	      cond_reduc_dt = dt;
+	      cond_reduc_val = ops[i];
+	    }
+	  if (dt == vect_induction_def && def_stmt != NULL
+	      && is_nonwrapping_integer_induction (def_stmt, loop))
+	    cond_reduc_dt = dt;
+	}
     }
 
   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
@@ -5630,18 +5640,49 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 					  !nested_cycle, &dummy, false,
 					  &v_reduc_type);
 
+  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
   /* If we have a condition reduction, see if we can simplify it further.  */
-  if (v_reduc_type == COND_REDUCTION
-      && cond_expr_induction_def_stmt != NULL
-      && is_nonwrapping_integer_induction (cond_expr_induction_def_stmt, loop))
+  if (v_reduc_type == COND_REDUCTION)
     {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_NOTE, vect_location,
-			 "condition expression based on integer induction.\n");
-      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = INTEGER_INDUC_COND_REDUCTION;
+      if (cond_reduc_dt == vect_induction_def)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "condition expression based on "
+			     "integer induction.\n");
+	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+	    = INTEGER_INDUC_COND_REDUCTION;
+	}
+
+      if (cond_reduc_dt == vect_constant_def)
+	{
+	  enum vect_def_type cond_initial_dt;
+	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
+	  tree cond_initial_val
+	    = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
+
+	  gcc_assert (cond_reduc_val != NULL_TREE);
+	  vect_is_simple_use (cond_initial_val, loop_vinfo,
+			      &def_stmt, &cond_initial_dt);
+	  if (cond_initial_dt == vect_constant_def
+	      && types_compatible_p (TREE_TYPE (cond_initial_val),
+				     TREE_TYPE (cond_reduc_val)))
+	    {
+	      tree e = fold_build2 (LE_EXPR, boolean_type_node,
+				    cond_initial_val, cond_reduc_val);
+	      if (e && (integer_onep (e) || integer_zerop (e)))
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_NOTE, vect_location,
+				     "condition expression based on "
+				     "compile time constant.\n");
+		  const_cond_cmp = e;
+		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		    = CONST_COND_REDUCTION;
+		}
+	    }
+	}
     }
-  else
-   STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
 
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
@@ -5787,8 +5828,15 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
       /* For simple condition reductions, replace with the actual expression
 	 we want to base our reduction around.  */
-      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-	  == INTEGER_INDUC_COND_REDUCTION)
+      if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
+	{
+	  gcc_assert (const_cond_cmp != NULL_TREE);
+	  gcc_assert (integer_onep (const_cond_cmp)
+		      || integer_zerop (const_cond_cmp));
+	  orig_code = integer_onep (const_cond_cmp) ? MAX_EXPR : MIN_EXPR;
+	}
+      else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+		 == INTEGER_INDUC_COND_REDUCTION)
 	orig_code = MAX_EXPR;
     }
 
@@ -5810,9 +5858,7 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 
   epilog_reduc_code = ERROR_MARK;
 
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION
-      || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-		== INTEGER_INDUC_COND_REDUCTION)
+  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
     {
       if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
 	{
@@ -5839,8 +5885,10 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 	     generated in the epilog using multiple expressions.  This does not
 	     work for condition reductions.  */
 	  if (epilog_reduc_code == ERROR_MARK
-	      && STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-			== INTEGER_INDUC_COND_REDUCTION)
+	      && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+			== INTEGER_INDUC_COND_REDUCTION
+		  || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+			== CONST_COND_REDUCTION))
 	    {
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5881,9 +5929,7 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
     }
 
   if ((double_reduc
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
-       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
-		== INTEGER_INDUC_COND_REDUCTION)
+       || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION)
       && ncopies > 1)
     {
       if (dump_enabled_p ())
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 31570d8..29ef676 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -65,7 +65,8 @@  enum vect_def_type {
 enum vect_reduction_type {
   TREE_CODE_REDUCTION,
   COND_REDUCTION,
-  INTEGER_INDUC_COND_REDUCTION
+  INTEGER_INDUC_COND_REDUCTION,
+  CONST_COND_REDUCTION
 };
 
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \
diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr69848.c b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c
new file mode 100644
index 0000000..7c6e33b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-pr69848.c
@@ -0,0 +1,37 @@ 
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_condition } */
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 256
+int a[N] = {0};
+
+__attribute__ ((noinline))
+int foo ()
+{
+  int i, res = 0;
+  for (i = 0; i < N; i++)
+  {
+    if (a[i] != 0)
+      res = 1;
+  }
+  return res;
+}
+
+int main (void)
+{
+  int i, res;
+
+  check_vect ();
+
+  if ((res = foo ()) != 0)
+    abort ();
+
+  a[34] = 101;
+  a[85] = 9;
+  if ((res = foo ()) != 1)
+    abort ();
+
+  return 0;
+}
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! vect_max_reduc } } } } */