diff mbox

[PR77503] Record reduction code for CONST_COND_REDUCTION at analysis stage and use it at transform

Message ID VI1PR0802MB2176341761A63463372AB825E7F00@VI1PR0802MB2176.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Bin Cheng Sept. 15, 2016, 10:07 a.m. UTC
Hi,
This patch fixes PR77503.  Root cause is loop peeling changes the initial value for reduction PHI node, resulting in different statement for vect_transform_stmt to vect_analyze_stmt.  Similar issue stands for loop control IV, we record the original information in LOOP_PHI_EVOLUTION_BASE_UNCHANGED for that.  This patch follows the same method by recording reduction code for CONST_COND_REDUCTION at analysis stage and use the information at transform stage.  The only difference is we need record it in stmt_vec_info because there might be multiple reductions.  Unfortunately this requires additional memory for each statement, I didn't see easy way out.  Maybe it's possible to improve vectorizer so it caches/reuses information from analysis stage to transform stage?
Bootstrap and test on x86_64 and AArch64 ongoing.  Is it OK if no regression?

Thanks,
bin

2016-09-07  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/77503
	* tree-vect-loop.c (vectorizable_reduction): Record reduction
	code for CONST_COND_REDUCTION at analysis stage and use it at
	transform stage.
	* tree-vectorizer.h (struct _stmt_vec_info): New field.
	(STMT_VINFO_VEC_CONST_COND_REDUC_CODE): New macro.
	* tree-vect-stmts.c (new_stmt_vec_info): Initialize above new
	field.

gcc/testsuite/ChangeLog
2016-09-07  Bin Cheng  <bin.cheng@arm.com>

	PR tree-optimization/77503
	* gcc.dg/vect/pr77503.c: New test.

Comments

Richard Biener Sept. 15, 2016, 10:38 a.m. UTC | #1
On Thu, Sep 15, 2016 at 12:07 PM, Bin Cheng <Bin.Cheng@arm.com> wrote:
> Hi,
> This patch fixes PR77503.  Root cause is loop peeling changes the initial value for reduction PHI node, resulting in different statement for vect_transform_stmt to vect_analyze_stmt.  Similar issue stands for loop control IV, we record the original information in LOOP_PHI_EVOLUTION_BASE_UNCHANGED for that.  This patch follows the same method by recording reduction code for CONST_COND_REDUCTION at analysis stage and use the information at transform stage.  The only difference is we need record it in stmt_vec_info because there might be multiple reductions.  Unfortunately this requires additional memory for each statement, I didn't see easy way out.  Maybe it's possible to improve vectorizer so it caches/reuses information from analysis stage to transform stage?
> Bootstrap and test on x86_64 and AArch64 ongoing.  Is it OK if no regression?

Ok.

As for improving stmt_info size and caching info from analysis phase
-- yes, ideally stmt_info would have
most of its contents discriminated on STMT_VINFO_TYPE using a union of
type specific fields.  Note that
this kind of refactoring would be way easier than trying to make it a
class using inheritance (you'd need to
defer vinfo creation until analysis figured out the info or add an
indirection to type specific data).

stmt_info isn't very well packed either so the general answer for now
is -- we don't care about its size.

As for re-using data from analysis phase -- yes!  That we share the
"head" of all vectorizable_* routines
for both analysis and transform phase was a bad design decision --
ideally we'd have vectorizable_*
routines w/o transform and then vectorize_* routines which only do the
transform based on data recorded
during analysis phase.

Both refactorings are very welcome (esp. the latter which eventually
means adding many more fields to
stmt_info).

Thanks,
Richard.

> Thanks,
> bin
>
> 2016-09-07  Bin Cheng  <bin.cheng@arm.com>
>
>         PR tree-optimization/77503
>         * tree-vect-loop.c (vectorizable_reduction): Record reduction
>         code for CONST_COND_REDUCTION at analysis stage and use it at
>         transform stage.
>         * tree-vectorizer.h (struct _stmt_vec_info): New field.
>         (STMT_VINFO_VEC_CONST_COND_REDUC_CODE): New macro.
>         * tree-vect-stmts.c (new_stmt_vec_info): Initialize above new
>         field.
>
> gcc/testsuite/ChangeLog
> 2016-09-07  Bin Cheng  <bin.cheng@arm.com>
>
>         PR tree-optimization/77503
>         * gcc.dg/vect/pr77503.c: New test.
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr77503.c b/gcc/testsuite/gcc.dg/vect/pr77503.c
new file mode 100644
index 0000000..609e7fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr77503.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-skip-if "need vect_max_reduc support" { ! vect_max_reduc } } */
+
+extern void d(void);
+void a() {
+  char *b;
+  char c = 0;
+  for (; b < (char *)a; b++) {
+    if (*b)
+      c = 1;
+    *b = 0;
+  }
+  if (c)
+    d();
+}
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index fa06505..58f3456 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5438,7 +5438,7 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
   tree def0, def1, tem, op1 = NULL_TREE;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
-  tree cond_reduc_val = NULL_TREE, const_cond_cmp = NULL_TREE;
+  tree cond_reduc_val = NULL_TREE;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5645,7 +5645,19 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 	    = INTEGER_INDUC_COND_REDUCTION;
 	}
 
-      if (cond_reduc_dt == vect_constant_def)
+      /* Loop peeling modifies initial value of reduction PHI, which
+	 makes the reduction stmt to be transformed different to the
+	 original stmt analyzed.  We need to record reduction code for
+	 CONST_COND_REDUCTION type reduction at analyzing stage, thus
+	 it can be used directly at transform stage.  */
+      if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
+	  || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
+	{
+	  /* Also set the reduction type to CONST_COND_REDUCTION.  */
+	  gcc_assert (cond_reduc_dt == vect_constant_def);
+	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
+	}
+      else if (cond_reduc_dt == vect_constant_def)
 	{
 	  enum vect_def_type cond_initial_dt;
 	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
@@ -5667,7 +5679,9 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 		    dump_printf_loc (MSG_NOTE, vect_location,
 				     "condition expression based on "
 				     "compile time constant.\n");
-		  const_cond_cmp = e;
+		  /* Record reduction code at analysis stage.  */
+		  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
+		    = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
 		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
 		    = CONST_COND_REDUCTION;
 		}
@@ -5821,10 +5835,8 @@  vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
 	 we want to base our reduction around.  */
       if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION)
 	{
-	  gcc_assert (const_cond_cmp != NULL_TREE);
-	  gcc_assert (integer_onep (const_cond_cmp)
-		      || integer_zerop (const_cond_cmp));
-	  orig_code = integer_onep (const_cond_cmp) ? MAX_EXPR : MIN_EXPR;
+	  orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
+	  gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
 	}
       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
 		 == INTEGER_INDUC_COND_REDUCTION)
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ce5536c..6a6167b 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -8563,6 +8563,7 @@  new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
   STMT_VINFO_DATA_REF (res) = NULL;
   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
+  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
 
   STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
   STMT_VINFO_DR_OFFSET (res) = NULL;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 29ef676..240af06 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -657,6 +657,9 @@  typedef struct _stmt_vec_info {
   /* For reduction loops, this is the type of reduction.  */
   enum vect_reduction_type v_reduc_type;
 
+  /* For CONST_COND_REDUCTION, record the reduc code.  */
+  enum tree_code const_cond_reduc_code;
+
   /* The number of scalar stmt references from active SLP instances.  */
   unsigned int num_slp_uses;
 } *stmt_vec_info;
@@ -711,6 +714,7 @@  STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S)   (S)->memory_access_type
 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
 #define STMT_VINFO_VEC_REDUCTION_TYPE(S)   (S)->v_reduc_type
+#define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code
 
 #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_base_address
 #define STMT_VINFO_DR_INIT(S)              (S)->dr_init