diff mbox

[1/2] PR60510, reduction chain vectorization w/o SLP

Message ID alpine.LSU.2.20.1707030917170.23185@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener July 3, 2017, 7:26 a.m. UTC
This is the 1st patch in the series to support reduction chain 
vectorization without SLP.  In the PR there's a reduction chain detected
that doesn't qualify SLP but then this completely fails vectorization
because we cannot do regular reduction vectorization on this.

This patch series aims at fixing this (and allow further enhancements
for a few other PRs) by splitting reduction vectorization into two pieces,
vectorizing the reduction PHIs (creating vector defs) and vectorizing
the reduction operation.  This allows reduction operations covering
more than one stmt to be trivially(*) vectorized using the regular
vectorizable_* helpers as they have access to the reduction PHI vector
defs.  SLP reduction chains as detected just require adjustments to
the code generation phase to support non-SLP operation as the analysis
phase already figured out the reduction chain is a valid reduction.

This first patch simply splits the reduction vectorization into
two phases, reduction PHI def generation and the rest.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2016-07-03  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_analyze_loop_operations): Also analyze
	reduction PHIs.
	(vect_force_simple_reduction): Record reduction def -> phi mapping.
	(vectorizable_reduction): Perform reduction PHI creation when
	visiting a reduction PHI and adjust and simplify code generation
	phase of the reduction op.  Cache dts, use fold_binary, not fold_build2.
	(vect_transform_loop): Visit reduction PHIs.
	* tree-vect-slp.c (vect_get_and_check_slp_defs): Record reduction
	defs into the SLP tree.
	(vect_build_slp_tree): Reduction defs terminate the recursion.
	* tree-vect-stmts.c (vect_get_vec_def_for_operand_1): Allow lookup
	of reduction defs.
	(vect_get_vec_defs_for_stmt_copy): Export.
	(vect_get_vec_defs): Likewise.
	* tree-vectorizer.h (struct _stmt_vec_info): Amend reduc_def
	purpose.
	(vect_get_vec_defs_for_stmt_copy): Declare.
	(vect_get_vec_defs): Likewise.
diff mbox

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 249892)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -1778,6 +1778,10 @@  vect_analyze_loop_operations (loop_vec_i
               if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
 		  && ! PURE_SLP_STMT (stmt_info))
                 ok = vectorizable_induction (phi, NULL, NULL, NULL);
+	      else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+			|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+		       && ! PURE_SLP_STMT (stmt_info))
+		ok = vectorizable_reduction (phi, NULL, NULL, NULL);
             }
 
 	  if (ok && STMT_VINFO_LIVE_P (stmt_info))
@@ -3185,6 +3189,8 @@  vect_force_simple_reduction (loop_vec_in
       stmt_vec_info reduc_def_info = vinfo_for_stmt (phi);
       STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type;
       STMT_VINFO_REDUC_DEF (reduc_def_info) = def;
+      reduc_def_info = vinfo_for_stmt (def);
+      STMT_VINFO_REDUC_DEF (reduc_def_info) = phi;
     }
   return def;
 }
@@ -5558,7 +5564,6 @@  vectorizable_reduction (gimple *stmt, gi
 {
   tree vec_dest;
   tree scalar_dest;
-  tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
   tree vectype_in = NULL_TREE;
@@ -5576,7 +5581,6 @@  vectorizable_reduction (gimple *stmt, gi
   bool is_simple_use;
   gimple *orig_stmt;
   stmt_vec_info orig_stmt_info;
-  tree expr = NULL_TREE;
   int i;
   int ncopies;
   int epilog_copies;
@@ -5586,6 +5590,7 @@  vectorizable_reduction (gimple *stmt, gi
   gimple *new_stmt = NULL;
   int j;
   tree ops[3];
+  enum vect_def_type dts[3];
   bool nested_cycle = false, found_nested_cycle_def = false;
   gimple *reduc_def_stmt = NULL;
   bool double_reduc = false;
@@ -5598,11 +5603,23 @@  vectorizable_reduction (gimple *stmt, gi
   auto_vec<tree> vect_defs;
   auto_vec<gimple *> phis;
   int vec_num;
-  tree def0, def1, tem, op1 = NULL_TREE;
+  tree def0, tem;
   bool first_p = true;
   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
   tree cond_reduc_val = NULL_TREE;
 
+  /* Make sure it was already recognized as a reduction computation.  */
+  if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
+      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
+    return false;
+
+  if (nested_in_vect_loop_p (loop, stmt))
+    {
+      outer_loop = loop;
+      loop = loop->inner;
+      nested_cycle = true;
+    }
+
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
      and has reduction properties.  */
@@ -5613,11 +5630,82 @@  vectorizable_reduction (gimple *stmt, gi
       first_p = false;
     }
 
-  if (nested_in_vect_loop_p (loop, stmt))
+  if (gimple_code (stmt) == GIMPLE_PHI)
     {
-      outer_loop = loop;
-      loop = loop->inner;
-      nested_cycle = true;
+      /* Analysis is fully done on the reduction stmt invocation.  */
+      if (! vec_stmt)
+	{
+	  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+	  return true;
+	}
+
+      gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info);
+      if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt)))
+	reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt));
+      if (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt)) <= vect_used_only_live)
+	single_defuse_cycle = true;
+
+      gcc_assert (is_gimple_assign (reduc_stmt));
+      for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
+	{
+	  tree op = gimple_op (reduc_stmt, k);
+	  if (op == gimple_phi_result (stmt))
+	    continue;
+	  if (k == 1
+	      && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
+	    continue;
+	  vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
+	  break;
+	}
+      gcc_assert (vectype_in);
+
+      if (slp_node)
+	ncopies = 1;
+      else
+	ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+		   / TYPE_VECTOR_SUBPARTS (vectype_in));
+
+      /* Create the destination vector  */
+      scalar_dest = gimple_assign_lhs (reduc_stmt);
+      vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+
+      if (slp_node)
+	/* The size vect_schedule_slp_instance computes is off for us.  */
+	vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+		    * SLP_TREE_SCALAR_STMTS (slp_node).length ())
+		   / TYPE_VECTOR_SUBPARTS (vectype_in));
+      else
+	vec_num = 1;
+
+      /* Generate the reduction PHIs upfront.  */
+      prev_phi_info = NULL;
+      for (j = 0; j < ncopies; j++)
+	{
+	  if (j == 0 || !single_defuse_cycle)
+	    {
+	      for (i = 0; i < vec_num; i++)
+		{
+		  /* Create the reduction-phi that defines the reduction
+		     operand.  */
+		  new_phi = create_phi_node (vec_dest, loop->header);
+		  set_vinfo_for_stmt (new_phi,
+				      new_stmt_vec_info (new_phi, loop_vinfo));
+
+		  if (slp_node)
+		    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi);
+		  else
+		    {
+		      if (j == 0)
+			STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi;
+		      else
+			STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
+		      prev_phi_info = vinfo_for_stmt (new_phi);
+		    }
+		}
+	    }
+	}
+
+      return true;
     }
 
   /* 1. Is vectorizable reduction?  */
@@ -5633,11 +5721,6 @@  vectorizable_reduction (gimple *stmt, gi
       && !STMT_VINFO_LIVE_P (stmt_info))
     return false;
 
-  /* Make sure it was already recognized as a reduction computation.  */
-  if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def
-      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle)
-    return false;
-
   /* 2. Has this been recognized as a reduction pattern?
 
      Check if STMT represents a pattern that has been recognized
@@ -5718,11 +5801,12 @@  vectorizable_reduction (gimple *stmt, gi
         continue;
 
       is_simple_use = vect_is_simple_use (ops[i], loop_vinfo,
-					  &def_stmt, &dt, &tem);
+					  &def_stmt, &dts[i], &tem);
       if (!vectype_in)
 	vectype_in = tem;
       gcc_assert (is_simple_use);
 
+      dt = dts[i];
       if (dt != vect_internal_def
 	  && dt != vect_external_def
 	  && dt != vect_constant_def
@@ -5752,7 +5836,7 @@  vectorizable_reduction (gimple *stmt, gi
     }
 
   is_simple_use = vect_is_simple_use (ops[reduc_index], loop_vinfo,
-				      &def_stmt, &dt, &tem);
+				      &def_stmt, &dts[reduc_index], &tem);
   if (!vectype_in)
     vectype_in = tem;
   gcc_assert (is_simple_use);
@@ -5762,6 +5846,7 @@  vectorizable_reduction (gimple *stmt, gi
   if (reduc_def_stmt && gimple_code (reduc_def_stmt) != GIMPLE_PHI)
     return false;
 
+  dt = dts[reduc_index];
   if (!(dt == vect_reduction_def
 	|| dt == vect_nested_cycle
 	|| ((dt == vect_internal_def || dt == vect_external_def
@@ -5820,7 +5905,7 @@  vectorizable_reduction (gimple *stmt, gi
 	      && types_compatible_p (TREE_TYPE (cond_initial_val),
 				     TREE_TYPE (cond_reduc_val)))
 	    {
-	      tree e = fold_build2 (LE_EXPR, boolean_type_node,
+	      tree e = fold_binary (LE_EXPR, boolean_type_node,
 				    cond_initial_val, cond_reduc_val);
 	      if (e && (integer_onep (e) || integer_zerop (e)))
 		{
@@ -6190,19 +6275,28 @@  vectorizable_reduction (gimple *stmt, gi
   if (!slp_node)
     vect_defs.quick_push (NULL_TREE);
 
+  auto_vec<tree> vec_oprnds;
   for (j = 0; j < ncopies; j++)
     {
       if (j == 0 || !single_defuse_cycle)
 	{
           for (i = 0; i < vec_num; i++)
             {
-              /* Create the reduction-phi that defines the reduction
+	      /* Get the created reduction-phi that defines the reduction
                  operand.  */
-              new_phi = create_phi_node (vec_dest, loop->header);
-              set_vinfo_for_stmt (new_phi,
-                                  new_stmt_vec_info (new_phi, loop_vinfo));
-               if (j == 0 || slp_node)
-                 phis.quick_push (new_phi);
+	      tree reduc_def = gimple_phi_result (reduc_def_stmt);
+	      if (j == 0)
+		vect_get_vec_defs (reduc_def, NULL, stmt, &vec_oprnds, NULL,
+				   slp_node);
+	      else
+		{
+		  dt = vect_reduction_def;
+		  vect_get_vec_defs_for_stmt_copy (&dt,
+						   &vec_oprnds, NULL);
+		}
+	      new_phi = as_a <gphi *> (SSA_NAME_DEF_STMT (vec_oprnds[i]));
+	      if (j == 0 || slp_node)
+		phis.quick_push (new_phi);
             }
         }
 
@@ -6243,42 +6337,30 @@  vectorizable_reduction (gimple *stmt, gi
 	    }
           else
 	    {
-              loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
-                                                            stmt);
-              vec_oprnds0.quick_push (loop_vec_def0);
+              vec_oprnds0.quick_push
+		(vect_get_vec_def_for_operand (ops[!reduc_index], stmt));
               if (op_type == ternary_op)
-               {
-		 op1 = reduc_index == 0 ? ops[2] : ops[1];
-                 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt);
-                 vec_oprnds1.quick_push (loop_vec_def1);
-               }
+		vec_oprnds1.quick_push 
+		  (vect_get_vec_def_for_operand (reduc_index == 0
+						 ? ops[2] : ops[1], stmt));
 	    }
         }
       else
         {
           if (!slp_node)
             {
-              enum vect_def_type dt;
-	      gimple *dummy_stmt;
-
-              vect_is_simple_use (ops[!reduc_index], loop_vinfo,
-                                  &dummy_stmt, &dt);
-              loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
-                                                              loop_vec_def0);
-              vec_oprnds0[0] = loop_vec_def0;
+              vec_oprnds0[0]
+		= vect_get_vec_def_for_stmt_copy (dts[!reduc_index],
+						  vec_oprnds0[0]);
               if (op_type == ternary_op)
-                {
-                  vect_is_simple_use (op1, loop_vinfo, &dummy_stmt, &dt);
-                  loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
-                                                                loop_vec_def1);
-                  vec_oprnds1[0] = loop_vec_def1;
-                }
+                vec_oprnds1[0] 
+		  = vect_get_vec_def_for_stmt_copy (dts[reduc_index == 0
+						        ? 2 : 1],
+						    vec_oprnds1[0]);
             }
 
           if (single_defuse_cycle)
             reduc_def = gimple_assign_lhs (new_stmt);
-
-          STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
         }
 
       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
@@ -6291,31 +6373,16 @@  vectorizable_reduction (gimple *stmt, gi
                 reduc_def = PHI_RESULT (new_phi);
             }
 
-          def1 = ((op_type == ternary_op)
-                  ? vec_oprnds1[i] : NULL);
-          if (op_type == binary_op)
-            {
-              if (reduc_index == 0)
-                expr = build2 (code, vectype_out, reduc_def, def0);
-              else
-                expr = build2 (code, vectype_out, def0, reduc_def);
-            }
-          else
-            {
-              if (reduc_index == 0)
-                expr = build3 (code, vectype_out, reduc_def, def0, def1);
-              else
-                {
-                  if (reduc_index == 1)
-                    expr = build3 (code, vectype_out, def0, reduc_def, def1);
-                  else
-                    expr = build3 (code, vectype_out, def0, def1, reduc_def);
-                }
-            }
+	  tree vop[3] = { def0, NULL_TREE, NULL_TREE };
+	  if (op_type == ternary_op)
+	    vop[1] = vec_oprnds1[i];
+	  for (int k = 2; k > reduc_index; --k)
+	    vop[k] = vop[k - 1];
+	  vop[reduc_index] = reduc_def;
 
-          new_stmt = gimple_build_assign (vec_dest, expr);
           new_temp = make_ssa_name (vec_dest, new_stmt);
-          gimple_assign_set_lhs (new_stmt, new_temp);
+          new_stmt = gimple_build_assign (new_temp, code,
+					  vop[0], vop[1], vop[2]);
           vect_finish_stmt_generation (stmt, new_stmt, gsi);
 
           if (slp_node)
@@ -6336,7 +6403,6 @@  vectorizable_reduction (gimple *stmt, gi
 	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
 
       prev_stmt_info = vinfo_for_stmt (new_stmt);
-      prev_phi_info = vinfo_for_stmt (new_phi);
     }
 
   /* Finalize the reduction-phi (set its arguments) and create the
@@ -7285,7 +7351,9 @@  vect_transform_loop (loop_vec_info loop_
 	      && dump_enabled_p ())
 	    dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
 
-	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
+	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
 	      && ! PURE_SLP_STMT (stmt_info))
 	    {
 	      if (dump_enabled_p ())
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 249892)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -403,9 +403,9 @@  again:
 	{
 	case vect_constant_def:
 	case vect_external_def:
-	case vect_reduction_def:
 	  break;
 
+	case vect_reduction_def:
 	case vect_induction_def:
 	case vect_internal_def:
 	  oprnd_info->def_stmts.quick_push (def_stmt);
@@ -943,13 +943,15 @@  vect_build_slp_tree (vec_info *vinfo,
   else
     return NULL;
 
-  /* If the SLP node is a PHI (induction), terminate the recursion.  */
+  /* If the SLP node is a PHI (induction or reduction), terminate
+     the recursion.  */
   if (gimple_code (stmt) == GIMPLE_PHI)
     {
-      FOR_EACH_VEC_ELT (stmts, i, stmt)
-	if (stmt != stmts[0])
-	  /* Induction from different IVs is not supported.  */
-	  return NULL;
+      /* Induction from different IVs is not supported.  */
+      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) == vect_induction_def)
+	FOR_EACH_VEC_ELT (stmts, i, stmt)
+	  if (stmt != stmts[0])
+	    return NULL;
       node = vect_create_new_slp_node (stmts);
       return node;
     }
@@ -1005,6 +1007,7 @@  vect_build_slp_tree (vec_info *vinfo,
       unsigned int j;
 
       if (oprnd_info->first_dt != vect_internal_def
+	  && oprnd_info->first_dt != vect_reduction_def
 	  && oprnd_info->first_dt != vect_induction_def)
         continue;
 
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 249892)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -1367,14 +1367,10 @@  vect_get_vec_def_for_operand_1 (gimple *
         return vec_oprnd;
       }
 
-    /* operand is defined by a loop header phi - reduction  */
+    /* operand is defined by a loop header phi.  */
     case vect_reduction_def:
     case vect_double_reduction_def:
     case vect_nested_cycle:
-      /* Code should use get_initial_def_for_reduction.  */
-      gcc_unreachable ();
-
-    /* operand is defined by loop-header phi - induction.  */
     case vect_induction_def:
       {
 	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
@@ -1535,7 +1531,7 @@  vect_get_vec_def_for_stmt_copy (enum vec
 /* Get vectorized definitions for the operands to create a copy of an original
    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
 
-static void
+void
 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
 				 vec<tree> *vec_oprnds0,
 				 vec<tree> *vec_oprnds1)
@@ -1554,11 +1550,9 @@  vect_get_vec_defs_for_stmt_copy (enum ve
 }
 
 
-/* Get vectorized definitions for OP0 and OP1.
-   REDUC_INDEX is the index of reduction operand in case of reduction,
-   and -1 otherwise.  */
+/* Get vectorized definitions for OP0 and OP1.  */
 
-static void
+void
 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
 		   vec<tree> *vec_oprnds0,
 		   vec<tree> *vec_oprnds1,
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 249892)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -647,7 +647,9 @@  typedef struct _stmt_vec_info {
      vect_force_simple_reduction.  */
   enum vect_reduction_type reduc_type;
 
-  /* On a reduction PHI the def returned by vect_force_simple_reduction.  */
+  /* On a reduction PHI the def returned by vect_force_simple_reduction.
+     On the def returned by vect_force_simple_reduction the
+     corresponding PHI.  */
   gimple *reduc_def;
 
   /* The number of scalar stmt references from active SLP instances.  */
@@ -1078,6 +1080,10 @@  extern void vect_finish_stmt_generation
 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
 extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type);
 extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL);
+extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *,
+			       vec<tree> *, slp_tree);
+extern void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *,
+					     vec<tree> *, vec<tree> *);
 extern tree vect_init_vector (gimple *, tree, tree,
                               gimple_stmt_iterator *);
 extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree);