diff mbox

[3/n] Reduction vectorization improvements

Message ID alpine.LSU.2.11.1505261052420.30088@zhemvz.fhfr.qr
State New
Headers show

Commit Message

Richard Biener May 26, 2015, 8:54 a.m. UTC
This refactors code to arrange that for loop SLP we pass in the SLP
node and instance to the vectorizable_* functions also at analysis
phase (not only at transform phase).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-26  Richard Biener  <rguenther@suse.de>

	* tree-vect-loop.c (vect_update_vf_for_slp): Split out from ...
	(vect_analyze_loop_operations): ... here.  Remove slp parameter,
	detect whether we apply SLP.  Remove call to
	vect_update_slp_costs_according_to_vf.
	(vect_analyze_loop_2): Call vect_update_vf_for_slp and
	vect_update_slp_costs_according_to_vf from here.  Dispatch
	to vect_slp_analyze_operations to analyze SLP stmts.
	* tree-vect-slp.c (vect_slp_analyze_node_operations): Drop
	unused bb_vec_info parameter, adjust assert.
	(vect_slp_analyze_operations): Pass in the slp instance tree
	instead of bb_vec_info.
	(vect_slp_analyze_bb_1): Adjust call to vect_slp_analyze_operations.
	* tree-vectorizer.h (vect_slp_analyze_operations): Declare.
diff mbox

Patch

Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	(revision 223574)
+++ gcc/tree-vect-loop.c	(working copy)
@@ -1355,25 +1355,85 @@  vect_analyze_loop_form (struct loop *loo
   return loop_vinfo;
 }
 
+/* Scan the loop stmts and dependent on whether there are any (non-)SLP
+   statements update the vectorization factor.  */
+
+static void
+vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+  int nbbs = loop->num_nodes;
+  unsigned int vectorization_factor;
+  int i;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "=== vect_update_vf_for_slp ===\n");
+
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gcc_assert (vectorization_factor != 0);
+
+  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+     vectorization factor of the loop is the unrolling factor required by
+     the SLP instances.  If that unrolling factor is 1, we say, that we
+     perform pure SLP on loop - cross iteration parallelism is not
+     exploited.  */
+  bool only_slp_in_loop = true;
+  for (i = 0; i < nbbs; i++)
+    {
+      basic_block bb = bbs[i];
+      for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+	   gsi_next (&si))
+	{
+	  gimple stmt = gsi_stmt (si);
+	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+	  if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+	      && STMT_VINFO_RELATED_STMT (stmt_info))
+	    {
+	      stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+	      stmt_info = vinfo_for_stmt (stmt);
+	    }
+	  if ((STMT_VINFO_RELEVANT_P (stmt_info)
+	       || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+	      && !PURE_SLP_STMT (stmt_info))
+	    /* STMT needs both SLP and loop-based vectorization.  */
+	    only_slp_in_loop = false;
+	}
+    }
+
+  if (only_slp_in_loop)
+    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+  else
+    vectorization_factor
+      = least_common_multiple (vectorization_factor,
+			       LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "Updating vectorization factor to %d\n",
+		     vectorization_factor);
+}
 
 /* Function vect_analyze_loop_operations.
 
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
   int nbbs = loop->num_nodes;
-  unsigned int vectorization_factor = 0;
+  unsigned int vectorization_factor;
   int i;
   stmt_vec_info stmt_info;
   bool need_to_vectorize = false;
   int min_profitable_iters;
   int min_scalar_loop_bound;
   unsigned int th;
-  bool only_slp_in_loop = true, ok;
+  bool ok;
   HOST_WIDE_INT max_niter;
   HOST_WIDE_INT estimated_niter;
   int min_profitable_estimate;
@@ -1382,50 +1442,6 @@  vect_analyze_loop_operations (loop_vec_i
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "=== vect_analyze_loop_operations ===\n");
 
-  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
-  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  if (slp)
-    {
-      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-	 vectorization factor of the loop is the unrolling factor required by
-	 the SLP instances.  If that unrolling factor is 1, we say, that we
-	 perform pure SLP on loop - cross iteration parallelism is not
-	 exploited.  */
-      for (i = 0; i < nbbs; i++)
-	{
-	  basic_block bb = bbs[i];
-	  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
-	       gsi_next (&si))
-	    {
-	      gimple stmt = gsi_stmt (si);
-	      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-	      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
-		  && STMT_VINFO_RELATED_STMT (stmt_info))
-		{
-		  stmt = STMT_VINFO_RELATED_STMT (stmt_info);
-		  stmt_info = vinfo_for_stmt (stmt);
-		}
-	      if ((STMT_VINFO_RELEVANT_P (stmt_info)
-		   || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-		  && !PURE_SLP_STMT (stmt_info))
-		/* STMT needs both SLP and loop-based vectorization.  */
-		only_slp_in_loop = false;
-	    }
-	}
-
-      if (only_slp_in_loop)
-	vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-      else
-	vectorization_factor = least_common_multiple (vectorization_factor,
-				LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_NOTE, vect_location,
-			 "Updating vectorization factor to %d\n",
-			 vectorization_factor);
-    }
-
   for (i = 0; i < nbbs; i++)
     {
       basic_block bb = bbs[i];
@@ -1540,6 +1556,11 @@  vect_analyze_loop_operations (loop_vec_i
 	   gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
+	  if (STMT_SLP_TYPE (vinfo_for_stmt (stmt)))
+	    {
+	      need_to_vectorize = true;
+	      continue;
+	    }
 	  if (!gimple_clobber_p (stmt)
 	      && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
 	    return false;
@@ -1563,6 +1584,9 @@  vect_analyze_loop_operations (loop_vec_i
       return false;
     }
 
+  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  gcc_assert (vectorization_factor != 0);
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "vectorization_factor = %d, niters = "
@@ -1586,10 +1610,6 @@  vect_analyze_loop_operations (loop_vec_i
 
   /* Analyze cost.  Decide if worth while to vectorize.  */
 
-  /* Once VF is set, SLP costs should be updated since the number of created
-     vector stmts depends on VF.  */
-  vect_update_slp_costs_according_to_vf (loop_vinfo);
-
   vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
 				      &min_profitable_estimate);
   LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
@@ -1664,7 +1684,7 @@  vect_analyze_loop_operations (loop_vec_i
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, slp = false;
+  bool ok;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
   unsigned int th;
@@ -1790,19 +1810,34 @@  vect_analyze_loop_2 (loop_vec_info loop_
   ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
   if (ok)
     {
-      /* Decide which possible SLP instances to SLP.  */
-      slp = vect_make_slp_decision (loop_vinfo);
+      /* If there are any SLP instances mark them as pure_slp.  */
+      if (vect_make_slp_decision (loop_vinfo))
+	{
+	  /* Find stmts that need to be both vectorized and SLPed.  */
+	  vect_detect_hybrid_slp (loop_vinfo);
+
+	  /* Update the vectorization factor based on the SLP decision.  */
+	  vect_update_vf_for_slp (loop_vinfo);
 
-      /* Find stmts that need to be both vectorized and SLPed.  */
-      vect_detect_hybrid_slp (loop_vinfo);
+	  /* Once VF is set, SLP costs should be updated since the number of
+	     created vector stmts depends on VF.  */
+	  vect_update_slp_costs_according_to_vf (loop_vinfo);
+
+	  /* Analyze operations in the SLP instances.  Note this may
+	     remove unsupported SLP instances which makes the above
+	     SLP kind detection invalid.  */
+	  unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
+	  vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+	  if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+	    return false;
+	}
     }
   else
     return false;
 
-  /* Scan all the operations in the loop and make sure they are
-     vectorizable.  */
-
-  ok = vect_analyze_loop_operations (loop_vinfo, slp);
+  /* Scan all the remaining operations in the loop that are not subject
+     to SLP and make sure they are vectorizable.  */
+  ok = vect_analyze_loop_operations (loop_vinfo);
   if (!ok)
     {
       if (dump_enabled_p ())
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 223574)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -2191,7 +2191,7 @@  destroy_bb_vec_info (bb_vec_info bb_vinf
    the subtree. Return TRUE if the operations are supported.  */
 
 static bool
-vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
+vect_slp_analyze_node_operations (slp_tree node)
 {
   bool dummy;
   int i;
@@ -2202,17 +2202,17 @@  vect_slp_analyze_node_operations (bb_vec
     return true;
 
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (!vect_slp_analyze_node_operations (bb_vinfo, child))
+    if (!vect_slp_analyze_node_operations (child))
       return false;
 
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
     {
       stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
       gcc_assert (stmt_info);
-      gcc_assert (PURE_SLP_STMT (stmt_info));
+      gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
 
       if (!vect_analyze_stmt (stmt, &dummy, node))
-        return false;
+	return false;
     }
 
   return true;
@@ -2222,19 +2222,26 @@  vect_slp_analyze_node_operations (bb_vec
 /* Analyze statements in SLP instances of the basic block.  Return TRUE if the
    operations are supported. */
 
-static bool
-vect_slp_analyze_operations (bb_vec_info bb_vinfo)
+bool
+vect_slp_analyze_operations (vec<slp_instance> slp_instances)
 {
-  vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
   slp_instance instance;
   int i;
 
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "=== vect_slp_analyze_operations ===\n");
+
   for (i = 0; slp_instances.iterate (i, &instance); )
     {
-      if (!vect_slp_analyze_node_operations (bb_vinfo,
-                                             SLP_INSTANCE_TREE (instance)))
+      if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
         {
- 	  vect_free_slp_instance (instance);
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "removing SLP instance operations starting from: ");
+	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
+			    SLP_TREE_SCALAR_STMTS
+			      (SLP_INSTANCE_TREE (instance))[0], 0);
+	  vect_free_slp_instance (instance);
           slp_instances.ordered_remove (i);
 	}
       else
@@ -2498,7 +2505,7 @@  vect_slp_analyze_bb_1 (basic_block bb)
       return NULL;
     }
 
-  if (!vect_slp_analyze_operations (bb_vinfo))
+  if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo)))
     {
       if (dump_enabled_p ())
         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	(revision 223574)
+++ gcc/tree-vectorizer.h	(working copy)
@@ -1114,6 +1114,7 @@  extern void vect_free_slp_instance (slp_
 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
                                           gimple_stmt_iterator *, int,
                                           slp_instance, bool);
+extern bool vect_slp_analyze_operations (vec<slp_instance> slp_instances);
 extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
 extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
 extern bool vect_analyze_slp (loop_vec_info, bb_vec_info, unsigned);