Patchwork Vectorizer TLC, split SLP discovery and cost calculation

login
register
mail settings
Submitter Richard Guenther
Date April 9, 2013, 2:10 p.m.
Message ID <alpine.LNX.2.00.1304091607360.21094@zhemvz.fhfr.qr>
Download mbox | patch
Permalink /patch/235092/
State New
Headers show

Comments

Richard Guenther - April 9, 2013, 2:10 p.m.
This splits discovering of a SLP opportunity (vect_build_slp_tree)
and calculating the cost of a SLP instance (now vect_analyze_slp_cost).
The immediate need for me is to support handling of mismatches
in the SLP tree due to commutative operand order mismatch, one more
cleanup patch for this reason will follow.  In the end cost calculation
should be delayed even more, but that's for later.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-04-09  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_get_and_check_slp_defs): Remove code
	dealing with cost.
	(vect_build_slp_tree): Likewise.
	(vect_analyze_slp_cost_1, vect_analyze_slp_cost): New functions
	calculating the cost of a SLP instance.
	(vect_analyze_slp_instance): Use it from here, after building
	the SLP tree.

Patch

Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	(revision 197629)
+++ gcc/tree-vect-slp.c	(working copy)
@@ -199,11 +199,8 @@  vect_get_place_in_interleaving_chain (gi
 
 static bool
 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-                             slp_tree slp_node, gimple stmt,
-			     int ncopies_for_cost, bool first,
-                             vec<slp_oprnd_info> *oprnds_info,
-			     stmt_vector_for_cost *prologue_cost_vec,
-			     stmt_vector_for_cost *body_cost_vec)
+                             gimple stmt, bool first,
+                             vec<slp_oprnd_info> *oprnds_info)
 {
   tree oprnd;
   unsigned int i, number_of_oprnds;
@@ -211,8 +208,6 @@  vect_get_and_check_slp_defs (loop_vec_in
   gimple def_stmt;
   enum vect_def_type dt = vect_uninitialized_def;
   enum vect_def_type dt_op0 = vect_uninitialized_def;
-  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree lhs = gimple_get_lhs (stmt);
   struct loop *loop = NULL;
   enum tree_code rhs_code;
   bool different_types = false;
@@ -344,22 +339,6 @@  vect_get_and_check_slp_defs (loop_vec_in
 	    {
 	      def_op0 = def;
 	      dt_op0 = dt;
-	      /* Analyze costs (for the first stmt of the group only).  */
-	      if (REFERENCE_CLASS_P (lhs))
-		/* Store.  */
-                vect_model_store_cost (stmt_info, ncopies_for_cost, false,
-				       dt, slp_node, prologue_cost_vec,
-				       body_cost_vec);
-	      else
-		{
-		  enum vect_def_type dts[2];
-		  dts[0] = dt;
-		  dts[1] = vect_uninitialized_def;
-		  /* Not memory operation (we don't call this function for
-		     loads).  */
-		  vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
-					  prologue_cost_vec, body_cost_vec);
-		}
 	    }
 	}
       else
@@ -479,13 +458,11 @@  vect_get_and_check_slp_defs (loop_vec_in
 
 static bool
 vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-                     slp_tree *node, unsigned int group_size, int *outside_cost,
-                     int ncopies_for_cost, unsigned int *max_nunits,
+                     slp_tree *node, unsigned int group_size,
+                     unsigned int *max_nunits,
                      vec<int> *load_permutation,
                      vec<slp_tree> *loads,
-                     unsigned int vectorization_factor, bool *loads_permuted,
-		     stmt_vector_for_cost *prologue_cost_vec,
-		     stmt_vector_for_cost *body_cost_vec)
+                     unsigned int vectorization_factor, bool *loads_permuted)
 {
   unsigned int i;
   vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
@@ -750,11 +727,8 @@  vect_build_slp_tree (loop_vec_info loop_
 	  if (REFERENCE_CLASS_P (lhs))
 	    {
 	      /* Store.  */
-	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
-						stmt, ncopies_for_cost,
-						(i == 0), &oprnds_info,
-						prologue_cost_vec,
-						body_cost_vec))
+	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
+						stmt, (i == 0), &oprnds_info))
 		{
 	  	  vect_free_oprnd_info (oprnds_info);
  		  return false;
@@ -863,11 +837,6 @@  vect_build_slp_tree (loop_vec_info loop_
 	  	      vect_free_oprnd_info (oprnds_info);
                       return false;
                     }
-
-                  /* Analyze costs (for the first stmt in the group).  */
-                  vect_model_load_cost (vinfo_for_stmt (stmt),
-                                        ncopies_for_cost, false, *node,
-					prologue_cost_vec, body_cost_vec);
                 }
 
               /* Store the place of this load in the interleaving chain.  In
@@ -943,10 +912,8 @@  vect_build_slp_tree (loop_vec_info loop_
             }
 
 	  /* Find the def-stmts.  */
-	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
-					    ncopies_for_cost, (i == 0),
-					    &oprnds_info, prologue_cost_vec,
-					    body_cost_vec))
+	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
+					    (i == 0), &oprnds_info))
 	    {
 	      vect_free_oprnd_info (oprnds_info);
 	      return false;
@@ -959,12 +926,7 @@  vect_build_slp_tree (loop_vec_info loop_
     {
       loads->safe_push (*node);
       if (permutation)
-        {
-	  gimple first_stmt = stmts[0];
-          *loads_permuted = true;
-	  (void) record_stmt_cost (body_cost_vec, group_size, vec_perm, 
-				   vinfo_for_stmt (first_stmt), 0, vect_body);
-        }
+	*loads_permuted = true;
       else
         {
           /* We don't check here complex numbers chains, so we set
@@ -989,10 +951,8 @@  vect_build_slp_tree (loop_vec_info loop_
       child = vect_create_new_slp_node (oprnd_info->def_stmts);
       if (!child
           || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
-				   outside_cost, ncopies_for_cost,
 				   max_nunits, load_permutation, loads,
-				   vectorization_factor, loads_permuted,
-				   prologue_cost_vec, body_cost_vec))
+				   vectorization_factor, loads_permuted))
         {
 	  if (child)
 	    oprnd_info->def_stmts = vNULL;
@@ -1510,6 +1470,122 @@  vect_find_last_store_in_slp_instance (sl
   return last_store;
 }
 
+/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
+
+static void
+vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+			 slp_instance instance, slp_tree node,
+			 stmt_vector_for_cost *prologue_cost_vec,
+			 unsigned ncopies_for_cost)
+{
+  stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
+
+  unsigned i;
+  slp_tree child;
+  gimple stmt, s;
+  stmt_vec_info stmt_info;
+  tree lhs;
+  unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+
+  /* Recurse down the SLP tree.  */
+  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+    vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+			     instance, child, prologue_cost_vec,
+			     ncopies_for_cost);
+
+  /* Look at the first scalar stmt to determine the cost.  */
+  stmt = SLP_TREE_SCALAR_STMTS (node)[0];
+  stmt_info = vinfo_for_stmt (stmt);
+  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    {
+      if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
+	vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+			       vect_uninitialized_def,
+			       node, prologue_cost_vec, body_cost_vec);
+      else
+	{
+	  int i;
+	  gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
+	  vect_model_load_cost (stmt_info, ncopies_for_cost, false,
+				node, prologue_cost_vec, body_cost_vec);
+	  /* If the load is permuted record the cost for the permutation.
+	     ???  Loads from multiple chains are let through here only
+	     for a single special case involving complex numbers where
+	     in the end no permutation is necessary.  */
+	  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
+	    if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
+		 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
+		&& vect_get_place_in_interleaving_chain
+		     (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
+	      {
+		record_stmt_cost (body_cost_vec, group_size, vec_perm,
+				  stmt_info, 0, vect_body);
+		break;
+	      }
+	}
+    }
+  else
+    record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
+		      stmt_info, 0, vect_body);
+
+  /* Scan operands and account for prologue cost of constants/externals.
+     ???  This over-estimates cost for multiple uses and should be
+     re-engineered.  */
+  lhs = gimple_get_lhs (stmt);
+  for (i = 0; i < gimple_num_ops (stmt); ++i)
+    {
+      tree def, op = gimple_op (stmt, i);
+      gimple def_stmt;
+      enum vect_def_type dt;
+      if (!op || op == lhs)
+	continue;
+      if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+			      &def_stmt, &def, &dt)
+	  && (dt == vect_constant_def || dt == vect_external_def))
+	record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
+			  stmt_info, 0, vect_prologue);
+    }
+}
+
+/* Compute the cost for the SLP instance INSTANCE.  */
+
+static void
+vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
+		       slp_instance instance, unsigned nunits)
+{
+  stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
+  unsigned ncopies_for_cost;
+  stmt_info_for_cost *si;
+  unsigned i;
+
+  /* Calculate the number of vector stmts to create based on the unrolling
+     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
+     GROUP_SIZE / NUNITS otherwise.  */
+  unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+  ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
+
+  prologue_cost_vec.create (10);
+  body_cost_vec.create (10);
+  SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
+  vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
+			   instance, SLP_INSTANCE_TREE (instance),
+			   &prologue_cost_vec, ncopies_for_cost);
+
+  /* Record the prologue costs, which were delayed until we were
+     sure that SLP was successful.  Unlike the body costs, we know
+     the final values now regardless of the loop vectorization factor.  */
+  void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
+		: BB_VINFO_TARGET_COST_DATA (bb_vinfo));
+  FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
+    {
+      struct _stmt_vec_info *stmt_info
+	= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
+      (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
+			    si->misalign, vect_prologue);
+    }
+
+  prologue_cost_vec.release ();
+}
 
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
    vect_build_slp_tree to build a tree of packed stmts if possible.
@@ -1526,15 +1602,13 @@  vect_analyze_slp_instance (loop_vec_info
   tree vectype, scalar_type = NULL_TREE;
   gimple next;
   unsigned int vectorization_factor = 0;
-  int outside_cost = 0, ncopies_for_cost, i;
+  int i;
   unsigned int max_nunits = 0;
   vec<int> load_permutation;
   vec<slp_tree> loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
   bool loads_permuted = false;
   vec<gimple> scalar_stmts;
-  stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
-  stmt_info_for_cost *si;
 
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
@@ -1615,26 +1689,14 @@  vect_analyze_slp_instance (loop_vec_info
 
   node = vect_create_new_slp_node (scalar_stmts);
 
-  /* Calculate the number of vector stmts to create based on the unrolling
-     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
-     GROUP_SIZE / NUNITS otherwise.  */
-  ncopies_for_cost = unrolling_factor * group_size / nunits;
-
   load_permutation.create (group_size * group_size);
   loads.create (group_size);
-  prologue_cost_vec.create (10);
-  body_cost_vec.create (10);
 
   /* Build the tree for the SLP instance.  */
   if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
-                           &outside_cost, ncopies_for_cost,
 			   &max_nunits, &load_permutation, &loads,
-			   vectorization_factor, &loads_permuted,
-			   &prologue_cost_vec, &body_cost_vec))
+			   vectorization_factor, &loads_permuted))
     {
-      void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
-		    : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
-
       /* Calculate the unrolling factor based on the smallest type.  */
       if (max_nunits > nunits)
         unrolling_factor = least_common_multiple (max_nunits, group_size)
@@ -1647,8 +1709,6 @@  vect_analyze_slp_instance (loop_vec_info
 			     "Build SLP failed: unrolling required in basic"
 			     " block SLP");
 	  vect_free_slp_tree (node);
-	  body_cost_vec.release ();
-	  prologue_cost_vec.release ();
 	  load_permutation.release ();
 	  loads.release ();
           return false;
@@ -1659,7 +1719,7 @@  vect_analyze_slp_instance (loop_vec_info
       SLP_INSTANCE_TREE (new_instance) = node;
       SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
       SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
-      SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
+      SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
       SLP_INSTANCE_LOADS (new_instance) = loads;
       SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
       SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
@@ -1678,7 +1738,6 @@  vect_analyze_slp_instance (loop_vec_info
                 }
 
               vect_free_slp_instance (new_instance);
-	      prologue_cost_vec.release ();
               return false;
             }
 
@@ -1688,18 +1747,9 @@  vect_analyze_slp_instance (loop_vec_info
       else
         SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
 
-      /* Record the prologue costs, which were delayed until we were
-	 sure that SLP was successful.  Unlike the body costs, we know
-	 the final values now regardless of the loop vectorization factor.  */
-      FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
-	{
-	  struct _stmt_vec_info *stmt_info
-	    = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
-	  (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
-				si->misalign, vect_prologue);
-	}
-
-      prologue_cost_vec.release ();
+      /* Compute the costs of this SLP instance.  */
+      vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
+			     new_instance, TYPE_VECTOR_SUBPARTS (vectype));
 
       if (loop_vinfo)
         LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
@@ -1711,11 +1761,6 @@  vect_analyze_slp_instance (loop_vec_info
 
       return true;
     }
-  else
-    {
-      body_cost_vec.release ();
-      prologue_cost_vec.release ();
-    }
 
   /* Failed to SLP.  */
   /* Free the allocated memory.  */