diff mbox series

[065/nnn] poly_int: vect_nunits_for_cost

Message ID 873769kc63.fsf@linaro.org
State New
Headers show
Series [065/nnn] poly_int: vect_nunits_for_cost | expand

Commit Message

Richard Sandiford Oct. 23, 2017, 5:27 p.m. UTC
This patch adds a function for getting the number of elements in
a vector for cost purposes, which is always constant.  It makes
it possible for a later patch to change GET_MODE_NUNITS and
TYPE_VECTOR_SUBPARTS to a poly_int.


2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* tree-vectorizer.h (vect_nunits_for_cost): New function.
	* tree-vect-loop.c (vect_model_reduction_cost): Use it.
	* tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise.
	(vect_analyze_slp_cost): Likewise.
	* tree-vect-stmts.c (vect_model_store_cost): Likewise.
	(vect_model_load_cost): Likewise.

Comments

Jeff Law Dec. 5, 2017, 5:35 p.m. UTC | #1
On 10/23/2017 11:27 AM, Richard Sandiford wrote:
> This patch adds a function for getting the number of elements in
> a vector for cost purposes, which is always constant.  It makes
> it possible for a later patch to change GET_MODE_NUNITS and
> TYPE_VECTOR_SUBPARTS to a poly_int.
> 
> 
> 2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>
> 	    Alan Hayward  <alan.hayward@arm.com>
> 	    David Sherwood  <david.sherwood@arm.com>
> 
> gcc/
> 	* tree-vectorizer.h (vect_nunits_for_cost): New function.
> 	* tree-vect-loop.c (vect_model_reduction_cost): Use it.
> 	* tree-vect-slp.c (vect_analyze_slp_cost_1): Likewise.
> 	(vect_analyze_slp_cost): Likewise.
> 	* tree-vect-stmts.c (vect_model_store_cost): Likewise.
> 	(vect_model_load_cost): Likewise.
OK.
jeff
diff mbox series

Patch

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2017-10-23 17:22:26.575499779 +0100
+++ gcc/tree-vectorizer.h	2017-10-23 17:22:28.837953732 +0100
@@ -1154,6 +1154,16 @@  vect_vf_for_cost (loop_vec_info loop_vin
   return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
 }
 
+/* Estimate the number of elements in VEC_TYPE for costing purposes.
+   Pick a reasonable estimate if the exact number isn't known at
+   compile time.  */
+
+static inline unsigned int
+vect_nunits_for_cost (tree vec_type)
+{
+  return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
+}
+
 /* Return the size of the value accessed by unvectorized data reference DR.
    This is only valid once STMT_VINFO_VECTYPE has been calculated for the
    associated gimple statement, since that guarantees that DR accesses
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2017-10-23 17:22:26.573499378 +0100
+++ gcc/tree-vect-loop.c	2017-10-23 17:22:28.835953330 +0100
@@ -3844,13 +3844,15 @@  vect_model_reduction_cost (stmt_vec_info
 	}
       else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
 	{
-	  unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
+	  unsigned estimated_nunits = vect_nunits_for_cost (vectype);
 	  /* Extraction of scalar elements.  */
-	  epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits,
+	  epilogue_cost += add_stmt_cost (target_cost_data,
+					  2 * estimated_nunits,
 					  vec_to_scalar, stmt_info, 0,
 					  vect_epilogue);
 	  /* Scalar max reductions via COND_EXPR / MAX_EXPR.  */
-	  epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3,
+	  epilogue_cost += add_stmt_cost (target_cost_data,
+					  2 * estimated_nunits - 3,
 					  scalar_stmt, stmt_info, 0,
 					  vect_epilogue);
 	}
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	2017-10-23 17:22:27.793744215 +0100
+++ gcc/tree-vect-slp.c	2017-10-23 17:22:28.836953531 +0100
@@ -1718,8 +1718,8 @@  vect_analyze_slp_cost_1 (slp_instance in
 					    &n_perms);
 	      record_stmt_cost (body_cost_vec, n_perms, vec_perm,
 				stmt_info, 0, vect_body);
-	      unsigned nunits
-		= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+	      unsigned assumed_nunits
+		= vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
 	      /* And adjust the number of loads performed.  This handles
 	         redundancies as well as loads that are later dead.  */
 	      auto_sbitmap perm (GROUP_SIZE (stmt_info));
@@ -1730,7 +1730,7 @@  vect_analyze_slp_cost_1 (slp_instance in
 	      bool load_seen = false;
 	      for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
 		{
-		  if (i % nunits == 0)
+		  if (i % assumed_nunits == 0)
 		    {
 		      if (load_seen)
 			ncopies_for_cost++;
@@ -1743,7 +1743,7 @@  vect_analyze_slp_cost_1 (slp_instance in
 		ncopies_for_cost++;
 	      gcc_assert (ncopies_for_cost
 			  <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
-			      + nunits - 1) / nunits);
+			      + assumed_nunits - 1) / assumed_nunits);
 	      poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
 	      ncopies_for_cost *= estimated_poly_value (uf);
 	    }
@@ -1856,9 +1856,9 @@  vect_analyze_slp_cost (slp_instance inst
     assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
   else
     assumed_vf = 1;
-  unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
   /* For reductions look at a reduction operand in case the reduction
      operation is widening like DOT_PROD or SAD.  */
+  tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
       gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1866,14 +1866,16 @@  vect_analyze_slp_cost (slp_instance inst
 	{
 	case DOT_PROD_EXPR:
 	case SAD_EXPR:
-	  nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
-				(TREE_TYPE (gimple_assign_rhs1 (stmt))));
+	  vectype_for_cost = get_vectype_for_scalar_type
+	    (TREE_TYPE (gimple_assign_rhs1 (stmt)));
 	  break;
 	default:;
 	}
     }
-  ncopies_for_cost = least_common_multiple (nunits,
-					    group_size * assumed_vf) / nunits;
+  unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
+  ncopies_for_cost = (least_common_multiple (assumed_nunits,
+					     group_size * assumed_vf)
+		      / assumed_nunits);
 
   prologue_cost_vec.create (10);
   body_cost_vec.create (10);
Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	2017-10-23 17:22:26.574499579 +0100
+++ gcc/tree-vect-stmts.c	2017-10-23 17:22:28.837953732 +0100
@@ -950,18 +950,25 @@  vect_model_store_cost (stmt_vec_info stm
   /* Costs of the stores.  */
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_GATHER_SCATTER)
-    /* N scalar stores plus extracting the elements.  */
-    inside_cost += record_stmt_cost (body_cost_vec,
-				     ncopies * TYPE_VECTOR_SUBPARTS (vectype),
-				     scalar_store, stmt_info, 0, vect_body);
+    {
+      /* N scalar stores plus extracting the elements.  */
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+      inside_cost += record_stmt_cost (body_cost_vec,
+				       ncopies * assumed_nunits,
+				       scalar_store, stmt_info, 0, vect_body);
+    }
   else
     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
 
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
-    inside_cost += record_stmt_cost (body_cost_vec,
-				     ncopies * TYPE_VECTOR_SUBPARTS (vectype),
-				     vec_to_scalar, stmt_info, 0, vect_body);
+    {
+      /* N scalar stores plus extracting the elements.  */
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
+      inside_cost += record_stmt_cost (body_cost_vec,
+				       ncopies * assumed_nunits,
+				       vec_to_scalar, stmt_info, 0, vect_body);
+    }
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -1081,8 +1088,9 @@  vect_model_load_cost (stmt_vec_info stmt
     {
       /* N scalar loads plus gathering them into a vector.  */
       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+      unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
       inside_cost += record_stmt_cost (body_cost_vec,
-				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
+				       ncopies * assumed_nunits,
 				       scalar_load, stmt_info, 0, vect_body);
     }
   else