diff mbox series

[1/6] Split code out of vectorizable_slp_permutation

Message ID mptsflkiicy.fsf@arm.com
State New
Headers show
Series Optimise placement of SLP permutations | expand

Commit Message

Richard Sandiford Aug. 25, 2022, 1:05 p.m. UTC
A later patch needs to test whether the target supports a
lane_permutation_t without having to construct a full SLP
node to test that.  This patch splits out most of the work
of vectorizable_slp_permutation into a subroutine, so that
properties of the permutation can be passed explicitly without
disturbing the main interface.

The new subroutine still uses an slp_tree argument to get things
like the number of lanes and the vector type.  That's a bit clunky,
but it seemed like the least worst option.

gcc/
	* tree-vect-slp.cc (vectorizable_slp_permutation_1): Split out from...
	(vectorizable_slp_permutation): ...here.
---
 gcc/tree-vect-slp.cc | 98 +++++++++++++++++++++++++++++---------------
 1 file changed, 66 insertions(+), 32 deletions(-)
diff mbox series

Patch

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index dab5daddcc5..13c242e5012 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6976,20 +6976,22 @@  vect_add_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
   SLP_TREE_VEC_STMTS (node).quick_push (perm_stmt);
 }
 
-/* Vectorize the SLP permutations in NODE as specified
-   in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
-   child number and lane number.
-   Interleaving of two two-lane two-child SLP subtrees (not supported):
-     [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
-   A blend of two four-lane two-child SLP subtrees:
-     [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
-   Highpart of a four-lane one-child SLP subtree (not supported):
-     [ { 0, 2 }, { 0, 3 } ]
-   Where currently only a subset is supported by code generating below.  */
+/* Subroutine of vectorizable_slp_permutation.  Check whether the target
+   can perform permutation PERM on the (1 or 2) input nodes in CHILDREN.
+   If GSI is nonnull, emit the permutation there.
 
-static bool
-vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
-			      slp_tree node, stmt_vector_for_cost *cost_vec)
+   When GSI is null, the only purpose of NODE is to give properties
+   of the result, such as the vector type and number of SLP lanes.
+   The node does not need to be a VEC_PERM_EXPR.
+
+   If the target supports the operation, return the number of individual
+   VEC_PERM_EXPRs needed, otherwise return -1.  Print information to the
+   dump file if DUMP_P is true.  */
+
+static int
+vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
+				slp_tree node, lane_permutation_t &perm,
+				vec<slp_tree> &children, bool dump_p)
 {
   tree vectype = SLP_TREE_VECTYPE (node);
 
@@ -7001,7 +7003,7 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
   bool repeating_p = multiple_p (nunits, SLP_TREE_LANES (node));
   tree op_vectype = NULL_TREE;
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+  FOR_EACH_VEC_ELT (children, i, child)
     if (SLP_TREE_VECTYPE (child))
       {
 	op_vectype = SLP_TREE_VECTYPE (child);
@@ -7009,25 +7011,24 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
       }
   if (!op_vectype)
     op_vectype = vectype;
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+  FOR_EACH_VEC_ELT (children, i, child)
     {
       if ((SLP_TREE_DEF_TYPE (child) != vect_internal_def
 	   && !vect_maybe_update_slp_op_vectype (child, op_vectype))
 	  || !types_compatible_p (SLP_TREE_VECTYPE (child), op_vectype)
 	  || !types_compatible_p (TREE_TYPE (vectype), TREE_TYPE (op_vectype)))
 	{
-	  if (dump_enabled_p ())
+	  if (dump_p)
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			     "Unsupported vector types in lane permutation\n");
-	  return false;
+	  return -1;
 	}
       if (SLP_TREE_LANES (child) != SLP_TREE_LANES (node))
 	repeating_p = false;
     }
 
-  vec<std::pair<unsigned, unsigned> > &perm = SLP_TREE_LANE_PERMUTATION (node);
   gcc_assert (perm.length () == SLP_TREE_LANES (node));
-  if (dump_enabled_p ())
+  if (dump_p)
     {
       dump_printf_loc (MSG_NOTE, vect_location,
 		       "vectorizing permutation");
@@ -7076,11 +7077,11 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
       /* Calculate every element of every permute mask vector explicitly,
 	 instead of relying on the pattern described above.  */
       if (!nunits.is_constant (&npatterns))
-	return false;
+	return -1;
       nelts_per_pattern = ncopies = 1;
       if (loop_vec_info linfo = dyn_cast <loop_vec_info> (vinfo))
 	if (!LOOP_VINFO_VECT_FACTOR (linfo).is_constant (&ncopies))
-	  return false;
+	  return -1;
       noutputs_per_mask = 1;
     }
   unsigned olanes = ncopies * SLP_TREE_LANES (node);
@@ -7093,13 +7094,13 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
   auto_vec<std::pair<std::pair<unsigned, unsigned>, unsigned> > vperm;
   auto_vec<unsigned> active_lane;
   vperm.create (olanes);
-  active_lane.safe_grow_cleared (SLP_TREE_CHILDREN (node).length (), true);
+  active_lane.safe_grow_cleared (children.length (), true);
   for (unsigned i = 0; i < ncopies; ++i)
     {
       for (unsigned pi = 0; pi < perm.length (); ++pi)
 	{
 	  std::pair<unsigned, unsigned> p = perm[pi];
-	  tree vtype = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node)[p.first]);
+	  tree vtype = SLP_TREE_VECTYPE (children[p.first]);
 	  if (repeating_p)
 	    vperm.quick_push ({{p.first, 0}, p.second + active_lane[p.first]});
 	  else
@@ -7112,12 +7113,19 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 	    }
 	}
       /* Advance to the next group.  */
-      for (unsigned j = 0; j < SLP_TREE_CHILDREN (node).length (); ++j)
-	active_lane[j] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node)[j]);
+      for (unsigned j = 0; j < children.length (); ++j)
+	active_lane[j] += SLP_TREE_LANES (children[j]);
     }
 
-  if (dump_enabled_p ())
+  if (dump_p)
     {
+      dump_printf_loc (MSG_NOTE, vect_location,
+		       "vectorizing permutation");
+      for (unsigned i = 0; i < perm.length (); ++i)
+	dump_printf (MSG_NOTE, " op%u[%u]", perm[i].first, perm[i].second);
+      if (repeating_p)
+	dump_printf (MSG_NOTE, " (repeat %d)\n", SLP_TREE_LANES (node));
+      dump_printf (MSG_NOTE, "\n");
       dump_printf_loc (MSG_NOTE, vect_location, "as");
       for (unsigned i = 0; i < vperm.length (); ++i)
 	{
@@ -7163,12 +7171,12 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 	}
       else
 	{
-	  if (dump_enabled_p ())
+	  if (dump_p)
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			     "permutation requires at "
 			     "least three vectors\n");
 	  gcc_assert (!gsi);
-	  return false;
+	  return -1;
 	}
 
       mask[index++] = mask_element;
@@ -7190,7 +7198,7 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 					    TYPE_VECTOR_SUBPARTS (op_vectype),
 					    &c) || c != 2)))
 	    {
-	      if (dump_enabled_p ())
+	      if (dump_p)
 		{
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION,
 				   vect_location,
@@ -7203,7 +7211,7 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 		  dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
 		}
 	      gcc_assert (!gsi);
-	      return false;
+	      return -1;
 	    }
 
 	  if (!identity_p)
@@ -7214,8 +7222,8 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 		second_vec = first_vec;
 
 	      slp_tree
-		first_node = SLP_TREE_CHILDREN (node)[first_vec.first],
-		second_node = SLP_TREE_CHILDREN (node)[second_vec.first];
+		first_node = children[first_vec.first],
+		second_node = children[second_vec.first];
 
 	      tree mask_vec = NULL_TREE;
 	      if (!identity_p)
@@ -7240,6 +7248,32 @@  vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
 	}
     }
 
+  return nperms;
+}
+
+/* Vectorize the SLP permutations in NODE as specified
+   in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
+   child number and lane number.
+   Interleaving of two two-lane two-child SLP subtrees (not supported):
+     [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
+   A blend of two four-lane two-child SLP subtrees:
+     [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
+   Highpart of a four-lane one-child SLP subtree (not supported):
+     [ { 0, 2 }, { 0, 3 } ]
+   Where currently only a subset is supported by code generating below.  */
+
+static bool
+vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
+			      slp_tree node, stmt_vector_for_cost *cost_vec)
+{
+  tree vectype = SLP_TREE_VECTYPE (node);
+  lane_permutation_t &perm = SLP_TREE_LANE_PERMUTATION (node);
+  int nperms = vectorizable_slp_permutation_1 (vinfo, gsi, node, perm,
+					       SLP_TREE_CHILDREN (node),
+					       dump_enabled_p ());
+  if (nperms < 0)
+    return false;
+
   if (!gsi)
     record_stmt_cost (cost_vec, nperms, vec_perm, node, vectype, 0, vect_body);