diff mbox series

[6/6] Defer assigning vector types until after VF is determined

Message ID 20231213123324.41586385C019@sourceware.org
State New
Headers show
Series Relax single-vector-size restriction | expand

Commit Message

Richard Biener Dec. 13, 2023, 12:31 p.m. UTC
The following defers, for non-gather/scatter and non-pattern stmts,
setting of STMT_VINFO_VECTYPE until after we computed the desired
vectorization factor.  This allows us to use larger vector types
when the vectorization factor and the preferred vector mode allow,
reducing the number of vector stmt copies and enabling vectorization
in the first place if ncopies restrictions requires the use of
different size vector types like for PR65947.

vectorizable_operation handles some of the required vector type
inference.

	* tree-vect-data-refs.cc (vect_analyze_data_refs): Do not
	set STMT_VINFO_VECTYPE unless this is a gather/scatter.
	* tree-vect-loop.cc (vect_determine_vf_for_stmt_1): Do not
	set STMT_VINFO_VECTYPE, only determine the VF.
	(vect_determine_vectorization_factor): Likewise.
	(vect_analyze_loop_2): Set STMT_VINFO_VECTYPE where missing
	and non-mask.  Choose larger vectors to reduce the number of
	stmt copies.
	* tree-vect-stmts.cc (vect_analyze_stmt): Allow not
	specified vector type for mask producers.
	(vectorizable_operation): Refactor to handle
	STMT_VINFO_VECTYPE inference from operands.

	* gcc.dg/vect/pr65947-7.c: Adjust.
	* gcc.target/i386/vect-multi-size-1.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr65947-7.c         |   2 +-
 .../gcc.target/i386/vect-multi-size-1.c       |  17 ++
 gcc/tree-vect-data-refs.cc                    |  11 +-
 gcc/tree-vect-loop.cc                         | 148 +++++++++++++++---
 gcc/tree-vect-stmts.cc                        | 121 +++++++-------
 5 files changed, 202 insertions(+), 97 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-multi-size-1.c
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-7.c b/gcc/testsuite/gcc.dg/vect/pr65947-7.c
index 58c46df5c54..8f8adce3d91 100644
--- a/gcc/testsuite/gcc.dg/vect/pr65947-7.c
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-7.c
@@ -53,4 +53,4 @@  main (void)
 }
 
 /* { dg-final { scan-tree-dump "optimizing condition reduction with FOLD_EXTRACT_LAST" "vect" { target vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } || { vect_multiple_sizes } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-multi-size-1.c b/gcc/testsuite/gcc.target/i386/vect-multi-size-1.c
new file mode 100644
index 00000000000..a0dd3cf9801
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-multi-size-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=znver4 -fdump-tree-vect" } */
+
+double x[1024];
+char y[1024];
+void foo  ()
+{
+  for (int i = 0 ; i < 16; ++i)
+    {
+      x[i] = i;
+      y[i] = i;
+    }
+}
+
+/* We expect to see AVX512 vectors for x[] and a SSE vector for y[].  */
+/* { dg-final { scan-tree-dump-times "MEM <vector\\\(8\\\) double>" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "MEM <vector\\\(16\\\) char>" 1 "vect" } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index 59e296e7976..80057474af9 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4716,18 +4716,19 @@  vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
       vf = TYPE_VECTOR_SUBPARTS (vectype);
       *min_vf = upper_bound (*min_vf, vf);
 
-      /* Leave the BB vectorizer to pick the vector type later, based on
-	 the final dataref group size and SLP node size.  */
-      if (is_a <loop_vec_info> (vinfo))
-	STMT_VINFO_VECTYPE (stmt_info) = vectype;
-
       if (gatherscatter != SG_NONE)
 	{
+	  /* ???  We should perform a coarser check here, or none at all.
+	     We're checking this again later, in particular during
+	     relevancy analysis where we hook on the discovered offset
+	     operand.  */
+	  STMT_VINFO_VECTYPE (stmt_info) = vectype;
 	  gather_scatter_info gs_info;
 	  if (!vect_check_gather_scatter (stmt_info,
 					  as_a <loop_vec_info> (vinfo),
 					  &gs_info))
 	    {
+	      STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
 	      if (fatal)
 		*fatal = false;
 	      return opt_result::failure_at
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 9e531921e29..f226135cb1d 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -189,22 +189,19 @@  vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
   if (!res)
     return res;
 
-  if (stmt_vectype)
+  if (nunits_vectype)
     {
-      if (STMT_VINFO_VECTYPE (stmt_info))
-	/* The only case when a vectype had been already set is for stmts
-	   that contain a data ref, or for "pattern-stmts" (stmts generated
-	   by the vectorizer to represent/replace a certain idiom).  */
-	gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
-		     || vectype_maybe_set_p)
-		    && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
-      else
-	STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
+      poly_uint64 saved_vf = *vf;
+      vect_update_max_nunits (vf, nunits_vectype);
+      if (maybe_ne (*vf, saved_vf) && dump_enabled_p ())
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location, "updated "
+			   "vectorization factor to ");
+	  dump_dec (MSG_NOTE, *vf);
+	  dump_printf (MSG_NOTE, "\n");
+	}
     }
 
-  if (nunits_vectype)
-    vect_update_max_nunits (vf, nunits_vectype);
-
   return opt_result::success ();
 }
 
@@ -330,20 +327,17 @@  vect_determine_vectorization_factor (loop_vec_info loop_vinfo,
 					       "not vectorized: unsupported "
 					       "data-type %T\n",
 					       scalar_type);
-	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
-
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
-				 vectype);
 
-	      if (dump_enabled_p ())
+	      poly_uint64 saved_vectorization_factor = vectorization_factor;
+	      vect_update_max_nunits (&vectorization_factor, vectype);
+	      if (maybe_ne (vectorization_factor, saved_vectorization_factor)
+		  && dump_enabled_p ())
 		{
-		  dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
-		  dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
+		  dump_printf_loc (MSG_NOTE, vect_location, "updated "
+				   "vectorization factor to ");
+		  dump_dec (MSG_NOTE, vectorization_factor);
 		  dump_printf (MSG_NOTE, "\n");
 		}
-
-	      vect_update_max_nunits (&vectorization_factor, vectype);
 	    }
 	}
 
@@ -2864,6 +2858,114 @@  start_over:
   gcc_assert (known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 0U));
   loop_vinfo->vectorization_factor = vectorization_factor;
 
+  /* At this point we have the vectorization factor that should determine
+     the vector types to use decided.  The unrolling factor should not
+     influence that since otherwise we'd eventually use larger vectors
+     rather than doing actual effective unrolling.
+
+     Note that with re-starting without SLP we actually will have the
+     original loop VF so we're off here - but then non-SLP should go
+     away ... */
+  /* Check that nothing set STMT_VINFO_VECTYPE so nothing could have
+     relied on it.  ???  Same for SLP.  ???  That also catches pattern
+     stmts which might be more difficult to "fix".  */
+  for (stmt_vec_info stmt_info : loop_vinfo->stmt_vec_infos)
+    {
+      if (!stmt_info
+	  || gimple_clobber_p (stmt_info->stmt))
+	continue;
+
+      if (!STMT_VINFO_RELEVANT_P (stmt_info)
+	  && !STMT_VINFO_LIVE_P (stmt_info))
+	continue;
+
+      if (STMT_VINFO_VECTYPE (stmt_info))
+	{
+	  /* Pattern stmts and gather/scatter may have a precomputed
+	     vector type.  */
+	  gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info)
+		      || STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+	  continue;
+	}
+
+      /* ???  This is still a coarse vector type decision.  Multiple
+	 up/down passes over use-def chains should be used to set
+	 vector types from within vectorizable_* itself, in a new
+	 special mode.  Possibly identifying the responsible worker early.
+	 Not worth spending much time on this in the non-SLP path.  */
+      tree stmt_vectype, nunits_vectype;
+      opt_result res
+	= vect_get_vector_types_for_stmt (loop_vinfo, stmt_info, &stmt_vectype,
+					  &nunits_vectype);
+      gcc_assert (res);
+      if (!stmt_vectype)
+	/* OMP SIMD calls without LHS.  */
+	continue;
+
+      tree scalar_type = NULL_TREE;
+      if (vect_use_mask_type_p (stmt_info))
+	{
+	  if (is_a <gphi *> (stmt_info->stmt))
+	    {
+	      /* Only with BB vectorization or as PHI in a nested cycle.  */
+	      gcc_assert (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+						 gimple_bb (stmt_info->stmt)));
+	      /* ???  vectorizable_* should set the vector type.  */
+	      continue;
+	    }
+	  else
+	    {
+	      tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
+	      if (is_gimple_assign (stmt_info->stmt)
+		  && TREE_CODE_CLASS (code) == tcc_comparison)
+		scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
+	      else
+		/* ???  vectorizable_* should set the vector type.  */
+		continue;
+	    }
+	}
+      else
+	scalar_type = TREE_TYPE (stmt_vectype);
+
+      /* Try to use a larger vector type when the above one has less lanes
+	 than the chosen VF, up to the one recommended by the perferred vector
+	 mode hook.  This keeps ncopies down, generating more efficient code
+	 and in some cases enables vectorizing in the first place.  */
+      tree preferred_vectype = get_related_vectype_for_scalar_type (VOIDmode,
+								    scalar_type,
+								    0);
+      if (known_lt (TYPE_VECTOR_SUBPARTS (stmt_vectype),
+		    LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+	  && known_lt (TYPE_VECTOR_SUBPARTS (stmt_vectype),
+		       TYPE_VECTOR_SUBPARTS (preferred_vectype))
+	  && ordered_p (TYPE_VECTOR_SUBPARTS (preferred_vectype),
+			LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+	{
+	  /* ???  Could try all nunits between stmt_vectype and MIN.  */
+	  poly_uint64 nunits
+	    = ordered_min (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+			   TYPE_VECTOR_SUBPARTS (preferred_vectype));
+	  tree cand = get_related_vectype_for_scalar_type
+			(TYPE_MODE (preferred_vectype), scalar_type, nunits);
+	  if (cand)
+	    {
+	      if (VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
+		cand = truth_type_for (cand);
+	      stmt_vectype = cand;
+	    }
+	}
+
+      if (dump_enabled_p ())
+	{
+	  dump_printf_loc (MSG_NOTE, vect_location,
+			   "==> examining statement: %G", stmt_info->stmt);
+	  dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
+			   stmt_vectype);
+	}
+
+      STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
+    }
+
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
     {
       dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a5e26b746fb..da27404aadb 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6490,7 +6490,6 @@  vectorizable_operation (vec_info *vinfo,
   int ndts = 3;
   poly_uint64 nunits_in;
   poly_uint64 nunits_out;
-  tree vectype_out;
   int ncopies, vec_num;
   int i;
   vec<tree> vec_oprnds0 = vNULL;
@@ -6550,25 +6549,6 @@  vectorizable_operation (vec_info *vinfo,
       return false;
     }
 
-  scalar_dest = gimple_assign_lhs (stmt);
-  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-
-  /* Most operations cannot handle bit-precision types without extra
-     truncations.  */
-  bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
-  if (!mask_op_p
-      && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
-      /* Exception are bitwise binary operations.  */
-      && code != BIT_IOR_EXPR
-      && code != BIT_XOR_EXPR
-      && code != BIT_AND_EXPR)
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "bit-precision arithmetic not supported.\n");
-      return false;
-    }
-
   slp_tree slp_op0;
   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
 			   0, &op0, &slp_op0, &dt[0], &vectype))
@@ -6580,47 +6560,6 @@  vectorizable_operation (vec_info *vinfo,
     }
   bool is_invariant = (dt[0] == vect_external_def
 		       || dt[0] == vect_constant_def);
-  /* If op0 is an external or constant def, infer the vector type
-     from the scalar type.  */
-  if (!vectype)
-    {
-      /* For boolean type we cannot determine vectype by
-	 invariant value (don't know whether it is a vector
-	 of booleans or vector of integers).  We use output
-	 vectype because operations on boolean don't change
-	 type.  */
-      if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
-	{
-	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "not supported operation on bool value.\n");
-	      return false;
-	    }
-	  vectype = vectype_out;
-	}
-      else
-	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
-					       slp_node);
-    }
-  if (vec_stmt)
-    gcc_assert (vectype);
-  if (!vectype)
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "no vectype for scalar type %T\n",
-			 TREE_TYPE (op0));
-
-      return false;
-    }
-
-  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
-  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
-  if (maybe_ne (nunits_out, nunits_in))
-    return false;
-
   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
   slp_tree slp_op1 = NULL, slp_op2 = NULL;
   if (op_type == binary_op || op_type == ternary_op)
@@ -6635,9 +6574,8 @@  vectorizable_operation (vec_info *vinfo,
 	}
       is_invariant &= (dt[1] == vect_external_def
 		       || dt[1] == vect_constant_def);
-      if (vectype2
-	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
-	return false;
+      if (!vectype)
+	vectype = vectype2;
     }
   if (op_type == ternary_op)
     {
@@ -6651,9 +6589,52 @@  vectorizable_operation (vec_info *vinfo,
 	}
       is_invariant &= (dt[2] == vect_external_def
 		       || dt[2] == vect_constant_def);
-      if (vectype3
-	  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
-	return false;
+      if (!vectype)
+	vectype = vectype3;
+    }
+
+  if (!vectype)
+    vectype = STMT_VINFO_VECTYPE (stmt_info);
+  if (!vectype)
+    {
+      /* We want to pre-assign sth here.  */
+      gcc_assert (!vec_stmt
+		  && is_invariant
+		  && !vect_use_mask_type_p (stmt_info));
+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
+    }
+
+  tree vectype_out = vectype;
+  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
+  if (maybe_ne (nunits_out, nunits_in))
+    return false;
+  /* ???  Isn't the constraint the types are the same apart from
+     signednes (ABSU_EXPR for example)?  The rest suggests this as
+     we are using 'vectype' for constants/invariants.  */
+  if (vectype2
+      && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
+    return false;
+  if (vectype3
+      && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
+    return false;
+
+  scalar_dest = gimple_assign_lhs (stmt);
+
+  /* Most operations cannot handle bit-precision types without extra
+     truncations.  */
+  bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
+  if (!mask_op_p
+      && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
+      /* Exception are bitwise binary operations.  */
+      && code != BIT_IOR_EXPR
+      && code != BIT_XOR_EXPR
+      && code != BIT_AND_EXPR)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "bit-precision arithmetic not supported.\n");
+      return false;
     }
 
   /* Multiple types in SLP are handled by creating the appropriate number of
@@ -6788,6 +6769,8 @@  vectorizable_operation (vec_info *vinfo,
 	  return false;
 	}
 
+      if (!STMT_VINFO_VECTYPE (stmt_info))
+	STMT_VINFO_VECTYPE (stmt_info) = vectype;
       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
       DUMP_VECT_SCOPE ("vectorizable_operation");
       vect_model_simple_cost (vinfo, stmt_info,
@@ -12890,7 +12873,9 @@  vect_analyze_stmt (vec_info *vinfo,
     {
       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
-		  || (call && gimple_call_lhs (call) == NULL_TREE));
+		  || (call && gimple_call_lhs (call) == NULL_TREE)
+		  /* ???  Inconsistently so.  */
+		  || vect_use_mask_type_p (stmt_info));
       *need_to_vectorize = true;
     }