[17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors
diff mbox series

Message ID mpty2wutvy7.fsf@arm.com
State New
Headers show
Series
  • [6/n] Use build_vector_type_for_mode in get_vectype_for_scalar_type_and_size
Related show

Commit Message

Richard Sandiford Nov. 5, 2019, 11:16 a.m. UTC
This patch makes can_duplicate_and_interleave_p cope with mixtures of
vector sizes, by using queries based on get_vectype_for_scalar_type
instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).

int_mode_for_size is now the first check we do for a candidate mode,
so it seemed better to restrict it to MAX_FIXED_MODE_SIZE.  This avoids
unnecessary work and avoids trying to create scalar types that the
target might not support.

This final patch in the series.  As before, each patch tested individually
on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.


2019-11-04  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
	element type rather than an element mode.
	* tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
	Use get_vectype_for_scalar_type to query the natural types
	for a given element type rather than basing everything on
	GET_MODE_SIZE (vinfo->vector_mode).  Limit int_mode_for_size
	query to MAX_FIXED_MODE_SIZE.
	(duplicate_and_interleave): Update call accordingly.
	* tree-vect-loop.c (vectorizable_reduction): Likewise.

Comments

Richard Biener Nov. 14, 2019, 12:22 p.m. UTC | #1
On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> This patch makes can_duplicate_and_interleave_p cope with mixtures of
> vector sizes, by using queries based on get_vectype_for_scalar_type
> instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
>
> int_mode_for_size is now the first check we do for a candidate mode,
> so it seemed better to restrict it to MAX_FIXED_MODE_SIZE.  This avoids
> unnecessary work and avoids trying to create scalar types that the
> target might not support.
>
> This final patch in the series.  As before, each patch tested individually
> on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.

OK.

Thanks,
Richard.

>
> 2019-11-04  Richard Sandiford  <richard.sandiford@arm.com>
>
> gcc/
>         * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
>         element type rather than an element mode.
>         * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
>         Use get_vectype_for_scalar_type to query the natural types
>         for a given element type rather than basing everything on
>         GET_MODE_SIZE (vinfo->vector_mode).  Limit int_mode_for_size
>         query to MAX_FIXED_MODE_SIZE.
>         (duplicate_and_interleave): Update call accordingly.
>         * tree-vect-loop.c (vectorizable_reduction): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h       2019-11-05 11:08:12.521631453 +0000
> +++ gcc/tree-vectorizer.h       2019-11-05 11:14:42.786884473 +0000
> @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
>  extern bool vect_slp_bb (basic_block);
>  extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
>  extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
> -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
> -                                           machine_mode,
> +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
>                                             unsigned int * = NULL,
>                                             tree * = NULL, tree * = NULL);
>  extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
> Index: gcc/tree-vect-slp.c
> ===================================================================
> --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000
> +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000
> @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
>    return -1;
>  }
>
> -/* Check whether it is possible to load COUNT elements of type ELT_MODE
> +/* Check whether it is possible to load COUNT elements of type ELT_TYPE
>     using the method implemented by duplicate_and_interleave.  Return true
>     if so, returning the number of intermediate vectors in *NVECTORS_OUT
>     (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
> @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
>
>  bool
>  can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
> -                               machine_mode elt_mode,
> -                               unsigned int *nvectors_out,
> +                               tree elt_type, unsigned int *nvectors_out,
>                                 tree *vector_type_out,
>                                 tree *permutes)
>  {
> -  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
> -  poly_int64 nelts;
> +  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
> +  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
> +    return false;
> +
> +  machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
> +  poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
>    unsigned int nvectors = 1;
>    for (;;)
>      {
>        scalar_int_mode int_mode;
>        poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
> -      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
> -         && int_mode_for_size (elt_bits, 0).exists (&int_mode))
> +      if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
>         {
> +         /* Get the natural vector type for this SLP group size.  */
>           tree int_type = build_nonstandard_integer_type
>             (GET_MODE_BITSIZE (int_mode), 1);
> -         tree vector_type = build_vector_type (int_type, nelts);
> -         if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
> -           {
> +         tree vector_type
> +           = get_vectype_for_scalar_type (vinfo, int_type, count);
> +         if (vector_type
> +             && VECTOR_MODE_P (TYPE_MODE (vector_type))
> +             && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
> +                          GET_MODE_SIZE (base_vector_mode)))
> +           {
> +             /* Try fusing consecutive sequences of COUNT / NVECTORS elements
> +                together into elements of type INT_TYPE and using the result
> +                to build NVECTORS vectors.  */
> +             poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
>               vec_perm_builder sel1 (nelts, 2, 3);
>               vec_perm_builder sel2 (nelts, 2, 3);
>               poly_int64 half_nelts = exact_div (nelts, 2);
> @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v
>               && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
>               && (TREE_CODE (type) == BOOLEAN_TYPE
>                   || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
> -                                                     TYPE_MODE (type))))
> +                                                     type)))
>             {
>               if (dump_enabled_p ())
>                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf
>    unsigned int nvectors = 1;
>    tree new_vector_type;
>    tree permutes[2];
> -  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
> +  if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
>                                        &nvectors, &new_vector_type,
>                                        permutes))
>      gcc_unreachable ();
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c        2019-11-05 10:57:41.658071173 +0000
> +++ gcc/tree-vect-loop.c        2019-11-05 11:14:42.782884501 +0000
> @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st
>          that value needs to be repeated for every instance of the
>          statement within the initial vector.  */
>        unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
> -      scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
>        if (!neutral_op
>           && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
> -                                             elt_mode))
> +                                             TREE_TYPE (vectype_out)))
>         {
>           if (dump_enabled_p ())
>             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

Patch
diff mbox series

Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2019-11-05 11:08:12.521631453 +0000
+++ gcc/tree-vectorizer.h	2019-11-05 11:14:42.786884473 +0000
@@ -1779,8 +1779,7 @@  extern void vect_get_slp_defs (slp_tree,
 extern bool vect_slp_bb (basic_block);
 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
-extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
-					    machine_mode,
+extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
 					    unsigned int * = NULL,
 					    tree * = NULL, tree * = NULL);
 extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
Index: gcc/tree-vect-slp.c
===================================================================
--- gcc/tree-vect-slp.c	2019-11-05 11:08:12.517631481 +0000
+++ gcc/tree-vect-slp.c	2019-11-05 11:14:42.786884473 +0000
@@ -265,7 +265,7 @@  vect_get_place_in_interleaving_chain (st
   return -1;
 }
 
-/* Check whether it is possible to load COUNT elements of type ELT_MODE
+/* Check whether it is possible to load COUNT elements of type ELT_TYPE
    using the method implemented by duplicate_and_interleave.  Return true
    if so, returning the number of intermediate vectors in *NVECTORS_OUT
    (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
@@ -273,26 +273,37 @@  vect_get_place_in_interleaving_chain (st
 
 bool
 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
-				machine_mode elt_mode,
-				unsigned int *nvectors_out,
+				tree elt_type, unsigned int *nvectors_out,
 				tree *vector_type_out,
 				tree *permutes)
 {
-  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
-  poly_int64 nelts;
+  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
+  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
+    return false;
+
+  machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
+  poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
   unsigned int nvectors = 1;
   for (;;)
     {
       scalar_int_mode int_mode;
       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
-      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
-	  && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+      if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
 	{
+	  /* Get the natural vector type for this SLP group size.  */
 	  tree int_type = build_nonstandard_integer_type
 	    (GET_MODE_BITSIZE (int_mode), 1);
-	  tree vector_type = build_vector_type (int_type, nelts);
-	  if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
-	    {
+	  tree vector_type
+	    = get_vectype_for_scalar_type (vinfo, int_type, count);
+	  if (vector_type
+	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
+	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+			   GET_MODE_SIZE (base_vector_mode)))
+	    {
+	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
+		 together into elements of type INT_TYPE and using the result
+		 to build NVECTORS vectors.  */
+	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
 	      vec_perm_builder sel1 (nelts, 2, 3);
 	      vec_perm_builder sel2 (nelts, 2, 3);
 	      poly_int64 half_nelts = exact_div (nelts, 2);
@@ -492,7 +503,7 @@  vect_get_and_check_slp_defs (vec_info *v
 	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
 	      && (TREE_CODE (type) == BOOLEAN_TYPE
 		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
-						      TYPE_MODE (type))))
+						      type)))
 	    {
 	      if (dump_enabled_p ())
 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3551,7 +3562,7 @@  duplicate_and_interleave (vec_info *vinf
   unsigned int nvectors = 1;
   tree new_vector_type;
   tree permutes[2];
-  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
+  if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
 				       &nvectors, &new_vector_type,
 				       permutes))
     gcc_unreachable ();
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2019-11-05 10:57:41.658071173 +0000
+++ gcc/tree-vect-loop.c	2019-11-05 11:14:42.782884501 +0000
@@ -6288,10 +6288,9 @@  vectorizable_reduction (stmt_vec_info st
 	 that value needs to be repeated for every instance of the
 	 statement within the initial vector.  */
       unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
-      scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
       if (!neutral_op
 	  && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
-					      elt_mode))
+					      TREE_TYPE (vectype_out)))
 	{
 	  if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,