Message ID | mpty2wutvy7.fsf@arm.com |
---|---|
State | New |
Headers | show |
Series | [6/n] Use build_vector_type_for_mode in get_vectype_for_scalar_type_and_size | expand |
On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford <richard.sandiford@arm.com> wrote: > > This patch makes can_duplicate_and_interleave_p cope with mixtures of > vector sizes, by using queries based on get_vectype_for_scalar_type > instead of directly querying GET_MODE_SIZE (vinfo->vector_mode). > > int_mode_for_size is now the first check we do for a candidate mode, > so it seemed better to restrict it to MAX_FIXED_MODE_SIZE. This avoids > unnecessary work and avoids trying to create scalar types that the > target might not support. > > This final patch in the series. As before, each patch tested individually > on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu. OK. Thanks, Richard. > > 2019-11-04 Richard Sandiford <richard.sandiford@arm.com> > > gcc/ > * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an > element type rather than an element mode. > * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise. > Use get_vectype_for_scalar_type to query the natural types > for a given element type rather than basing everything on > GET_MODE_SIZE (vinfo->vector_mode). Limit int_mode_for_size > query to MAX_FIXED_MODE_SIZE. > (duplicate_and_interleave): Update call accordingly. > * tree-vect-loop.c (vectorizable_reduction): Likewise. > > Index: gcc/tree-vectorizer.h > =================================================================== > --- gcc/tree-vectorizer.h 2019-11-05 11:08:12.521631453 +0000 > +++ gcc/tree-vectorizer.h 2019-11-05 11:14:42.786884473 +0000 > @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree, > extern bool vect_slp_bb (basic_block); > extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); > extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); > -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, > - machine_mode, > +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, > unsigned int * = NULL, > tree * = NULL, tree * = NULL); > extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, > Index: gcc/tree-vect-slp.c > =================================================================== > --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000 > +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000 > @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st > return -1; > } > > -/* Check whether it is possible to load COUNT elements of type ELT_MODE > +/* Check whether it is possible to load COUNT elements of type ELT_TYPE > using the method implemented by duplicate_and_interleave. Return true > if so, returning the number of intermediate vectors in *NVECTORS_OUT > (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT > @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st > > bool > can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, > - machine_mode elt_mode, > - unsigned int *nvectors_out, > + tree elt_type, unsigned int *nvectors_out, > tree *vector_type_out, > tree *permutes) > { > - poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); > - poly_int64 nelts; > + tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count); > + if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type))) > + return false; > + > + machine_mode base_vector_mode = TYPE_MODE (base_vector_type); > + poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode); > unsigned int nvectors = 1; > for (;;) > { > scalar_int_mode int_mode; > poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; > - if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts) > - && int_mode_for_size (elt_bits, 0).exists (&int_mode)) > + if (int_mode_for_size (elt_bits, 1).exists (&int_mode)) > { > + /* Get the natural vector type for this SLP group size. */ > tree int_type = build_nonstandard_integer_type > (GET_MODE_BITSIZE (int_mode), 1); > - tree vector_type = build_vector_type (int_type, nelts); > - if (VECTOR_MODE_P (TYPE_MODE (vector_type))) > - { > + tree vector_type > + = get_vectype_for_scalar_type (vinfo, int_type, count); > + if (vector_type > + && VECTOR_MODE_P (TYPE_MODE (vector_type)) > + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), > + GET_MODE_SIZE (base_vector_mode))) > + { > + /* Try fusing consecutive sequences of COUNT / NVECTORS elements > + together into elements of type INT_TYPE and using the result > + to build NVECTORS vectors. */ > + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); > vec_perm_builder sel1 (nelts, 2, 3); > vec_perm_builder sel2 (nelts, 2, 3); > poly_int64 half_nelts = exact_div (nelts, 2); > @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v > && !GET_MODE_SIZE (vinfo->vector_mode).is_constant () > && (TREE_CODE (type) == BOOLEAN_TYPE > || !can_duplicate_and_interleave_p (vinfo, stmts.length (), > - TYPE_MODE (type)))) > + type))) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf > unsigned int nvectors = 1; > tree new_vector_type; > tree permutes[2]; > - if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type), > + if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type, > &nvectors, &new_vector_type, > permutes)) > gcc_unreachable (); > Index: gcc/tree-vect-loop.c > =================================================================== > --- gcc/tree-vect-loop.c 2019-11-05 10:57:41.658071173 +0000 > +++ gcc/tree-vect-loop.c 2019-11-05 11:14:42.782884501 +0000 > @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st > that value needs to be repeated for every instance of the > statement within the initial vector. */ > unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); > - scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out)); > if (!neutral_op > && !can_duplicate_and_interleave_p (loop_vinfo, group_size, > - elt_mode)) > + TREE_TYPE (vectype_out))) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
Index: gcc/tree-vectorizer.h =================================================================== --- gcc/tree-vectorizer.h 2019-11-05 11:08:12.521631453 +0000 +++ gcc/tree-vectorizer.h 2019-11-05 11:14:42.786884473 +0000 @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree, extern bool vect_slp_bb (basic_block); extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, - machine_mode, +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree, unsigned int * = NULL, tree * = NULL, tree * = NULL); extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, Index: gcc/tree-vect-slp.c =================================================================== --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +0000 +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +0000 @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st return -1; } -/* Check whether it is possible to load COUNT elements of type ELT_MODE +/* Check whether it is possible to load COUNT elements of type ELT_TYPE using the method implemented by duplicate_and_interleave. Return true if so, returning the number of intermediate vectors in *NVECTORS_OUT (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st bool can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, - machine_mode elt_mode, - unsigned int *nvectors_out, + tree elt_type, unsigned int *nvectors_out, tree *vector_type_out, tree *permutes) { - poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); - poly_int64 nelts; + tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count); + if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type))) + return false; + + machine_mode base_vector_mode = TYPE_MODE (base_vector_type); + poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode); unsigned int nvectors = 1; for (;;) { scalar_int_mode int_mode; poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; - if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts) - && int_mode_for_size (elt_bits, 0).exists (&int_mode)) + if (int_mode_for_size (elt_bits, 1).exists (&int_mode)) { + /* Get the natural vector type for this SLP group size. */ tree int_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (int_mode), 1); - tree vector_type = build_vector_type (int_type, nelts); - if (VECTOR_MODE_P (TYPE_MODE (vector_type))) - { + tree vector_type + = get_vectype_for_scalar_type (vinfo, int_type, count); + if (vector_type + && VECTOR_MODE_P (TYPE_MODE (vector_type)) + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), + GET_MODE_SIZE (base_vector_mode))) + { + /* Try fusing consecutive sequences of COUNT / NVECTORS elements + together into elements of type INT_TYPE and using the result + to build NVECTORS vectors. */ + poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); vec_perm_builder sel1 (nelts, 2, 3); vec_perm_builder sel2 (nelts, 2, 3); poly_int64 half_nelts = exact_div (nelts, 2); @@ -492,7 +503,7 @@ vect_get_and_check_slp_defs (vec_info *v && !GET_MODE_SIZE (vinfo->vector_mode).is_constant () && (TREE_CODE (type) == BOOLEAN_TYPE || !can_duplicate_and_interleave_p (vinfo, stmts.length (), - TYPE_MODE (type)))) + type))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3551,7 +3562,7 @@ duplicate_and_interleave (vec_info *vinf unsigned int nvectors = 1; tree new_vector_type; tree permutes[2]; - if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type), + if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type, &nvectors, &new_vector_type, permutes)) gcc_unreachable (); Index: gcc/tree-vect-loop.c =================================================================== --- gcc/tree-vect-loop.c 2019-11-05 10:57:41.658071173 +0000 +++ gcc/tree-vect-loop.c 2019-11-05 11:14:42.782884501 +0000 @@ -6288,10 +6288,9 @@ vectorizable_reduction (stmt_vec_info st that value needs to be repeated for every instance of the statement within the initial vector. */ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); - scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out)); if (!neutral_op && !can_duplicate_and_interleave_p (loop_vinfo, group_size, - elt_mode)) + TREE_TYPE (vectype_out))) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,