Message ID | b1a866a7-502e-49e9-e8f8-a0fe3d99feae@suse.cz |
---|---|
State | New |
Headers | show |
Series | SLP: support entire BB. | expand |
On Fri, Jul 31, 2020 at 12:30 PM Martin Liška <mliska@suse.cz> wrote: > > Hey. > > Motivation of the patch is to allow vectorization of an entire BB. > So far we bail out a sub-BB region when we reach a stmt for which > vect_find_stmt_data_reference returns false. That's replaced with > recoding of groups of the data references. > > We can newly vectorize code like: > > void foo(); > void bar (int i, double *a, double *b) > { > double tem1 = a[2*i]; > double tem2 = 2*a[2*i+1]; > foo (); > b[2*i] = 2*tem1; > b[2*i+1] = tem2; > } > > into: > <bb 2> [local count: 1073741824]: > _1 = i_12(D) * 2; > _2 = (long unsigned int) _1; > _3 = _2 * 8; > _4 = a_13(D) + _3; > vect_tem1_15.5_24 = MEM <vector(2) double> [(double *)_4]; > vect__10.6_25 = vect_tem1_15.5_24 * { 2.0e+0, 2.0e+0 }; > foo (); > _9 = b_18(D) + _3; > MEM <vector(2) double> [(double *)_9] = vect__10.6_25; > return; > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. OK with some minor changes, see below > Thoughs? > Martin > > gcc/ChangeLog: > > * tree-vect-data-refs.c (dr_group_sort_cmp): Work on > data_ref_pair. > (vect_analyze_data_ref_accesses): Work on groups. > (vect_find_stmt_data_reference): Add group_id argument and fill > up dataref_groups vector. > * tree-vect-loop.c (vect_get_datarefs_in_loop): Pass new > arguments. > (vect_analyze_loop_2): Likewise. > * tree-vect-slp.c (vect_slp_analyze_bb_1): Pass argument. > (vect_slp_bb_region): Likewise. > (vect_slp_region): Likewise. > (vect_slp_bb):Work on the entire BB. > * tree-vectorizer.h (vect_analyze_data_ref_accesses): Add new > argument. > (vect_find_stmt_data_reference): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.dg/vect/bb-slp-38.c: Adjust pattern as now we only process > a single vectorization and now 2 partial. > * gcc.dg/vect/bb-slp-45.c: New test. > --- > gcc/testsuite/gcc.dg/vect/bb-slp-38.c | 2 +- > gcc/testsuite/gcc.dg/vect/bb-slp-45.c | 36 ++++++++++++ > gcc/tree-vect-data-refs.c | 63 +++++++++++++------- > gcc/tree-vect-loop.c | 5 +- > gcc/tree-vect-slp.c | 82 ++++++++++----------------- > gcc/tree-vectorizer.h | 5 +- > 6 files changed, 118 insertions(+), 75 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-45.c > > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > index 59aec54fffd..9c57ea3c2c1 100644 > --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c > @@ -41,4 +41,4 @@ int main() > } > > /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */ > +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c > new file mode 100644 > index 00000000000..4107a34cb93 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c > @@ -0,0 +1,36 @@ > +/* { dg-require-effective-target vect_double } */ > + > +#include "tree-vect.h" > + > +extern void abort (void); > + > +double a[8], b[8]; > +int x; > + > +void __attribute__((noinline,noclone)) > +bar (void) > +{ > + x = 1; > +} > + > +void __attribute__((noinline,noclone)) > +foo(int i) > +{ > + double tem1 = a[2*i]; > + double tem2 = 2*a[2*i+1]; > + bar (); > + b[2*i] = 2*tem1; > + b[2*i+1] = tem2; > +} > + > +int main() > +{ > + int i; > + check_vect (); > + for (i = 0; i < 8; ++i) > + b[i] = i; > + foo (2); > + return 0; > +} > + > +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */ > diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c > index a78ae61d1b0..36e10ff3619 100644 > --- a/gcc/tree-vect-data-refs.c > +++ b/gcc/tree-vect-data-refs.c > @@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info) > return vect_analyze_group_access (vinfo, dr_info); > } > > +typedef std::pair<data_reference_p, int> data_ref_pair; > + > /* Compare two data-references DRA and DRB to group them into chunks > suitable for grouping. */ > > static int > dr_group_sort_cmp (const void *dra_, const void *drb_) > { > - data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); > - data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); > + data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_); > + data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_); > + data_reference_p dra = dra_pair.first; > + data_reference_p drb = drb_pair.first; > int cmp; > > /* Stabilize sort. */ > @@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_) > return 0; > > /* DRs in different loops never belong to the same group. */ Comment needs to be adjusted to mention basic-block instead of loop > - loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father; > - loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father; > - if (loopa != loopb) > - return loopa->num < loopb->num ? -1 : 1; > + int bb_index1 = gimple_bb (DR_STMT (dra))->index; > + int bb_index2 = gimple_bb (DR_STMT (drb))->index; > + if (bb_index1 != bb_index2) > + return bb_index1 < bb_index2 ? -1 : 1; > + > + if (dra_pair.second != drb_pair.second) > + return dra_pair.second < drb_pair.second ? -1 : 1; Please add a comment that this compares the DR group (not obvious because it says .second) > > /* Ordering of DRs according to base. */ > cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), > @@ -2881,11 +2888,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, > FORNOW: handle only arrays and pointer accesses. */ > > opt_result > -vect_analyze_data_ref_accesses (vec_info *vinfo) > +vect_analyze_data_ref_accesses (vec_info *vinfo, > + vec<int> *dataref_groups) > { > unsigned int i; > vec<data_reference_p> datarefs = vinfo->shared->datarefs; > - struct data_reference *dr; > > DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses"); > > @@ -2895,14 +2902,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > /* Sort the array of datarefs to make building the interleaving chains > linear. Don't modify the original vector's order, it is needed for > determining what dependencies are reversed. */ > - vec<data_reference_p> datarefs_copy = datarefs.copy (); > + vec<data_ref_pair> datarefs_copy; > + datarefs_copy.create (datarefs.length ()); > + for (unsigned i = 0; i < datarefs.length (); i++) > + { > + int group_id = dataref_groups ? (*dataref_groups)[i] : 0; > + datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id)); quick_push > + } > datarefs_copy.qsort (dr_group_sort_cmp); > hash_set<stmt_vec_info> to_fixup; > > /* Build the interleaving chains. */ > for (i = 0; i < datarefs_copy.length () - 1;) > { > - data_reference_p dra = datarefs_copy[i]; > + data_reference_p dra = datarefs_copy[i].first; > + int dra_group_id = datarefs_copy[i].second; > dr_vec_info *dr_info_a = vinfo->lookup_dr (dra); > stmt_vec_info stmtinfo_a = dr_info_a->stmt; > stmt_vec_info lastinfo = NULL; > @@ -2914,7 +2928,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > } > for (i = i + 1; i < datarefs_copy.length (); ++i) > { > - data_reference_p drb = datarefs_copy[i]; > + data_reference_p drb = datarefs_copy[i].first; > + int drb_group_id = datarefs_copy[i].second; > dr_vec_info *dr_info_b = vinfo->lookup_dr (drb); > stmt_vec_info stmtinfo_b = dr_info_b->stmt; > if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b) > @@ -2927,10 +2942,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > matters we can push those to a worklist and re-iterate > over them. The we can just skip ahead to the next DR here. */ > > - /* DRs in a different loop should not be put into the same > + /* DRs in a different BBs should not be put into the same > interleaving group. */ > - if (gimple_bb (DR_STMT (dra))->loop_father > - != gimple_bb (DR_STMT (drb))->loop_father) > + int bb_index1 = gimple_bb (DR_STMT (dra))->index; > + int bb_index2 = gimple_bb (DR_STMT (drb))->index; > + if (bb_index1 != bb_index2) > + break; > + > + if (dra_group_id != drb_group_id) > break; > > /* Check that the data-refs have same first location (except init) > @@ -2977,7 +2996,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); > HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); > HOST_WIDE_INT init_prev > - = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1])); > + = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first)); > gcc_assert (init_a <= init_b > && init_a <= init_prev > && init_prev <= init_b); > @@ -2985,7 +3004,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > /* Do not place the same access in the interleaving chain twice. */ > if (init_b == init_prev) > { > - gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) > + gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first)) > < gimple_uid (DR_STMT (drb))); > /* Simply link in duplicates and fix up the chain below. */ > } > @@ -3098,9 +3117,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) > to_fixup.add (newgroup); > } > > - FOR_EACH_VEC_ELT (datarefs_copy, i, dr) > + data_ref_pair *dr_pair; > + FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair) > { > - dr_vec_info *dr_info = vinfo->lookup_dr (dr); > + dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first); > if (STMT_VINFO_VECTORIZABLE (dr_info->stmt) > && !vect_analyze_data_ref_access (vinfo, dr_info)) > { > @@ -3991,7 +4011,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, > > opt_result > vect_find_stmt_data_reference (loop_p loop, gimple *stmt, > - vec<data_reference_p> *datarefs) > + vec<data_reference_p> *datarefs, > + vec<int> *dataref_groups, int group_id) You are always passing NULL here so simply avoid this and the following changes. OK with those changes. Thanks, Richard. > { > /* We can ignore clobbers for dataref analysis - they are removed during > loop vectorization and BB vectorization checks dependences with a > @@ -4118,6 +4139,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, > newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); > free_data_ref (dr); > datarefs->safe_push (newdr); > + if (dataref_groups) > + dataref_groups->safe_push (group_id); > return opt_result::success (); > } > } > @@ -4127,6 +4150,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, > } > > datarefs->safe_push (dr); > + if (dataref_groups) > + dataref_groups->safe_push (group_id); > return opt_result::success (); > } > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index 185019c3305..2326d670bee 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, > if (is_gimple_debug (stmt)) > continue; > ++(*n_stmts); > - opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs); > + opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, > + NULL, 0); > if (!res) > { > if (is_gimple_call (stmt) && loop->safelen) > @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) > /* Analyze the access patterns of the data-refs in the loop (consecutive, > complex, etc.). FORNOW: Only handle consecutive access pattern. */ > > - ok = vect_analyze_data_ref_accesses (loop_vinfo); > + ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL); > if (!ok) > { > if (dump_enabled_p ()) > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index 72192b5a813..167db076454 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo) > region. */ > > static bool > -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) > +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, > + vec<int> *dataref_groups) > { > DUMP_VECT_SCOPE ("vect_slp_analyze_bb"); > > @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) > return false; > } > > - if (!vect_analyze_data_ref_accesses (bb_vinfo)) > + if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) > given by DATAREFS. */ > > static bool > -vect_slp_bb_region (gimple_stmt_iterator region_begin, > - gimple_stmt_iterator region_end, > - vec<data_reference_p> datarefs, > - unsigned int n_stmts) > +vect_slp_region (gimple_stmt_iterator region_begin, > + gimple_stmt_iterator region_end, > + vec<data_reference_p> datarefs, > + vec<int> *dataref_groups, > + unsigned int n_stmts) > { > bb_vec_info bb_vinfo; > auto_vector_modes vector_modes; > @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, > bb_vinfo->shared->check_datarefs (); > bb_vinfo->vector_mode = next_vector_mode; > > - if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal) > + if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups) > && dbg_cnt (vect_slp)) > { > if (dump_enabled_p ()) > @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, > bool > vect_slp_bb (basic_block bb) > { > - gimple_stmt_iterator gsi; > - bool any_vectorized = false; > - > - gsi = gsi_after_labels (bb); > - while (!gsi_end_p (gsi)) > + vec<data_reference_p> datarefs = vNULL; > + vec<int> dataref_groups = vNULL; > + int insns = 0; > + int current_group = 0; > + gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb); > + gimple_stmt_iterator region_end = gsi_last_bb (bb); > + if (!gsi_end_p (region_end)) > + gsi_next (®ion_end); > + > + for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); > + gsi_next (&gsi)) > { > - gimple_stmt_iterator region_begin = gsi; > - vec<data_reference_p> datarefs = vNULL; > - int insns = 0; > - > - for (; !gsi_end_p (gsi); gsi_next (&gsi)) > - { > - gimple *stmt = gsi_stmt (gsi); > - if (is_gimple_debug (stmt)) > - { > - /* Skip leading debug stmts. */ > - if (gsi_stmt (region_begin) == stmt) > - gsi_next (®ion_begin); > - continue; > - } > - insns++; > - > - if (gimple_location (stmt) != UNKNOWN_LOCATION) > - vect_location = stmt; > + gimple *stmt = gsi_stmt (gsi); > + if (is_gimple_debug (stmt)) > + continue; > > - if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs)) > - break; > - } > - if (gsi_end_p (region_begin)) > - break; > + insns++; > > - /* Skip leading unhandled stmts. */ > - if (gsi_stmt (region_begin) == gsi_stmt (gsi)) > - { > - gsi_next (&gsi); > - continue; > - } > + if (gimple_location (stmt) != UNKNOWN_LOCATION) > + vect_location = stmt; > > - gimple_stmt_iterator region_end = gsi; > + if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs, > + &dataref_groups, current_group)) > + ++current_group; > > if (insns > param_slp_max_insns_in_bb) > { > @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb) > "not vectorized: too many instructions in " > "basic block.\n"); > } > - else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns)) > - any_vectorized = true; > - > - if (gsi_end_p (region_end)) > - break; > - > - /* Skip the unhandled stmt. */ > - gsi_next (&gsi); > } > > - return any_vectorized; > + return vect_slp_region (region_begin, region_end, datarefs, > + &dataref_groups, insns); > } > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 5466c78c20b..21881a9390c 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance); > extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); > extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); > extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); > -extern opt_result vect_analyze_data_ref_accesses (vec_info *); > +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); > extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); > extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, > tree, int, internal_fn *, tree *); > extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, > gather_scatter_info *); > extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, > - vec<data_reference_p> *); > + vec<data_reference_p> *, > + vec<int> *, int); > extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); > extern void vect_record_base_alignments (vec_info *); > extern tree vect_create_data_ref_ptr (vec_info *, > -- > 2.27.0 >
On 8/3/20 12:29 PM, Richard Biener wrote:
> You are always passing NULL here so simply avoid this and the following changes.
Are you sure about this?
Note that vect_slp_bb does:
+ if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs,
+ &dataref_groups, current_group))
+ ++current_group;
Martin
On Mon, Aug 10, 2020 at 12:29 PM Martin Liška <mliska@suse.cz> wrote: > > On 8/3/20 12:29 PM, Richard Biener wrote: > > You are always passing NULL here so simply avoid this and the following changes. > > Are you sure about this? > > Note that vect_slp_bb does: > > + if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs, > + &dataref_groups, current_group)) > + ++current_group; Oops, I stand corrected. Patch is OK without this particular requested change. Richard. > Martin
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c index 59aec54fffd..9c57ea3c2c1 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-38.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-38.c @@ -41,4 +41,4 @@ int main() } /* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "basic block part vectorized" 2 "slp2" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" { target vect_perm } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-45.c b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c new file mode 100644 index 00000000000..4107a34cb93 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-45.c @@ -0,0 +1,36 @@ +/* { dg-require-effective-target vect_double } */ + +#include "tree-vect.h" + +extern void abort (void); + +double a[8], b[8]; +int x; + +void __attribute__((noinline,noclone)) +bar (void) +{ + x = 1; +} + +void __attribute__((noinline,noclone)) +foo(int i) +{ + double tem1 = a[2*i]; + double tem2 = 2*a[2*i+1]; + bar (); + b[2*i] = 2*tem1; + b[2*i+1] = tem2; +} + +int main() +{ + int i; + check_vect (); + for (i = 0; i < 8; ++i) + b[i] = i; + foo (2); + return 0; +} + +/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" } } */ diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index a78ae61d1b0..36e10ff3619 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -2757,14 +2757,18 @@ vect_analyze_data_ref_access (vec_info *vinfo, dr_vec_info *dr_info) return vect_analyze_group_access (vinfo, dr_info); } +typedef std::pair<data_reference_p, int> data_ref_pair; + /* Compare two data-references DRA and DRB to group them into chunks suitable for grouping. */ static int dr_group_sort_cmp (const void *dra_, const void *drb_) { - data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_); - data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_); + data_ref_pair dra_pair = *(data_ref_pair *)const_cast<void *>(dra_); + data_ref_pair drb_pair = *(data_ref_pair *)const_cast<void *>(drb_); + data_reference_p dra = dra_pair.first; + data_reference_p drb = drb_pair.first; int cmp; /* Stabilize sort. */ @@ -2772,10 +2776,13 @@ dr_group_sort_cmp (const void *dra_, const void *drb_) return 0; /* DRs in different loops never belong to the same group. */ - loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father; - loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father; - if (loopa != loopb) - return loopa->num < loopb->num ? -1 : 1; + int bb_index1 = gimple_bb (DR_STMT (dra))->index; + int bb_index2 = gimple_bb (DR_STMT (drb))->index; + if (bb_index1 != bb_index2) + return bb_index1 < bb_index2 ? -1 : 1; + + if (dra_pair.second != drb_pair.second) + return dra_pair.second < drb_pair.second ? -1 : 1; /* Ordering of DRs according to base. */ cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra), @@ -2881,11 +2888,11 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info, FORNOW: handle only arrays and pointer accesses. */ opt_result -vect_analyze_data_ref_accesses (vec_info *vinfo) +vect_analyze_data_ref_accesses (vec_info *vinfo, + vec<int> *dataref_groups) { unsigned int i; vec<data_reference_p> datarefs = vinfo->shared->datarefs; - struct data_reference *dr; DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses"); @@ -2895,14 +2902,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Sort the array of datarefs to make building the interleaving chains linear. Don't modify the original vector's order, it is needed for determining what dependencies are reversed. */ - vec<data_reference_p> datarefs_copy = datarefs.copy (); + vec<data_ref_pair> datarefs_copy; + datarefs_copy.create (datarefs.length ()); + for (unsigned i = 0; i < datarefs.length (); i++) + { + int group_id = dataref_groups ? (*dataref_groups)[i] : 0; + datarefs_copy.safe_push (data_ref_pair (datarefs[i], group_id)); + } datarefs_copy.qsort (dr_group_sort_cmp); hash_set<stmt_vec_info> to_fixup; /* Build the interleaving chains. */ for (i = 0; i < datarefs_copy.length () - 1;) { - data_reference_p dra = datarefs_copy[i]; + data_reference_p dra = datarefs_copy[i].first; + int dra_group_id = datarefs_copy[i].second; dr_vec_info *dr_info_a = vinfo->lookup_dr (dra); stmt_vec_info stmtinfo_a = dr_info_a->stmt; stmt_vec_info lastinfo = NULL; @@ -2914,7 +2928,8 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) } for (i = i + 1; i < datarefs_copy.length (); ++i) { - data_reference_p drb = datarefs_copy[i]; + data_reference_p drb = datarefs_copy[i].first; + int drb_group_id = datarefs_copy[i].second; dr_vec_info *dr_info_b = vinfo->lookup_dr (drb); stmt_vec_info stmtinfo_b = dr_info_b->stmt; if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b) @@ -2927,10 +2942,14 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) matters we can push those to a worklist and re-iterate over them. The we can just skip ahead to the next DR here. */ - /* DRs in a different loop should not be put into the same + /* DRs in a different BBs should not be put into the same interleaving group. */ - if (gimple_bb (DR_STMT (dra))->loop_father - != gimple_bb (DR_STMT (drb))->loop_father) + int bb_index1 = gimple_bb (DR_STMT (dra))->index; + int bb_index2 = gimple_bb (DR_STMT (drb))->index; + if (bb_index1 != bb_index2) + break; + + if (dra_group_id != drb_group_id) break; /* Check that the data-refs have same first location (except init) @@ -2977,7 +2996,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); HOST_WIDE_INT init_prev - = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1])); + = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1].first)); gcc_assert (init_a <= init_b && init_a <= init_prev && init_prev <= init_b); @@ -2985,7 +3004,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Do not place the same access in the interleaving chain twice. */ if (init_b == init_prev) { - gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1])) + gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1].first)) < gimple_uid (DR_STMT (drb))); /* Simply link in duplicates and fix up the chain below. */ } @@ -3098,9 +3117,10 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) to_fixup.add (newgroup); } - FOR_EACH_VEC_ELT (datarefs_copy, i, dr) + data_ref_pair *dr_pair; + FOR_EACH_VEC_ELT (datarefs_copy, i, dr_pair) { - dr_vec_info *dr_info = vinfo->lookup_dr (dr); + dr_vec_info *dr_info = vinfo->lookup_dr (dr_pair->first); if (STMT_VINFO_VECTORIZABLE (dr_info->stmt) && !vect_analyze_data_ref_access (vinfo, dr_info)) { @@ -3991,7 +4011,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, opt_result vect_find_stmt_data_reference (loop_p loop, gimple *stmt, - vec<data_reference_p> *datarefs) + vec<data_reference_p> *datarefs, + vec<int> *dataref_groups, int group_id) { /* We can ignore clobbers for dataref analysis - they are removed during loop vectorization and BB vectorization checks dependences with a @@ -4118,6 +4139,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, newdr->aux = (void *) (-1 - tree_to_uhwi (arg2)); free_data_ref (dr); datarefs->safe_push (newdr); + if (dataref_groups) + dataref_groups->safe_push (group_id); return opt_result::success (); } } @@ -4127,6 +4150,8 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt, } datarefs->safe_push (dr); + if (dataref_groups) + dataref_groups->safe_push (group_id); return opt_result::success (); } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 185019c3305..2326d670bee 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1865,7 +1865,8 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, if (is_gimple_debug (stmt)) continue; ++(*n_stmts); - opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs); + opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs, + NULL, 0); if (!res) { if (is_gimple_call (stmt) && loop->safelen) @@ -2087,7 +2088,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) /* Analyze the access patterns of the data-refs in the loop (consecutive, complex, etc.). FORNOW: Only handle consecutive access pattern. */ - ok = vect_analyze_data_ref_accesses (loop_vinfo); + ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL); if (!ok) { if (dump_enabled_p ()) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 72192b5a813..167db076454 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3217,7 +3217,8 @@ vect_slp_check_for_constructors (bb_vec_info bb_vinfo) region. */ static bool -vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) +vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, + vec<int> *dataref_groups) { DUMP_VECT_SCOPE ("vect_slp_analyze_bb"); @@ -3239,7 +3240,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) return false; } - if (!vect_analyze_data_ref_accesses (bb_vinfo)) + if (!vect_analyze_data_ref_accesses (bb_vinfo, dataref_groups)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3348,10 +3349,11 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal) given by DATAREFS. */ static bool -vect_slp_bb_region (gimple_stmt_iterator region_begin, - gimple_stmt_iterator region_end, - vec<data_reference_p> datarefs, - unsigned int n_stmts) +vect_slp_region (gimple_stmt_iterator region_begin, + gimple_stmt_iterator region_end, + vec<data_reference_p> datarefs, + vec<int> *dataref_groups, + unsigned int n_stmts) { bb_vec_info bb_vinfo; auto_vector_modes vector_modes; @@ -3378,7 +3380,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, bb_vinfo->shared->check_datarefs (); bb_vinfo->vector_mode = next_vector_mode; - if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal) + if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups) && dbg_cnt (vect_slp)) { if (dump_enabled_p ()) @@ -3473,45 +3475,30 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin, bool vect_slp_bb (basic_block bb) { - gimple_stmt_iterator gsi; - bool any_vectorized = false; - - gsi = gsi_after_labels (bb); - while (!gsi_end_p (gsi)) + vec<data_reference_p> datarefs = vNULL; + vec<int> dataref_groups = vNULL; + int insns = 0; + int current_group = 0; + gimple_stmt_iterator region_begin = gsi_start_nondebug_after_labels_bb (bb); + gimple_stmt_iterator region_end = gsi_last_bb (bb); + if (!gsi_end_p (region_end)) + gsi_next (®ion_end); + + for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) { - gimple_stmt_iterator region_begin = gsi; - vec<data_reference_p> datarefs = vNULL; - int insns = 0; - - for (; !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (is_gimple_debug (stmt)) - { - /* Skip leading debug stmts. */ - if (gsi_stmt (region_begin) == stmt) - gsi_next (®ion_begin); - continue; - } - insns++; - - if (gimple_location (stmt) != UNKNOWN_LOCATION) - vect_location = stmt; + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_debug (stmt)) + continue; - if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs)) - break; - } - if (gsi_end_p (region_begin)) - break; + insns++; - /* Skip leading unhandled stmts. */ - if (gsi_stmt (region_begin) == gsi_stmt (gsi)) - { - gsi_next (&gsi); - continue; - } + if (gimple_location (stmt) != UNKNOWN_LOCATION) + vect_location = stmt; - gimple_stmt_iterator region_end = gsi; + if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs, + &dataref_groups, current_group)) + ++current_group; if (insns > param_slp_max_insns_in_bb) { @@ -3520,17 +3507,10 @@ vect_slp_bb (basic_block bb) "not vectorized: too many instructions in " "basic block.\n"); } - else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns)) - any_vectorized = true; - - if (gsi_end_p (region_end)) - break; - - /* Skip the unhandled stmt. */ - gsi_next (&gsi); } - return any_vectorized; + return vect_slp_region (region_begin, region_end, datarefs, + &dataref_groups, insns); } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5466c78c20b..21881a9390c 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1917,14 +1917,15 @@ extern bool vect_slp_analyze_instance_dependence (vec_info *, slp_instance); extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); extern bool vect_slp_analyze_instance_alignment (vec_info *, slp_instance); -extern opt_result vect_analyze_data_ref_accesses (vec_info *); +extern opt_result vect_analyze_data_ref_accesses (vec_info *, vec<int> *); extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_gather_scatter_fn_p (vec_info *, bool, bool, tree, tree, tree, int, internal_fn *, tree *); extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, gather_scatter_info *); extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, - vec<data_reference_p> *); + vec<data_reference_p> *, + vec<int> *, int); extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *); extern void vect_record_base_alignments (vec_info *); extern tree vect_create_data_ref_ptr (vec_info *,