Message ID | 1452252616-15125-1-git-send-email-alan.lawrence@arm.com |
---|---|
State | New |
Headers | show |
On Fri, 8 Jan 2016, Alan Lawrence wrote: > Here's an alternative patch, using the hunk from > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68707#c16, which 'improves' (heh, > we think) on the previous by allowing SLP to proceed where the loads are > strided, for example, slp-perm-11.c. Also I fix the testsuite failures (looking > for SLP where we've now decided to do load/store-lanes instead). I used simple > scan-tree-dump (rather than -times) because load_lanes typically appears many > times in the log for each instance output, and it felt fragile to look for a > specific number. > > Note, this doesn't fix PR67323 - that's an example where unpermuted SLP is > still (a bit) worse than load-lanes, as it needs unrolling * 2. The > previously-posted patch doesn't fix this either, however > (https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01620.html). > > Bootstrapped + check-gcc + check-g++ on x86_64 (no changes), arm and aarch64 > (various new scan-tree-dump tests all passing, and PASS->FAIL for O3-pr36098.c). Looks good to me. Ok if arm maintainers agree. Thanks, Richard. > gcc/ChangeLog: > > 2016-01-XX Alan Lawrence <alan.lawrence@arm.com> > Richard Biener <rguenther@suse.de> > > * tree-vect-slp.c (vect_analyze_slp_instance): Cancel permuted SLP > instances that can be handled via vect_load_lanes. > > gcc/testsuite/ChangeLog: > > * lib/target-supports.exp (check_effective_target_vect_load_lanes): New. > * gcc.dg/vect/slp-perm-1.c: Look for vect_load_lanes instead of SLP > on platforms supporting it. > * gcc.dg/vect/slp-perm-2.c: Likewise. > * gcc.dg/vect/slp-perm-3.c: Likewise. > * gcc.dg/vect/slp-perm-5.c: Likewise. > * gcc.dg/vect/slp-perm-7.c: Likewise. > * gcc.dg/vect/slp-perm-8.c: Likewise. > * gcc.dg/vect/slp-perm-6.c: Look for vect_load_lanes in addition to SLP > on platforms supporting it. > --- > gcc/testsuite/gcc.dg/vect/slp-perm-1.c | 6 +++++- > gcc/testsuite/gcc.dg/vect/slp-perm-2.c | 7 +++++-- > gcc/testsuite/gcc.dg/vect/slp-perm-3.c | 7 +++++-- > gcc/testsuite/gcc.dg/vect/slp-perm-5.c | 7 +++++-- > gcc/testsuite/gcc.dg/vect/slp-perm-6.c | 7 +++++-- > gcc/testsuite/gcc.dg/vect/slp-perm-7.c | 8 +++++--- > gcc/testsuite/gcc.dg/vect/slp-perm-8.c | 7 +++++-- > gcc/testsuite/lib/target-supports.exp | 19 +++++++++++++++++++ > gcc/tree-vect-slp.c | 30 ++++++++++++++++++++++++++++++ > 9 files changed, 84 insertions(+), 14 deletions(-) > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c > index 2b6c134..6d0d66f 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c > @@ -58,5 +58,9 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c > index da50e16..34b413d 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c > @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c > index 1d33f9b..ec13e0f 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c > @@ -67,6 +67,9 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c > index 9ed4d724..f44f44f 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c > @@ -73,6 +73,9 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c > index 392d36f..551734a 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c > @@ -72,5 +72,8 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c > index 4352334..8b66142 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c > @@ -71,6 +71,8 @@ int main (int argc, const char* argv[]) > } > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ > - > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c > index 3bdd150..b1e9a10 100644 > --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c > +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c > @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) > > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */ > /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */ > -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */ > - > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */ > +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */ > +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ > +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ > diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp > index 4e349e9..0e6d2b6 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -4823,6 +4823,25 @@ proc check_effective_target_vect_element_align { } { > return $et_vect_element_align > } > > +# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise. > + > +proc check_effective_target_vect_load_lanes { } { > + global et_vect_load_lanes > + > + if [info exists et_vect_load_lanes] { > + verbose "check_effective_target_vect_load_lanes: using cached result" 2 > + } else { > + set et_vect_load_lanes 0 > + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) > + || [istarget aarch64*-*-*] } { > + set et_vect_load_lanes 1 > + } > + } > + > + verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2 > + return $et_vect_load_lanes > +} > + > # Return 1 if the target supports vector conditional operations, 0 otherwise. > > proc check_effective_target_vect_condition { } { > diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c > index b893682..b53767f 100644 > --- a/gcc/tree-vect-slp.c > +++ b/gcc/tree-vect-slp.c > @@ -1793,6 +1793,36 @@ vect_analyze_slp_instance (vec_info *vinfo, > } > } > > + /* If the loads and stores can be handled with load/store-lane > + instructions do not generate this SLP instance. */ > + if (is_a <loop_vec_info> (vinfo) > + && loads_permuted > + && dr && vect_store_lanes_supported (vectype, group_size)) > + { > + slp_tree load_node; > + FOR_EACH_VEC_ELT (loads, i, load_node) > + { > + gimple *first_stmt = GROUP_FIRST_ELEMENT > + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); > + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); > + /* Use SLP for strided accesses (or if we can't load-lanes). */ > + if (STMT_VINFO_STRIDED_P (stmt_vinfo) > + || ! vect_load_lanes_supported > + (STMT_VINFO_VECTYPE (stmt_vinfo), > + GROUP_SIZE (stmt_vinfo))) > + break; > + } > + if (i == loads.length ()) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "Built SLP cancelled: can use " > + "load/store-lanes\n"); > + vect_free_slp_instance (new_instance); > + return false; > + } > + } > + > vinfo->slp_instances.safe_push (new_instance); > > if (dump_enabled_p ()) >
On 08/01/16 11:45, Richard Biener wrote: > On Fri, 8 Jan 2016, Alan Lawrence wrote: > >> Here's an alternative patch, using the hunk from >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68707#c16, which 'improves' (heh, >> we think) on the previous by allowing SLP to proceed where the loads are >> strided, for example, slp-perm-11.c. Also I fix the testsuite failures (looking >> for SLP where we've now decided to do load/store-lanes instead). I used simple >> scan-tree-dump (rather than -times) because load_lanes typically appears many >> times in the log for each instance output, and it felt fragile to look for a >> specific number. >> >> Note, this doesn't fix PR67323 - that's an example where unpermuted SLP is >> still (a bit) worse than load-lanes, as it needs unrolling * 2. The >> previously-posted patch doesn't fix this either, however >> (https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01620.html). >> >> Bootstrapped + check-gcc + check-g++ on x86_64 (no changes), arm and aarch64 >> (various new scan-tree-dump tests all passing, and PASS->FAIL for O3-pr36098.c). > > Looks good to me. Ok if arm maintainers agree. > OK by me. R. > Thanks, > Richard. > >> gcc/ChangeLog: >> >> 2016-01-XX Alan Lawrence <alan.lawrence@arm.com> >> Richard Biener <rguenther@suse.de> >> >> * tree-vect-slp.c (vect_analyze_slp_instance): Cancel permuted SLP >> instances that can be handled via vect_load_lanes. >> >> gcc/testsuite/ChangeLog: >> >> * lib/target-supports.exp (check_effective_target_vect_load_lanes): New. >> * gcc.dg/vect/slp-perm-1.c: Look for vect_load_lanes instead of SLP >> on platforms supporting it. >> * gcc.dg/vect/slp-perm-2.c: Likewise. >> * gcc.dg/vect/slp-perm-3.c: Likewise. >> * gcc.dg/vect/slp-perm-5.c: Likewise. >> * gcc.dg/vect/slp-perm-7.c: Likewise. >> * gcc.dg/vect/slp-perm-8.c: Likewise. >> * gcc.dg/vect/slp-perm-6.c: Look for vect_load_lanes in addition to SLP >> on platforms supporting it. >> --- >> gcc/testsuite/gcc.dg/vect/slp-perm-1.c | 6 +++++- >> gcc/testsuite/gcc.dg/vect/slp-perm-2.c | 7 +++++-- >> gcc/testsuite/gcc.dg/vect/slp-perm-3.c | 7 +++++-- >> gcc/testsuite/gcc.dg/vect/slp-perm-5.c | 7 +++++-- >> gcc/testsuite/gcc.dg/vect/slp-perm-6.c | 7 +++++-- >> gcc/testsuite/gcc.dg/vect/slp-perm-7.c | 8 +++++--- >> gcc/testsuite/gcc.dg/vect/slp-perm-8.c | 7 +++++-- >> gcc/testsuite/lib/target-supports.exp | 19 +++++++++++++++++++ >> gcc/tree-vect-slp.c | 30 ++++++++++++++++++++++++++++++ >> 9 files changed, 84 insertions(+), 14 deletions(-) >> >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c >> index 2b6c134..6d0d66f 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c >> @@ -58,5 +58,9 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c >> index da50e16..34b413d 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c >> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c >> index 1d33f9b..ec13e0f 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c >> @@ -67,6 +67,9 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c >> index 9ed4d724..f44f44f 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c >> @@ -73,6 +73,9 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c >> index 392d36f..551734a 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c >> @@ -72,5 +72,8 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c >> index 4352334..8b66142 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c >> @@ -71,6 +71,8 @@ int main (int argc, const char* argv[]) >> } >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ >> - >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c >> index 3bdd150..b1e9a10 100644 >> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c >> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c >> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) >> >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */ >> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */ >> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */ >> - >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */ >> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */ >> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ >> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ >> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp >> index 4e349e9..0e6d2b6 100644 >> --- a/gcc/testsuite/lib/target-supports.exp >> +++ b/gcc/testsuite/lib/target-supports.exp >> @@ -4823,6 +4823,25 @@ proc check_effective_target_vect_element_align { } { >> return $et_vect_element_align >> } >> >> +# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise. >> + >> +proc check_effective_target_vect_load_lanes { } { >> + global et_vect_load_lanes >> + >> + if [info exists et_vect_load_lanes] { >> + verbose "check_effective_target_vect_load_lanes: using cached result" 2 >> + } else { >> + set et_vect_load_lanes 0 >> + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) >> + || [istarget aarch64*-*-*] } { >> + set et_vect_load_lanes 1 >> + } >> + } >> + >> + verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2 >> + return $et_vect_load_lanes >> +} >> + >> # Return 1 if the target supports vector conditional operations, 0 otherwise. >> >> proc check_effective_target_vect_condition { } { >> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c >> index b893682..b53767f 100644 >> --- a/gcc/tree-vect-slp.c >> +++ b/gcc/tree-vect-slp.c >> @@ -1793,6 +1793,36 @@ vect_analyze_slp_instance (vec_info *vinfo, >> } >> } >> >> + /* If the loads and stores can be handled with load/store-lane >> + instructions do not generate this SLP instance. */ >> + if (is_a <loop_vec_info> (vinfo) >> + && loads_permuted >> + && dr && vect_store_lanes_supported (vectype, group_size)) >> + { >> + slp_tree load_node; >> + FOR_EACH_VEC_ELT (loads, i, load_node) >> + { >> + gimple *first_stmt = GROUP_FIRST_ELEMENT >> + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); >> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); >> + /* Use SLP for strided accesses (or if we can't load-lanes). */ >> + if (STMT_VINFO_STRIDED_P (stmt_vinfo) >> + || ! vect_load_lanes_supported >> + (STMT_VINFO_VECTYPE (stmt_vinfo), >> + GROUP_SIZE (stmt_vinfo))) >> + break; >> + } >> + if (i == loads.length ()) >> + { >> + if (dump_enabled_p ()) >> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, >> + "Built SLP cancelled: can use " >> + "load/store-lanes\n"); >> + vect_free_slp_instance (new_instance); >> + return false; >> + } >> + } >> + >> vinfo->slp_instances.safe_push (new_instance); >> >> if (dump_enabled_p ()) >> >
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c index 2b6c134..6d0d66f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c @@ -58,5 +58,9 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c index da50e16..34b413d 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c index 1d33f9b..ec13e0f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c @@ -67,6 +67,9 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c index 9ed4d724..f44f44f 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c @@ -73,6 +73,9 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c index 392d36f..551734a 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c @@ -72,5 +72,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c index 4352334..8b66142 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c @@ -71,6 +71,8 @@ int main (int argc, const char* argv[]) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ - - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c index 3bdd150..b1e9a10 100644 --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c @@ -54,5 +54,8 @@ int main (int argc, const char* argv[]) /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */ -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */ - +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 4e349e9..0e6d2b6 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4823,6 +4823,25 @@ proc check_effective_target_vect_element_align { } { return $et_vect_element_align } +# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise. + +proc check_effective_target_vect_load_lanes { } { + global et_vect_load_lanes + + if [info exists et_vect_load_lanes] { + verbose "check_effective_target_vect_load_lanes: using cached result" 2 + } else { + set et_vect_load_lanes 0 + if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) + || [istarget aarch64*-*-*] } { + set et_vect_load_lanes 1 + } + } + + verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2 + return $et_vect_load_lanes +} + # Return 1 if the target supports vector conditional operations, 0 otherwise. proc check_effective_target_vect_condition { } { diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index b893682..b53767f 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -1793,6 +1793,36 @@ vect_analyze_slp_instance (vec_info *vinfo, } } + /* If the loads and stores can be handled with load/store-lane + instructions do not generate this SLP instance. */ + if (is_a <loop_vec_info> (vinfo) + && loads_permuted + && dr && vect_store_lanes_supported (vectype, group_size)) + { + slp_tree load_node; + FOR_EACH_VEC_ELT (loads, i, load_node) + { + gimple *first_stmt = GROUP_FIRST_ELEMENT + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); + /* Use SLP for strided accesses (or if we can't load-lanes). */ + if (STMT_VINFO_STRIDED_P (stmt_vinfo) + || ! vect_load_lanes_supported + (STMT_VINFO_VECTYPE (stmt_vinfo), + GROUP_SIZE (stmt_vinfo))) + break; + } + if (i == loads.length ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "Built SLP cancelled: can use " + "load/store-lanes\n"); + vect_free_slp_instance (new_instance); + return false; + } + } + vinfo->slp_instances.safe_push (new_instance); if (dump_enabled_p ())