diff mbox

Fix PR68707

Message ID 1452252616-15125-1-git-send-email-alan.lawrence@arm.com
State New
Headers show

Commit Message

Alan Lawrence Jan. 8, 2016, 11:30 a.m. UTC
Here's an alternative patch, using the hunk from
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68707#c16, which 'improves' (heh,
we think) on the previous by allowing SLP to proceed where the loads are
strided, for example, slp-perm-11.c. Also I fix the testsuite failures (looking
for SLP where we've now decided to do load/store-lanes instead). I used simple
scan-tree-dump (rather than -times) because load_lanes typically appears many
times in the log for each instance output, and it felt fragile to look for a
specific number.

Note, this doesn't fix PR67323 - that's an example where unpermuted SLP is
still (a bit) worse than load-lanes, as it needs unrolling * 2. The
previously-posted patch doesn't fix this either, however
(https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01620.html).

Bootstrapped + check-gcc + check-g++ on x86_64 (no changes), arm and aarch64
(various new scan-tree-dump tests all passing, and PASS->FAIL for O3-pr36098.c).

gcc/ChangeLog:

2016-01-XX  Alan Lawrence  <alan.lawrence@arm.com>
	Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_analyze_slp_instance): Cancel permuted SLP
	instances that can be handled via vect_load_lanes.

gcc/testsuite/ChangeLog:

	* lib/target-supports.exp (check_effective_target_vect_load_lanes): New.
	* gcc.dg/vect/slp-perm-1.c: Look for vect_load_lanes instead of SLP
	on platforms supporting it.
	* gcc.dg/vect/slp-perm-2.c: Likewise.
	* gcc.dg/vect/slp-perm-3.c: Likewise.
	* gcc.dg/vect/slp-perm-5.c: Likewise.
	* gcc.dg/vect/slp-perm-7.c: Likewise.
	* gcc.dg/vect/slp-perm-8.c: Likewise.
	* gcc.dg/vect/slp-perm-6.c: Look for vect_load_lanes in addition to SLP
	on platforms supporting it.
---
 gcc/testsuite/gcc.dg/vect/slp-perm-1.c |  6 +++++-
 gcc/testsuite/gcc.dg/vect/slp-perm-2.c |  7 +++++--
 gcc/testsuite/gcc.dg/vect/slp-perm-3.c |  7 +++++--
 gcc/testsuite/gcc.dg/vect/slp-perm-5.c |  7 +++++--
 gcc/testsuite/gcc.dg/vect/slp-perm-6.c |  7 +++++--
 gcc/testsuite/gcc.dg/vect/slp-perm-7.c |  8 +++++---
 gcc/testsuite/gcc.dg/vect/slp-perm-8.c |  7 +++++--
 gcc/testsuite/lib/target-supports.exp  | 19 +++++++++++++++++++
 gcc/tree-vect-slp.c                    | 30 ++++++++++++++++++++++++++++++
 9 files changed, 84 insertions(+), 14 deletions(-)

Comments

Richard Biener Jan. 8, 2016, 11:45 a.m. UTC | #1
On Fri, 8 Jan 2016, Alan Lawrence wrote:

> Here's an alternative patch, using the hunk from
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68707#c16, which 'improves' (heh,
> we think) on the previous by allowing SLP to proceed where the loads are
> strided, for example, slp-perm-11.c. Also I fix the testsuite failures (looking
> for SLP where we've now decided to do load/store-lanes instead). I used simple
> scan-tree-dump (rather than -times) because load_lanes typically appears many
> times in the log for each instance output, and it felt fragile to look for a
> specific number.
> 
> Note, this doesn't fix PR67323 - that's an example where unpermuted SLP is
> still (a bit) worse than load-lanes, as it needs unrolling * 2. The
> previously-posted patch doesn't fix this either, however
> (https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01620.html).
> 
> Bootstrapped + check-gcc + check-g++ on x86_64 (no changes), arm and aarch64
> (various new scan-tree-dump tests all passing, and PASS->FAIL for O3-pr36098.c).

Looks good to me.  Ok if arm maintainers agree.

Thanks,
Richard.

> gcc/ChangeLog:
> 
> 2016-01-XX  Alan Lawrence  <alan.lawrence@arm.com>
> 	Richard Biener  <rguenther@suse.de>
> 
> 	* tree-vect-slp.c (vect_analyze_slp_instance): Cancel permuted SLP
> 	instances that can be handled via vect_load_lanes.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* lib/target-supports.exp (check_effective_target_vect_load_lanes): New.
> 	* gcc.dg/vect/slp-perm-1.c: Look for vect_load_lanes instead of SLP
> 	on platforms supporting it.
> 	* gcc.dg/vect/slp-perm-2.c: Likewise.
> 	* gcc.dg/vect/slp-perm-3.c: Likewise.
> 	* gcc.dg/vect/slp-perm-5.c: Likewise.
> 	* gcc.dg/vect/slp-perm-7.c: Likewise.
> 	* gcc.dg/vect/slp-perm-8.c: Likewise.
> 	* gcc.dg/vect/slp-perm-6.c: Look for vect_load_lanes in addition to SLP
> 	on platforms supporting it.
> ---
>  gcc/testsuite/gcc.dg/vect/slp-perm-1.c |  6 +++++-
>  gcc/testsuite/gcc.dg/vect/slp-perm-2.c |  7 +++++--
>  gcc/testsuite/gcc.dg/vect/slp-perm-3.c |  7 +++++--
>  gcc/testsuite/gcc.dg/vect/slp-perm-5.c |  7 +++++--
>  gcc/testsuite/gcc.dg/vect/slp-perm-6.c |  7 +++++--
>  gcc/testsuite/gcc.dg/vect/slp-perm-7.c |  8 +++++---
>  gcc/testsuite/gcc.dg/vect/slp-perm-8.c |  7 +++++--
>  gcc/testsuite/lib/target-supports.exp  | 19 +++++++++++++++++++
>  gcc/tree-vect-slp.c                    | 30 ++++++++++++++++++++++++++++++
>  9 files changed, 84 insertions(+), 14 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
> index 2b6c134..6d0d66f 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
> @@ -58,5 +58,9 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>  
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
> index da50e16..34b413d 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
> index 1d33f9b..ec13e0f 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
> @@ -67,6 +67,9 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>  
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
> index 9ed4d724..f44f44f 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
> @@ -73,6 +73,9 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>  
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
> index 392d36f..551734a 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
> @@ -72,5 +72,8 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
> index 4352334..8b66142 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
> @@ -71,6 +71,8 @@ int main (int argc, const char* argv[])
>  }
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
> -
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
> index 3bdd150..b1e9a10 100644
> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[])
>  
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */
>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
> -
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */
> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 4e349e9..0e6d2b6 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4823,6 +4823,25 @@ proc check_effective_target_vect_element_align { } {
>      return $et_vect_element_align
>  }
>  
> +# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise.
> +
> +proc check_effective_target_vect_load_lanes { } {
> +    global et_vect_load_lanes
> +
> +    if [info exists et_vect_load_lanes] {
> +	verbose "check_effective_target_vect_load_lanes: using cached result" 2
> +    } else {
> +	set et_vect_load_lanes 0
> +	if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
> +	     || [istarget aarch64*-*-*] } {
> +	    set et_vect_load_lanes 1
> +	}
> +    }
> +
> +    verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2
> +    return $et_vect_load_lanes
> +}
> +
>  # Return 1 if the target supports vector conditional operations, 0 otherwise.
>  
>  proc check_effective_target_vect_condition { } {
> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
> index b893682..b53767f 100644
> --- a/gcc/tree-vect-slp.c
> +++ b/gcc/tree-vect-slp.c
> @@ -1793,6 +1793,36 @@ vect_analyze_slp_instance (vec_info *vinfo,
>              }
>          }
>  
> +      /* If the loads and stores can be handled with load/store-lane
> +	 instructions do not generate this SLP instance.  */
> +      if (is_a <loop_vec_info> (vinfo)
> +	  && loads_permuted
> +	  && dr && vect_store_lanes_supported (vectype, group_size))
> +	{
> +	  slp_tree load_node;
> +	  FOR_EACH_VEC_ELT (loads, i, load_node)
> +	    {
> +	      gimple *first_stmt = GROUP_FIRST_ELEMENT
> +		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> +	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> +	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
> +	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
> +		  || ! vect_load_lanes_supported
> +			(STMT_VINFO_VECTYPE (stmt_vinfo),
> +			 GROUP_SIZE (stmt_vinfo)))
> +		break;
> +	    }
> +	  if (i == loads.length ())
> +	    {
> +	      if (dump_enabled_p ())
> +		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +				 "Built SLP cancelled: can use "
> +				 "load/store-lanes\n");
> +	      vect_free_slp_instance (new_instance);
> +	      return false;
> +	    }
> +	}
> +
>        vinfo->slp_instances.safe_push (new_instance);
>  
>        if (dump_enabled_p ())
>
Richard Earnshaw (lists) Jan. 8, 2016, 5:15 p.m. UTC | #2
On 08/01/16 11:45, Richard Biener wrote:
> On Fri, 8 Jan 2016, Alan Lawrence wrote:
> 
>> Here's an alternative patch, using the hunk from
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68707#c16, which 'improves' (heh,
>> we think) on the previous by allowing SLP to proceed where the loads are
>> strided, for example, slp-perm-11.c. Also I fix the testsuite failures (looking
>> for SLP where we've now decided to do load/store-lanes instead). I used simple
>> scan-tree-dump (rather than -times) because load_lanes typically appears many
>> times in the log for each instance output, and it felt fragile to look for a
>> specific number.
>>
>> Note, this doesn't fix PR67323 - that's an example where unpermuted SLP is
>> still (a bit) worse than load-lanes, as it needs unrolling * 2. The
>> previously-posted patch doesn't fix this either, however
>> (https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01620.html).
>>
>> Bootstrapped + check-gcc + check-g++ on x86_64 (no changes), arm and aarch64
>> (various new scan-tree-dump tests all passing, and PASS->FAIL for O3-pr36098.c).
> 
> Looks good to me.  Ok if arm maintainers agree.
> 

OK by me.

R.

> Thanks,
> Richard.
> 
>> gcc/ChangeLog:
>>
>> 2016-01-XX  Alan Lawrence  <alan.lawrence@arm.com>
>> 	Richard Biener  <rguenther@suse.de>
>>
>> 	* tree-vect-slp.c (vect_analyze_slp_instance): Cancel permuted SLP
>> 	instances that can be handled via vect_load_lanes.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* lib/target-supports.exp (check_effective_target_vect_load_lanes): New.
>> 	* gcc.dg/vect/slp-perm-1.c: Look for vect_load_lanes instead of SLP
>> 	on platforms supporting it.
>> 	* gcc.dg/vect/slp-perm-2.c: Likewise.
>> 	* gcc.dg/vect/slp-perm-3.c: Likewise.
>> 	* gcc.dg/vect/slp-perm-5.c: Likewise.
>> 	* gcc.dg/vect/slp-perm-7.c: Likewise.
>> 	* gcc.dg/vect/slp-perm-8.c: Likewise.
>> 	* gcc.dg/vect/slp-perm-6.c: Look for vect_load_lanes in addition to SLP
>> 	on platforms supporting it.
>> ---
>>  gcc/testsuite/gcc.dg/vect/slp-perm-1.c |  6 +++++-
>>  gcc/testsuite/gcc.dg/vect/slp-perm-2.c |  7 +++++--
>>  gcc/testsuite/gcc.dg/vect/slp-perm-3.c |  7 +++++--
>>  gcc/testsuite/gcc.dg/vect/slp-perm-5.c |  7 +++++--
>>  gcc/testsuite/gcc.dg/vect/slp-perm-6.c |  7 +++++--
>>  gcc/testsuite/gcc.dg/vect/slp-perm-7.c |  8 +++++---
>>  gcc/testsuite/gcc.dg/vect/slp-perm-8.c |  7 +++++--
>>  gcc/testsuite/lib/target-supports.exp  | 19 +++++++++++++++++++
>>  gcc/tree-vect-slp.c                    | 30 ++++++++++++++++++++++++++++++
>>  9 files changed, 84 insertions(+), 14 deletions(-)
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
>> index 2b6c134..6d0d66f 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
>> @@ -58,5 +58,9 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>>  
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
>> index da50e16..34b413d 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
>> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
>> index 1d33f9b..ec13e0f 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
>> @@ -67,6 +67,9 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>>  
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
>> index 9ed4d724..f44f44f 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
>> @@ -73,6 +73,9 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>>  
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
>> index 392d36f..551734a 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
>> @@ -72,5 +72,8 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
>> index 4352334..8b66142 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
>> @@ -71,6 +71,8 @@ int main (int argc, const char* argv[])
>>  }
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target vect_perm } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
>> -
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>> diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
>> index 3bdd150..b1e9a10 100644
>> --- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
>> +++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
>> @@ -54,5 +54,8 @@ int main (int argc, const char* argv[])
>>  
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */
>>  /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */
>> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
>> -
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */
>> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */
>> +/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
>> +/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
>> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
>> index 4e349e9..0e6d2b6 100644
>> --- a/gcc/testsuite/lib/target-supports.exp
>> +++ b/gcc/testsuite/lib/target-supports.exp
>> @@ -4823,6 +4823,25 @@ proc check_effective_target_vect_element_align { } {
>>      return $et_vect_element_align
>>  }
>>  
>> +# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise.
>> +
>> +proc check_effective_target_vect_load_lanes { } {
>> +    global et_vect_load_lanes
>> +
>> +    if [info exists et_vect_load_lanes] {
>> +	verbose "check_effective_target_vect_load_lanes: using cached result" 2
>> +    } else {
>> +	set et_vect_load_lanes 0
>> +	if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
>> +	     || [istarget aarch64*-*-*] } {
>> +	    set et_vect_load_lanes 1
>> +	}
>> +    }
>> +
>> +    verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2
>> +    return $et_vect_load_lanes
>> +}
>> +
>>  # Return 1 if the target supports vector conditional operations, 0 otherwise.
>>  
>>  proc check_effective_target_vect_condition { } {
>> diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
>> index b893682..b53767f 100644
>> --- a/gcc/tree-vect-slp.c
>> +++ b/gcc/tree-vect-slp.c
>> @@ -1793,6 +1793,36 @@ vect_analyze_slp_instance (vec_info *vinfo,
>>              }
>>          }
>>  
>> +      /* If the loads and stores can be handled with load/store-lane
>> +	 instructions do not generate this SLP instance.  */
>> +      if (is_a <loop_vec_info> (vinfo)
>> +	  && loads_permuted
>> +	  && dr && vect_store_lanes_supported (vectype, group_size))
>> +	{
>> +	  slp_tree load_node;
>> +	  FOR_EACH_VEC_ELT (loads, i, load_node)
>> +	    {
>> +	      gimple *first_stmt = GROUP_FIRST_ELEMENT
>> +		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
>> +	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
>> +	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
>> +	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
>> +		  || ! vect_load_lanes_supported
>> +			(STMT_VINFO_VECTYPE (stmt_vinfo),
>> +			 GROUP_SIZE (stmt_vinfo)))
>> +		break;
>> +	    }
>> +	  if (i == loads.length ())
>> +	    {
>> +	      if (dump_enabled_p ())
>> +		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> +				 "Built SLP cancelled: can use "
>> +				 "load/store-lanes\n");
>> +	      vect_free_slp_instance (new_instance);
>> +	      return false;
>> +	    }
>> +	}
>> +
>>        vinfo->slp_instances.safe_push (new_instance);
>>  
>>        if (dump_enabled_p ())
>>
>
diff mbox

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
index 2b6c134..6d0d66f 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
@@ -58,5 +58,9 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
index da50e16..34b413d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-2.c
@@ -54,5 +54,8 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
index 1d33f9b..ec13e0f 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-3.c
@@ -67,6 +67,9 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
index 9ed4d724..f44f44f 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
@@ -73,6 +73,9 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
 
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
index 392d36f..551734a 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
@@ -72,5 +72,8 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_perm } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
index 4352334..8b66142 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
@@ -71,6 +71,8 @@  int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
-
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
index 3bdd150..b1e9a10 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
@@ -54,5 +54,8 @@  int main (int argc, const char* argv[])
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_perm_byte && vect_char_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte && {! vect_char_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" { target { vect_perm_byte && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4e349e9..0e6d2b6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4823,6 +4823,25 @@  proc check_effective_target_vect_element_align { } {
     return $et_vect_element_align
 }
 
+# Return 1 if the target supports vector LOAD_LANES operations, 0 otherwise.
+
+proc check_effective_target_vect_load_lanes { } {
+    global et_vect_load_lanes
+
+    if [info exists et_vect_load_lanes] {
+	verbose "check_effective_target_vect_load_lanes: using cached result" 2
+    } else {
+	set et_vect_load_lanes 0
+	if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+	     || [istarget aarch64*-*-*] } {
+	    set et_vect_load_lanes 1
+	}
+    }
+
+    verbose "check_effective_target_vect_load_lanes: returning $et_vect_load_lanes" 2
+    return $et_vect_load_lanes
+}
+
 # Return 1 if the target supports vector conditional operations, 0 otherwise.
 
 proc check_effective_target_vect_condition { } {
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index b893682..b53767f 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1793,6 +1793,36 @@  vect_analyze_slp_instance (vec_info *vinfo,
             }
         }
 
+      /* If the loads and stores can be handled with load/store-lane
+	 instructions do not generate this SLP instance.  */
+      if (is_a <loop_vec_info> (vinfo)
+	  && loads_permuted
+	  && dr && vect_store_lanes_supported (vectype, group_size))
+	{
+	  slp_tree load_node;
+	  FOR_EACH_VEC_ELT (loads, i, load_node)
+	    {
+	      gimple *first_stmt = GROUP_FIRST_ELEMENT
+		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
+	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
+	      /* Use SLP for strided accesses (or if we can't load-lanes).  */
+	      if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+		  || ! vect_load_lanes_supported
+			(STMT_VINFO_VECTYPE (stmt_vinfo),
+			 GROUP_SIZE (stmt_vinfo)))
+		break;
+	    }
+	  if (i == loads.length ())
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "Built SLP cancelled: can use "
+				 "load/store-lanes\n");
+	      vect_free_slp_instance (new_instance);
+	      return false;
+	    }
+	}
+
       vinfo->slp_instances.safe_push (new_instance);
 
       if (dump_enabled_p ())