diff mbox series

[1/3,aarch64] Add aarch64 support for vec_widen_add, vec_widen_sub patterns

Message ID VI1PR0802MB2205379B83D99FB8BBD43DBFF5E70@VI1PR0802MB2205.eurprd08.prod.outlook.com
State New
Headers show
Series [1/3,aarch64] Add aarch64 support for vec_widen_add, vec_widen_sub patterns | expand

Commit Message

Joel Hutton Nov. 12, 2020, 7:34 p.m. UTC
Hi all,

This patch adds backend patterns for vec_widen_add, vec_widen_sub on aarch64.

All 3 patches together bootstrapped and regression tested on aarch64.

Ok for stage 1?

gcc/ChangeLog:

2020-11-12  Joel Hutton  <joel.hutton@arm.com>

        * config/aarch64/aarch64-simd.md: New patterns vec_widen_saddl_lo/hi_<mode>

Comments

Richard Sandiford Nov. 13, 2020, 10:02 a.m. UTC | #1
Joel Hutton via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi all,
>
> This patch adds backend patterns for vec_widen_add, vec_widen_sub on aarch64.
>
> All 3 patches together bootstrapped and regression tested on aarch64.
>
> Ok for stage 1?
>
> gcc/ChangeLog:
>
> 2020-11-12  Joel Hutton  <joel.hutton@arm.com>
>
>         * config/aarch64/aarch64-simd.md: New patterns vec_widen_saddl_lo/hi_<mode>
>
> From 3e47bc562b83417a048e780bcde52fb2c9617df3 Mon Sep 17 00:00:00 2001
> From: Joel Hutton <joel.hutton@arm.com>
> Date: Mon, 9 Nov 2020 15:35:57 +0000
> Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64
>
> Add widening add and subtract pattrerns to the aarch64
> backend.
> ---
>  gcc/config/aarch64/aarch64-simd.md | 94 ++++++++++++++++++++++++++++++
>  1 file changed, 94 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 2cf6fe9154a2ee1b21ad9e8e2a6109805022be7f..b4f56a2295926f027bd53e7456eec729af0cd6df 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3382,6 +3382,100 @@
>    [(set_attr "type" "neon_<ADDSUB:optab>_long")]
>  )
>  
> +(define_expand "vec_widen_saddl_lo_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +  emit_insn (gen_aarch64_saddl<mode>_lo_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +
> +(define_expand "vec_widen_ssubl_lo_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +  emit_insn (gen_aarch64_ssubl<mode>_lo_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +(define_expand "vec_widen_saddl_hi_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +
> +(define_expand "vec_widen_ssubl_hi_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +(define_expand "vec_widen_uaddl_lo_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +  emit_insn (gen_aarch64_uaddl<mode>_lo_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +
> +(define_expand "vec_widen_usubl_lo_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +  emit_insn (gen_aarch64_usubl<mode>_lo_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +
> +(define_expand "vec_widen_uaddl_hi_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})
> +
> +(define_expand "vec_widen_usubl_hi_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (match_operand:VQW 1 "register_operand")
> +   (match_operand:VQW 2 "register_operand")]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
> +  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
> +						  operands[2], p));
> +  DONE;
> +})

There are ways in which we could reduce the amount of cut-&-paste here,
but I guess everything is a trade-off between clarity and compactness.
One extreme is to write them all out explicitly, another extreme would
be to have one define_expand and various iterators and attributes.

I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance:
the use ANY_EXTEND to hide the sign difference while still having
separate hi and lo patterns:

(define_expand "vec_widen_<su>mult_lo_<mode>"
  [(match_operand:<VWIDE> 0 "register_operand")
   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
 "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
						       operands[1],
						       operands[2], p));
   DONE;
 }
)

This reduces the amount of cut-&-paste by half without losing much
in terms of clarity.

For the record: I had to double-check which way round the hi/lo optabs
are for big-endian: whether “hi” is “high lane numbers” or “high part
of the vector register”.  But I see it's the latter, so I agree that
the aarch64_simd_vect_par_cnst_half calls are the right way around
for big-endian.

Thanks,
Richard
Joel Hutton Nov. 13, 2020, 4:43 p.m. UTC | #2
Tests are still running, but I believe I've addressed the comment.

> There are ways in which we could reduce the amount of cut-&-paste here,
> but I guess everything is a trade-off between clarity and compactness.
> One extreme is to write them all out explicitly, another extreme would
> be to have one define_expand and various iterators and attributes.
>
> I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance:
> the use ANY_EXTEND to hide the sign difference while still having
> separate hi and lo patterns:

Done

gcc/ChangeLog:

2020-11-13  Joel Hutton  <joel.hutton@arm.com>

        * config/aarch64/aarch64-simd.md: New patterns
	  vec_widen_saddl_lo/hi_<mode>.
Richard Sandiford Nov. 17, 2020, 1:26 p.m. UTC | #3
Joel Hutton <Joel.Hutton@arm.com> writes:
> Tests are still running, but I believe I've addressed the comment.
>
>> There are ways in which we could reduce the amount of cut-&-paste here,
>> but I guess everything is a trade-off between clarity and compactness.
>> One extreme is to write them all out explicitly, another extreme would
>> be to have one define_expand and various iterators and attributes.
>>
>> I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance:
>> the use ANY_EXTEND to hide the sign difference while still having
>> separate hi and lo patterns:
>
> Done
>
> gcc/ChangeLog:
>
> 2020-11-13  Joel Hutton  <joel.hutton@arm.com>
>
>         * config/aarch64/aarch64-simd.md: New patterns
>   vec_widen_saddl_lo/hi_<mode>.
>
> From c52fd11f5d471200c1292fad3bc04056e7721f06 Mon Sep 17 00:00:00 2001
> From: Joel Hutton <joel.hutton@arm.com>
> Date: Mon, 9 Nov 2020 15:35:57 +0000
> Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64
>
> Add widening add and subtract patterns to the aarch64
> backend. These allow taking vectors of N elements of size S
> and performing and add/subtract on the high or low half
> widening the resulting elements and storing N/2 elements of size 2*S.
> These correspond to the addl,addl2,subl,subl2 instructions.
> ---
>  gcc/config/aarch64/aarch64-simd.md | 47 ++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 2cf6fe9154a..30299610635 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -3382,6 +3382,53 @@
>    [(set_attr "type" "neon_<ADDSUB:optab>_long")]
>  )
>  
> +(define_expand "vec_widen_<su>addl_lo_<mode>"
> +  [(match_operand:<VWIDE> 0 "register_operand")
> +   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
> +   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
> +  "TARGET_SIMD"
> +{
> +  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
> +  emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
> +						  operands[2], p));

Nit: operands[2] should be indented three more columns now that “s” and
“u” have changed to “<su>”.

OK with that change, thanks.

Richard
diff mbox series

Patch

From 3e47bc562b83417a048e780bcde52fb2c9617df3 Mon Sep 17 00:00:00 2001
From: Joel Hutton <joel.hutton@arm.com>
Date: Mon, 9 Nov 2020 15:35:57 +0000
Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64

Add widening add and subtract pattrerns to the aarch64
backend.
---
 gcc/config/aarch64/aarch64-simd.md | 94 ++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2cf6fe9154a2ee1b21ad9e8e2a6109805022be7f..b4f56a2295926f027bd53e7456eec729af0cd6df 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3382,6 +3382,100 @@ 
   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
 )
 
+(define_expand "vec_widen_saddl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_saddl<mode>_lo_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_ssubl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_ssubl<mode>_lo_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+(define_expand "vec_widen_saddl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_ssubl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+(define_expand "vec_widen_uaddl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_uaddl<mode>_lo_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_usubl_lo_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
+  emit_insn (gen_aarch64_usubl<mode>_lo_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_uaddl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
+(define_expand "vec_widen_usubl_hi_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
+  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
+						  operands[2], p));
+  DONE;
+})
+
 
 (define_expand "aarch64_saddl2<mode>"
   [(match_operand:<VWIDE> 0 "register_operand")
-- 
2.17.1