Message ID | VI1PR0802MB2205379B83D99FB8BBD43DBFF5E70@VI1PR0802MB2205.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [1/3,aarch64] Add aarch64 support for vec_widen_add, vec_widen_sub patterns | expand |
Joel Hutton via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi all, > > This patch adds backend patterns for vec_widen_add, vec_widen_sub on aarch64. > > All 3 patches together bootstrapped and regression tested on aarch64. > > Ok for stage 1? > > gcc/ChangeLog: > > 2020-11-12 Joel Hutton <joel.hutton@arm.com> > > * config/aarch64/aarch64-simd.md: New patterns vec_widen_saddl_lo/hi_<mode> > > From 3e47bc562b83417a048e780bcde52fb2c9617df3 Mon Sep 17 00:00:00 2001 > From: Joel Hutton <joel.hutton@arm.com> > Date: Mon, 9 Nov 2020 15:35:57 +0000 > Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64 > > Add widening add and subtract pattrerns to the aarch64 > backend. > --- > gcc/config/aarch64/aarch64-simd.md | 94 ++++++++++++++++++++++++++++++ > 1 file changed, 94 insertions(+) > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 2cf6fe9154a2ee1b21ad9e8e2a6109805022be7f..b4f56a2295926f027bd53e7456eec729af0cd6df 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3382,6 +3382,100 @@ > [(set_attr "type" "neon_<ADDSUB:optab>_long")] > ) > > +(define_expand "vec_widen_saddl_lo_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_saddl<mode>_lo_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > + > +(define_expand "vec_widen_ssubl_lo_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_ssubl<mode>_lo_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > +(define_expand "vec_widen_saddl_hi_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > + > +(define_expand "vec_widen_ssubl_hi_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > +(define_expand "vec_widen_uaddl_lo_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_uaddl<mode>_lo_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > + > +(define_expand "vec_widen_usubl_lo_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_usubl<mode>_lo_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > + > +(define_expand "vec_widen_uaddl_hi_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) > + > +(define_expand "vec_widen_usubl_hi_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (match_operand:VQW 1 "register_operand") > + (match_operand:VQW 2 "register_operand")] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], > + operands[2], p)); > + DONE; > +}) There are ways in which we could reduce the amount of cut-&-paste here, but I guess everything is a trade-off between clarity and compactness. One extreme is to write them all out explicitly, another extreme would be to have one define_expand and various iterators and attributes. I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance: the use ANY_EXTEND to hide the sign difference while still having separate hi and lo patterns: (define_expand "vec_widen_<su>mult_lo_<mode>" [(match_operand:<VWIDE> 0 "register_operand") (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] "TARGET_SIMD" { rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], operands[1], operands[2], p)); DONE; } ) This reduces the amount of cut-&-paste by half without losing much in terms of clarity. For the record: I had to double-check which way round the hi/lo optabs are for big-endian: whether “hi” is “high lane numbers” or “high part of the vector register”. But I see it's the latter, so I agree that the aarch64_simd_vect_par_cnst_half calls are the right way around for big-endian. Thanks, Richard
Tests are still running, but I believe I've addressed the comment. > There are ways in which we could reduce the amount of cut-&-paste here, > but I guess everything is a trade-off between clarity and compactness. > One extreme is to write them all out explicitly, another extreme would > be to have one define_expand and various iterators and attributes. > > I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance: > the use ANY_EXTEND to hide the sign difference while still having > separate hi and lo patterns: Done gcc/ChangeLog: 2020-11-13 Joel Hutton <joel.hutton@arm.com> * config/aarch64/aarch64-simd.md: New patterns vec_widen_saddl_lo/hi_<mode>.
Joel Hutton <Joel.Hutton@arm.com> writes: > Tests are still running, but I believe I've addressed the comment. > >> There are ways in which we could reduce the amount of cut-&-paste here, >> but I guess everything is a trade-off between clarity and compactness. >> One extreme is to write them all out explicitly, another extreme would >> be to have one define_expand and various iterators and attributes. >> >> I think the vec_widen_<su>mult_*_<mode> patterns strike a good balance: >> the use ANY_EXTEND to hide the sign difference while still having >> separate hi and lo patterns: > > Done > > gcc/ChangeLog: > > 2020-11-13 Joel Hutton <joel.hutton@arm.com> > > * config/aarch64/aarch64-simd.md: New patterns > vec_widen_saddl_lo/hi_<mode>. > > From c52fd11f5d471200c1292fad3bc04056e7721f06 Mon Sep 17 00:00:00 2001 > From: Joel Hutton <joel.hutton@arm.com> > Date: Mon, 9 Nov 2020 15:35:57 +0000 > Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64 > > Add widening add and subtract patterns to the aarch64 > backend. These allow taking vectors of N elements of size S > and performing and add/subtract on the high or low half > widening the resulting elements and storing N/2 elements of size 2*S. > These correspond to the addl,addl2,subl,subl2 instructions. > --- > gcc/config/aarch64/aarch64-simd.md | 47 ++++++++++++++++++++++++++++++ > 1 file changed, 47 insertions(+) > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index 2cf6fe9154a..30299610635 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -3382,6 +3382,53 @@ > [(set_attr "type" "neon_<ADDSUB:optab>_long")] > ) > > +(define_expand "vec_widen_<su>addl_lo_<mode>" > + [(match_operand:<VWIDE> 0 "register_operand") > + (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) > + (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] > + "TARGET_SIMD" > +{ > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1], > + operands[2], p)); Nit: operands[2] should be indented three more columns now that “s” and “u” have changed to “<su>”. OK with that change, thanks. Richard
From 3e47bc562b83417a048e780bcde52fb2c9617df3 Mon Sep 17 00:00:00 2001 From: Joel Hutton <joel.hutton@arm.com> Date: Mon, 9 Nov 2020 15:35:57 +0000 Subject: [PATCH 1/3] [aarch64] Add vec_widen patterns to aarch64 Add widening add and subtract pattrerns to the aarch64 backend. --- gcc/config/aarch64/aarch64-simd.md | 94 ++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 2cf6fe9154a2ee1b21ad9e8e2a6109805022be7f..b4f56a2295926f027bd53e7456eec729af0cd6df 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3382,6 +3382,100 @@ [(set_attr "type" "neon_<ADDSUB:optab>_long")] ) +(define_expand "vec_widen_saddl_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_saddl<mode>_lo_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "vec_widen_ssubl_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_ssubl<mode>_lo_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) +(define_expand "vec_widen_saddl_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "vec_widen_ssubl_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) +(define_expand "vec_widen_uaddl_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_uaddl<mode>_lo_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "vec_widen_usubl_lo_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_usubl<mode>_lo_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "vec_widen_uaddl_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + +(define_expand "vec_widen_usubl_hi_<mode>" + [(match_operand:<VWIDE> 0 "register_operand") + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "register_operand")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], + operands[2], p)); + DONE; +}) + (define_expand "aarch64_saddl2<mode>" [(match_operand:<VWIDE> 0 "register_operand") -- 2.17.1