Message ID | VI1PR0802MB22051881F4761DB92B29BD7CF5E70@VI1PR0802MB2205.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
Series | [1/3,aarch64] Add aarch64 support for vec_widen_add, vec_widen_sub patterns | expand |
On Thu, 12 Nov 2020, Joel Hutton wrote: > Hi all, > > This patch adds support in the aarch64 backend for the vec_widen_shift vect-pattern and makes a minor mid-end fix to support it. > > All 3 patches together bootstrapped and regression tested on aarch64. > > Ok for stage 1? diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f12fd158b13656ee24022ec7e445c53444be6554..1f40b59c0560eec675af1d9a0e3e818d47 589de6 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4934,8 +4934,13 @@ vectorizable_conversion (vec_info *vinfo, &vec_oprnds1); if (code == WIDEN_LSHIFT_EXPR) { - vec_oprnds1.create (ncopies * ninputs); - for (i = 0; i < ncopies * ninputs; ++i) + int oprnds_size = ncopies * ninputs; + /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here + * should be obtained by the the size of vec_oprnds0. */ You should be able to always use vec_oprnds0.length () This hunk is OK with that change. + if (slp_node) + oprnds_size = vec_oprnds0.length (); + vec_oprnds1.create (oprnds_size); + for (i = 0; i < oprnds_size; ++i) vec_oprnds1.quick_push (op1); } /* Arguments are ready. Create the new vector stmts. */ > > gcc/ChangeLog: > > 2020-11-12 ?Joel Hutton ?<joel.hutton@arm.com> > > ? ? ? ? * config/aarch64/aarch64-simd.md: vec_widen_lshift_hi/lo<mode> patterns > ? ? ? ? * tree-vect-stmts.c > ? ? ? ? (vectorizable_conversion): Fix for widen_lshift case > > gcc/testsuite/ChangeLog: > > 2020-11-12 ?Joel Hutton ?<joel.hutton@arm.com> > > ? ? ? ? * gcc.target/aarch64/vect-widen-lshift.c: New test. >
Joel Hutton via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi all, > > This patch adds support in the aarch64 backend for the vec_widen_shift vect-pattern and makes a minor mid-end fix to support it. > > All 3 patches together bootstrapped and regression tested on aarch64. > > Ok for stage 1? > > gcc/ChangeLog: > > 2020-11-12 Joel Hutton <joel.hutton@arm.com> > > * config/aarch64/aarch64-simd.md: vec_widen_lshift_hi/lo<mode> patterns > * tree-vect-stmts.c > (vectorizable_conversion): Fix for widen_lshift case > > gcc/testsuite/ChangeLog: > > 2020-11-12 Joel Hutton <joel.hutton@arm.com> > > * gcc.target/aarch64/vect-widen-lshift.c: New test. > > From 97af35b2d2a505dcefd8474cbd4bc3441b83ab02 Mon Sep 17 00:00:00 2001 > From: Joel Hutton <joel.hutton@arm.com> > Date: Thu, 12 Nov 2020 11:48:25 +0000 > Subject: [PATCH 3/3] [AArch64][vect] vec_widen_lshift pattern > > Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in > mid-end. > --- > gcc/config/aarch64/aarch64-simd.md | 66 +++++++++++++++++++ > .../gcc.target/aarch64/vect-widen-lshift.c | 60 +++++++++++++++++ > gcc/tree-vect-stmts.c | 9 ++- > 3 files changed, 133 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md > index b4f56a2295926f027bd53e7456eec729af0cd6df..2bb39c530a1a861cb9bd3df0c2943f62bd6153d7 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -4711,8 +4711,74 @@ > [(set_attr "type" "neon_sat_shift_reg<q>")] > ) > > +(define_expand "vec_widen_<sur>shiftl_lo_<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > + (match_operand:SI 2 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1], > + p, operands[2])); > + DONE; > + } > +) > + > +(define_expand "vec_widen_<sur>shiftl_hi_<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > + (match_operand:SI 2 > + "immediate_operand" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1], > + p, operands[2])); > + DONE; > + } > +) > + > ;; vshll_n > > +(define_insn "aarch64_<sur>shll<mode>_internal" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(vec_select:<VHALF> > + (match_operand:VQW 1 "register_operand" "w") > + (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) > + (match_operand:SI 3 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > + return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > + else > + return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > + } > + [(set_attr "type" "neon_shift_imm_long")] > +) > + > +(define_insn "aarch64_<sur>shll2<mode>_internal" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(vec_select:<VHALF> > + (match_operand:VQW 1 "register_operand" "w") > + (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) > + (match_operand:SI 3 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > + return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > + else > + return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > + } > + [(set_attr "type" "neon_shift_imm_long")] > +) > + > (define_insn "aarch64_<sur>shll_n<mode>" > [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > new file mode 100644 > index 0000000000000000000000000000000000000000..23ed93d1dcbc3ca559efa6708b4ed5855fb6a050 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > @@ -0,0 +1,60 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps" } */ > +#include <stdint.h> > +#include <string.h> > + SVE targets will need a: #pragma GCC target "+nosve" here, since we'll generate different code for SVE. > +#define ARR_SIZE 1024 > + > +/* Should produce an shll,shll2 pair*/ > +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 16; > + foo[i+2] = a[i+2] << 16; > + foo[i+3] = a[i+3] << 16; > + } > +} > + > +__attribute__((optimize (0))) > +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 16; > + foo[i+2] = a[i+2] << 16; > + foo[i+3] = a[i+3] << 16; > + } > +} > + > + > +void __attribute__((optimize (0))) > +init(uint16_t *a, uint16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE;i++) > + { > + a[i] = i; > + b[i] = 2*i; > + } > +} > + > +int __attribute__((optimize (0))) > +main() > +{ > + uint32_t foo_arr[ARR_SIZE]; > + uint32_t bar_arr[ARR_SIZE]; > + uint16_t a[ARR_SIZE]; > + uint16_t b[ARR_SIZE]; > + > + init(a, b); > + sshll_opt(foo_arr, a, b); > + sshll_nonopt(bar_arr, a, b); > + if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0) > + return 1; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "shll\t" 1} } */ > +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ Very minor nit, sorry, but I think: /* { dg-final { scan-assembler-times {\tshll\t} 1 } } */ would be better. Using "…\t" works, but IIRC it shows up as a tab character in the testsuite result summary too. > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index f12fd158b13656ee24022ec7e445c53444be6554..1f40b59c0560eec675af1d9a0e3e818d47589de6 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -4934,8 +4934,13 @@ vectorizable_conversion (vec_info *vinfo, > &vec_oprnds1); > if (code == WIDEN_LSHIFT_EXPR) > { > - vec_oprnds1.create (ncopies * ninputs); > - for (i = 0; i < ncopies * ninputs; ++i) > + int oprnds_size = ncopies * ninputs; > + /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here > + * should be obtained by the the size of vec_oprnds0. */ This is redundant given Richard's comment, but FWIW, GCC style is to indent without “*”s, so: /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here should be obtained by the the size of vec_oprnds0. */ OK for the aarch64 bits with the testsuite changes above. Thanks, Richard > + if (slp_node) > + oprnds_size = vec_oprnds0.length (); > + vec_oprnds1.create (oprnds_size); > + for (i = 0; i < oprnds_size; ++i) > vec_oprnds1.quick_push (op1); > } > /* Arguments are ready. Create the new vector stmts. */
Tests are still running, but I believe I've addressed all the comments. > > +#include <string.h> > > + > > SVE targets will need a: > > #pragma GCC target "+nosve" > > here, since we'll generate different code for SVE. Fixed. > > +/* { dg-final { scan-assembler-times "shll\t" 1} } */ > > +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ > > Very minor nit, sorry, but I think: > > /* { dg-final { scan-assembler-times {\tshll\t} 1 } } */ > > would be better. Using "…\t" works, but IIRC it shows up as a tab > character in the testsuite result summary too. Fixed. Minor nits welcome. :) > OK for the aarch64 bits with the testsuite changes above. ok? gcc/ChangeLog: 2020-11-13 Joel Hutton <joel.hutton@arm.com> * config/aarch64/aarch64-simd.md: Add vec_widen_lshift_hi/lo<mode> patterns. * tree-vect-stmts.c (vectorizable_conversion): Fix for widen_lshift case. gcc/testsuite/ChangeLog: 2020-11-13 Joel Hutton <joel.hutton@arm.com> * gcc.target/aarch64/vect-widen-lshift.c: New test.
On Fri, 13 Nov 2020, Joel Hutton wrote: > Tests are still running, but I believe I've addressed all the comments. > > > > +#include <string.h> > > > + > > > > SVE targets will need a: > > > > #pragma GCC target "+nosve" > > > > here, since we'll generate different code for SVE. > > Fixed. > > > > +/* { dg-final { scan-assembler-times "shll\t" 1} } */ > > > +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ > > > > Very minor nit, sorry, but I think: > > > > /* { dg-final { scan-assembler-times {\tshll\t} 1 } } */ > > > > would be better. Using "?\t" works, but IIRC it shows up as a tab > > character in the testsuite result summary too. > > Fixed. Minor nits welcome. :) > > > > OK for the aarch64 bits with the testsuite changes above. > ok? The gcc/tree-vect-stmts.c parts are OK. Richard. > gcc/ChangeLog: > > 2020-11-13 Joel Hutton <joel.hutton@arm.com> > > * config/aarch64/aarch64-simd.md: Add vec_widen_lshift_hi/lo<mode> > patterns. > * tree-vect-stmts.c > (vectorizable_conversion): Fix for widen_lshift case. > > gcc/testsuite/ChangeLog: > > 2020-11-13 Joel Hutton <joel.hutton@arm.com> > > * gcc.target/aarch64/vect-widen-lshift.c: New test. >
Richard Biener <rguenther@suse.de> writes: > On Fri, 13 Nov 2020, Joel Hutton wrote: > >> Tests are still running, but I believe I've addressed all the comments. >> >> > > +#include <string.h> >> > > + >> > >> > SVE targets will need a: >> > >> > #pragma GCC target "+nosve" >> > >> > here, since we'll generate different code for SVE. >> >> Fixed. >> >> > > +/* { dg-final { scan-assembler-times "shll\t" 1} } */ >> > > +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ >> > >> > Very minor nit, sorry, but I think: >> > >> > /* { dg-final { scan-assembler-times {\tshll\t} 1 } } */ >> > >> > would be better. Using "?\t" works, but IIRC it shows up as a tab >> > character in the testsuite result summary too. >> >> Fixed. Minor nits welcome. :) >> >> >> > OK for the aarch64 bits with the testsuite changes above. >> ok? > > The gcc/tree-vect-stmts.c parts are OK. Same for the AArch64 stuff. Thanks, Richard
From 97af35b2d2a505dcefd8474cbd4bc3441b83ab02 Mon Sep 17 00:00:00 2001 From: Joel Hutton <joel.hutton@arm.com> Date: Thu, 12 Nov 2020 11:48:25 +0000 Subject: [PATCH 3/3] [AArch64][vect] vec_widen_lshift pattern Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in mid-end. --- gcc/config/aarch64/aarch64-simd.md | 66 +++++++++++++++++++ .../gcc.target/aarch64/vect-widen-lshift.c | 60 +++++++++++++++++ gcc/tree-vect-stmts.c | 9 ++- 3 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index b4f56a2295926f027bd53e7456eec729af0cd6df..2bb39c530a1a861cb9bd3df0c2943f62bd6153d7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4711,8 +4711,74 @@ [(set_attr "type" "neon_sat_shift_reg<q>")] ) +(define_expand "vec_widen_<sur>shiftl_lo_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); + emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + +(define_expand "vec_widen_<sur>shiftl_hi_<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") + (match_operand:SI 2 + "immediate_operand" "i")] + VSHLL))] + "TARGET_SIMD" + { + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1], + p, operands[2])); + DONE; + } +) + ;; vshll_n +(define_insn "aarch64_<sur>shll<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) + return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; + else + return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + +(define_insn "aarch64_<sur>shll2<mode>_internal" + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") + (unspec:<VWIDE> [(vec_select:<VHALF> + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) + (match_operand:SI 3 + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] + VSHLL))] + "TARGET_SIMD" + { + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) + return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; + else + return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; + } + [(set_attr "type" "neon_shift_imm_long")] +) + (define_insn "aarch64_<sur>shll_n<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c new file mode 100644 index 0000000000000000000000000000000000000000..23ed93d1dcbc3ca559efa6708b4ed5855fb6a050 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -save-temps" } */ +#include <stdint.h> +#include <string.h> + +#define ARR_SIZE 1024 + +/* Should produce an shll,shll2 pair*/ +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + +__attribute__((optimize (0))) +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + + +void __attribute__((optimize (0))) +init(uint16_t *a, uint16_t *b) +{ + for( int i = 0; i < ARR_SIZE;i++) + { + a[i] = i; + b[i] = 2*i; + } +} + +int __attribute__((optimize (0))) +main() +{ + uint32_t foo_arr[ARR_SIZE]; + uint32_t bar_arr[ARR_SIZE]; + uint16_t a[ARR_SIZE]; + uint16_t b[ARR_SIZE]; + + init(a, b); + sshll_opt(foo_arr, a, b); + sshll_nonopt(bar_arr, a, b); + if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0) + return 1; + return 0; +} + +/* { dg-final { scan-assembler-times "shll\t" 1} } */ +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index f12fd158b13656ee24022ec7e445c53444be6554..1f40b59c0560eec675af1d9a0e3e818d47589de6 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -4934,8 +4934,13 @@ vectorizable_conversion (vec_info *vinfo, &vec_oprnds1); if (code == WIDEN_LSHIFT_EXPR) { - vec_oprnds1.create (ncopies * ninputs); - for (i = 0; i < ncopies * ninputs; ++i) + int oprnds_size = ncopies * ninputs; + /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here + * should be obtained by the the size of vec_oprnds0. */ + if (slp_node) + oprnds_size = vec_oprnds0.length (); + vec_oprnds1.create (oprnds_size); + for (i = 0; i < oprnds_size; ++i) vec_oprnds1.quick_push (op1); } /* Arguments are ready. Create the new vector stmts. */ -- 2.17.1