Message ID | 20210607112007.8659-1-christophe.lyon@linaro.org |
---|---|
State | New |
Headers | show |
Series | [1/2] arm: Auto-vectorization for MVE: vclz | expand |
Christophe Lyon <christophe.lyon@linaro.org> writes: > This patch adds support for auto-vectorization of clz for MVE. > > It does so by removing the unspec from mve_vclzq_<supf><mode> and uses > 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to > vec-common.md and renames it into the standard name clz<mode>2. > > 2021-06-03 Christophe Lyon <christophe.lyon@linaro.org> > > gcc/ > * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S. > (VCLZQ): Remove. > * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix, > remove <supf> iterator. > (mve_vclzq_u<mode>): New. > * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>. > (neon_vclz<mode): Move to ... > * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove. > * config/arm/vec-common.md ... here. Add support for MVE. > > gcc/testsuite/ > * gcc.target/arm/simd/mve-vclz.c: New test. > --- > gcc/config/arm/iterators.md | 3 +-- > gcc/config/arm/mve.md | 12 ++++++--- > gcc/config/arm/neon.md | 11 +------- > gcc/config/arm/unspecs.md | 2 -- > gcc/config/arm/vec-common.md | 13 +++++++++ > gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++ > 6 files changed, 52 insertions(+), 17 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md > index 3042bafc6c6..5c4fe895268 100644 > --- a/gcc/config/arm/iterators.md > +++ b/gcc/config/arm/iterators.md > @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") > (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") > (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") > (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") > - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") > + (VREV32Q_U "u") > (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") > (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") > (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") > @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) > (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) > (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) > (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) > -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) > (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) > (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) > (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > index 04aa612331a..99e46d0bc69 100644 > --- a/gcc/config/arm/mve.md > +++ b/gcc/config/arm/mve.md > @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>" > ;; > ;; [vclzq_u, vclzq_s]) > ;; > -(define_insn "mve_vclzq_<supf><mode>" > +(define_insn "@mve_vclzq_s<mode>" > [ > (set (match_operand:MVE_2 0 "s_register_operand" "=w") > - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] > - VCLZQ)) > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) > ] > "TARGET_HAVE_MVE" > "vclz.i%#<V_sz_elem> %q0, %q1" > [(set_attr "type" "mve_move") > ]) > +(define_expand "mve_vclzq_u<mode>" > + [ > + (set (match_operand:MVE_2 0 "s_register_operand") > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) > + ] > + "TARGET_HAVE_MVE" > +) > > ;; > ;; [vclsq_s]) > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > index 18571d819eb..0fdffaf4ec4 100644 > --- a/gcc/config/arm/neon.md > +++ b/gcc/config/arm/neon.md > @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>" > [(set_attr "type" "neon_cls<q>")] > ) > > -(define_insn "clz<mode>2" > +(define_insn "neon_vclz<mode>" > [(set (match_operand:VDQIW 0 "s_register_operand" "=w") > (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] > "TARGET_NEON" > @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2" > [(set_attr "type" "neon_cnt<q>")] > ) > > -(define_expand "neon_vclz<mode>" > - [(match_operand:VDQIW 0 "s_register_operand") > - (match_operand:VDQIW 1 "s_register_operand")] > - "TARGET_NEON" > -{ > - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); > - DONE; > -}) > - > (define_insn "popcount<mode>2" > [(set (match_operand:VE 0 "s_register_operand" "=w") > (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md > index ed1bc293b78..ad1c6edd005 100644 > --- a/gcc/config/arm/unspecs.md > +++ b/gcc/config/arm/unspecs.md > @@ -556,8 +556,6 @@ (define_c_enum "unspec" [ > VQABSQ_S > VDUPQ_N_U > VDUPQ_N_S > - VCLZQ_U > - VCLZQ_S > VCLSQ_S > VADDVQ_S > VADDVQ_U > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index 2779c1a8aaa..1ba1e5eb008 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil" > operands[0], operands[1], operands[2])); > DONE; > }) > + > +(define_expand "clz<mode>2" > + [(match_operand:VDQIW 0 "s_register_operand") > + (match_operand:VDQIW 1 "s_register_operand")] > + "ARM_HAVE_<MODE>_ARITH > + && !TARGET_REALLY_IWMMXT" > +{ > + if (TARGET_NEON) > + emit_insn (gen_neon_vclz<mode> (operands[0], operands[1])); > + else > + emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1])); > + DONE; > +}) For cases like this where the patterns are the same, I think we should instead do: [(set (match_operand:MVE_2 0 "s_register_operand") (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))] "…" and drop the C code. OK with that change, thanks. Richard > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > new file mode 100644 > index 00000000000..7068736bc28 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > @@ -0,0 +1,28 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > +/* { dg-add-options arm_v8_1m_mve } */ > +/* { dg-additional-options "-O3" } */ > + > +#include <stdint.h> > + > +#define FUNC(SIGN, TYPE, BITS, NAME) \ > + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ > + TYPE##BITS##_t *a) { \ > + int i; \ > + for (i=0; i < (128 / BITS); i++) { \ > + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ > + } \ > +} > + > +FUNC(s, int, 32, clz) > +FUNC(u, uint, 32, clz) > +FUNC(s, int, 16, clz) > +FUNC(u, uint, 16, clz) > +FUNC(s, int, 8, clz) > +FUNC(u, uint, 8, clz) > + > +/* 16 and 8-bit versions are not vectorized because they need pack/unpack > + patterns since __builtin_clz uses 32-bit parameter and return value. */ > +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ > +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ > +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
On Tue, 8 Jun 2021 at 13:58, Richard Sandiford <richard.sandiford@arm.com> wrote: > > Christophe Lyon <christophe.lyon@linaro.org> writes: > > This patch adds support for auto-vectorization of clz for MVE. > > > > It does so by removing the unspec from mve_vclzq_<supf><mode> and uses > > 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to > > vec-common.md and renames it into the standard name clz<mode>2. > > > > 2021-06-03 Christophe Lyon <christophe.lyon@linaro.org> > > > > gcc/ > > * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S. > > (VCLZQ): Remove. > > * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix, > > remove <supf> iterator. > > (mve_vclzq_u<mode>): New. > > * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>. > > (neon_vclz<mode): Move to ... > > * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove. > > * config/arm/vec-common.md ... here. Add support for MVE. > > > > gcc/testsuite/ > > * gcc.target/arm/simd/mve-vclz.c: New test. > > --- > > gcc/config/arm/iterators.md | 3 +-- > > gcc/config/arm/mve.md | 12 ++++++--- > > gcc/config/arm/neon.md | 11 +------- > > gcc/config/arm/unspecs.md | 2 -- > > gcc/config/arm/vec-common.md | 13 +++++++++ > > gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++ > > 6 files changed, 52 insertions(+), 17 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > > > > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md > > index 3042bafc6c6..5c4fe895268 100644 > > --- a/gcc/config/arm/iterators.md > > +++ b/gcc/config/arm/iterators.md > > @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") > > (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") > > (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") > > (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") > > - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") > > + (VREV32Q_U "u") > > (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") > > (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") > > (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") > > @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) > > (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) > > (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) > > (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) > > -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) > > (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) > > (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) > > (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md > > index 04aa612331a..99e46d0bc69 100644 > > --- a/gcc/config/arm/mve.md > > +++ b/gcc/config/arm/mve.md > > @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>" > > ;; > > ;; [vclzq_u, vclzq_s]) > > ;; > > -(define_insn "mve_vclzq_<supf><mode>" > > +(define_insn "@mve_vclzq_s<mode>" > > [ > > (set (match_operand:MVE_2 0 "s_register_operand" "=w") > > - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] > > - VCLZQ)) > > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) > > ] > > "TARGET_HAVE_MVE" > > "vclz.i%#<V_sz_elem> %q0, %q1" > > [(set_attr "type" "mve_move") > > ]) > > +(define_expand "mve_vclzq_u<mode>" > > + [ > > + (set (match_operand:MVE_2 0 "s_register_operand") > > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) > > + ] > > + "TARGET_HAVE_MVE" > > +) > > > > ;; > > ;; [vclsq_s]) > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md > > index 18571d819eb..0fdffaf4ec4 100644 > > --- a/gcc/config/arm/neon.md > > +++ b/gcc/config/arm/neon.md > > @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>" > > [(set_attr "type" "neon_cls<q>")] > > ) > > > > -(define_insn "clz<mode>2" > > +(define_insn "neon_vclz<mode>" > > [(set (match_operand:VDQIW 0 "s_register_operand" "=w") > > (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] > > "TARGET_NEON" > > @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2" > > [(set_attr "type" "neon_cnt<q>")] > > ) > > > > -(define_expand "neon_vclz<mode>" > > - [(match_operand:VDQIW 0 "s_register_operand") > > - (match_operand:VDQIW 1 "s_register_operand")] > > - "TARGET_NEON" > > -{ > > - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); > > - DONE; > > -}) > > - > > (define_insn "popcount<mode>2" > > [(set (match_operand:VE 0 "s_register_operand" "=w") > > (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] > > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md > > index ed1bc293b78..ad1c6edd005 100644 > > --- a/gcc/config/arm/unspecs.md > > +++ b/gcc/config/arm/unspecs.md > > @@ -556,8 +556,6 @@ (define_c_enum "unspec" [ > > VQABSQ_S > > VDUPQ_N_U > > VDUPQ_N_S > > - VCLZQ_U > > - VCLZQ_S > > VCLSQ_S > > VADDVQ_S > > VADDVQ_U > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > > index 2779c1a8aaa..1ba1e5eb008 100644 > > --- a/gcc/config/arm/vec-common.md > > +++ b/gcc/config/arm/vec-common.md > > @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil" > > operands[0], operands[1], operands[2])); > > DONE; > > }) > > + > > +(define_expand "clz<mode>2" > > + [(match_operand:VDQIW 0 "s_register_operand") > > + (match_operand:VDQIW 1 "s_register_operand")] > > + "ARM_HAVE_<MODE>_ARITH > > + && !TARGET_REALLY_IWMMXT" > > +{ > > + if (TARGET_NEON) > > + emit_insn (gen_neon_vclz<mode> (operands[0], operands[1])); > > + else > > + emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1])); > > + DONE; > > +}) > > For cases like this where the patterns are the same, I think we should > instead do: > > [(set (match_operand:MVE_2 0 "s_register_operand") > (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))] > "…" > > and drop the C code. > > OK with that change, thanks. > I guess you mean VDQIW instead of MVE_2 ? Otherwise this will not cover all Neon modes? > Richard > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > > new file mode 100644 > > index 00000000000..7068736bc28 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c > > @@ -0,0 +1,28 @@ > > +/* { dg-do compile } */ > > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ > > +/* { dg-add-options arm_v8_1m_mve } */ > > +/* { dg-additional-options "-O3" } */ > > + > > +#include <stdint.h> > > + > > +#define FUNC(SIGN, TYPE, BITS, NAME) \ > > + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ > > + TYPE##BITS##_t *a) { \ > > + int i; \ > > + for (i=0; i < (128 / BITS); i++) { \ > > + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ > > + } \ > > +} > > + > > +FUNC(s, int, 32, clz) > > +FUNC(u, uint, 32, clz) > > +FUNC(s, int, 16, clz) > > +FUNC(u, uint, 16, clz) > > +FUNC(s, int, 8, clz) > > +FUNC(u, uint, 8, clz) > > + > > +/* 16 and 8-bit versions are not vectorized because they need pack/unpack > > + patterns since __builtin_clz uses 32-bit parameter and return value. */ > > +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ > > +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ > > +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
Christophe Lyon <christophe.lyon@linaro.org> writes: > On Tue, 8 Jun 2021 at 13:58, Richard Sandiford > <richard.sandiford@arm.com> wrote: >> >> Christophe Lyon <christophe.lyon@linaro.org> writes: >> > This patch adds support for auto-vectorization of clz for MVE. >> > >> > It does so by removing the unspec from mve_vclzq_<supf><mode> and uses >> > 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to >> > vec-common.md and renames it into the standard name clz<mode>2. >> > >> > 2021-06-03 Christophe Lyon <christophe.lyon@linaro.org> >> > >> > gcc/ >> > * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S. >> > (VCLZQ): Remove. >> > * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix, >> > remove <supf> iterator. >> > (mve_vclzq_u<mode>): New. >> > * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>. >> > (neon_vclz<mode): Move to ... >> > * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove. >> > * config/arm/vec-common.md ... here. Add support for MVE. >> > >> > gcc/testsuite/ >> > * gcc.target/arm/simd/mve-vclz.c: New test. >> > --- >> > gcc/config/arm/iterators.md | 3 +-- >> > gcc/config/arm/mve.md | 12 ++++++--- >> > gcc/config/arm/neon.md | 11 +------- >> > gcc/config/arm/unspecs.md | 2 -- >> > gcc/config/arm/vec-common.md | 13 +++++++++ >> > gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++ >> > 6 files changed, 52 insertions(+), 17 deletions(-) >> > create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c >> > >> > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md >> > index 3042bafc6c6..5c4fe895268 100644 >> > --- a/gcc/config/arm/iterators.md >> > +++ b/gcc/config/arm/iterators.md >> > @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") >> > (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") >> > (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") >> > (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") >> > - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") >> > + (VREV32Q_U "u") >> > (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") >> > (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") >> > (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") >> > @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) >> > (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) >> > (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) >> > (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) >> > -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) >> > (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) >> > (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) >> > (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) >> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md >> > index 04aa612331a..99e46d0bc69 100644 >> > --- a/gcc/config/arm/mve.md >> > +++ b/gcc/config/arm/mve.md >> > @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>" >> > ;; >> > ;; [vclzq_u, vclzq_s]) >> > ;; >> > -(define_insn "mve_vclzq_<supf><mode>" >> > +(define_insn "@mve_vclzq_s<mode>" >> > [ >> > (set (match_operand:MVE_2 0 "s_register_operand" "=w") >> > - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] >> > - VCLZQ)) >> > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) >> > ] >> > "TARGET_HAVE_MVE" >> > "vclz.i%#<V_sz_elem> %q0, %q1" >> > [(set_attr "type" "mve_move") >> > ]) >> > +(define_expand "mve_vclzq_u<mode>" >> > + [ >> > + (set (match_operand:MVE_2 0 "s_register_operand") >> > + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) >> > + ] >> > + "TARGET_HAVE_MVE" >> > +) >> > >> > ;; >> > ;; [vclsq_s]) >> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md >> > index 18571d819eb..0fdffaf4ec4 100644 >> > --- a/gcc/config/arm/neon.md >> > +++ b/gcc/config/arm/neon.md >> > @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>" >> > [(set_attr "type" "neon_cls<q>")] >> > ) >> > >> > -(define_insn "clz<mode>2" >> > +(define_insn "neon_vclz<mode>" >> > [(set (match_operand:VDQIW 0 "s_register_operand" "=w") >> > (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] >> > "TARGET_NEON" >> > @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2" >> > [(set_attr "type" "neon_cnt<q>")] >> > ) >> > >> > -(define_expand "neon_vclz<mode>" >> > - [(match_operand:VDQIW 0 "s_register_operand") >> > - (match_operand:VDQIW 1 "s_register_operand")] >> > - "TARGET_NEON" >> > -{ >> > - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); >> > - DONE; >> > -}) >> > - >> > (define_insn "popcount<mode>2" >> > [(set (match_operand:VE 0 "s_register_operand" "=w") >> > (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] >> > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md >> > index ed1bc293b78..ad1c6edd005 100644 >> > --- a/gcc/config/arm/unspecs.md >> > +++ b/gcc/config/arm/unspecs.md >> > @@ -556,8 +556,6 @@ (define_c_enum "unspec" [ >> > VQABSQ_S >> > VDUPQ_N_U >> > VDUPQ_N_S >> > - VCLZQ_U >> > - VCLZQ_S >> > VCLSQ_S >> > VADDVQ_S >> > VADDVQ_U >> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md >> > index 2779c1a8aaa..1ba1e5eb008 100644 >> > --- a/gcc/config/arm/vec-common.md >> > +++ b/gcc/config/arm/vec-common.md >> > @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil" >> > operands[0], operands[1], operands[2])); >> > DONE; >> > }) >> > + >> > +(define_expand "clz<mode>2" >> > + [(match_operand:VDQIW 0 "s_register_operand") >> > + (match_operand:VDQIW 1 "s_register_operand")] >> > + "ARM_HAVE_<MODE>_ARITH >> > + && !TARGET_REALLY_IWMMXT" >> > +{ >> > + if (TARGET_NEON) >> > + emit_insn (gen_neon_vclz<mode> (operands[0], operands[1])); >> > + else >> > + emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1])); >> > + DONE; >> > +}) >> >> For cases like this where the patterns are the same, I think we should >> instead do: >> >> [(set (match_operand:MVE_2 0 "s_register_operand") >> (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))] >> "…" >> >> and drop the C code. >> >> OK with that change, thanks. >> > > I guess you mean VDQIW instead of MVE_2 ? Otherwise this will not > cover all Neon modes? Oops, yes. I copied it from the MVE patterns and forgot to change the modes. Thanks, Richard
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 3042bafc6c6..5c4fe895268 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s") (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u") (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s") (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u") - (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u") + (VREV32Q_U "u") (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s") (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u") (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U]) (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S]) (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S]) (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S]) -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S]) (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S]) (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S]) (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 04aa612331a..99e46d0bc69 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>" ;; ;; [vclzq_u, vclzq_s]) ;; -(define_insn "mve_vclzq_<supf><mode>" +(define_insn "@mve_vclzq_s<mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VCLZQ)) + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vclz.i%#<V_sz_elem> %q0, %q1" [(set_attr "type" "mve_move") ]) +(define_expand "mve_vclzq_u<mode>" + [ + (set (match_operand:MVE_2 0 "s_register_operand") + (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand"))) + ] + "TARGET_HAVE_MVE" +) ;; ;; [vclsq_s]) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 18571d819eb..0fdffaf4ec4 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>" [(set_attr "type" "neon_cls<q>")] ) -(define_insn "clz<mode>2" +(define_insn "neon_vclz<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2" [(set_attr "type" "neon_cnt<q>")] ) -(define_expand "neon_vclz<mode>" - [(match_operand:VDQIW 0 "s_register_operand") - (match_operand:VDQIW 1 "s_register_operand")] - "TARGET_NEON" -{ - emit_insn (gen_clz<mode>2 (operands[0], operands[1])); - DONE; -}) - (define_insn "popcount<mode>2" [(set (match_operand:VE 0 "s_register_operand" "=w") (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index ed1bc293b78..ad1c6edd005 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -556,8 +556,6 @@ (define_c_enum "unspec" [ VQABSQ_S VDUPQ_N_U VDUPQ_N_S - VCLZQ_U - VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 2779c1a8aaa..1ba1e5eb008 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil" operands[0], operands[1], operands[2])); DONE; }) + +(define_expand "clz<mode>2" + [(match_operand:VDQIW 0 "s_register_operand") + (match_operand:VDQIW 1 "s_register_operand")] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +{ + if (TARGET_NEON) + emit_insn (gen_neon_vclz<mode> (operands[0], operands[1])); + else + emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1])); + DONE; +}) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c new file mode 100644 index 00000000000..7068736bc28 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +#define FUNC(SIGN, TYPE, BITS, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \ + TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i < (128 / BITS); i++) { \ + dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]); \ + } \ +} + +FUNC(s, int, 32, clz) +FUNC(u, uint, 32, clz) +FUNC(s, int, 16, clz) +FUNC(u, uint, 16, clz) +FUNC(s, int, 8, clz) +FUNC(u, uint, 8, clz) + +/* 16 and 8-bit versions are not vectorized because they need pack/unpack + patterns since __builtin_clz uses 32-bit parameter and return value. */ +/* { dg-final { scan-assembler-times {vclz\.i32 q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.i16 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */ +/* { dg-final { scan-assembler-times {vclz\.i8 q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */