diff mbox series

[1/2] arm: Auto-vectorization for MVE: vclz

Message ID 20210607112007.8659-1-christophe.lyon@linaro.org
State New
Headers show
Series [1/2] arm: Auto-vectorization for MVE: vclz | expand

Commit Message

Christophe Lyon June 7, 2021, 11:20 a.m. UTC
This patch adds support for auto-vectorization of clz for MVE.

It does so by removing the unspec from mve_vclzq_<supf><mode> and uses
'clz' instead. It moves to neon_vclz<mode> expander from neon.md to
vec-common.md and renames it into the standard name clz<mode>2.

2021-06-03  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S.
	(VCLZQ): Remove.
	* config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix,
	remove <supf> iterator.
	(mve_vclzq_u<mode>): New.
	* config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>.
	(neon_vclz<mode): Move to ...
	* config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove.
	* config/arm/vec-common.md ... here. Add support for MVE.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-vclz.c: New test.
---
 gcc/config/arm/iterators.md                  |  3 +--
 gcc/config/arm/mve.md                        | 12 ++++++---
 gcc/config/arm/neon.md                       | 11 +-------
 gcc/config/arm/unspecs.md                    |  2 --
 gcc/config/arm/vec-common.md                 | 13 +++++++++
 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++
 6 files changed, 52 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c

Comments

Richard Sandiford June 8, 2021, 11:58 a.m. UTC | #1
Christophe Lyon <christophe.lyon@linaro.org> writes:
> This patch adds support for auto-vectorization of clz for MVE.
>
> It does so by removing the unspec from mve_vclzq_<supf><mode> and uses
> 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to
> vec-common.md and renames it into the standard name clz<mode>2.
>
> 2021-06-03  Christophe Lyon  <christophe.lyon@linaro.org>
>
> 	gcc/
> 	* config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S.
> 	(VCLZQ): Remove.
> 	* config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix,
> 	remove <supf> iterator.
> 	(mve_vclzq_u<mode>): New.
> 	* config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>.
> 	(neon_vclz<mode): Move to ...
> 	* config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove.
> 	* config/arm/vec-common.md ... here. Add support for MVE.
>
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-vclz.c: New test.
> ---
>  gcc/config/arm/iterators.md                  |  3 +--
>  gcc/config/arm/mve.md                        | 12 ++++++---
>  gcc/config/arm/neon.md                       | 11 +-------
>  gcc/config/arm/unspecs.md                    |  2 --
>  gcc/config/arm/vec-common.md                 | 13 +++++++++
>  gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++
>  6 files changed, 52 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
>
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 3042bafc6c6..5c4fe895268 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
>  		       (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
>  		       (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
> -		       (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
> +		       (VREV32Q_U "u")
>  		       (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
>  		       (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
>  		       (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
> @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
>  (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
>  (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
>  (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
> -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
>  (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
>  (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
>  (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 04aa612331a..99e46d0bc69 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>"
>  ;;
>  ;; [vclzq_u, vclzq_s])
>  ;;
> -(define_insn "mve_vclzq_<supf><mode>"
> +(define_insn "@mve_vclzq_s<mode>"
>    [
>     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
> -	 VCLZQ))
> +	(clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE"
>    "vclz.i%#<V_sz_elem>  %q0, %q1"
>    [(set_attr "type" "mve_move")
>  ])
> +(define_expand "mve_vclzq_u<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand")
> +	(clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +)
>  
>  ;;
>  ;; [vclsq_s])
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 18571d819eb..0fdffaf4ec4 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>"
>    [(set_attr "type" "neon_cls<q>")]
>  )
>  
> -(define_insn "clz<mode>2"
> +(define_insn "neon_vclz<mode>"
>    [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
>          (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
>    "TARGET_NEON"
> @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2"
>    [(set_attr "type" "neon_cnt<q>")]
>  )
>  
> -(define_expand "neon_vclz<mode>"
> -  [(match_operand:VDQIW 0 "s_register_operand")
> -   (match_operand:VDQIW 1 "s_register_operand")]
> -  "TARGET_NEON"
> -{
> -  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
> -  DONE;
> -})
> -
>  (define_insn "popcount<mode>2"
>    [(set (match_operand:VE 0 "s_register_operand" "=w")
>          (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index ed1bc293b78..ad1c6edd005 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -556,8 +556,6 @@ (define_c_enum "unspec" [
>    VQABSQ_S
>    VDUPQ_N_U
>    VDUPQ_N_S
> -  VCLZQ_U
> -  VCLZQ_S
>    VCLSQ_S
>    VADDVQ_S
>    VADDVQ_U
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 2779c1a8aaa..1ba1e5eb008 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil"
>  			       operands[0], operands[1], operands[2]));
>    DONE;
>  })
> +
> +(define_expand "clz<mode>2"
> +  [(match_operand:VDQIW 0 "s_register_operand")
> +   (match_operand:VDQIW 1 "s_register_operand")]
> +  "ARM_HAVE_<MODE>_ARITH
> +   && !TARGET_REALLY_IWMMXT"
> +{
> +  if (TARGET_NEON)
> +    emit_insn (gen_neon_vclz<mode> (operands[0], operands[1]));
> +  else
> +    emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1]));
> +  DONE;
> +})

For cases like this where the patterns are the same, I think we should
instead do:

  [(set (match_operand:MVE_2 0 "s_register_operand")
        (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))]
  "…"

and drop the C code.

OK with that change, thanks.

Richard

> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
> new file mode 100644
> index 00000000000..7068736bc28
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NAME)					\
> +  void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \
> +					  TYPE##BITS##_t *a) {		\
> +    int i;								\
> +    for (i=0; i < (128 / BITS); i++) {					\
> +      dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]);			\
> +    }									\
> +}
> +
> +FUNC(s, int, 32, clz)
> +FUNC(u, uint, 32, clz)
> +FUNC(s, int, 16, clz)
> +FUNC(u, uint, 16, clz)
> +FUNC(s, int, 8, clz)
> +FUNC(u, uint, 8, clz)
> +
> +/* 16 and 8-bit versions are not vectorized because they need pack/unpack
> +   patterns since __builtin_clz uses 32-bit parameter and return value.  */
> +/* { dg-final { scan-assembler-times {vclz\.i32  q[0-9]+, q[0-9]+} 2 } } */
> +/* { dg-final { scan-assembler-times {vclz\.i16  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {vclz\.i8  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
Christophe Lyon June 9, 2021, 3:41 p.m. UTC | #2
On Tue, 8 Jun 2021 at 13:58, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Christophe Lyon <christophe.lyon@linaro.org> writes:
> > This patch adds support for auto-vectorization of clz for MVE.
> >
> > It does so by removing the unspec from mve_vclzq_<supf><mode> and uses
> > 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to
> > vec-common.md and renames it into the standard name clz<mode>2.
> >
> > 2021-06-03  Christophe Lyon  <christophe.lyon@linaro.org>
> >
> >       gcc/
> >       * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S.
> >       (VCLZQ): Remove.
> >       * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix,
> >       remove <supf> iterator.
> >       (mve_vclzq_u<mode>): New.
> >       * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>.
> >       (neon_vclz<mode): Move to ...
> >       * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove.
> >       * config/arm/vec-common.md ... here. Add support for MVE.
> >
> >       gcc/testsuite/
> >       * gcc.target/arm/simd/mve-vclz.c: New test.
> > ---
> >  gcc/config/arm/iterators.md                  |  3 +--
> >  gcc/config/arm/mve.md                        | 12 ++++++---
> >  gcc/config/arm/neon.md                       | 11 +-------
> >  gcc/config/arm/unspecs.md                    |  2 --
> >  gcc/config/arm/vec-common.md                 | 13 +++++++++
> >  gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++
> >  6 files changed, 52 insertions(+), 17 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
> >
> > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> > index 3042bafc6c6..5c4fe895268 100644
> > --- a/gcc/config/arm/iterators.md
> > +++ b/gcc/config/arm/iterators.md
> > @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> >                      (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
> >                      (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
> >                      (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
> > -                    (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
> > +                    (VREV32Q_U "u")
> >                      (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
> >                      (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
> >                      (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
> > @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
> >  (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
> >  (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
> >  (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
> > -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
> >  (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
> >  (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
> >  (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index 04aa612331a..99e46d0bc69 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>"
> >  ;;
> >  ;; [vclzq_u, vclzq_s])
> >  ;;
> > -(define_insn "mve_vclzq_<supf><mode>"
> > +(define_insn "@mve_vclzq_s<mode>"
> >    [
> >     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
> > -      VCLZQ))
> > +     (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
> >    ]
> >    "TARGET_HAVE_MVE"
> >    "vclz.i%#<V_sz_elem>  %q0, %q1"
> >    [(set_attr "type" "mve_move")
> >  ])
> > +(define_expand "mve_vclzq_u<mode>"
> > +  [
> > +   (set (match_operand:MVE_2 0 "s_register_operand")
> > +     (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
> > +  ]
> > +  "TARGET_HAVE_MVE"
> > +)
> >
> >  ;;
> >  ;; [vclsq_s])
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > index 18571d819eb..0fdffaf4ec4 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>"
> >    [(set_attr "type" "neon_cls<q>")]
> >  )
> >
> > -(define_insn "clz<mode>2"
> > +(define_insn "neon_vclz<mode>"
> >    [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
> >          (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
> >    "TARGET_NEON"
> > @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2"
> >    [(set_attr "type" "neon_cnt<q>")]
> >  )
> >
> > -(define_expand "neon_vclz<mode>"
> > -  [(match_operand:VDQIW 0 "s_register_operand")
> > -   (match_operand:VDQIW 1 "s_register_operand")]
> > -  "TARGET_NEON"
> > -{
> > -  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
> > -  DONE;
> > -})
> > -
> >  (define_insn "popcount<mode>2"
> >    [(set (match_operand:VE 0 "s_register_operand" "=w")
> >          (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
> > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > index ed1bc293b78..ad1c6edd005 100644
> > --- a/gcc/config/arm/unspecs.md
> > +++ b/gcc/config/arm/unspecs.md
> > @@ -556,8 +556,6 @@ (define_c_enum "unspec" [
> >    VQABSQ_S
> >    VDUPQ_N_U
> >    VDUPQ_N_S
> > -  VCLZQ_U
> > -  VCLZQ_S
> >    VCLSQ_S
> >    VADDVQ_S
> >    VADDVQ_U
> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> > index 2779c1a8aaa..1ba1e5eb008 100644
> > --- a/gcc/config/arm/vec-common.md
> > +++ b/gcc/config/arm/vec-common.md
> > @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil"
> >                              operands[0], operands[1], operands[2]));
> >    DONE;
> >  })
> > +
> > +(define_expand "clz<mode>2"
> > +  [(match_operand:VDQIW 0 "s_register_operand")
> > +   (match_operand:VDQIW 1 "s_register_operand")]
> > +  "ARM_HAVE_<MODE>_ARITH
> > +   && !TARGET_REALLY_IWMMXT"
> > +{
> > +  if (TARGET_NEON)
> > +    emit_insn (gen_neon_vclz<mode> (operands[0], operands[1]));
> > +  else
> > +    emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1]));
> > +  DONE;
> > +})
>
> For cases like this where the patterns are the same, I think we should
> instead do:
>
>   [(set (match_operand:MVE_2 0 "s_register_operand")
>         (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))]
>   "…"
>
> and drop the C code.
>
> OK with that change, thanks.
>

I guess you mean VDQIW instead of MVE_2 ? Otherwise this will not
cover all Neon modes?


> Richard
>
> > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
> > new file mode 100644
> > index 00000000000..7068736bc28
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
> > @@ -0,0 +1,28 @@
> > +/* { dg-do compile } */
> > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > +/* { dg-add-options arm_v8_1m_mve } */
> > +/* { dg-additional-options "-O3" } */
> > +
> > +#include <stdint.h>
> > +
> > +#define FUNC(SIGN, TYPE, BITS, NAME)                                 \
> > +  void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \
> > +                                       TYPE##BITS##_t *a) {          \
> > +    int i;                                                           \
> > +    for (i=0; i < (128 / BITS); i++) {                                       \
> > +      dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]);                 \
> > +    }                                                                        \
> > +}
> > +
> > +FUNC(s, int, 32, clz)
> > +FUNC(u, uint, 32, clz)
> > +FUNC(s, int, 16, clz)
> > +FUNC(u, uint, 16, clz)
> > +FUNC(s, int, 8, clz)
> > +FUNC(u, uint, 8, clz)
> > +
> > +/* 16 and 8-bit versions are not vectorized because they need pack/unpack
> > +   patterns since __builtin_clz uses 32-bit parameter and return value.  */
> > +/* { dg-final { scan-assembler-times {vclz\.i32  q[0-9]+, q[0-9]+} 2 } } */
> > +/* { dg-final { scan-assembler-times {vclz\.i16  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
> > +/* { dg-final { scan-assembler-times {vclz\.i8  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
Richard Sandiford June 9, 2021, 3:47 p.m. UTC | #3
Christophe Lyon <christophe.lyon@linaro.org> writes:
> On Tue, 8 Jun 2021 at 13:58, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Christophe Lyon <christophe.lyon@linaro.org> writes:
>> > This patch adds support for auto-vectorization of clz for MVE.
>> >
>> > It does so by removing the unspec from mve_vclzq_<supf><mode> and uses
>> > 'clz' instead. It moves to neon_vclz<mode> expander from neon.md to
>> > vec-common.md and renames it into the standard name clz<mode>2.
>> >
>> > 2021-06-03  Christophe Lyon  <christophe.lyon@linaro.org>
>> >
>> >       gcc/
>> >       * config/arm/iterators.md (<supf>): Remove VCLZQ_U, VCLZQ_S.
>> >       (VCLZQ): Remove.
>> >       * config/arm/mve.md (mve_vclzq_<supf><mode>): Add '@' prefix,
>> >       remove <supf> iterator.
>> >       (mve_vclzq_u<mode>): New.
>> >       * config/arm/neon.md (clz<mode>2): Rename to neon_vclz<mode>.
>> >       (neon_vclz<mode): Move to ...
>> >       * config/arm/unspecs.md (VCLZQ_U, VCLZQ_S): Remove.
>> >       * config/arm/vec-common.md ... here. Add support for MVE.
>> >
>> >       gcc/testsuite/
>> >       * gcc.target/arm/simd/mve-vclz.c: New test.
>> > ---
>> >  gcc/config/arm/iterators.md                  |  3 +--
>> >  gcc/config/arm/mve.md                        | 12 ++++++---
>> >  gcc/config/arm/neon.md                       | 11 +-------
>> >  gcc/config/arm/unspecs.md                    |  2 --
>> >  gcc/config/arm/vec-common.md                 | 13 +++++++++
>> >  gcc/testsuite/gcc.target/arm/simd/mve-vclz.c | 28 ++++++++++++++++++++
>> >  6 files changed, 52 insertions(+), 17 deletions(-)
>> >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
>> >
>> > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
>> > index 3042bafc6c6..5c4fe895268 100644
>> > --- a/gcc/config/arm/iterators.md
>> > +++ b/gcc/config/arm/iterators.md
>> > @@ -1288,7 +1288,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>> >                      (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
>> >                      (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
>> >                      (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
>> > -                    (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
>> > +                    (VREV32Q_U "u")
>> >                      (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
>> >                      (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
>> >                      (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
>> > @@ -1538,7 +1538,6 @@ (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
>> >  (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
>> >  (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
>> >  (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
>> > -(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
>> >  (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
>> >  (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
>> >  (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
>> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
>> > index 04aa612331a..99e46d0bc69 100644
>> > --- a/gcc/config/arm/mve.md
>> > +++ b/gcc/config/arm/mve.md
>> > @@ -435,16 +435,22 @@ (define_insn "mve_vdupq_n_<supf><mode>"
>> >  ;;
>> >  ;; [vclzq_u, vclzq_s])
>> >  ;;
>> > -(define_insn "mve_vclzq_<supf><mode>"
>> > +(define_insn "@mve_vclzq_s<mode>"
>> >    [
>> >     (set (match_operand:MVE_2 0 "s_register_operand" "=w")
>> > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
>> > -      VCLZQ))
>> > +     (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
>> >    ]
>> >    "TARGET_HAVE_MVE"
>> >    "vclz.i%#<V_sz_elem>  %q0, %q1"
>> >    [(set_attr "type" "mve_move")
>> >  ])
>> > +(define_expand "mve_vclzq_u<mode>"
>> > +  [
>> > +   (set (match_operand:MVE_2 0 "s_register_operand")
>> > +     (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
>> > +  ]
>> > +  "TARGET_HAVE_MVE"
>> > +)
>> >
>> >  ;;
>> >  ;; [vclsq_s])
>> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
>> > index 18571d819eb..0fdffaf4ec4 100644
>> > --- a/gcc/config/arm/neon.md
>> > +++ b/gcc/config/arm/neon.md
>> > @@ -3018,7 +3018,7 @@ (define_insn "neon_vcls<mode>"
>> >    [(set_attr "type" "neon_cls<q>")]
>> >  )
>> >
>> > -(define_insn "clz<mode>2"
>> > +(define_insn "neon_vclz<mode>"
>> >    [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
>> >          (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
>> >    "TARGET_NEON"
>> > @@ -3026,15 +3026,6 @@ (define_insn "clz<mode>2"
>> >    [(set_attr "type" "neon_cnt<q>")]
>> >  )
>> >
>> > -(define_expand "neon_vclz<mode>"
>> > -  [(match_operand:VDQIW 0 "s_register_operand")
>> > -   (match_operand:VDQIW 1 "s_register_operand")]
>> > -  "TARGET_NEON"
>> > -{
>> > -  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
>> > -  DONE;
>> > -})
>> > -
>> >  (define_insn "popcount<mode>2"
>> >    [(set (match_operand:VE 0 "s_register_operand" "=w")
>> >          (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
>> > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
>> > index ed1bc293b78..ad1c6edd005 100644
>> > --- a/gcc/config/arm/unspecs.md
>> > +++ b/gcc/config/arm/unspecs.md
>> > @@ -556,8 +556,6 @@ (define_c_enum "unspec" [
>> >    VQABSQ_S
>> >    VDUPQ_N_U
>> >    VDUPQ_N_S
>> > -  VCLZQ_U
>> > -  VCLZQ_S
>> >    VCLSQ_S
>> >    VADDVQ_S
>> >    VADDVQ_U
>> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
>> > index 2779c1a8aaa..1ba1e5eb008 100644
>> > --- a/gcc/config/arm/vec-common.md
>> > +++ b/gcc/config/arm/vec-common.md
>> > @@ -625,3 +625,16 @@ (define_expand "uavg<mode>3_ceil"
>> >                              operands[0], operands[1], operands[2]));
>> >    DONE;
>> >  })
>> > +
>> > +(define_expand "clz<mode>2"
>> > +  [(match_operand:VDQIW 0 "s_register_operand")
>> > +   (match_operand:VDQIW 1 "s_register_operand")]
>> > +  "ARM_HAVE_<MODE>_ARITH
>> > +   && !TARGET_REALLY_IWMMXT"
>> > +{
>> > +  if (TARGET_NEON)
>> > +    emit_insn (gen_neon_vclz<mode> (operands[0], operands[1]));
>> > +  else
>> > +    emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1]));
>> > +  DONE;
>> > +})
>>
>> For cases like this where the patterns are the same, I think we should
>> instead do:
>>
>>   [(set (match_operand:MVE_2 0 "s_register_operand")
>>         (clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))]
>>   "…"
>>
>> and drop the C code.
>>
>> OK with that change, thanks.
>>
>
> I guess you mean VDQIW instead of MVE_2 ? Otherwise this will not
> cover all Neon modes?

Oops, yes.  I copied it from the MVE patterns and forgot to change
the modes.

Thanks,
Richard
diff mbox series

Patch

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 3042bafc6c6..5c4fe895268 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1288,7 +1288,7 @@  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
 		       (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
 		       (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
-		       (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
+		       (VREV32Q_U "u")
 		       (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
 		       (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
 		       (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
@@ -1538,7 +1538,6 @@  (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
 (define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
 (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
-(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
 (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
 (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
 (define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 04aa612331a..99e46d0bc69 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -435,16 +435,22 @@  (define_insn "mve_vdupq_n_<supf><mode>"
 ;;
 ;; [vclzq_u, vclzq_s])
 ;;
-(define_insn "mve_vclzq_<supf><mode>"
+(define_insn "@mve_vclzq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
-	 VCLZQ))
+	(clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
   "vclz.i%#<V_sz_elem>  %q0, %q1"
   [(set_attr "type" "mve_move")
 ])
+(define_expand "mve_vclzq_u<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand")
+	(clz:MVE_2 (match_operand:MVE_2 1 "s_register_operand")))
+  ]
+  "TARGET_HAVE_MVE"
+)
 
 ;;
 ;; [vclsq_s])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 18571d819eb..0fdffaf4ec4 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3018,7 +3018,7 @@  (define_insn "neon_vcls<mode>"
   [(set_attr "type" "neon_cls<q>")]
 )
 
-(define_insn "clz<mode>2"
+(define_insn "neon_vclz<mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
   "TARGET_NEON"
@@ -3026,15 +3026,6 @@  (define_insn "clz<mode>2"
   [(set_attr "type" "neon_cnt<q>")]
 )
 
-(define_expand "neon_vclz<mode>"
-  [(match_operand:VDQIW 0 "s_register_operand")
-   (match_operand:VDQIW 1 "s_register_operand")]
-  "TARGET_NEON"
-{
-  emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
-  DONE;
-})
-
 (define_insn "popcount<mode>2"
   [(set (match_operand:VE 0 "s_register_operand" "=w")
         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index ed1bc293b78..ad1c6edd005 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -556,8 +556,6 @@  (define_c_enum "unspec" [
   VQABSQ_S
   VDUPQ_N_U
   VDUPQ_N_S
-  VCLZQ_U
-  VCLZQ_S
   VCLSQ_S
   VADDVQ_S
   VADDVQ_U
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 2779c1a8aaa..1ba1e5eb008 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -625,3 +625,16 @@  (define_expand "uavg<mode>3_ceil"
 			       operands[0], operands[1], operands[2]));
   DONE;
 })
+
+(define_expand "clz<mode>2"
+  [(match_operand:VDQIW 0 "s_register_operand")
+   (match_operand:VDQIW 1 "s_register_operand")]
+  "ARM_HAVE_<MODE>_ARITH
+   && !TARGET_REALLY_IWMMXT"
+{
+  if (TARGET_NEON)
+    emit_insn (gen_neon_vclz<mode> (operands[0], operands[1]));
+  else
+    emit_insn (gen_mve_vclzq_s (<MODE>mode, operands[0], operands[1]));
+  DONE;
+})
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
new file mode 100644
index 00000000000..7068736bc28
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vclz.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NAME)					\
+  void test_ ## NAME ##_ ## SIGN ## BITS (TYPE##BITS##_t * __restrict__ dest, \
+					  TYPE##BITS##_t *a) {		\
+    int i;								\
+    for (i=0; i < (128 / BITS); i++) {					\
+      dest[i] = (TYPE##BITS##_t)__builtin_clz(a[i]);			\
+    }									\
+}
+
+FUNC(s, int, 32, clz)
+FUNC(u, uint, 32, clz)
+FUNC(s, int, 16, clz)
+FUNC(u, uint, 16, clz)
+FUNC(s, int, 8, clz)
+FUNC(u, uint, 8, clz)
+
+/* 16 and 8-bit versions are not vectorized because they need pack/unpack
+   patterns since __builtin_clz uses 32-bit parameter and return value.  */
+/* { dg-final { scan-assembler-times {vclz\.i32  q[0-9]+, q[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-times {vclz\.i16  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {vclz\.i8  q[0-9]+, q[0-9]+} 2 { xfail *-*-* } } } */