diff mbox series

[1/5] arm: Auto-vectorization for MVE: vand

Message ID 1607432763-11859-1-git-send-email-christophe.lyon@linaro.org
State New
Headers show
Series [1/5] arm: Auto-vectorization for MVE: vand | expand

Commit Message

Christophe Lyon Dec. 8, 2020, 1:05 p.m. UTC
This patch enables MVE vandq instructions for auto-vectorization.  MVE
vandq insns in mve.md are modified to use 'and' instead of unspec
expression to support and<mode>3.  The and<mode>3 expander is added to
vec-common.md

2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	* config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
	(VANQ): Remove.
	(VDQ): Add TARGET_HAVE_MVE condition where relevant.
	* config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
	instruction using expression 'and'.
	(mve_vandq_s<mode>): New expander.
	(mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
	* config/arm/neon.md (and<mode>3): Rename into and<mode>3_neon.
	* config/arm/predicates.md (imm_for_neon_inv_logic_operand):
	Enable for MVE.
	* config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F): Remove.
	* config/arm/vec-common.md (and<mode>3): New expander.

	gcc/testsuite/
	* gcc.target/arm/simd/mve-vand.c: New test.
---
 gcc/config/arm/iterators.md                  | 11 +++--
 gcc/config/arm/mve.md                        | 40 +++++++++++++-----
 gcc/config/arm/neon.md                       |  2 +-
 gcc/config/arm/predicates.md                 |  2 +-
 gcc/config/arm/unspecs.md                    |  3 --
 gcc/config/arm/vec-common.md                 |  8 ++++
 gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63 ++++++++++++++++++++++++++++
 7 files changed, 109 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c

Comments

Kyrylo Tkachov Dec. 8, 2020, 1:19 p.m. UTC | #1
Hi Christophe

> -----Original Message-----
> From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 08 December 2020 13:06
> To: gcc-patches@gcc.gnu.org
> Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> 
> This patch enables MVE vandq instructions for auto-vectorization.  MVE
> vandq insns in mve.md are modified to use 'and' instead of unspec
> expression to support and<mode>3.  The and<mode>3 expander is added to
> vec-common.md
> 
> 2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>
> 
> 	gcc/
> 	* config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> 	(VANQ): Remove.
> 	(VDQ): Add TARGET_HAVE_MVE condition where relevant.
> 	* config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
> 	instruction using expression 'and'.
> 	(mve_vandq_s<mode>): New expander.
> 	(mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
> 	* config/arm/neon.md (and<mode>3): Rename into
> and<mode>3_neon.
> 	* config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> 	Enable for MVE.
> 	* config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> Remove.
> 	* config/arm/vec-common.md (and<mode>3): New expander.
> 
> 	gcc/testsuite/
> 	* gcc.target/arm/simd/mve-vand.c: New test.
> ---
>  gcc/config/arm/iterators.md                  | 11 +++--
>  gcc/config/arm/mve.md                        | 40 +++++++++++++-----
>  gcc/config/arm/neon.md                       |  2 +-
>  gcc/config/arm/predicates.md                 |  2 +-
>  gcc/config/arm/unspecs.md                    |  3 --
>  gcc/config/arm/vec-common.md                 |  8 ++++
>  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> ++++++++++++++++++++++++++++
>  7 files changed, 109 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 592af35..badad2b 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
>  (define_mode_iterator VN [V8HI V4SI V2DI])
> 
>  ;; All supported vector modes (except singleton DImode).
> -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF
> V2SF V4SF V2DI])
> +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> +			   (V4HI "!TARGET_HAVE_MVE") V8HI
> +			   (V2SI "!TARGET_HAVE_MVE") V4SI
> +			   (V4HF "!TARGET_HAVE_MVE") V8HF
> +			   (V2SF "!TARGET_HAVE_MVE") V4SF
> +			   (V2DI "!TARGET_HAVE_MVE")])
> 
>  ;; All supported floating-point vector modes (except V2DF).
>  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
>  		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
>  		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
>  		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> -		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VANDQ_S "s")
> -		       (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> +		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VBICQ_S "s")
> (VBICQ_U "u")
>  		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> (VCADDQ_ROT270_S "s")
>  		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
>  		       (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> (VCADDQ_ROT90_U "u")
> @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S VABDQ_U])
>  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
>  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
>  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
>  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
>  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
>  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> VCADDQ_ROT270_U])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index ecbaaa9..238c828 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -894,17 +894,36 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
>  ;;
>  ;; [vandq_u, vandq_s])
>  ;;
> -(define_insn "mve_vandq_<supf><mode>"
> +;; signed and unsigned versions are the same: define the unsigned
> +;; insn, and use an expander for the signed one as we still reference
> +;; both names from arm_mve.h.
> +;; We use the same code as in neon.md (TODO: avoid this duplication).
> +(define_insn "mve_vandq_u<mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
> +	(and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
> +		   (match_operand:MVE_2 2 "neon_inv_logic_op2" "w,DL")))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  {
> +    switch (which_alternative)
> +      {
> +      case 0: return "vand\t%q0, %q1, %q2";
> +      case 1: return neon_output_logic_immediate ("vand", &operands[2],
> +			<MODE>mode, 1, VALID_NEON_QREG_MODE
> (<MODE>mode));
> +      default: gcc_unreachable ();
> +    }

We can avoid this switch statement by using the @ syntax for alternatives and using a '*' for the second alternative.
See the *aarch64_sve_mov<mode>_ldr_str pattern in config/aarch64/aarch64-sve.md, for example.

Ok with that change.
Thanks for doing this!
Kyrill

> +  }
> +  [(set_attr "type" "mve_move")
> +])
> +(define_expand "mve_vandq_s<mode>"
>    [
> -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> -	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> -		       (match_operand:MVE_2 2 "s_register_operand" "w")]
> -	 VANDQ))
> +   (set (match_operand:MVE_2 0 "s_register_operand")
> +	(and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> +		   (match_operand:MVE_2 2 "neon_inv_logic_op2")))
>    ]
>    "TARGET_HAVE_MVE"
> -  "vand %q0, %q1, %q2"
> -  [(set_attr "type" "mve_move")
> -])
> +)
> 
>  ;;
>  ;; [vbicq_s, vbicq_u])
> @@ -2019,9 +2038,8 @@ (define_insn "mve_vaddq_n_f<mode>"
>  (define_insn "mve_vandq_f<mode>"
>    [
>     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> -	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> -		       (match_operand:MVE_0 2 "s_register_operand" "w")]
> -	 VANDQ_F))
> +	(and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> +		   (match_operand:MVE_0 2 "s_register_operand" "w")))
>    ]
>    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
>    "vand %q0, %q1, %q2"
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 2d76769..dc4707d 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -712,7 +712,7 @@ (define_insn "ior<mode>3"
>  ;; corresponds to the canonical form the middle-end expects to use for
>  ;; immediate bitwise-ANDs.
> 
> -(define_insn "and<mode>3"
> +(define_insn "and<mode>3_neon"
>    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
>  	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
>  		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
> diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
> index 2144520..5f58f7c 100644
> --- a/gcc/config/arm/predicates.md
> +++ b/gcc/config/arm/predicates.md
> @@ -107,7 +107,7 @@ (define_predicate "vpr_register_operand"
>  (define_predicate "imm_for_neon_inv_logic_operand"
>    (match_code "const_vector")
>  {
> -  return (TARGET_NEON
> +  return ((TARGET_NEON || TARGET_HAVE_MVE)
>            && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
>  })
> 
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index a3844e9..18b3048 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
>    VADDQ_N_S
>    VADDVAQ_S
>    VADDVQ_P_S
> -  VANDQ_S
>    VBICQ_S
>    VBRSRQ_N_S
>    VCADDQ_ROT270_S
> @@ -648,7 +647,6 @@ (define_c_enum "unspec" [
>    VADDQ_N_U
>    VADDVAQ_U
>    VADDVQ_P_U
> -  VANDQ_U
>    VBICQ_U
>    VBRSRQ_N_U
>    VCADDQ_ROT270_U
> @@ -721,7 +719,6 @@ (define_c_enum "unspec" [
>    VABDQ_M_U
>    VABDQ_F
>    VADDQ_N_F
> -  VANDQ_F
>    VBICQ_F
>    VCADDQ_ROT270_F
>    VCADDQ_ROT90_F
> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> common.md
> index 250e503..2117e5b 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -172,3 +172,11 @@ (define_expand "vec_set<mode>"
>  					       GEN_INT (elem), operands[0]));
>    DONE;
>  })
> +
> +(define_expand "and<mode>3"
> +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> +	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> +		 (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
> +  "TARGET_NEON
> +   || TARGET_HAVE_MVE"
> +)
> diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> new file mode 100644
> index 0000000..26247dc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> @@ -0,0 +1,63 @@
> +/* { dg-do assemble } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +/* { dg-additional-options "-O3" } */
> +
> +#include <stdint.h>
> +
> +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP b[i];						\
> +    }									\
> +}
> +
> +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> 	\
> +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> __restrict__ dest, TYPE##BITS##_t *a) { \
> +    int i;								\
> +    for (i=0; i<NB; i++) {						\
> +      dest[i] = a[i] OP 1;						\
> +    }									\
> +}
> +
> +/* 64-bit vectors.  */
> +FUNC(s, int, 32, 2, &, vand)
> +FUNC(u, uint, 32, 2, &, vand)
> +FUNC(s, int, 16, 4, &, vand)
> +FUNC(u, uint, 16, 4, &, vand)
> +FUNC(s, int, 8, 8, &, vand)
> +FUNC(u, uint, 8, 8, &, vand)
> +
> +/* 128-bit vectors.  */
> +FUNC(s, int, 32, 4, &, vand)
> +FUNC(u, uint, 32, 4, &, vand)
> +FUNC(s, int, 16, 8, &, vand)
> +FUNC(u, uint, 16, 8, &, vand)
> +FUNC(s, int, 8, 16, &, vand)
> +FUNC(u, uint, 8, 16, &, vand)
> +
> +/* 64-bit vectors.  */
> +FUNC_IMM(s, int, 32, 2, &, vandimm)
> +FUNC_IMM(u, uint, 32, 2, &, vandimm)
> +FUNC_IMM(s, int, 16, 4, &, vandimm)
> +FUNC_IMM(u, uint, 16, 4, &, vandimm)
> +FUNC_IMM(s, int, 8, 8, &, vandimm)
> +FUNC_IMM(u, uint, 8, 8, &, vandimm)
> +
> +/* 128-bit vectors.  */
> +FUNC_IMM(s, int, 32, 4, &, vandimm)
> +FUNC_IMM(u, uint, 32, 4, &, vandimm)
> +FUNC_IMM(s, int, 16, 8, &, vandimm)
> +FUNC_IMM(u, uint, 16, 8, &, vandimm)
> +FUNC_IMM(s, int, 8, 16, &, vandimm)
> +FUNC_IMM(u, uint, 8, 16, &, vandimm)
> +
> +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> +   functions above.  */
> +/* Although float16 and float32 types are supported at assembly level,
> +   we cannot test them with the '&' operator, so we check only the
> +   integer variants.  */
> +/* For some reason, we do not generate the immediate version, we still
> +   use vldr to load the vector of immediates.  */
> +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+} 12 } }
> */
> --
> 2.7.4
Christophe Lyon Dec. 8, 2020, 1:58 p.m. UTC | #2
On Tue, 8 Dec 2020 at 14:19, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> wrote:
>
> Hi Christophe
>
> > -----Original Message-----
> > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> > Christophe Lyon via Gcc-patches
> > Sent: 08 December 2020 13:06
> > To: gcc-patches@gcc.gnu.org
> > Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> >
> > This patch enables MVE vandq instructions for auto-vectorization.  MVE
> > vandq insns in mve.md are modified to use 'and' instead of unspec
> > expression to support and<mode>3.  The and<mode>3 expander is added to
> > vec-common.md
> >
> > 2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>
> >
> >       gcc/
> >       * config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> >       (VANQ): Remove.
> >       (VDQ): Add TARGET_HAVE_MVE condition where relevant.
> >       * config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
> >       instruction using expression 'and'.
> >       (mve_vandq_s<mode>): New expander.
> >       (mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
> >       * config/arm/neon.md (and<mode>3): Rename into
> > and<mode>3_neon.
> >       * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> >       Enable for MVE.
> >       * config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> > Remove.
> >       * config/arm/vec-common.md (and<mode>3): New expander.
> >
> >       gcc/testsuite/
> >       * gcc.target/arm/simd/mve-vand.c: New test.
> > ---
> >  gcc/config/arm/iterators.md                  | 11 +++--
> >  gcc/config/arm/mve.md                        | 40 +++++++++++++-----
> >  gcc/config/arm/neon.md                       |  2 +-
> >  gcc/config/arm/predicates.md                 |  2 +-
> >  gcc/config/arm/unspecs.md                    |  3 --
> >  gcc/config/arm/vec-common.md                 |  8 ++++
> >  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> > ++++++++++++++++++++++++++++
> >  7 files changed, 109 insertions(+), 20 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> >
> > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> > index 592af35..badad2b 100644
> > --- a/gcc/config/arm/iterators.md
> > +++ b/gcc/config/arm/iterators.md
> > @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
> >  (define_mode_iterator VN [V8HI V4SI V2DI])
> >
> >  ;; All supported vector modes (except singleton DImode).
> > -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF
> > V2SF V4SF V2DI])
> > +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> > +                        (V4HI "!TARGET_HAVE_MVE") V8HI
> > +                        (V2SI "!TARGET_HAVE_MVE") V4SI
> > +                        (V4HF "!TARGET_HAVE_MVE") V8HF
> > +                        (V2SF "!TARGET_HAVE_MVE") V4SF
> > +                        (V2DI "!TARGET_HAVE_MVE")])
> >
> >  ;; All supported floating-point vector modes (except V2DF).
> >  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> > @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> >                      (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
> >                      (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
> >                      (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> > -                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
> > -                    (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> > +                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s")
> > (VBICQ_U "u")
> >                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> > (VCADDQ_ROT270_S "s")
> >                      (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
> >                      (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> > (VCADDQ_ROT90_U "u")
> > @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S VABDQ_U])
> >  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
> >  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
> >  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> > -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
> >  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
> >  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
> >  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> > VCADDQ_ROT270_U])
> > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > index ecbaaa9..238c828 100644
> > --- a/gcc/config/arm/mve.md
> > +++ b/gcc/config/arm/mve.md
> > @@ -894,17 +894,36 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
> >  ;;
> >  ;; [vandq_u, vandq_s])
> >  ;;
> > -(define_insn "mve_vandq_<supf><mode>"
> > +;; signed and unsigned versions are the same: define the unsigned
> > +;; insn, and use an expander for the signed one as we still reference
> > +;; both names from arm_mve.h.
> > +;; We use the same code as in neon.md (TODO: avoid this duplication).
> > +(define_insn "mve_vandq_u<mode>"
> > +  [
> > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
> > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
> > +                (match_operand:MVE_2 2 "neon_inv_logic_op2" "w,DL")))
> > +  ]
> > +  "TARGET_HAVE_MVE"
> > +  {
> > +    switch (which_alternative)
> > +      {
> > +      case 0: return "vand\t%q0, %q1, %q2";
> > +      case 1: return neon_output_logic_immediate ("vand", &operands[2],
> > +                     <MODE>mode, 1, VALID_NEON_QREG_MODE
> > (<MODE>mode));
> > +      default: gcc_unreachable ();
> > +    }
>
> We can avoid this switch statement by using the @ syntax for alternatives and using a '*' for the second alternative.
> See the *aarch64_sve_mov<mode>_ldr_str pattern in config/aarch64/aarch64-sve.md, for example.
>

Indeed, thanks. I just cut and pasted from and<mode>3_neon :-)

I guess a similar comment applies to the next patch (vorr) ?

> Ok with that change.
> Thanks for doing this!
> Kyrill
>
> > +  }
> > +  [(set_attr "type" "mve_move")
> > +])
> > +(define_expand "mve_vandq_s<mode>"
> >    [
> > -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> > -                    (match_operand:MVE_2 2 "s_register_operand" "w")]
> > -      VANDQ))
> > +   (set (match_operand:MVE_2 0 "s_register_operand")
> > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> > +                (match_operand:MVE_2 2 "neon_inv_logic_op2")))
> >    ]
> >    "TARGET_HAVE_MVE"
> > -  "vand %q0, %q1, %q2"
> > -  [(set_attr "type" "mve_move")
> > -])
> > +)
> >
> >  ;;
> >  ;; [vbicq_s, vbicq_u])
> > @@ -2019,9 +2038,8 @@ (define_insn "mve_vaddq_n_f<mode>"
> >  (define_insn "mve_vandq_f<mode>"
> >    [
> >     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> > -     (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> > -                    (match_operand:MVE_0 2 "s_register_operand" "w")]
> > -      VANDQ_F))
> > +     (and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> > +                (match_operand:MVE_0 2 "s_register_operand" "w")))
> >    ]
> >    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> >    "vand %q0, %q1, %q2"
> > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > index 2d76769..dc4707d 100644
> > --- a/gcc/config/arm/neon.md
> > +++ b/gcc/config/arm/neon.md
> > @@ -712,7 +712,7 @@ (define_insn "ior<mode>3"
> >  ;; corresponds to the canonical form the middle-end expects to use for
> >  ;; immediate bitwise-ANDs.
> >
> > -(define_insn "and<mode>3"
> > +(define_insn "and<mode>3_neon"
> >    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
> >       (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> >                (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
> > diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
> > index 2144520..5f58f7c 100644
> > --- a/gcc/config/arm/predicates.md
> > +++ b/gcc/config/arm/predicates.md
> > @@ -107,7 +107,7 @@ (define_predicate "vpr_register_operand"
> >  (define_predicate "imm_for_neon_inv_logic_operand"
> >    (match_code "const_vector")
> >  {
> > -  return (TARGET_NEON
> > +  return ((TARGET_NEON || TARGET_HAVE_MVE)
> >            && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
> >  })
> >
> > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > index a3844e9..18b3048 100644
> > --- a/gcc/config/arm/unspecs.md
> > +++ b/gcc/config/arm/unspecs.md
> > @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
> >    VADDQ_N_S
> >    VADDVAQ_S
> >    VADDVQ_P_S
> > -  VANDQ_S
> >    VBICQ_S
> >    VBRSRQ_N_S
> >    VCADDQ_ROT270_S
> > @@ -648,7 +647,6 @@ (define_c_enum "unspec" [
> >    VADDQ_N_U
> >    VADDVAQ_U
> >    VADDVQ_P_U
> > -  VANDQ_U
> >    VBICQ_U
> >    VBRSRQ_N_U
> >    VCADDQ_ROT270_U
> > @@ -721,7 +719,6 @@ (define_c_enum "unspec" [
> >    VABDQ_M_U
> >    VABDQ_F
> >    VADDQ_N_F
> > -  VANDQ_F
> >    VBICQ_F
> >    VCADDQ_ROT270_F
> >    VCADDQ_ROT90_F
> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> > common.md
> > index 250e503..2117e5b 100644
> > --- a/gcc/config/arm/vec-common.md
> > +++ b/gcc/config/arm/vec-common.md
> > @@ -172,3 +172,11 @@ (define_expand "vec_set<mode>"
> >                                              GEN_INT (elem), operands[0]));
> >    DONE;
> >  })
> > +
> > +(define_expand "and<mode>3"
> > +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> > +     (and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> > +              (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
> > +  "TARGET_NEON
> > +   || TARGET_HAVE_MVE"
> > +)
> > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > new file mode 100644
> > index 0000000..26247dc
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > @@ -0,0 +1,63 @@
> > +/* { dg-do assemble } */
> > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > +/* { dg-add-options arm_v8_1m_mve } */
> > +/* { dg-additional-options "-O3" } */
> > +
> > +#include <stdint.h>
> > +
> > +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> >       \
> > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> > +    int i;                                                           \
> > +    for (i=0; i<NB; i++) {                                           \
> > +      dest[i] = a[i] OP b[i];                                                \
> > +    }                                                                        \
> > +}
> > +
> > +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> >       \
> > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > __restrict__ dest, TYPE##BITS##_t *a) { \
> > +    int i;                                                           \
> > +    for (i=0; i<NB; i++) {                                           \
> > +      dest[i] = a[i] OP 1;                                           \
> > +    }                                                                        \
> > +}
> > +
> > +/* 64-bit vectors.  */
> > +FUNC(s, int, 32, 2, &, vand)
> > +FUNC(u, uint, 32, 2, &, vand)
> > +FUNC(s, int, 16, 4, &, vand)
> > +FUNC(u, uint, 16, 4, &, vand)
> > +FUNC(s, int, 8, 8, &, vand)
> > +FUNC(u, uint, 8, 8, &, vand)
> > +
> > +/* 128-bit vectors.  */
> > +FUNC(s, int, 32, 4, &, vand)
> > +FUNC(u, uint, 32, 4, &, vand)
> > +FUNC(s, int, 16, 8, &, vand)
> > +FUNC(u, uint, 16, 8, &, vand)
> > +FUNC(s, int, 8, 16, &, vand)
> > +FUNC(u, uint, 8, 16, &, vand)
> > +
> > +/* 64-bit vectors.  */
> > +FUNC_IMM(s, int, 32, 2, &, vandimm)
> > +FUNC_IMM(u, uint, 32, 2, &, vandimm)
> > +FUNC_IMM(s, int, 16, 4, &, vandimm)
> > +FUNC_IMM(u, uint, 16, 4, &, vandimm)
> > +FUNC_IMM(s, int, 8, 8, &, vandimm)
> > +FUNC_IMM(u, uint, 8, 8, &, vandimm)
> > +
> > +/* 128-bit vectors.  */
> > +FUNC_IMM(s, int, 32, 4, &, vandimm)
> > +FUNC_IMM(u, uint, 32, 4, &, vandimm)
> > +FUNC_IMM(s, int, 16, 8, &, vandimm)
> > +FUNC_IMM(u, uint, 16, 8, &, vandimm)
> > +FUNC_IMM(s, int, 8, 16, &, vandimm)
> > +FUNC_IMM(u, uint, 8, 16, &, vandimm)
> > +
> > +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> > +   functions above.  */
> > +/* Although float16 and float32 types are supported at assembly level,
> > +   we cannot test them with the '&' operator, so we check only the
> > +   integer variants.  */
> > +/* For some reason, we do not generate the immediate version, we still
> > +   use vldr to load the vector of immediates.  */
> > +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+} 12 } }
> > */
> > --
> > 2.7.4
>
Kyrylo Tkachov Dec. 8, 2020, 2 p.m. UTC | #3
> -----Original Message-----
> From: Christophe Lyon <christophe.lyon@linaro.org>
> Sent: 08 December 2020 13:59
> To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> 
> On Tue, 8 Dec 2020 at 14:19, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> wrote:
> >
> > Hi Christophe
> >
> > > -----Original Message-----
> > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> > > Christophe Lyon via Gcc-patches
> > > Sent: 08 December 2020 13:06
> > > To: gcc-patches@gcc.gnu.org
> > > Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> > >
> > > This patch enables MVE vandq instructions for auto-vectorization.  MVE
> > > vandq insns in mve.md are modified to use 'and' instead of unspec
> > > expression to support and<mode>3.  The and<mode>3 expander is added
> to
> > > vec-common.md
> > >
> > > 2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>
> > >
> > >       gcc/
> > >       * config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> > >       (VANQ): Remove.
> > >       (VDQ): Add TARGET_HAVE_MVE condition where relevant.
> > >       * config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
> > >       instruction using expression 'and'.
> > >       (mve_vandq_s<mode>): New expander.
> > >       (mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
> > >       * config/arm/neon.md (and<mode>3): Rename into
> > > and<mode>3_neon.
> > >       * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> > >       Enable for MVE.
> > >       * config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> > > Remove.
> > >       * config/arm/vec-common.md (and<mode>3): New expander.
> > >
> > >       gcc/testsuite/
> > >       * gcc.target/arm/simd/mve-vand.c: New test.
> > > ---
> > >  gcc/config/arm/iterators.md                  | 11 +++--
> > >  gcc/config/arm/mve.md                        | 40 +++++++++++++-----
> > >  gcc/config/arm/neon.md                       |  2 +-
> > >  gcc/config/arm/predicates.md                 |  2 +-
> > >  gcc/config/arm/unspecs.md                    |  3 --
> > >  gcc/config/arm/vec-common.md                 |  8 ++++
> > >  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> > > ++++++++++++++++++++++++++++
> > >  7 files changed, 109 insertions(+), 20 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > >
> > > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> > > index 592af35..badad2b 100644
> > > --- a/gcc/config/arm/iterators.md
> > > +++ b/gcc/config/arm/iterators.md
> > > @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
> > >  (define_mode_iterator VN [V8HI V4SI V2DI])
> > >
> > >  ;; All supported vector modes (except singleton DImode).
> > > -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF
> V8HF
> > > V2SF V4SF V2DI])
> > > +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> > > +                        (V4HI "!TARGET_HAVE_MVE") V8HI
> > > +                        (V2SI "!TARGET_HAVE_MVE") V4SI
> > > +                        (V4HF "!TARGET_HAVE_MVE") V8HF
> > > +                        (V2SF "!TARGET_HAVE_MVE") V4SF
> > > +                        (V2DI "!TARGET_HAVE_MVE")])
> > >
> > >  ;; All supported floating-point vector modes (except V2DF).
> > >  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> > > @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> > >                      (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
> > >                      (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
> > >                      (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> > > -                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
> > > -                    (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> > > +                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s")
> > > (VBICQ_U "u")
> > >                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> > > (VCADDQ_ROT270_S "s")
> > >                      (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
> > >                      (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> > > (VCADDQ_ROT90_U "u")
> > > @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S
> VABDQ_U])
> > >  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
> > >  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
> > >  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> > > -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
> > >  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
> > >  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
> > >  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> > > VCADDQ_ROT270_U])
> > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > > index ecbaaa9..238c828 100644
> > > --- a/gcc/config/arm/mve.md
> > > +++ b/gcc/config/arm/mve.md
> > > @@ -894,17 +894,36 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
> > >  ;;
> > >  ;; [vandq_u, vandq_s])
> > >  ;;
> > > -(define_insn "mve_vandq_<supf><mode>"
> > > +;; signed and unsigned versions are the same: define the unsigned
> > > +;; insn, and use an expander for the signed one as we still reference
> > > +;; both names from arm_mve.h.
> > > +;; We use the same code as in neon.md (TODO: avoid this duplication).
> > > +(define_insn "mve_vandq_u<mode>"
> > > +  [
> > > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
> > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
> > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2" "w,DL")))
> > > +  ]
> > > +  "TARGET_HAVE_MVE"
> > > +  {
> > > +    switch (which_alternative)
> > > +      {
> > > +      case 0: return "vand\t%q0, %q1, %q2";
> > > +      case 1: return neon_output_logic_immediate ("vand", &operands[2],
> > > +                     <MODE>mode, 1, VALID_NEON_QREG_MODE
> > > (<MODE>mode));
> > > +      default: gcc_unreachable ();
> > > +    }
> >
> > We can avoid this switch statement by using the @ syntax for alternatives
> and using a '*' for the second alternative.
> > See the *aarch64_sve_mov<mode>_ldr_str pattern in
> config/aarch64/aarch64-sve.md, for example.
> >
> 
> Indeed, thanks. I just cut and pasted from and<mode>3_neon :-)

Indeed, patches to clean such occurrences up are pre-approved 😉

> 
> I guess a similar comment applies to the next patch (vorr) ?

Yes.
Thanks,
Kyrill

> 
> > Ok with that change.
> > Thanks for doing this!
> > Kyrill
> >
> > > +  }
> > > +  [(set_attr "type" "mve_move")
> > > +])
> > > +(define_expand "mve_vandq_s<mode>"
> > >    [
> > > -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> > > -                    (match_operand:MVE_2 2 "s_register_operand" "w")]
> > > -      VANDQ))
> > > +   (set (match_operand:MVE_2 0 "s_register_operand")
> > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2")))
> > >    ]
> > >    "TARGET_HAVE_MVE"
> > > -  "vand %q0, %q1, %q2"
> > > -  [(set_attr "type" "mve_move")
> > > -])
> > > +)
> > >
> > >  ;;
> > >  ;; [vbicq_s, vbicq_u])
> > > @@ -2019,9 +2038,8 @@ (define_insn "mve_vaddq_n_f<mode>"
> > >  (define_insn "mve_vandq_f<mode>"
> > >    [
> > >     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> > > -     (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> > > -                    (match_operand:MVE_0 2 "s_register_operand" "w")]
> > > -      VANDQ_F))
> > > +     (and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> > > +                (match_operand:MVE_0 2 "s_register_operand" "w")))
> > >    ]
> > >    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> > >    "vand %q0, %q1, %q2"
> > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > > index 2d76769..dc4707d 100644
> > > --- a/gcc/config/arm/neon.md
> > > +++ b/gcc/config/arm/neon.md
> > > @@ -712,7 +712,7 @@ (define_insn "ior<mode>3"
> > >  ;; corresponds to the canonical form the middle-end expects to use for
> > >  ;; immediate bitwise-ANDs.
> > >
> > > -(define_insn "and<mode>3"
> > > +(define_insn "and<mode>3_neon"
> > >    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
> > >       (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> > >                (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
> > > diff --git a/gcc/config/arm/predicates.md
> b/gcc/config/arm/predicates.md
> > > index 2144520..5f58f7c 100644
> > > --- a/gcc/config/arm/predicates.md
> > > +++ b/gcc/config/arm/predicates.md
> > > @@ -107,7 +107,7 @@ (define_predicate "vpr_register_operand"
> > >  (define_predicate "imm_for_neon_inv_logic_operand"
> > >    (match_code "const_vector")
> > >  {
> > > -  return (TARGET_NEON
> > > +  return ((TARGET_NEON || TARGET_HAVE_MVE)
> > >            && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
> > >  })
> > >
> > > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > > index a3844e9..18b3048 100644
> > > --- a/gcc/config/arm/unspecs.md
> > > +++ b/gcc/config/arm/unspecs.md
> > > @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
> > >    VADDQ_N_S
> > >    VADDVAQ_S
> > >    VADDVQ_P_S
> > > -  VANDQ_S
> > >    VBICQ_S
> > >    VBRSRQ_N_S
> > >    VCADDQ_ROT270_S
> > > @@ -648,7 +647,6 @@ (define_c_enum "unspec" [
> > >    VADDQ_N_U
> > >    VADDVAQ_U
> > >    VADDVQ_P_U
> > > -  VANDQ_U
> > >    VBICQ_U
> > >    VBRSRQ_N_U
> > >    VCADDQ_ROT270_U
> > > @@ -721,7 +719,6 @@ (define_c_enum "unspec" [
> > >    VABDQ_M_U
> > >    VABDQ_F
> > >    VADDQ_N_F
> > > -  VANDQ_F
> > >    VBICQ_F
> > >    VCADDQ_ROT270_F
> > >    VCADDQ_ROT90_F
> > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> > > common.md
> > > index 250e503..2117e5b 100644
> > > --- a/gcc/config/arm/vec-common.md
> > > +++ b/gcc/config/arm/vec-common.md
> > > @@ -172,3 +172,11 @@ (define_expand "vec_set<mode>"
> > >                                              GEN_INT (elem), operands[0]));
> > >    DONE;
> > >  })
> > > +
> > > +(define_expand "and<mode>3"
> > > +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> > > +     (and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> > > +              (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
> > > +  "TARGET_NEON
> > > +   || TARGET_HAVE_MVE"
> > > +)
> > > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > new file mode 100644
> > > index 0000000..26247dc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > @@ -0,0 +1,63 @@
> > > +/* { dg-do assemble } */
> > > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > > +/* { dg-add-options arm_v8_1m_mve } */
> > > +/* { dg-additional-options "-O3" } */
> > > +
> > > +#include <stdint.h>
> > > +
> > > +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> > >       \
> > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > > __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> > > +    int i;                                                           \
> > > +    for (i=0; i<NB; i++) {                                           \
> > > +      dest[i] = a[i] OP b[i];                                                \
> > > +    }                                                                        \
> > > +}
> > > +
> > > +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> > >       \
> > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > > __restrict__ dest, TYPE##BITS##_t *a) { \
> > > +    int i;                                                           \
> > > +    for (i=0; i<NB; i++) {                                           \
> > > +      dest[i] = a[i] OP 1;                                           \
> > > +    }                                                                        \
> > > +}
> > > +
> > > +/* 64-bit vectors.  */
> > > +FUNC(s, int, 32, 2, &, vand)
> > > +FUNC(u, uint, 32, 2, &, vand)
> > > +FUNC(s, int, 16, 4, &, vand)
> > > +FUNC(u, uint, 16, 4, &, vand)
> > > +FUNC(s, int, 8, 8, &, vand)
> > > +FUNC(u, uint, 8, 8, &, vand)
> > > +
> > > +/* 128-bit vectors.  */
> > > +FUNC(s, int, 32, 4, &, vand)
> > > +FUNC(u, uint, 32, 4, &, vand)
> > > +FUNC(s, int, 16, 8, &, vand)
> > > +FUNC(u, uint, 16, 8, &, vand)
> > > +FUNC(s, int, 8, 16, &, vand)
> > > +FUNC(u, uint, 8, 16, &, vand)
> > > +
> > > +/* 64-bit vectors.  */
> > > +FUNC_IMM(s, int, 32, 2, &, vandimm)
> > > +FUNC_IMM(u, uint, 32, 2, &, vandimm)
> > > +FUNC_IMM(s, int, 16, 4, &, vandimm)
> > > +FUNC_IMM(u, uint, 16, 4, &, vandimm)
> > > +FUNC_IMM(s, int, 8, 8, &, vandimm)
> > > +FUNC_IMM(u, uint, 8, 8, &, vandimm)
> > > +
> > > +/* 128-bit vectors.  */
> > > +FUNC_IMM(s, int, 32, 4, &, vandimm)
> > > +FUNC_IMM(u, uint, 32, 4, &, vandimm)
> > > +FUNC_IMM(s, int, 16, 8, &, vandimm)
> > > +FUNC_IMM(u, uint, 16, 8, &, vandimm)
> > > +FUNC_IMM(s, int, 8, 16, &, vandimm)
> > > +FUNC_IMM(u, uint, 8, 16, &, vandimm)
> > > +
> > > +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> > > +   functions above.  */
> > > +/* Although float16 and float32 types are supported at assembly level,
> > > +   we cannot test them with the '&' operator, so we check only the
> > > +   integer variants.  */
> > > +/* For some reason, we do not generate the immediate version, we still
> > > +   use vldr to load the vector of immediates.  */
> > > +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+}
> 12 } }
> > > */
> > > --
> > > 2.7.4
> >
Christophe Lyon Dec. 10, 2020, 12:16 p.m. UTC | #4
On Tue, 8 Dec 2020 at 15:00, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> wrote:

>
>
> > -----Original Message-----
> > From: Christophe Lyon <christophe.lyon@linaro.org>
> > Sent: 08 December 2020 13:59
> > To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> >
> > On Tue, 8 Dec 2020 at 14:19, Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
> > wrote:
> > >
> > > Hi Christophe
> > >
> > > > -----Original Message-----
> > > > From: Gcc-patches <gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> > > > Christophe Lyon via Gcc-patches
> > > > Sent: 08 December 2020 13:06
> > > > To: gcc-patches@gcc.gnu.org
> > > > Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> > > >
> > > > This patch enables MVE vandq instructions for auto-vectorization.
> MVE
> > > > vandq insns in mve.md are modified to use 'and' instead of unspec
> > > > expression to support and<mode>3.  The and<mode>3 expander is added
> > to
> > > > vec-common.md
> > > >
> > > > 2020-12-03  Christophe Lyon  <christophe.lyon@linaro.org>
> > > >
> > > >       gcc/
> > > >       * config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> > > >       (VANQ): Remove.
> > > >       (VDQ): Add TARGET_HAVE_MVE condition where relevant.
> > > >       * config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
> > > >       instruction using expression 'and'.
> > > >       (mve_vandq_s<mode>): New expander.
> > > >       (mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
> > > >       * config/arm/neon.md (and<mode>3): Rename into
> > > > and<mode>3_neon.
> > > >       * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> > > >       Enable for MVE.
> > > >       * config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> > > > Remove.
> > > >       * config/arm/vec-common.md (and<mode>3): New expander.
> > > >
> > > >       gcc/testsuite/
> > > >       * gcc.target/arm/simd/mve-vand.c: New test.
> > > > ---
> > > >  gcc/config/arm/iterators.md                  | 11 +++--
> > > >  gcc/config/arm/mve.md                        | 40 +++++++++++++-----
> > > >  gcc/config/arm/neon.md                       |  2 +-
> > > >  gcc/config/arm/predicates.md                 |  2 +-
> > > >  gcc/config/arm/unspecs.md                    |  3 --
> > > >  gcc/config/arm/vec-common.md                 |  8 ++++
> > > >  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> > > > ++++++++++++++++++++++++++++
> > > >  7 files changed, 109 insertions(+), 20 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > >
> > > > diff --git a/gcc/config/arm/iterators.md
> b/gcc/config/arm/iterators.md
> > > > index 592af35..badad2b 100644
> > > > --- a/gcc/config/arm/iterators.md
> > > > +++ b/gcc/config/arm/iterators.md
> > > > @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
> > > >  (define_mode_iterator VN [V8HI V4SI V2DI])
> > > >
> > > >  ;; All supported vector modes (except singleton DImode).
> > > > -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF
> > V8HF
> > > > V2SF V4SF V2DI])
> > > > +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> > > > +                        (V4HI "!TARGET_HAVE_MVE") V8HI
> > > > +                        (V2SI "!TARGET_HAVE_MVE") V4SI
> > > > +                        (V4HF "!TARGET_HAVE_MVE") V8HF
> > > > +                        (V2SF "!TARGET_HAVE_MVE") V4SF
> > > > +                        (V2DI "!TARGET_HAVE_MVE")])
> > > >
> > > >  ;; All supported floating-point vector modes (except V2DF).
> > > >  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> > > > @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > > > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> > > >                      (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S
> "s")
> > > >                      (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
> > > >                      (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> > > > -                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
> > > > -                    (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> > > > +                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s")
> > > > (VBICQ_U "u")
> > > >                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> > > > (VCADDQ_ROT270_S "s")
> > > >                      (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
> > > >                      (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> > > > (VCADDQ_ROT90_U "u")
> > > > @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S
> > VABDQ_U])
> > > >  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
> > > >  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
> > > >  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> > > > -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
> > > >  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
> > > >  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
> > > >  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> > > > VCADDQ_ROT270_U])
> > > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > > > index ecbaaa9..238c828 100644
> > > > --- a/gcc/config/arm/mve.md
> > > > +++ b/gcc/config/arm/mve.md
> > > > @@ -894,17 +894,36 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
> > > >  ;;
> > > >  ;; [vandq_u, vandq_s])
> > > >  ;;
> > > > -(define_insn "mve_vandq_<supf><mode>"
> > > > +;; signed and unsigned versions are the same: define the unsigned
> > > > +;; insn, and use an expander for the signed one as we still
> reference
> > > > +;; both names from arm_mve.h.
> > > > +;; We use the same code as in neon.md (TODO: avoid this
> duplication).
> > > > +(define_insn "mve_vandq_u<mode>"
> > > > +  [
> > > > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
> > > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
> > > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2"
> "w,DL")))
> > > > +  ]
> > > > +  "TARGET_HAVE_MVE"
> > > > +  {
> > > > +    switch (which_alternative)
> > > > +      {
> > > > +      case 0: return "vand\t%q0, %q1, %q2";
> > > > +      case 1: return neon_output_logic_immediate ("vand",
> &operands[2],
> > > > +                     <MODE>mode, 1, VALID_NEON_QREG_MODE
> > > > (<MODE>mode));
> > > > +      default: gcc_unreachable ();
> > > > +    }
> > >
> > > We can avoid this switch statement by using the @ syntax for
> alternatives
> > and using a '*' for the second alternative.
> > > See the *aarch64_sve_mov<mode>_ldr_str pattern in
> > config/aarch64/aarch64-sve.md, for example.
> > >
> >
> > Indeed, thanks. I just cut and pasted from and<mode>3_neon :-)
>
> Indeed, patches to clean such occurrences up are pre-approved 😉
>
> >
> > I guess a similar comment applies to the next patch (vorr) ?
>
> Yes.
>


I've just pushed the vand patch. Does your "yes" means
approval for the vorr patch or only about the syntax change?

Thanks,

Christophe

Thanks,
> Kyrill
>
> >
> > > Ok with that change.
> > > Thanks for doing this!
> > > Kyrill
> > >
> > > > +  }
> > > > +  [(set_attr "type" "mve_move")
> > > > +])
> > > > +(define_expand "mve_vandq_s<mode>"
> > > >    [
> > > > -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > > > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> > > > -                    (match_operand:MVE_2 2 "s_register_operand"
> "w")]
> > > > -      VANDQ))
> > > > +   (set (match_operand:MVE_2 0 "s_register_operand")
> > > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> > > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2")))
> > > >    ]
> > > >    "TARGET_HAVE_MVE"
> > > > -  "vand %q0, %q1, %q2"
> > > > -  [(set_attr "type" "mve_move")
> > > > -])
> > > > +)
> > > >
> > > >  ;;
> > > >  ;; [vbicq_s, vbicq_u])
> > > > @@ -2019,9 +2038,8 @@ (define_insn "mve_vaddq_n_f<mode>"
> > > >  (define_insn "mve_vandq_f<mode>"
> > > >    [
> > > >     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> > > > -     (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> > > > -                    (match_operand:MVE_0 2 "s_register_operand"
> "w")]
> > > > -      VANDQ_F))
> > > > +     (and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> > > > +                (match_operand:MVE_0 2 "s_register_operand" "w")))
> > > >    ]
> > > >    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> > > >    "vand %q0, %q1, %q2"
> > > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > > > index 2d76769..dc4707d 100644
> > > > --- a/gcc/config/arm/neon.md
> > > > +++ b/gcc/config/arm/neon.md
> > > > @@ -712,7 +712,7 @@ (define_insn "ior<mode>3"
> > > >  ;; corresponds to the canonical form the middle-end expects to use
> for
> > > >  ;; immediate bitwise-ANDs.
> > > >
> > > > -(define_insn "and<mode>3"
> > > > +(define_insn "and<mode>3_neon"
> > > >    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
> > > >       (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> > > >                (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
> > > > diff --git a/gcc/config/arm/predicates.md
> > b/gcc/config/arm/predicates.md
> > > > index 2144520..5f58f7c 100644
> > > > --- a/gcc/config/arm/predicates.md
> > > > +++ b/gcc/config/arm/predicates.md
> > > > @@ -107,7 +107,7 @@ (define_predicate "vpr_register_operand"
> > > >  (define_predicate "imm_for_neon_inv_logic_operand"
> > > >    (match_code "const_vector")
> > > >  {
> > > > -  return (TARGET_NEON
> > > > +  return ((TARGET_NEON || TARGET_HAVE_MVE)
> > > >            && neon_immediate_valid_for_logic (op, mode, 1, NULL,
> NULL));
> > > >  })
> > > >
> > > > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > > > index a3844e9..18b3048 100644
> > > > --- a/gcc/config/arm/unspecs.md
> > > > +++ b/gcc/config/arm/unspecs.md
> > > > @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
> > > >    VADDQ_N_S
> > > >    VADDVAQ_S
> > > >    VADDVQ_P_S
> > > > -  VANDQ_S
> > > >    VBICQ_S
> > > >    VBRSRQ_N_S
> > > >    VCADDQ_ROT270_S
> > > > @@ -648,7 +647,6 @@ (define_c_enum "unspec" [
> > > >    VADDQ_N_U
> > > >    VADDVAQ_U
> > > >    VADDVQ_P_U
> > > > -  VANDQ_U
> > > >    VBICQ_U
> > > >    VBRSRQ_N_U
> > > >    VCADDQ_ROT270_U
> > > > @@ -721,7 +719,6 @@ (define_c_enum "unspec" [
> > > >    VABDQ_M_U
> > > >    VABDQ_F
> > > >    VADDQ_N_F
> > > > -  VANDQ_F
> > > >    VBICQ_F
> > > >    VCADDQ_ROT270_F
> > > >    VCADDQ_ROT90_F
> > > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> > > > common.md
> > > > index 250e503..2117e5b 100644
> > > > --- a/gcc/config/arm/vec-common.md
> > > > +++ b/gcc/config/arm/vec-common.md
> > > > @@ -172,3 +172,11 @@ (define_expand "vec_set<mode>"
> > > >                                              GEN_INT (elem),
> operands[0]));
> > > >    DONE;
> > > >  })
> > > > +
> > > > +(define_expand "and<mode>3"
> > > > +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> > > > +     (and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> > > > +              (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
> > > > +  "TARGET_NEON
> > > > +   || TARGET_HAVE_MVE"
> > > > +)
> > > > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > > b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > > new file mode 100644
> > > > index 0000000..26247dc
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > > @@ -0,0 +1,63 @@
> > > > +/* { dg-do assemble } */
> > > > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > > > +/* { dg-add-options arm_v8_1m_mve } */
> > > > +/* { dg-additional-options "-O3" } */
> > > > +
> > > > +#include <stdint.h>
> > > > +
> > > > +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> > > >       \
> > > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t
> *
> > > > __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> > > > +    int i;
>  \
> > > > +    for (i=0; i<NB; i++) {
>  \
> > > > +      dest[i] = a[i] OP b[i];
>           \
> > > > +    }
>           \
> > > > +}
> > > > +
> > > > +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> > > >       \
> > > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t
> *
> > > > __restrict__ dest, TYPE##BITS##_t *a) { \
> > > > +    int i;
>  \
> > > > +    for (i=0; i<NB; i++) {
>  \
> > > > +      dest[i] = a[i] OP 1;
>  \
> > > > +    }
>           \
> > > > +}
> > > > +
> > > > +/* 64-bit vectors.  */
> > > > +FUNC(s, int, 32, 2, &, vand)
> > > > +FUNC(u, uint, 32, 2, &, vand)
> > > > +FUNC(s, int, 16, 4, &, vand)
> > > > +FUNC(u, uint, 16, 4, &, vand)
> > > > +FUNC(s, int, 8, 8, &, vand)
> > > > +FUNC(u, uint, 8, 8, &, vand)
> > > > +
> > > > +/* 128-bit vectors.  */
> > > > +FUNC(s, int, 32, 4, &, vand)
> > > > +FUNC(u, uint, 32, 4, &, vand)
> > > > +FUNC(s, int, 16, 8, &, vand)
> > > > +FUNC(u, uint, 16, 8, &, vand)
> > > > +FUNC(s, int, 8, 16, &, vand)
> > > > +FUNC(u, uint, 8, 16, &, vand)
> > > > +
> > > > +/* 64-bit vectors.  */
> > > > +FUNC_IMM(s, int, 32, 2, &, vandimm)
> > > > +FUNC_IMM(u, uint, 32, 2, &, vandimm)
> > > > +FUNC_IMM(s, int, 16, 4, &, vandimm)
> > > > +FUNC_IMM(u, uint, 16, 4, &, vandimm)
> > > > +FUNC_IMM(s, int, 8, 8, &, vandimm)
> > > > +FUNC_IMM(u, uint, 8, 8, &, vandimm)
> > > > +
> > > > +/* 128-bit vectors.  */
> > > > +FUNC_IMM(s, int, 32, 4, &, vandimm)
> > > > +FUNC_IMM(u, uint, 32, 4, &, vandimm)
> > > > +FUNC_IMM(s, int, 16, 8, &, vandimm)
> > > > +FUNC_IMM(u, uint, 16, 8, &, vandimm)
> > > > +FUNC_IMM(s, int, 8, 16, &, vandimm)
> > > > +FUNC_IMM(u, uint, 8, 16, &, vandimm)
> > > > +
> > > > +/* MVE has only 128-bit vectors, so we can vectorize only half of
> the
> > > > +   functions above.  */
> > > > +/* Although float16 and float32 types are supported at assembly
> level,
> > > > +   we cannot test them with the '&' operator, so we check only the
> > > > +   integer variants.  */
> > > > +/* For some reason, we do not generate the immediate version, we
> still
> > > > +   use vldr to load the vector of immediates.  */
> > > > +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+,
> q[0-9]+}
> > 12 } }
> > > > */
> > > > --
> > > > 2.7.4
> > >
>
Kyrylo Tkachov Dec. 11, 2020, 9:54 a.m. UTC | #5
Hi Christophe,

From: Christophe Lyon <christophe.lyon@linaro.org> 
Sent: 10 December 2020 12:16
To: Kyrylo Tkachov <Kyrylo.Tkachov@arm.com>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand



On Tue, 8 Dec 2020 at 15:00, Kyrylo Tkachov <mailto:Kyrylo.Tkachov@arm.com> wrote:


> -----Original Message-----
> From: Christophe Lyon <mailto:christophe.lyon@linaro.org>
> Sent: 08 December 2020 13:59
> To: Kyrylo Tkachov <mailto:Kyrylo.Tkachov@arm.com>
> Cc: mailto:gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> 
> On Tue, 8 Dec 2020 at 14:19, Kyrylo Tkachov <mailto:Kyrylo.Tkachov@arm.com>
> wrote:
> >
> > Hi Christophe
> >
> > > -----Original Message-----
> > > From: Gcc-patches <mailto:gcc-patches-bounces@gcc.gnu.org> On Behalf Of
> > > Christophe Lyon via Gcc-patches
> > > Sent: 08 December 2020 13:06
> > > To: mailto:gcc-patches@gcc.gnu.org
> > > Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> > >
> > > This patch enables MVE vandq instructions for auto-vectorization.  MVE
> > > vandq insns in mve.md are modified to use 'and' instead of unspec
> > > expression to support and<mode>3.  The and<mode>3 expander is added
> to
> > > vec-common.md
> > >
> > > 2020-12-03  Christophe Lyon  <mailto:christophe.lyon@linaro.org>
> > >
> > >       gcc/
> > >       * config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> > >       (VANQ): Remove.
> > >       (VDQ): Add TARGET_HAVE_MVE condition where relevant.
> > >       * config/arm/mve.md (mve_vandq_u<mode>): New entry for vand
> > >       instruction using expression 'and'.
> > >       (mve_vandq_s<mode>): New expander.
> > >       (mve_vaddq_n_f<mode>): Use 'and' code instead of unspec.
> > >       * config/arm/neon.md (and<mode>3): Rename into
> > > and<mode>3_neon.
> > >       * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> > >       Enable for MVE.
> > >       * config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> > > Remove.
> > >       * config/arm/vec-common.md (and<mode>3): New expander.
> > >
> > >       gcc/testsuite/
> > >       * gcc.target/arm/simd/mve-vand.c: New test.
> > > ---
> > >  gcc/config/arm/iterators.md                  | 11 +++--
> > >  gcc/config/arm/mve.md                        | 40 +++++++++++++-----
> > >  gcc/config/arm/neon.md                       |  2 +-
> > >  gcc/config/arm/predicates.md                 |  2 +-
> > >  gcc/config/arm/unspecs.md                    |  3 --
> > >  gcc/config/arm/vec-common.md                 |  8 ++++
> > >  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> > > ++++++++++++++++++++++++++++
> > >  7 files changed, 109 insertions(+), 20 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > >
> > > diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> > > index 592af35..badad2b 100644
> > > --- a/gcc/config/arm/iterators.md
> > > +++ b/gcc/config/arm/iterators.md
> > > @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
> > >  (define_mode_iterator VN [V8HI V4SI V2DI])
> > >
> > >  ;; All supported vector modes (except singleton DImode).
> > > -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF
> V8HF
> > > V2SF V4SF V2DI])
> > > +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> > > +                        (V4HI "!TARGET_HAVE_MVE") V8HI
> > > +                        (V2SI "!TARGET_HAVE_MVE") V4SI
> > > +                        (V4HF "!TARGET_HAVE_MVE") V8HF
> > > +                        (V2SF "!TARGET_HAVE_MVE") V4SF
> > > +                        (V2DI "!TARGET_HAVE_MVE")])
> > >
> > >  ;; All supported floating-point vector modes (except V2DF).
> > >  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> > > @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> > >                      (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
> > >                      (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
> > >                      (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> > > -                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
> > > -                    (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> > > +                    (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s")
> > > (VBICQ_U "u")
> > >                      (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> > > (VCADDQ_ROT270_S "s")
> > >                      (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
> > >                      (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> > > (VCADDQ_ROT90_U "u")
> > > @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S
> VABDQ_U])
> > >  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
> > >  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
> > >  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> > > -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
> > >  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
> > >  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
> > >  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> > > VCADDQ_ROT270_U])
> > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > > index ecbaaa9..238c828 100644
> > > --- a/gcc/config/arm/mve.md
> > > +++ b/gcc/config/arm/mve.md
> > > @@ -894,17 +894,36 @@ (define_insn "mve_vaddvq_p_<supf><mode>"
> > >  ;;
> > >  ;; [vandq_u, vandq_s])
> > >  ;;
> > > -(define_insn "mve_vandq_<supf><mode>"
> > > +;; signed and unsigned versions are the same: define the unsigned
> > > +;; insn, and use an expander for the signed one as we still reference
> > > +;; both names from arm_mve.h.
> > > +;; We use the same code as in neon.md (TODO: avoid this duplication).
> > > +(define_insn "mve_vandq_u<mode>"
> > > +  [
> > > +   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
> > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
> > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2" "w,DL")))
> > > +  ]
> > > +  "TARGET_HAVE_MVE"
> > > +  {
> > > +    switch (which_alternative)
> > > +      {
> > > +      case 0: return "vand\t%q0, %q1, %q2";
> > > +      case 1: return neon_output_logic_immediate ("vand", &operands[2],
> > > +                     <MODE>mode, 1, VALID_NEON_QREG_MODE
> > > (<MODE>mode));
> > > +      default: gcc_unreachable ();
> > > +    }
> >
> > We can avoid this switch statement by using the @ syntax for alternatives
> and using a '*' for the second alternative.
> > See the *aarch64_sve_mov<mode>_ldr_str pattern in
> config/aarch64/aarch64-sve.md, for example.
> >
> 
> Indeed, thanks. I just cut and pasted from and<mode>3_neon :-)

Indeed, patches to clean such occurrences up are pre-approved 😉

> 
> I guess a similar comment applies to the next patch (vorr) ?

Yes.


I've just pushed the vand patch. Does your "yes" means
approval for the vorr patch or only about the syntax change?


The vorr patch is okay as well with the syntax change.
Thanks,
Kyrill



Thanks,

Christophe

Thanks,
Kyrill

> 
> > Ok with that change.
> > Thanks for doing this!
> > Kyrill
> >
> > > +  }
> > > +  [(set_attr "type" "mve_move")
> > > +])
> > > +(define_expand "mve_vandq_s<mode>"
> > >    [
> > > -   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> > > -     (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
> > > -                    (match_operand:MVE_2 2 "s_register_operand" "w")]
> > > -      VANDQ))
> > > +   (set (match_operand:MVE_2 0 "s_register_operand")
> > > +     (and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
> > > +                (match_operand:MVE_2 2 "neon_inv_logic_op2")))
> > >    ]
> > >    "TARGET_HAVE_MVE"
> > > -  "vand %q0, %q1, %q2"
> > > -  [(set_attr "type" "mve_move")
> > > -])
> > > +)
> > >
> > >  ;;
> > >  ;; [vbicq_s, vbicq_u])
> > > @@ -2019,9 +2038,8 @@ (define_insn "mve_vaddq_n_f<mode>"
> > >  (define_insn "mve_vandq_f<mode>"
> > >    [
> > >     (set (match_operand:MVE_0 0 "s_register_operand" "=w")
> > > -     (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
> > > -                    (match_operand:MVE_0 2 "s_register_operand" "w")]
> > > -      VANDQ_F))
> > > +     (and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
> > > +                (match_operand:MVE_0 2 "s_register_operand" "w")))
> > >    ]
> > >    "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> > >    "vand %q0, %q1, %q2"
> > > diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> > > index 2d76769..dc4707d 100644
> > > --- a/gcc/config/arm/neon.md
> > > +++ b/gcc/config/arm/neon.md
> > > @@ -712,7 +712,7 @@ (define_insn "ior<mode>3"
> > >  ;; corresponds to the canonical form the middle-end expects to use for
> > >  ;; immediate bitwise-ANDs.
> > >
> > > -(define_insn "and<mode>3"
> > > +(define_insn "and<mode>3_neon"
> > >    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
> > >       (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> > >                (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
> > > diff --git a/gcc/config/arm/predicates.md
> b/gcc/config/arm/predicates.md
> > > index 2144520..5f58f7c 100644
> > > --- a/gcc/config/arm/predicates.md
> > > +++ b/gcc/config/arm/predicates.md
> > > @@ -107,7 +107,7 @@ (define_predicate "vpr_register_operand"
> > >  (define_predicate "imm_for_neon_inv_logic_operand"
> > >    (match_code "const_vector")
> > >  {
> > > -  return (TARGET_NEON
> > > +  return ((TARGET_NEON || TARGET_HAVE_MVE)
> > >            && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
> > >  })
> > >
> > > diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> > > index a3844e9..18b3048 100644
> > > --- a/gcc/config/arm/unspecs.md
> > > +++ b/gcc/config/arm/unspecs.md
> > > @@ -601,7 +601,6 @@ (define_c_enum "unspec" [
> > >    VADDQ_N_S
> > >    VADDVAQ_S
> > >    VADDVQ_P_S
> > > -  VANDQ_S
> > >    VBICQ_S
> > >    VBRSRQ_N_S
> > >    VCADDQ_ROT270_S
> > > @@ -648,7 +647,6 @@ (define_c_enum "unspec" [
> > >    VADDQ_N_U
> > >    VADDVAQ_U
> > >    VADDVQ_P_U
> > > -  VANDQ_U
> > >    VBICQ_U
> > >    VBRSRQ_N_U
> > >    VCADDQ_ROT270_U
> > > @@ -721,7 +719,6 @@ (define_c_enum "unspec" [
> > >    VABDQ_M_U
> > >    VABDQ_F
> > >    VADDQ_N_F
> > > -  VANDQ_F
> > >    VBICQ_F
> > >    VCADDQ_ROT270_F
> > >    VCADDQ_ROT90_F
> > > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-
> > > common.md
> > > index 250e503..2117e5b 100644
> > > --- a/gcc/config/arm/vec-common.md
> > > +++ b/gcc/config/arm/vec-common.md
> > > @@ -172,3 +172,11 @@ (define_expand "vec_set<mode>"
> > >                                              GEN_INT (elem), operands[0]));
> > >    DONE;
> > >  })
> > > +
> > > +(define_expand "and<mode>3"
> > > +  [(set (match_operand:VDQ 0 "s_register_operand" "")
> > > +     (and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
> > > +              (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
> > > +  "TARGET_NEON
> > > +   || TARGET_HAVE_MVE"
> > > +)
> > > diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > new file mode 100644
> > > index 0000000..26247dc
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > @@ -0,0 +1,63 @@
> > > +/* { dg-do assemble } */
> > > +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> > > +/* { dg-add-options arm_v8_1m_mve } */
> > > +/* { dg-additional-options "-O3" } */
> > > +
> > > +#include <stdint.h>
> > > +
> > > +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)
> > >       \
> > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > > __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
> > > +    int i;                                                           \
> > > +    for (i=0; i<NB; i++) {                                           \
> > > +      dest[i] = a[i] OP b[i];                                                \
> > > +    }                                                                        \
> > > +}
> > > +
> > > +#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)
> > >       \
> > > +  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t *
> > > __restrict__ dest, TYPE##BITS##_t *a) { \
> > > +    int i;                                                           \
> > > +    for (i=0; i<NB; i++) {                                           \
> > > +      dest[i] = a[i] OP 1;                                           \
> > > +    }                                                                        \
> > > +}
> > > +
> > > +/* 64-bit vectors.  */
> > > +FUNC(s, int, 32, 2, &, vand)
> > > +FUNC(u, uint, 32, 2, &, vand)
> > > +FUNC(s, int, 16, 4, &, vand)
> > > +FUNC(u, uint, 16, 4, &, vand)
> > > +FUNC(s, int, 8, 8, &, vand)
> > > +FUNC(u, uint, 8, 8, &, vand)
> > > +
> > > +/* 128-bit vectors.  */
> > > +FUNC(s, int, 32, 4, &, vand)
> > > +FUNC(u, uint, 32, 4, &, vand)
> > > +FUNC(s, int, 16, 8, &, vand)
> > > +FUNC(u, uint, 16, 8, &, vand)
> > > +FUNC(s, int, 8, 16, &, vand)
> > > +FUNC(u, uint, 8, 16, &, vand)
> > > +
> > > +/* 64-bit vectors.  */
> > > +FUNC_IMM(s, int, 32, 2, &, vandimm)
> > > +FUNC_IMM(u, uint, 32, 2, &, vandimm)
> > > +FUNC_IMM(s, int, 16, 4, &, vandimm)
> > > +FUNC_IMM(u, uint, 16, 4, &, vandimm)
> > > +FUNC_IMM(s, int, 8, 8, &, vandimm)
> > > +FUNC_IMM(u, uint, 8, 8, &, vandimm)
> > > +
> > > +/* 128-bit vectors.  */
> > > +FUNC_IMM(s, int, 32, 4, &, vandimm)
> > > +FUNC_IMM(u, uint, 32, 4, &, vandimm)
> > > +FUNC_IMM(s, int, 16, 8, &, vandimm)
> > > +FUNC_IMM(u, uint, 16, 8, &, vandimm)
> > > +FUNC_IMM(s, int, 8, 16, &, vandimm)
> > > +FUNC_IMM(u, uint, 8, 16, &, vandimm)
> > > +
> > > +/* MVE has only 128-bit vectors, so we can vectorize only half of the
> > > +   functions above.  */
> > > +/* Although float16 and float32 types are supported at assembly level,
> > > +   we cannot test them with the '&' operator, so we check only the
> > > +   integer variants.  */
> > > +/* For some reason, we do not generate the immediate version, we still
> > > +   use vldr to load the vector of immediates.  */
> > > +/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+}
> 12 } }
> > > */
> > > --
> > > 2.7.4
> >
diff mbox series

Patch

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 592af35..badad2b 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -147,7 +147,12 @@  (define_mode_iterator VW [V8QI V4HI V2SI])
 (define_mode_iterator VN [V8HI V4SI V2DI])
 
 ;; All supported vector modes (except singleton DImode).
-(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI])
+(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
+			   (V4HI "!TARGET_HAVE_MVE") V8HI
+			   (V2SI "!TARGET_HAVE_MVE") V4SI
+			   (V4HF "!TARGET_HAVE_MVE") V8HF
+			   (V2SF "!TARGET_HAVE_MVE") V4SF
+			   (V2DI "!TARGET_HAVE_MVE")])
 
 ;; All supported floating-point vector modes (except V2DF).
 (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
@@ -1232,8 +1237,7 @@  (define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
 		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
 		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
 		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
-		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VANDQ_S "s")
-		       (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
+		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VBICQ_S "s") (VBICQ_U "u")
 		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
 		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
 		       (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
@@ -1501,7 +1505,6 @@  (define_int_iterator VABDQ [VABDQ_S VABDQ_U])
 (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
 (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
 (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
-(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
 (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
 (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
 (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index ecbaaa9..238c828 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -894,17 +894,36 @@  (define_insn "mve_vaddvq_p_<supf><mode>"
 ;;
 ;; [vandq_u, vandq_s])
 ;;
-(define_insn "mve_vandq_<supf><mode>"
+;; signed and unsigned versions are the same: define the unsigned
+;; insn, and use an expander for the signed one as we still reference
+;; both names from arm_mve.h.
+;; We use the same code as in neon.md (TODO: avoid this duplication).
+(define_insn "mve_vandq_u<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w,w")
+	(and:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w,0")
+		   (match_operand:MVE_2 2 "neon_inv_logic_op2" "w,DL")))
+  ]
+  "TARGET_HAVE_MVE"
+  {
+    switch (which_alternative)
+      {
+      case 0: return "vand\t%q0, %q1, %q2";
+      case 1: return neon_output_logic_immediate ("vand", &operands[2],
+			<MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
+      default: gcc_unreachable ();
+    }
+  }
+  [(set_attr "type" "mve_move")
+])
+(define_expand "mve_vandq_s<mode>"
   [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VANDQ))
+   (set (match_operand:MVE_2 0 "s_register_operand")
+	(and:MVE_2 (match_operand:MVE_2 1 "s_register_operand")
+		   (match_operand:MVE_2 2 "neon_inv_logic_op2")))
   ]
   "TARGET_HAVE_MVE"
-  "vand %q0, %q1, %q2"
-  [(set_attr "type" "mve_move")
-])
+)
 
 ;;
 ;; [vbicq_s, vbicq_u])
@@ -2019,9 +2038,8 @@  (define_insn "mve_vaddq_n_f<mode>"
 (define_insn "mve_vandq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VANDQ_F))
+	(and:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		   (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vand %q0, %q1, %q2"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 2d76769..dc4707d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -712,7 +712,7 @@  (define_insn "ior<mode>3"
 ;; corresponds to the canonical form the middle-end expects to use for
 ;; immediate bitwise-ANDs.
 
-(define_insn "and<mode>3"
+(define_insn "and<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
 		 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 2144520..5f58f7c 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -107,7 +107,7 @@  (define_predicate "vpr_register_operand"
 (define_predicate "imm_for_neon_inv_logic_operand"
   (match_code "const_vector")
 {
-  return (TARGET_NEON
+  return ((TARGET_NEON || TARGET_HAVE_MVE)
           && neon_immediate_valid_for_logic (op, mode, 1, NULL, NULL));
 })
 
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a3844e9..18b3048 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -601,7 +601,6 @@  (define_c_enum "unspec" [
   VADDQ_N_S
   VADDVAQ_S
   VADDVQ_P_S
-  VANDQ_S
   VBICQ_S
   VBRSRQ_N_S
   VCADDQ_ROT270_S
@@ -648,7 +647,6 @@  (define_c_enum "unspec" [
   VADDQ_N_U
   VADDVAQ_U
   VADDVQ_P_U
-  VANDQ_U
   VBICQ_U
   VBRSRQ_N_U
   VCADDQ_ROT270_U
@@ -721,7 +719,6 @@  (define_c_enum "unspec" [
   VABDQ_M_U
   VABDQ_F
   VADDQ_N_F
-  VANDQ_F
   VBICQ_F
   VCADDQ_ROT270_F
   VCADDQ_ROT90_F
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 250e503..2117e5b 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -172,3 +172,11 @@  (define_expand "vec_set<mode>"
 					       GEN_INT (elem), operands[0]));
   DONE;
 })
+
+(define_expand "and<mode>3"
+  [(set (match_operand:VDQ 0 "s_register_operand" "")
+	(and:VDQ (match_operand:VDQ 1 "s_register_operand" "")
+		 (match_operand:VDQ 2 "neon_inv_logic_op2" "")))]
+  "TARGET_NEON
+   || TARGET_HAVE_MVE"
+)
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vand.c b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
new file mode 100644
index 0000000..26247dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vand.c
@@ -0,0 +1,63 @@ 
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP b[i];						\
+    }									\
+}
+
+#define FUNC_IMM(SIGN, TYPE, BITS, NB, OP, NAME)				\
+  void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \
+    int i;								\
+    for (i=0; i<NB; i++) {						\
+      dest[i] = a[i] OP 1;						\
+    }									\
+}
+
+/* 64-bit vectors.  */
+FUNC(s, int, 32, 2, &, vand)
+FUNC(u, uint, 32, 2, &, vand)
+FUNC(s, int, 16, 4, &, vand)
+FUNC(u, uint, 16, 4, &, vand)
+FUNC(s, int, 8, 8, &, vand)
+FUNC(u, uint, 8, 8, &, vand)
+
+/* 128-bit vectors.  */
+FUNC(s, int, 32, 4, &, vand)
+FUNC(u, uint, 32, 4, &, vand)
+FUNC(s, int, 16, 8, &, vand)
+FUNC(u, uint, 16, 8, &, vand)
+FUNC(s, int, 8, 16, &, vand)
+FUNC(u, uint, 8, 16, &, vand)
+
+/* 64-bit vectors.  */
+FUNC_IMM(s, int, 32, 2, &, vandimm)
+FUNC_IMM(u, uint, 32, 2, &, vandimm)
+FUNC_IMM(s, int, 16, 4, &, vandimm)
+FUNC_IMM(u, uint, 16, 4, &, vandimm)
+FUNC_IMM(s, int, 8, 8, &, vandimm)
+FUNC_IMM(u, uint, 8, 8, &, vandimm)
+
+/* 128-bit vectors.  */
+FUNC_IMM(s, int, 32, 4, &, vandimm)
+FUNC_IMM(u, uint, 32, 4, &, vandimm)
+FUNC_IMM(s, int, 16, 8, &, vandimm)
+FUNC_IMM(u, uint, 16, 8, &, vandimm)
+FUNC_IMM(s, int, 8, 16, &, vandimm)
+FUNC_IMM(u, uint, 8, 16, &, vandimm)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+   functions above.  */
+/* Although float16 and float32 types are supported at assembly level,
+   we cannot test them with the '&' operator, so we check only the
+   integer variants.  */
+/* For some reason, we do not generate the immediate version, we still
+   use vldr to load the vector of immediates.  */
+/* { dg-final { scan-assembler-times {vand\tq[0-9]+, q[0-9]+, q[0-9]+} 12 } } */