diff mbox series

[ARM,2/1x] : MVE intrinsics with unary operand.

Message ID DBBPR08MB47756EC49F28E4A8B29EBA789B710@DBBPR08MB4775.eurprd08.prod.outlook.com
State New
Headers show
Series [ARM,2/1x] : MVE intrinsics with unary operand. | expand

Commit Message

Srinath Parvathaneni Nov. 14, 2019, 7:13 p.m. UTC
Hello,

This patch supports following MVE ACLE intrinsics with unary operand.

vmvnq_n_s16, vmvnq_n_s32, vrev64q_s8, vrev64q_s16, vrev64q_s32, vcvtq_s16_f16, vcvtq_s32_f32,
vrev64q_u8, vrev64q_u16, vrev64q_u32, vmvnq_n_u16, vmvnq_n_u32, vcvtq_u16_f16, vcvtq_u32_f32,
vrev64q.

Please refer to M-profile Vector Extension (MVE) intrinsics [1]  for more details.
[1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics

Regression tested on arm-none-eabi and found no regressions.

Ok for trunk?

Thanks,
Srinath.

gcc/ChangeLog:

2019-10-21  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Mihail Ionescu  <mihail.ionescu@arm.com>
	    Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	* config/arm/arm-builtins.c (UNOP_SNONE_SNONE_QUALIFIERS): Define.
	(UNOP_SNONE_NONE_QUALIFIERS): Likewise.
	(UNOP_SNONE_IMM_QUALIFIERS): Likewise.
	(UNOP_UNONE_NONE_QUALIFIERS): Likewise.
	(UNOP_UNONE_UNONE_QUALIFIERS): Likewise.
	(UNOP_UNONE_IMM_QUALIFIERS): Likewise.
	* config/arm/arm_mve.h (vmvnq_n_s16): Define macro.
	(vmvnq_n_s32): Likewise.
	(vrev64q_s8): Likewise.
	(vrev64q_s16): Likewise.
	(vrev64q_s32): Likewise.
	(vcvtq_s16_f16): Likewise.
	(vcvtq_s32_f32): Likewise.
	(vrev64q_u8): Likewise.
	(vrev64q_u16): Likewise.
	(vrev64q_u32): Likewise.
	(vmvnq_n_u16): Likewise.
	(vmvnq_n_u32): Likewise.
	(vcvtq_u16_f16): Likewise.
	(vcvtq_u32_f32): Likewise.
	(__arm_vmvnq_n_s16): Define intrinsic.
	(__arm_vmvnq_n_s32): Likewise.
	(__arm_vrev64q_s8): Likewise.
	(__arm_vrev64q_s16): Likewise.
	(__arm_vrev64q_s32): Likewise.
	(__arm_vrev64q_u8): Likewise.
	(__arm_vrev64q_u16): Likewise.
	(__arm_vrev64q_u32): Likewise.
	(__arm_vmvnq_n_u16): Likewise.
	(__arm_vmvnq_n_u32): Likewise.
	(__arm_vcvtq_s16_f16): Likewise.
	(__arm_vcvtq_s32_f32): Likewise.
	(__arm_vcvtq_u16_f16): Likewise.
	(__arm_vcvtq_u32_f32): Likewise.
	(vrev64q): Define polymorphic variant.
	* config/arm/arm_mve_builtins.def (UNOP_SNONE_SNONE): Use it.
	(UNOP_SNONE_NONE): Likewise.
	(UNOP_SNONE_IMM): Likewise.
	(UNOP_UNONE_UNONE): Likewise.
	(UNOP_UNONE_NONE): Likewise.
	(UNOP_UNONE_IMM): Likewise.
	* config/arm/mve.md (mve_vrev64q_<supf><mode>): Define RTL pattern.
	(mve_vcvtq_from_f_<supf><mode>): Likewise.
	(mve_vmvnq_n_<supf><mode>): Likewise.

gcc/testsuite/ChangeLog:

2019-10-21  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Mihail Ionescu  <mihail.ionescu@arm.com>
	    Srinath Parvathaneni  <srinath.parvathaneni@arm.com>

	* gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c: New test.
	* gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vrev64q_u8.c: Likewise.


###############     Attachment also inlined for ease of reply    ###############
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2fee417fe6585f457edd4cf96655366b1d6bd1a0..21b213d8e1bc99a3946f15e97161e01d73832799 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -313,6 +313,42 @@ arm_unop_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define UNOP_NONE_UNONE_QUALIFIERS \
   (arm_unop_none_unone_qualifiers)
 
+static enum arm_type_qualifiers
+arm_unop_snone_snone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define UNOP_SNONE_SNONE_QUALIFIERS \
+  (arm_unop_snone_snone_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_snone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define UNOP_SNONE_NONE_QUALIFIERS \
+  (arm_unop_snone_none_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_snone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate };
+#define UNOP_SNONE_IMM_QUALIFIERS \
+  (arm_unop_snone_imm_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none };
+#define UNOP_UNONE_NONE_QUALIFIERS \
+  (arm_unop_unone_none_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned };
+#define UNOP_UNONE_UNONE_QUALIFIERS \
+  (arm_unop_unone_unone_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_immediate };
+#define UNOP_UNONE_IMM_QUALIFIERS \
+  (arm_unop_unone_imm_qualifiers)
+
 /* End of Qualifier for MVE builtins.  */
 
    /* void ([T element type] *, T, immediate).  */
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 9bcb04fb99a54b47057bb33cc807d6a5ad16401f..bd5162122b8c8e61ba25ba6ea89c56005f5a79dc 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -108,6 +108,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t;
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
+#define vmvnq_n_s16( __imm) __arm_vmvnq_n_s16( __imm)
+#define vmvnq_n_s32( __imm) __arm_vmvnq_n_s32( __imm)
+#define vrev64q_s8(__a) __arm_vrev64q_s8(__a)
+#define vrev64q_s16(__a) __arm_vrev64q_s16(__a)
+#define vrev64q_s32(__a) __arm_vrev64q_s32(__a)
+#define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
+#define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
+#define vrev64q_u8(__a) __arm_vrev64q_u8(__a)
+#define vrev64q_u16(__a) __arm_vrev64q_u16(__a)
+#define vrev64q_u32(__a) __arm_vrev64q_u32(__a)
+#define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
+#define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
+#define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
+#define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
 #endif
 
 __extension__ extern __inline void
@@ -164,6 +178,76 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_s16 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_sv8hi (__imm);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_s32 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_sv4si (__imm);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s8 (int8x16_t __a)
+{
+  return __builtin_mve_vrev64q_sv16qi (__a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s16 (int16x8_t __a)
+{
+  return __builtin_mve_vrev64q_sv8hi (__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s32 (int32x4_t __a)
+{
+  return __builtin_mve_vrev64q_sv4si (__a);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u8 (uint8x16_t __a)
+{
+  return __builtin_mve_vrev64q_uv16qi (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u16 (uint16x8_t __a)
+{
+  return __builtin_mve_vrev64q_uv8hi (__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u32 (uint32x4_t __a)
+{
+  return __builtin_mve_vrev64q_uv4si (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_u16 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_uv8hi (__imm);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_u32 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_uv4si (__imm);
+}
+
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
 __extension__ extern __inline void
@@ -373,6 +457,34 @@ __arm_vcvtq_f32_u32 (uint32x4_t __a)
   return __builtin_mve_vcvtq_to_f_uv4sf (__a);
 }
 
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_sv8hi (__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_sv4si (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_uv8hi (__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_u32_f32 (float32x4_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_uv4si (__a);
+}
+
 #endif
 
 enum {
@@ -674,6 +786,16 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
+#define vrev64q(p0) __arm_vrev64q(p0)
+#define __arm_vrev64q(p0) ({ __typeof(p0) __p0 = (p0); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
+  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
+  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
+  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
+  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
+  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
+  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
+
 #endif /* MVE Floating point.  */
 
 #ifdef __cplusplus
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 65dc58c9328525891a0aa0bb97a412ebc8257c18..d205aca28909a224bd4bad103b8a280631661538 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -34,3 +34,9 @@ VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf)
 VAR1 (UNOP_NONE_NONE, vcvtbq_f32_f16, v4sf)
 VAR2 (UNOP_NONE_SNONE, vcvtq_to_f_s, v8hf, v4sf)
 VAR2 (UNOP_NONE_UNONE, vcvtq_to_f_u, v8hf, v4sf)
+VAR3 (UNOP_SNONE_SNONE, vrev64q_s, v16qi, v8hi, v4si)
+VAR2 (UNOP_SNONE_NONE, vcvtq_from_f_s, v8hi, v4si)
+VAR2 (UNOP_SNONE_IMM, vmvnq_n_s, v8hi, v4si)
+VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si)
+VAR2 (UNOP_UNONE_NONE, vcvtq_from_f_u, v8hi, v4si)
+VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 7a31d0abdfff9a93d79faa1de44d1b224470e2eb..a1dd709a9ffe479cf16a88a5923975f1941531ef 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -22,17 +22,26 @@
 (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
 (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
 (define_mode_iterator MVE_0 [V8HF V4SF])
+(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_5 [V8HI V4SI])
 
 (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
 			 VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F VREV32Q_F
 			 VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S
-			 VCVTQ_TO_F_U])
+			 VCVTQ_TO_F_U VMVNQ_N_S VMVNQ_N_U VREV64Q_S VREV64Q_U
+			 VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 
 (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF")
 			    (V8HF "V8HI") (V4SF "V4SI")])
 
-(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u")])
+(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VMVNQ_N_S "s")
+		       (VMVNQ_N_U "u") (VREV64Q_U "u") (VREV64Q_S "s")
+		       (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")])
+
 (define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
+(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
+(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
+(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 
 (define_insn "*mve_mov<mode>"
   [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w,r,w,w,r,w")
@@ -318,3 +327,45 @@
   "vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>       %q0, %q1"
   [(set_attr "type" "mve_move")
 ])
+
+;;
+;; [vrev64q_u, vrev64q_s])
+;;
+(define_insn "mve_vrev64q_<supf><mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
+	 VREV64Q))
+  ]
+  "TARGET_HAVE_MVE"
+  "vrev64.%#<V_sz_elem> %q0, %q1"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcvtq_from_f_s, vcvtq_from_f_u])
+;;
+(define_insn "mve_vcvtq_from_f_<supf><mode>"
+  [
+   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
+	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
+	 VCVTQ_FROM_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>       %q0, %q1"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vmvnq_n_u, vmvnq_n_s])
+;;
+(define_insn "mve_vmvnq_n_<supf><mode>"
+  [
+   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
+	(unspec:MVE_5 [(match_operand:SI 1 "immediate_operand" "i")]
+	 VMVNQ_N))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmvn.i%#<V_sz_elem>  %q0, %1"
+  [(set_attr "type" "mve_move")
+])
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa69b11b79a15dace81bb5d8112cc5053a6f8dc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (float16x8_t a)
+{
+  return vcvtq_s16_f16 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.s16.f16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
new file mode 100644
index 0000000000000000000000000000000000000000..0bfcba6dcf4e240a1de0cba5d98d85c2a529c09e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (float32x4_t a)
+{
+  return vcvtq_s32_f32 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.s32.f32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
new file mode 100644
index 0000000000000000000000000000000000000000..ed36c8082ee464e8878ae7453e04d26e09a87752
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (float16x8_t a)
+{
+    return vcvtq_u16_f16 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.u16.f16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
new file mode 100644
index 0000000000000000000000000000000000000000..fbd3989e19c8d832561d6a4265b68d0a87a678b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (float32x4_t a)
+{
+    return vcvtq_u32_f32 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.u32.f32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
new file mode 100644
index 0000000000000000000000000000000000000000..39c31b4bbe743ed765c9a106778d6c4ba31d14eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo ()
+{
+  return vmvnq_n_s16 (1);
+}
+
+/* { dg-final { scan-assembler "vmvn.i16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
new file mode 100644
index 0000000000000000000000000000000000000000..6754cbf8baf11a702543c86d4c048df6bd9699a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo ()
+{
+  return vmvnq_n_s32 (2);
+}
+
+/* { dg-final { scan-assembler "vmvn.i32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
new file mode 100644
index 0000000000000000000000000000000000000000..b7b12e7476917631927017d9413ef7226fedbe23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo ()
+{
+    return vmvnq_n_u16 (1);
+}
+
+/* { dg-final { scan-assembler "vmvn.i16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5fb831b41ca68d6c4d812051878835b074c47e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo ()
+{
+    return vmvnq_n_u32 (2);
+}
+
+/* { dg-final { scan-assembler "vmvn.i32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
new file mode 100644
index 0000000000000000000000000000000000000000..4eda96fefd369781a7639d7c5a9515d02b4b439e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t a)
+{
+  return vrev64q_s16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
+
+int16x8_t
+foo1 (int16x8_t a)
+{
+  return vrev64q_s16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
new file mode 100644
index 0000000000000000000000000000000000000000..356f162c477e8159da73c9242caff6a545235cc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32x4_t a)
+{
+  return vrev64q_s32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
+
+int32x4_t
+foo1 (int32x4_t a)
+{
+  return vrev64q_s32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
new file mode 100644
index 0000000000000000000000000000000000000000..5cc4d0750f4d8de85e997247c77d7c076dfb624e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int8x16_t
+foo (int8x16_t a)
+{
+  return vrev64q_s8 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
+
+int8x16_t
+foo1 (int8x16_t a)
+{
+  return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
new file mode 100644
index 0000000000000000000000000000000000000000..ae7e3665c54b11e2eee8209ade6030875a201b6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (uint16x8_t a)
+{
+    return vrev64q_u16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
+
+uint16x8_t
+foo1 (uint16x8_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c87cab925766ea981ce90cc47b3194bbf0913ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (uint32x4_t a)
+{
+    return vrev64q_u32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
+
+uint32x4_t
+foo1 (uint32x4_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4abd160e61517a4fcc2312c8fcdff1119686da6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
@@ -0,0 +1,21 @@
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint8x16_t
+foo (uint8x16_t a)
+{
+    return vrev64q_u8 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
+
+uint8x16_t
+foo1 (uint8x16_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */

Comments

Kyrill Tkachov Dec. 19, 2019, 5:57 p.m. UTC | #1
Hi Srinath,

On 11/14/19 7:13 PM, Srinath Parvathaneni wrote:
> Hello,
>
> This patch supports following MVE ACLE intrinsics with unary operand.
>
> vmvnq_n_s16, vmvnq_n_s32, vrev64q_s8, vrev64q_s16, vrev64q_s32, 
> vcvtq_s16_f16, vcvtq_s32_f32,
> vrev64q_u8, vrev64q_u16, vrev64q_u32, vmvnq_n_u16, vmvnq_n_u32, 
> vcvtq_u16_f16, vcvtq_u32_f32,
> vrev64q.
>
> Please refer to M-profile Vector Extension (MVE) intrinsics [1]  for 
> more details.
> [1] 
> https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics
>
> Regression tested on arm-none-eabi and found no regressions.
>
> Ok for trunk?
>
> Thanks,
> Srinath.
>
> gcc/ChangeLog:
>
> 2019-10-21  Andre Vieira <andre.simoesdiasvieira@arm.com>
>             Mihail Ionescu  <mihail.ionescu@arm.com>
>             Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>
>         * config/arm/arm-builtins.c (UNOP_SNONE_SNONE_QUALIFIERS): Define.
>         (UNOP_SNONE_NONE_QUALIFIERS): Likewise.
>         (UNOP_SNONE_IMM_QUALIFIERS): Likewise.
>         (UNOP_UNONE_NONE_QUALIFIERS): Likewise.
>         (UNOP_UNONE_UNONE_QUALIFIERS): Likewise.
>         (UNOP_UNONE_IMM_QUALIFIERS): Likewise.
>         * config/arm/arm_mve.h (vmvnq_n_s16): Define macro.
>         (vmvnq_n_s32): Likewise.
>         (vrev64q_s8): Likewise.
>         (vrev64q_s16): Likewise.
>         (vrev64q_s32): Likewise.
>         (vcvtq_s16_f16): Likewise.
>         (vcvtq_s32_f32): Likewise.
>         (vrev64q_u8): Likewise.
>         (vrev64q_u16): Likewise.
>         (vrev64q_u32): Likewise.
>         (vmvnq_n_u16): Likewise.
>         (vmvnq_n_u32): Likewise.
>         (vcvtq_u16_f16): Likewise.
>         (vcvtq_u32_f32): Likewise.
>         (__arm_vmvnq_n_s16): Define intrinsic.
>         (__arm_vmvnq_n_s32): Likewise.
>         (__arm_vrev64q_s8): Likewise.
>         (__arm_vrev64q_s16): Likewise.
>         (__arm_vrev64q_s32): Likewise.
>         (__arm_vrev64q_u8): Likewise.
>         (__arm_vrev64q_u16): Likewise.
>         (__arm_vrev64q_u32): Likewise.
>         (__arm_vmvnq_n_u16): Likewise.
>         (__arm_vmvnq_n_u32): Likewise.
>         (__arm_vcvtq_s16_f16): Likewise.
>         (__arm_vcvtq_s32_f32): Likewise.
>         (__arm_vcvtq_u16_f16): Likewise.
>         (__arm_vcvtq_u32_f32): Likewise.
>         (vrev64q): Define polymorphic variant.
>         * config/arm/arm_mve_builtins.def (UNOP_SNONE_SNONE): Use it.
>         (UNOP_SNONE_NONE): Likewise.
>         (UNOP_SNONE_IMM): Likewise.
>         (UNOP_UNONE_UNONE): Likewise.
>         (UNOP_UNONE_NONE): Likewise.
>         (UNOP_UNONE_IMM): Likewise.
>         * config/arm/mve.md (mve_vrev64q_<supf><mode>): Define RTL 
> pattern.
>         (mve_vcvtq_from_f_<supf><mode>): Likewise.
>         (mve_vmvnq_n_<supf><mode>): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2019-10-21  Andre Vieira <andre.simoesdiasvieira@arm.com>
>             Mihail Ionescu  <mihail.ionescu@arm.com>
>             Srinath Parvathaneni <srinath.parvathaneni@arm.com>
>
>         * gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c: New test.
>         * gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_s16.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_s32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_s8.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_u16.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_u32.c: Likewise.
>         * gcc.target/arm/mve/intrinsics/vrev64q_u8.c: Likewise.
>
>
> ###############     Attachment also inlined for ease of reply    
> ###############
>
>
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 
> 2fee417fe6585f457edd4cf96655366b1d6bd1a0..21b213d8e1bc99a3946f15e97161e01d73832799 
> 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -313,6 +313,42 @@ arm_unop_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define UNOP_NONE_UNONE_QUALIFIERS \
>    (arm_unop_none_unone_qualifiers)
>
> +static enum arm_type_qualifiers
> +arm_unop_snone_snone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_none, qualifier_none };
> +#define UNOP_SNONE_SNONE_QUALIFIERS \
> +  (arm_unop_snone_snone_qualifiers)
> +
> +static enum arm_type_qualifiers
> +arm_unop_snone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_none, qualifier_none };
> +#define UNOP_SNONE_NONE_QUALIFIERS \
> +  (arm_unop_snone_none_qualifiers)
> +
> +static enum arm_type_qualifiers
> +arm_unop_snone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_none, qualifier_immediate };
> +#define UNOP_SNONE_IMM_QUALIFIERS \
> +  (arm_unop_snone_imm_qualifiers)
> +
> +static enum arm_type_qualifiers
> +arm_unop_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_unsigned, qualifier_none };
> +#define UNOP_UNONE_NONE_QUALIFIERS \
> +  (arm_unop_unone_none_qualifiers)
> +
> +static enum arm_type_qualifiers
> +arm_unop_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_unsigned, qualifier_unsigned };
> +#define UNOP_UNONE_UNONE_QUALIFIERS \
> +  (arm_unop_unone_unone_qualifiers)
> +
> +static enum arm_type_qualifiers
> +arm_unop_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_unsigned, qualifier_immediate };
> +#define UNOP_UNONE_IMM_QUALIFIERS \
> +  (arm_unop_unone_imm_qualifiers)
> +
>  /* End of Qualifier for MVE builtins.  */
>
>     /* void ([T element type] *, T, immediate).  */
> diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
> index 
> 9bcb04fb99a54b47057bb33cc807d6a5ad16401f..bd5162122b8c8e61ba25ba6ea89c56005f5a79dc 
> 100644
> --- a/gcc/config/arm/arm_mve.h
> +++ b/gcc/config/arm/arm_mve.h
> @@ -108,6 +108,20 @@ typedef struct { uint8x16_t val[4]; } uint8x16x4_t;
>  #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
>  #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
>  #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
> +#define vmvnq_n_s16( __imm) __arm_vmvnq_n_s16( __imm)
> +#define vmvnq_n_s32( __imm) __arm_vmvnq_n_s32( __imm)
> +#define vrev64q_s8(__a) __arm_vrev64q_s8(__a)
> +#define vrev64q_s16(__a) __arm_vrev64q_s16(__a)
> +#define vrev64q_s32(__a) __arm_vrev64q_s32(__a)
> +#define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
> +#define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
> +#define vrev64q_u8(__a) __arm_vrev64q_u8(__a)
> +#define vrev64q_u16(__a) __arm_vrev64q_u16(__a)
> +#define vrev64q_u32(__a) __arm_vrev64q_u32(__a)
> +#define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
> +#define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
> +#define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
> +#define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
>  #endif
>
>  __extension__ extern __inline void
> @@ -164,6 +178,76 @@ __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t 
> __value)
>    __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
>  }
>
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vmvnq_n_s16 (const int __imm)
> +{
> +  return __builtin_mve_vmvnq_n_sv8hi (__imm);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vmvnq_n_s32 (const int __imm)
> +{
> +  return __builtin_mve_vmvnq_n_sv4si (__imm);
> +}


The spec says that the immediates should be int16_t and int32_t rather 
than ints. Same for the unsigned cases in the patch.


> +
> +__extension__ extern __inline int8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_s8 (int8x16_t __a)
> +{
> +  return __builtin_mve_vrev64q_sv16qi (__a);
> +}
> +
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_s16 (int16x8_t __a)
> +{
> +  return __builtin_mve_vrev64q_sv8hi (__a);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_s32 (int32x4_t __a)
> +{
> +  return __builtin_mve_vrev64q_sv4si (__a);
> +}
> +
> +__extension__ extern __inline uint8x16_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_u8 (uint8x16_t __a)
> +{
> +  return __builtin_mve_vrev64q_uv16qi (__a);
> +}
> +
> +__extension__ extern __inline uint16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_u16 (uint16x8_t __a)
> +{
> +  return __builtin_mve_vrev64q_uv8hi (__a);
> +}
> +
> +__extension__ extern __inline uint32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vrev64q_u32 (uint32x4_t __a)
> +{
> +  return __builtin_mve_vrev64q_uv4si (__a);
> +}
> +
> +__extension__ extern __inline uint16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vmvnq_n_u16 (const int __imm)
> +{
> +  return __builtin_mve_vmvnq_n_uv8hi (__imm);
> +}
> +
> +__extension__ extern __inline uint32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vmvnq_n_u32 (const int __imm)
> +{
> +  return __builtin_mve_vmvnq_n_uv4si (__imm);
> +}
> +
>  #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
>
>  __extension__ extern __inline void
> @@ -373,6 +457,34 @@ __arm_vcvtq_f32_u32 (uint32x4_t __a)
>    return __builtin_mve_vcvtq_to_f_uv4sf (__a);
>  }
>
> +__extension__ extern __inline int16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vcvtq_s16_f16 (float16x8_t __a)
> +{
> +  return __builtin_mve_vcvtq_from_f_sv8hi (__a);
> +}
> +
> +__extension__ extern __inline int32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vcvtq_s32_f32 (float32x4_t __a)
> +{
> +  return __builtin_mve_vcvtq_from_f_sv4si (__a);
> +}
> +
> +__extension__ extern __inline uint16x8_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vcvtq_u16_f16 (float16x8_t __a)
> +{
> +  return __builtin_mve_vcvtq_from_f_uv8hi (__a);
> +}
> +
> +__extension__ extern __inline uint32x4_t
> +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
> +__arm_vcvtq_u32_f32 (float32x4_t __a)
> +{
> +  return __builtin_mve_vcvtq_from_f_uv4si (__a);
> +}
> +
>  #endif
>
>  enum {
> @@ -674,6 +786,16 @@ extern void *__ARM_undef;
>    int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: 
> __arm_vst4q_u16 (__ARM_mve_coerce(__p0, uint16_t *), 
> __ARM_mve_coerce(__p1, uint16x8x4_t)), \
>    int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: 
> __arm_vst4q_u32 (__ARM_mve_coerce(__p0, uint32_t *), 
> __ARM_mve_coerce(__p1, uint32x4x4_t)));})
>
> +#define vrev64q(p0) __arm_vrev64q(p0)
> +#define __arm_vrev64q(p0) ({ __typeof(p0) __p0 = (p0); \
> +  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
> +  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_s8 
> (__ARM_mve_coerce(__p0, int8x16_t)), \
> +  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_s16 
> (__ARM_mve_coerce(__p0, int16x8_t)), \
> +  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_s32 
> (__ARM_mve_coerce(__p0, int32x4_t)), \
> +  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_u8 
> (__ARM_mve_coerce(__p0, uint8x16_t)), \
> +  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 
> (__ARM_mve_coerce(__p0, uint16x8_t)), \
> +  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 
> (__ARM_mve_coerce(__p0, uint32x4_t)));})
> +
>  #endif /* MVE Floating point.  */
>
>  #ifdef __cplusplus
> diff --git a/gcc/config/arm/arm_mve_builtins.def 
> b/gcc/config/arm/arm_mve_builtins.def
> index 
> 65dc58c9328525891a0aa0bb97a412ebc8257c18..d205aca28909a224bd4bad103b8a280631661538 
> 100644
> --- a/gcc/config/arm/arm_mve_builtins.def
> +++ b/gcc/config/arm/arm_mve_builtins.def
> @@ -34,3 +34,9 @@ VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf)
>  VAR1 (UNOP_NONE_NONE, vcvtbq_f32_f16, v4sf)
>  VAR2 (UNOP_NONE_SNONE, vcvtq_to_f_s, v8hf, v4sf)
>  VAR2 (UNOP_NONE_UNONE, vcvtq_to_f_u, v8hf, v4sf)
> +VAR3 (UNOP_SNONE_SNONE, vrev64q_s, v16qi, v8hi, v4si)
> +VAR2 (UNOP_SNONE_NONE, vcvtq_from_f_s, v8hi, v4si)
> +VAR2 (UNOP_SNONE_IMM, vmvnq_n_s, v8hi, v4si)
> +VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si)
> +VAR2 (UNOP_UNONE_NONE, vcvtq_from_f_u, v8hi, v4si)
> +VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si)
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 
> 7a31d0abdfff9a93d79faa1de44d1b224470e2eb..a1dd709a9ffe479cf16a88a5923975f1941531ef 
> 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -22,17 +22,26 @@
>  (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
>  (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
>  (define_mode_iterator MVE_0 [V8HF V4SF])
> +(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
> +(define_mode_iterator MVE_5 [V8HI V4SI])
>
>  (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F 
> VRNDMQ_F
>                           VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F 
> VREV32Q_F
>                           VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S
> -                        VCVTQ_TO_F_U])
> +                        VCVTQ_TO_F_U VMVNQ_N_S VMVNQ_N_U VREV64Q_S 
> VREV64Q_U
> +                        VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
>
>  (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF")
>                              (V8HF "V8HI") (V4SF "V4SI")])
>
> -(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u")])
> +(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") 
> (VMVNQ_N_S "s")
> +                      (VMVNQ_N_U "u") (VREV64Q_U "u") (VREV64Q_S "s")
> +                      (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")])
> +
>  (define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
> +(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
> +(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
> +(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
>
>  (define_insn "*mve_mov<mode>"
>    [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w,r,w,w,r,w")
> @@ -318,3 +327,45 @@
> "vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem> %q0, %q1"
>    [(set_attr "type" "mve_move")
>  ])
> +
> +;;
> +;; [vrev64q_u, vrev64q_s])
> +;;
> +(define_insn "mve_vrev64q_<supf><mode>"
> +  [
> +   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
> +       (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
> +        VREV64Q))
> +  ]
> +  "TARGET_HAVE_MVE"
> +  "vrev64.%#<V_sz_elem> %q0, %q1"
> +  [(set_attr "type" "mve_move")
> +])
> +
> +;;
> +;; [vcvtq_from_f_s, vcvtq_from_f_u])
> +;;
> +(define_insn "mve_vcvtq_from_f_<supf><mode>"
> +  [
> +   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
> +       (unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 
> "s_register_operand" "w")]
> +        VCVTQ_FROM_F))
> +  ]
> +  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
> + "vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem> %q0, %q1"
> +  [(set_attr "type" "mve_move")
> +])
> +
> +;;
> +;; [vmvnq_n_u, vmvnq_n_s])
> +;;
> +(define_insn "mve_vmvnq_n_<supf><mode>"
> +  [
> +   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
> +       (unspec:MVE_5 [(match_operand:SI 1 "immediate_operand" "i")]
> +        VMVNQ_N))
> +  ]


Falling out from my previous comment in the s16/u16 cases the immediate 
should be HImode.

Thanks,

Kyrill


> +  "TARGET_HAVE_MVE"
> +  "vmvn.i%#<V_sz_elem>  %q0, %1"
> +  [(set_attr "type" "mve_move")
> +])
> diff --git 
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..aa69b11b79a15dace81bb5d8112cc5053a6f8dc2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int16x8_t
> +foo (float16x8_t a)
> +{
> +  return vcvtq_s16_f16 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vcvt.s16.f16"  }  } */
> diff --git 
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..0bfcba6dcf4e240a1de0cba5d98d85c2a529c09e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int32x4_t
> +foo (float32x4_t a)
> +{
> +  return vcvtq_s32_f32 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vcvt.s32.f32"  }  } */
> diff --git 
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..ed36c8082ee464e8878ae7453e04d26e09a87752
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint16x8_t
> +foo (float16x8_t a)
> +{
> +    return vcvtq_u16_f16 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vcvt.u16.f16"  }  } */
> diff --git 
> a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..fbd3989e19c8d832561d6a4265b68d0a87a678b7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve.fp 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint32x4_t
> +foo (float32x4_t a)
> +{
> +    return vcvtq_u32_f32 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vcvt.u32.f32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..39c31b4bbe743ed765c9a106778d6c4ba31d14eb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int16x8_t
> +foo ()
> +{
> +  return vmvnq_n_s16 (1);
> +}
> +
> +/* { dg-final { scan-assembler "vmvn.i16"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..6754cbf8baf11a702543c86d4c048df6bd9699a8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int32x4_t
> +foo ()
> +{
> +  return vmvnq_n_s32 (2);
> +}
> +
> +/* { dg-final { scan-assembler "vmvn.i32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b7b12e7476917631927017d9413ef7226fedbe23
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint16x8_t
> +foo ()
> +{
> +    return vmvnq_n_u16 (1);
> +}
> +
> +/* { dg-final { scan-assembler "vmvn.i16"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..d5fb831b41ca68d6c4d812051878835b074c47e5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint32x4_t
> +foo ()
> +{
> +    return vmvnq_n_u32 (2);
> +}
> +
> +/* { dg-final { scan-assembler "vmvn.i32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..4eda96fefd369781a7639d7c5a9515d02b4b439e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int16x8_t
> +foo (int16x8_t a)
> +{
> +  return vrev64q_s16 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.16"  }  } */
> +
> +int16x8_t
> +foo1 (int16x8_t a)
> +{
> +  return vrev64q_s16 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.16"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..356f162c477e8159da73c9242caff6a545235cc1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int32x4_t
> +foo (int32x4_t a)
> +{
> +  return vrev64q_s32 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.32"  }  } */
> +
> +int32x4_t
> +foo1 (int32x4_t a)
> +{
> +  return vrev64q_s32 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..5cc4d0750f4d8de85e997247c77d7c076dfb624e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +int8x16_t
> +foo (int8x16_t a)
> +{
> +  return vrev64q_s8 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.8"  }  } */
> +
> +int8x16_t
> +foo1 (int8x16_t a)
> +{
> +  return vrev64q (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.8"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..ae7e3665c54b11e2eee8209ade6030875a201b6b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint16x8_t
> +foo (uint16x8_t a)
> +{
> +    return vrev64q_u16 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.16"  }  } */
> +
> +uint16x8_t
> +foo1 (uint16x8_t a)
> +{
> +    return vrev64q (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.16"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..8c87cab925766ea981ce90cc47b3194bbf0913ff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint32x4_t
> +foo (uint32x4_t a)
> +{
> +    return vrev64q_u32 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.32"  }  } */
> +
> +uint32x4_t
> +foo1 (uint32x4_t a)
> +{
> +    return vrev64q (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.32"  }  } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..c4abd160e61517a4fcc2312c8fcdff1119686da6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile  } */
> +/* { dg-additional-options "-march=armv8.1-m.main+mve 
> -mfloat-abi=hard -O2"  }  */
> +/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} 
> } */
> +
> +#include "arm_mve.h"
> +
> +uint8x16_t
> +foo (uint8x16_t a)
> +{
> +    return vrev64q_u8 (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.8"  }  } */
> +
> +uint8x16_t
> +foo1 (uint8x16_t a)
> +{
> +    return vrev64q (a);
> +}
> +
> +/* { dg-final { scan-assembler "vrev64.8"  }  } */
>
diff mbox series

Patch

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 2fee417fe6585f457edd4cf96655366b1d6bd1a0..21b213d8e1bc99a3946f15e97161e01d73832799 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -313,6 +313,42 @@  arm_unop_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define UNOP_NONE_UNONE_QUALIFIERS \
   (arm_unop_none_unone_qualifiers)
 
+static enum arm_type_qualifiers
+arm_unop_snone_snone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define UNOP_SNONE_SNONE_QUALIFIERS \
+  (arm_unop_snone_snone_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_snone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none };
+#define UNOP_SNONE_NONE_QUALIFIERS \
+  (arm_unop_snone_none_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_snone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_immediate };
+#define UNOP_SNONE_IMM_QUALIFIERS \
+  (arm_unop_snone_imm_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_none };
+#define UNOP_UNONE_NONE_QUALIFIERS \
+  (arm_unop_unone_none_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned };
+#define UNOP_UNONE_UNONE_QUALIFIERS \
+  (arm_unop_unone_unone_qualifiers)
+
+static enum arm_type_qualifiers
+arm_unop_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_immediate };
+#define UNOP_UNONE_IMM_QUALIFIERS \
+  (arm_unop_unone_imm_qualifiers)
+
 /* End of Qualifier for MVE builtins.  */
 
    /* void ([T element type] *, T, immediate).  */
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 9bcb04fb99a54b47057bb33cc807d6a5ad16401f..bd5162122b8c8e61ba25ba6ea89c56005f5a79dc 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -108,6 +108,20 @@  typedef struct { uint8x16_t val[4]; } uint8x16x4_t;
 #define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
 #define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
 #define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
+#define vmvnq_n_s16( __imm) __arm_vmvnq_n_s16( __imm)
+#define vmvnq_n_s32( __imm) __arm_vmvnq_n_s32( __imm)
+#define vrev64q_s8(__a) __arm_vrev64q_s8(__a)
+#define vrev64q_s16(__a) __arm_vrev64q_s16(__a)
+#define vrev64q_s32(__a) __arm_vrev64q_s32(__a)
+#define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
+#define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
+#define vrev64q_u8(__a) __arm_vrev64q_u8(__a)
+#define vrev64q_u16(__a) __arm_vrev64q_u16(__a)
+#define vrev64q_u32(__a) __arm_vrev64q_u32(__a)
+#define vmvnq_n_u16( __imm) __arm_vmvnq_n_u16( __imm)
+#define vmvnq_n_u32( __imm) __arm_vmvnq_n_u32( __imm)
+#define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
+#define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
 #endif
 
 __extension__ extern __inline void
@@ -164,6 +178,76 @@  __arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
   __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
 }
 
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_s16 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_sv8hi (__imm);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_s32 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_sv4si (__imm);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s8 (int8x16_t __a)
+{
+  return __builtin_mve_vrev64q_sv16qi (__a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s16 (int16x8_t __a)
+{
+  return __builtin_mve_vrev64q_sv8hi (__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_s32 (int32x4_t __a)
+{
+  return __builtin_mve_vrev64q_sv4si (__a);
+}
+
+__extension__ extern __inline uint8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u8 (uint8x16_t __a)
+{
+  return __builtin_mve_vrev64q_uv16qi (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u16 (uint16x8_t __a)
+{
+  return __builtin_mve_vrev64q_uv8hi (__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vrev64q_u32 (uint32x4_t __a)
+{
+  return __builtin_mve_vrev64q_uv4si (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_u16 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_uv8hi (__imm);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vmvnq_n_u32 (const int __imm)
+{
+  return __builtin_mve_vmvnq_n_uv4si (__imm);
+}
+
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
 __extension__ extern __inline void
@@ -373,6 +457,34 @@  __arm_vcvtq_f32_u32 (uint32x4_t __a)
   return __builtin_mve_vcvtq_to_f_uv4sf (__a);
 }
 
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_s16_f16 (float16x8_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_sv8hi (__a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_s32_f32 (float32x4_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_sv4si (__a);
+}
+
+__extension__ extern __inline uint16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_u16_f16 (float16x8_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_uv8hi (__a);
+}
+
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtq_u32_f32 (float32x4_t __a)
+{
+  return __builtin_mve_vcvtq_from_f_uv4si (__a);
+}
+
 #endif
 
 enum {
@@ -674,6 +786,16 @@  extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
   int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
 
+#define vrev64q(p0) __arm_vrev64q(p0)
+#define __arm_vrev64q(p0) ({ __typeof(p0) __p0 = (p0); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
+  int (*)[__ARM_mve_type_int8x16_t]: __arm_vrev64q_s8 (__ARM_mve_coerce(__p0, int8x16_t)), \
+  int (*)[__ARM_mve_type_int16x8_t]: __arm_vrev64q_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
+  int (*)[__ARM_mve_type_int32x4_t]: __arm_vrev64q_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
+  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vrev64q_u8 (__ARM_mve_coerce(__p0, uint8x16_t)), \
+  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vrev64q_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
+  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vrev64q_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
+
 #endif /* MVE Floating point.  */
 
 #ifdef __cplusplus
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 65dc58c9328525891a0aa0bb97a412ebc8257c18..d205aca28909a224bd4bad103b8a280631661538 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -34,3 +34,9 @@  VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf)
 VAR1 (UNOP_NONE_NONE, vcvtbq_f32_f16, v4sf)
 VAR2 (UNOP_NONE_SNONE, vcvtq_to_f_s, v8hf, v4sf)
 VAR2 (UNOP_NONE_UNONE, vcvtq_to_f_u, v8hf, v4sf)
+VAR3 (UNOP_SNONE_SNONE, vrev64q_s, v16qi, v8hi, v4si)
+VAR2 (UNOP_SNONE_NONE, vcvtq_from_f_s, v8hi, v4si)
+VAR2 (UNOP_SNONE_IMM, vmvnq_n_s, v8hi, v4si)
+VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si)
+VAR2 (UNOP_UNONE_NONE, vcvtq_from_f_u, v8hi, v4si)
+VAR2 (UNOP_UNONE_IMM, vmvnq_n_u, v8hi, v4si)
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 7a31d0abdfff9a93d79faa1de44d1b224470e2eb..a1dd709a9ffe479cf16a88a5923975f1941531ef 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -22,17 +22,26 @@ 
 (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
 (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
 (define_mode_iterator MVE_0 [V8HF V4SF])
+(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_5 [V8HI V4SI])
 
 (define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
 			 VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F VREV32Q_F
 			 VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S
-			 VCVTQ_TO_F_U])
+			 VCVTQ_TO_F_U VMVNQ_N_S VMVNQ_N_U VREV64Q_S VREV64Q_U
+			 VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 
 (define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF")
 			    (V8HF "V8HI") (V4SF "V4SI")])
 
-(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u")])
+(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VMVNQ_N_S "s")
+		       (VMVNQ_N_U "u") (VREV64Q_U "u") (VREV64Q_S "s")
+		       (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")])
+
 (define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
+(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
+(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
+(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 
 (define_insn "*mve_mov<mode>"
   [(set (match_operand:MVE_types 0 "s_register_operand" "=w,w,r,w,w,r,w")
@@ -318,3 +327,45 @@ 
   "vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>       %q0, %q1"
   [(set_attr "type" "mve_move")
 ])
+
+;;
+;; [vrev64q_u, vrev64q_s])
+;;
+(define_insn "mve_vrev64q_<supf><mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")]
+	 VREV64Q))
+  ]
+  "TARGET_HAVE_MVE"
+  "vrev64.%#<V_sz_elem> %q0, %q1"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vcvtq_from_f_s, vcvtq_from_f_u])
+;;
+(define_insn "mve_vcvtq_from_f_<supf><mode>"
+  [
+   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
+	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
+	 VCVTQ_FROM_F))
+  ]
+  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
+  "vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>       %q0, %q1"
+  [(set_attr "type" "mve_move")
+])
+
+;;
+;; [vmvnq_n_u, vmvnq_n_s])
+;;
+(define_insn "mve_vmvnq_n_<supf><mode>"
+  [
+   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
+	(unspec:MVE_5 [(match_operand:SI 1 "immediate_operand" "i")]
+	 VMVNQ_N))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmvn.i%#<V_sz_elem>  %q0, %1"
+  [(set_attr "type" "mve_move")
+])
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa69b11b79a15dace81bb5d8112cc5053a6f8dc2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s16_f16.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (float16x8_t a)
+{
+  return vcvtq_s16_f16 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.s16.f16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
new file mode 100644
index 0000000000000000000000000000000000000000..0bfcba6dcf4e240a1de0cba5d98d85c2a529c09e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_s32_f32.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (float32x4_t a)
+{
+  return vcvtq_s32_f32 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.s32.f32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
new file mode 100644
index 0000000000000000000000000000000000000000..ed36c8082ee464e8878ae7453e04d26e09a87752
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u16_f16.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (float16x8_t a)
+{
+    return vcvtq_u16_f16 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.u16.f16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
new file mode 100644
index 0000000000000000000000000000000000000000..fbd3989e19c8d832561d6a4265b68d0a87a678b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vcvtq_u32_f32.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve.fp -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (float32x4_t a)
+{
+    return vcvtq_u32_f32 (a);
+}
+
+/* { dg-final { scan-assembler "vcvt.u32.f32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
new file mode 100644
index 0000000000000000000000000000000000000000..39c31b4bbe743ed765c9a106778d6c4ba31d14eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s16.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo ()
+{
+  return vmvnq_n_s16 (1);
+}
+
+/* { dg-final { scan-assembler "vmvn.i16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
new file mode 100644
index 0000000000000000000000000000000000000000..6754cbf8baf11a702543c86d4c048df6bd9699a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_s32.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo ()
+{
+  return vmvnq_n_s32 (2);
+}
+
+/* { dg-final { scan-assembler "vmvn.i32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
new file mode 100644
index 0000000000000000000000000000000000000000..b7b12e7476917631927017d9413ef7226fedbe23
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u16.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo ()
+{
+    return vmvnq_n_u16 (1);
+}
+
+/* { dg-final { scan-assembler "vmvn.i16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5fb831b41ca68d6c4d812051878835b074c47e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vmvnq_n_u32.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo ()
+{
+    return vmvnq_n_u32 (2);
+}
+
+/* { dg-final { scan-assembler "vmvn.i32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
new file mode 100644
index 0000000000000000000000000000000000000000..4eda96fefd369781a7639d7c5a9515d02b4b439e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s16.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int16x8_t
+foo (int16x8_t a)
+{
+  return vrev64q_s16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
+
+int16x8_t
+foo1 (int16x8_t a)
+{
+  return vrev64q_s16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
new file mode 100644
index 0000000000000000000000000000000000000000..356f162c477e8159da73c9242caff6a545235cc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s32.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int32x4_t
+foo (int32x4_t a)
+{
+  return vrev64q_s32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
+
+int32x4_t
+foo1 (int32x4_t a)
+{
+  return vrev64q_s32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
new file mode 100644
index 0000000000000000000000000000000000000000..5cc4d0750f4d8de85e997247c77d7c076dfb624e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_s8.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+int8x16_t
+foo (int8x16_t a)
+{
+  return vrev64q_s8 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
+
+int8x16_t
+foo1 (int8x16_t a)
+{
+  return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
new file mode 100644
index 0000000000000000000000000000000000000000..ae7e3665c54b11e2eee8209ade6030875a201b6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u16.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint16x8_t
+foo (uint16x8_t a)
+{
+    return vrev64q_u16 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
+
+uint16x8_t
+foo1 (uint16x8_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.16"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c87cab925766ea981ce90cc47b3194bbf0913ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u32.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint32x4_t
+foo (uint32x4_t a)
+{
+    return vrev64q_u32 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
+
+uint32x4_t
+foo1 (uint32x4_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.32"  }  } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
new file mode 100644
index 0000000000000000000000000000000000000000..c4abd160e61517a4fcc2312c8fcdff1119686da6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vrev64q_u8.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile  } */
+/* { dg-additional-options "-march=armv8.1-m.main+mve -mfloat-abi=hard -O2"  }  */
+/* { dg-skip-if "Skip if not auto" {*-*-*} {"-mfpu=*"} {"-mfpu=auto"} } */
+
+#include "arm_mve.h"
+
+uint8x16_t
+foo (uint8x16_t a)
+{
+    return vrev64q_u8 (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */
+
+uint8x16_t
+foo1 (uint8x16_t a)
+{
+    return vrev64q (a);
+}
+
+/* { dg-final { scan-assembler "vrev64.8"  }  } */