diff mbox

[RFA/ARM] target/51534 Fix unsigned vector comparisons

Message ID 20120221161710.GL32296@e103070-lin.arm.com
State New
Headers show

Commit Message

Matthew Gretton-Dann Feb. 21, 2012, 4:17 p.m. UTC
The attached patch fixes instruction generation for unsigned vector
comparisons against a known-zero vector.

ARM's Neon extensions only allow unsigned equality comparison against
unsigned zero, not less than or greater than comparisons.

This patch fixes code generation - there are further changes that could be
made which would improve the code generated which will come in a
separate patch.

This issue does not affect the auto-vectorizer.

OK?

Matt

gcc/ChangeLog:

2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	PR target/51534
	* config/arm/arm.c (neon_builtin_data): Add entries for vcgeu
	and vcgtu.
	* config/arm/arm_neon.h: Regenerate.
	* config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU.
	(neon_vcgeu): New insn.
	(neon_vcgtu): Likewise.
	* config/arm/neon.ml (s_8_32, u_8_32): New lists.
	(ops): Unsigned comparison intrinsics call a different
	builtin.

gcc/testsuite/ChangeLog:

2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	PR target/51534
	gcc.target/arm/neon/pr51534.c: New testcase.

Comments

Richard Earnshaw Feb. 28, 2012, 3:58 p.m. UTC | #1
On 21/02/12 16:17, Matthew Gretton-Dann wrote:
> The attached patch fixes instruction generation for unsigned vector
> comparisons against a known-zero vector.
> 
> ARM's Neon extensions only allow unsigned equality comparison against
> unsigned zero, not less than or greater than comparisons.
> 
> This patch fixes code generation - there are further changes that could be
> made which would improve the code generated which will come in a
> separate patch.
> 
> This issue does not affect the auto-vectorizer.
> 
> OK?
> 
> Matt
> 
> gcc/ChangeLog:
> 
> 2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
> 
> 	PR target/51534
> 	* config/arm/arm.c (neon_builtin_data): Add entries for vcgeu
> 	and vcgtu.
> 	* config/arm/arm_neon.h: Regenerate.
> 	* config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU.
> 	(neon_vcgeu): New insn.
> 	(neon_vcgtu): Likewise.
> 	* config/arm/neon.ml (s_8_32, u_8_32): New lists.
> 	(ops): Unsigned comparison intrinsics call a different
> 	builtin.
> 
> gcc/testsuite/ChangeLog:
> 
> 2012-02-21  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
> 
> 	PR target/51534
> 	gcc.target/arm/neon/pr51534.c: New testcase.
> 
> 

OK.

R.
diff mbox

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7f0dc6b..4a173ac 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19108,7 +19108,9 @@  static neon_builtin_datum neon_builtin_data[] =
   VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
   VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
   VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
   VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+  VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
   VAR2 (BINOP, vcage, v2sf, v4sf),
   VAR2 (BINOP, vcagt, v2sf, v4sf),
   VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 9cba0a9..0567895 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -1893,19 +1893,19 @@  vcge_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcge_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcge_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcge_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -1935,19 +1935,19 @@  vcgeq_f32 (float32x4_t __a, float32x4_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -1977,19 +1977,19 @@  vcle_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcle_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+  return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcle_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+  return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcle_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+  return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2019,19 +2019,19 @@  vcleq_f32 (float32x4_t __a, float32x4_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+  return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+  return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+  return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2061,19 +2061,19 @@  vcgt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+  return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+  return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+  return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2103,19 +2103,19 @@  vcgtq_f32 (float32x4_t __a, float32x4_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+  return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+  return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+  return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
 }
 
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2145,19 +2145,19 @@  vclt_f32 (float32x2_t __a, float32x2_t __b)
 __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
 vclt_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+  return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
 }
 
 __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
 vclt_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+  return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
 vclt_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+  return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
 }
 
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2187,19 +2187,19 @@  vcltq_f32 (float32x4_t __a, float32x4_t __b)
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+  return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
 }
 
 __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
 vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+  return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
 }
 
 __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
 vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+  return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
 }
 
 __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index a6c891d..7f15e87 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -34,7 +34,9 @@ 
   UNSPEC_VCAGT
   UNSPEC_VCEQ
   UNSPEC_VCGE
+  UNSPEC_VCGEU
   UNSPEC_VCGT
+  UNSPEC_VCGTU
   UNSPEC_VCLS
   UNSPEC_VCONCAT
   UNSPEC_VCVT
@@ -2146,6 +2148,18 @@ 
                    (const_string "neon_int_5")))]
 )
 
+(define_insn "neon_vcgeu<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+	   (match_operand:VDQIW 2 "s_register_operand" "w")
+           (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCGEU))]
+  "TARGET_NEON"
+  "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
 (define_insn "neon_vcgt<mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (unspec:<V_cmp_result>
@@ -2165,6 +2179,18 @@ 
                    (const_string "neon_int_5")))]
 )
 
+(define_insn "neon_vcgtu<mode>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+        (unspec:<V_cmp_result>
+	  [(match_operand:VDQIW 1 "s_register_operand" "w")
+	   (match_operand:VDQIW 2 "s_register_operand" "w")
+           (match_operand:SI 3 "immediate_operand" "i")]
+          UNSPEC_VCGTU))]
+  "TARGET_NEON"
+  "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "neon_type" "neon_int_5")]
+)
+
 ;; VCLE and VCLT only support comparisons with immediate zero (register
 ;; variants are VCGE and VCGT with operands reversed).
 
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index b5b9cab..363e55c 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -700,6 +700,8 @@  let bit_select shape elt =
 
 (* Common lists of supported element types.  *)
 
+let s_8_32 = [S8; S16; S32]
+let u_8_32 = [U8; U16; U32]
 let su_8_32 = [S8; S16; S32; U8; U16; U32]
 let su_8_64 = S64 :: U64 :: su_8_32
 let su_16_64 = [S16; S32; S64; U16; U32; U64]
@@ -777,26 +779,40 @@  let ops =
     Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
 
     (* Comparison, greater-than or equal.  *)
-    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
-    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
+    Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
+    Vcge, [Builtin_name "vcgeu"], All (3, Dreg), "vcge", cmp_sign_matters, u_8_32;
+    Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
+    Vcge, [Builtin_name "vcgeu"], All (3, Qreg), "vcgeQ", cmp_sign_matters, u_8_32;
 
     (* Comparison, less-than or equal.  *)
     Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
-      F32 :: su_8_32;
+      F32 :: s_8_32;
+    Vcle, [Flipped "vcgeu"], All (3, Dreg), "vcle", cmp_sign_matters,
+      u_8_32;
     Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
       All (3, Qreg), "vcleQ", cmp_sign_matters,
-      F32 :: su_8_32;
+      F32 :: s_8_32;
+    Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
+      All (3, Qreg), "vcleQ", cmp_sign_matters,
+      u_8_32;
 
     (* Comparison, greater-than.  *)
-    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
-    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
+    Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
+    Vcgt, [Builtin_name "vcgtu"], All (3, Dreg), "vcgt", cmp_sign_matters, u_8_32;
+    Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
+    Vcgt, [Builtin_name "vcgtu"], All (3, Qreg), "vcgtQ", cmp_sign_matters, u_8_32;
 
     (* Comparison, less-than.  *)
     Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
-      F32 :: su_8_32;
+      F32 :: s_8_32;
+    Vclt, [Flipped "vcgtu"], All (3, Dreg), "vclt", cmp_sign_matters,
+      u_8_32;
     Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
       All (3, Qreg), "vcltQ", cmp_sign_matters,
-      F32 :: su_8_32;
+      F32 :: s_8_32;
+    Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
+      All (3, Qreg), "vcltQ", cmp_sign_matters,
+      u_8_32;
 
     (* Compare absolute greater-than or equal.  *)
     Vcage, [Instruction_name ["vacge"]],
diff --git a/gcc/testsuite/gcc.target/arm/neon/pr51534.c b/gcc/testsuite/gcc.target/arm/neon/pr51534.c
new file mode 100644
index 0000000..71cbb05
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/pr51534.c
@@ -0,0 +1,84 @@ 
+/* Test the vector comparison intrinsics when comparing to immediate zero.
+   */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-save-temps -mfloat-abi=hard -O3" } */
+/* { dg-add-options arm_neon } */
+
+#include <arm_neon.h>
+
+#define GEN_TEST(T, D, C, R) \
+  R test_##C##_##T (T a) { return C (a, D (0)); }
+
+#define GEN_DOUBLE_TESTS(S, T, C) \
+  GEN_TEST (T, vdup_n_s##S, C##_s##S, u##T) \
+  GEN_TEST (u##T, vdup_n_u##S, C##_u##S, u##T) 
+
+#define GEN_QUAD_TESTS(S, T, C) \
+  GEN_TEST (T, vdupq_n_s##S, C##q_s##S, u##T) \
+  GEN_TEST (u##T, vdupq_n_u##S, C##q_u##S, u##T) 
+
+#define GEN_COND_TESTS(C) \
+  GEN_DOUBLE_TESTS (8, int8x8_t, C) \
+  GEN_DOUBLE_TESTS (16, int16x4_t, C) \
+  GEN_DOUBLE_TESTS (32, int32x2_t, C) \
+  GEN_QUAD_TESTS (8, int8x16_t, C) \
+  GEN_QUAD_TESTS (16, int16x8_t, C) \
+  GEN_QUAD_TESTS (32, int32x4_t, C)
+
+GEN_COND_TESTS(vcgt)
+GEN_COND_TESTS(vcge)
+GEN_COND_TESTS(vclt)
+GEN_COND_TESTS(vcle)
+GEN_COND_TESTS(vceq)
+
+/* Scan for expected outputs.  */
+/* { dg-final { scan-assembler "vcgt\.s8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcgt\.u32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcge\.s32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */
+/* { dg-final { scan-assembler-times "vcge\.u32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */
+/* { dg-final { scan-assembler "vcgt\.s8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\.s8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\.s16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcgt\.s32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler "vcge\.s32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "vceq\.i8\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
+/* { dg-final { scan-assembler-times "vceq\.i16\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
+/* { dg-final { scan-assembler-times "vceq\.i32\[ 	\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */
+/* { dg-final { scan-assembler-times "vceq\.i8\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */
+/* { dg-final { scan-assembler-times "vceq\.i16\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */
+/* { dg-final { scan-assembler-times "vceq\.i32\[ 	\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */
+
+/* And ensure we don't have unexpected output too.  */
+/* { dg-final { scan-assembler-not "vc\[gl\]\[te\]\.u\[0-9\]+\[ 	\]+\[qQdD\]\[0-9\]+, \[qQdD\]\[0-9\]+, #0" } } */
+
+/* Tidy up.  */
+/* { dg-final { cleanup-saved-temps } } */