diff mbox

[AArch32,NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsincs.

Message ID 5677E964.7000408@foss.arm.com
State New
Headers show

Commit Message

Bilyan Borisov Dec. 21, 2015, 11:58 a.m. UTC
This patch implements the vmaxnmQ_ST and vminnmQ_ST intrinsincs. It also
implements the __ARM_FEATURE_NUMERIC_MAXMIN macro, which is defined when
__ARM_ARCH >= 8, and which enables the intrinsincs.

Tested on arm-none-eabi, armeb-none-eabi, arm-none-linux-gnueabihf.

---

gcc/

2015-XX-XX  Bilyan Borisov  <bilyan.borisov@arm.com>

	* config/arm/arm-c.c (arm_cpu_builtins): New macro definition.
	* config/arm/arm_neon.h (vmaxnm_f32): New intrinsinc.
	(vmaxnmq_f32): Likewise.
	(vminnm_f32): Likewise.
	(vminnmq_f32): Likewise.
	* config/arm/arm_neon_builtins.def (vmaxnm): New builtin.
	(vminnm): Likewise.
	* config/arm/iterators.md (VMAXMINNM): New iterator.
	(maxmin): Updated iterator.
	* config/arm/neon.md (neon_v<maxmin><mode>, VCVTF): New pattern.
	* config/arm/unspecs.md (UNSPEC_VMAXNM): New unspec.
	(UNSPEC_VMINNM): Likewise.

gcc/testsuite/

2015-XX-XX  Bilyan Borisov  <bilyan.borisov@arm.com>

	* gcc.target/arm/simd/vmaxnm_f32_1.c: New.
	* gcc.target/arm/simd/vmaxnmq_f32_1.c: Likewise.
	* gcc.target/arm/simd/vminnm_f32_1.c: Likewise.
	* gcc.target/arm/simd/vminnmq_f32_1.c: Likewise.

Comments

Alan Lawrence Dec. 22, 2015, 3:16 p.m. UTC | #1
On 21/12/15 11:58, Bilyan Borisov wrote:
> This patch implements the vmaxnmQ_ST and vminnmQ_ST intrinsincs. It also
> implements the __ARM_FEATURE_NUMERIC_MAXMIN macro, which is defined when
> __ARM_ARCH >= 8, and which enables the intrinsincs.
>
> Tested on arm-none-eabi, armeb-none-eabi, arm-none-linux-gnueabihf.
>
> ---
>
> gcc/
>
> 2015-XX-XX  Bilyan Borisov  <bilyan.borisov@arm.com>
>
>      * config/arm/arm-c.c (arm_cpu_builtins): New macro definition.
>      * config/arm/arm_neon.h (vmaxnm_f32): New intrinsinc.
>      (vmaxnmq_f32): Likewise.
>      (vminnm_f32): Likewise.
>      (vminnmq_f32): Likewise.
>      * config/arm/arm_neon_builtins.def (vmaxnm): New builtin.
>      (vminnm): Likewise.
>      * config/arm/iterators.md (VMAXMINNM): New iterator.
>      (maxmin): Updated iterator.
>      * config/arm/neon.md (neon_v<maxmin><mode>, VCVTF): New pattern.
>      * config/arm/unspecs.md (UNSPEC_VMAXNM): New unspec.
>      (UNSPEC_VMINNM): Likewise.
>
> gcc/testsuite/
>
> 2015-XX-XX  Bilyan Borisov  <bilyan.borisov@arm.com>
>
>      * gcc.target/arm/simd/vmaxnm_f32_1.c: New.
>      * gcc.target/arm/simd/vmaxnmq_f32_1.c: Likewise.
>      * gcc.target/arm/simd/vminnm_f32_1.c: Likewise.
>      * gcc.target/arm/simd/vminnmq_f32_1.c: Likewise.
>

I note strong similarities between this patch and David Sherwood's 
https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01560.html

Both add the same UNSPEC_s, and equivalent VMAXMIN(F?)NM. David's adds <fmaxmin> 
and <fmaxmin_op> attributes, whereas Bilyan reuses some elements of the existing 
<maxmin>. AFAICT, the patterns they add are in other ways equivalent (same type, 
condition, modes, alternatives), albeit in different files and constructed using 
those different iterators, and David's has the standard names (which IIUC we 
want, so the autovectorizer finds them) whereas Bilyan adds the intrinsics 
(which we also want)...

--Alan
diff mbox

Patch

diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 7dee28ec52df68f8c7a60fe66e1b049fed39c1c0..7b63bdcf86c079288611f79ed89d6540b348fe82 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -83,6 +83,9 @@  arm_cpu_builtins (struct cpp_reader* pfile)
 		      ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)
 		       || TARGET_ARM_ARCH_ISA_THUMB >=2));
 
+  def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN",
+		      TARGET_ARM_ARCH >= 8);
+
   def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD);
 
   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 0a33d21f2fcf8a1074fb62e89f4418295d446db5..0c8c08cc404cbc446db648d41f0773d0b4798a3a 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -2907,6 +2907,33 @@  vmaxq_u32 (uint32x4_t __a, uint32x4_t __b)
   return (uint32x4_t)__builtin_neon_vmaxuv4si ((int32x4_t) __a, (int32x4_t) __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-fp-armv8")
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmaxnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return (float32x2_t)__builtin_neon_vmaxnmv2sf (a, b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return (float32x4_t)__builtin_neon_vmaxnmv4sf (a, b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vminnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return (float32x2_t)__builtin_neon_vminnmv2sf (a, b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return (float32x4_t)__builtin_neon_vminnmv4sf (a, b);
+}
+#pragma GCC pop_options
+
 __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
 vmin_s8 (int8x8_t __a, int8x8_t __b)
 {
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 0b719df760747af7642bd14ab14a9b2144d43359..1d3b6e9b6a08a3cf3b0d6f76bf340208919c9b13 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -126,6 +126,9 @@  VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR2 (BINOP, vminf, v2sf, v4sf)
 
+VAR2 (BINOP, vmaxnm, v2sf, v4sf)
+VAR2 (BINOP, vminnm, v2sf, v4sf)
+
 VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si)
 VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si)
 VAR1 (BINOP, vpmaxf, v2sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 6a541251ed1e5d7c766aca04f0da97ba6d470541..e2f7cea89688c67d841dfef4c5a4e6e003660c63 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -308,6 +308,8 @@ 
 
 (define_int_iterator VMAXMINF [UNSPEC_VMAX UNSPEC_VMIN])
 
+(define_int_iterator VMAXMINNM [UNSPEC_VMAXNM UNSPEC_VMINNM])
+
 (define_int_iterator VPADDL [UNSPEC_VPADDL_S UNSPEC_VPADDL_U])
 
 (define_int_iterator VPADAL [UNSPEC_VPADAL_S UNSPEC_VPADAL_U])
@@ -741,6 +743,7 @@ 
   (UNSPEC_VMIN "min") (UNSPEC_VMIN_U "min")
   (UNSPEC_VPMAX "max") (UNSPEC_VPMAX_U "max")
   (UNSPEC_VPMIN "min") (UNSPEC_VPMIN_U "min")
+  (UNSPEC_VMAXNM "maxnm") (UNSPEC_VMINNM "minnm")
 ])
 
 (define_int_attr shift_op [
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 62fb6daae9983470faf2c9cc686f5181b8bd7cb6..1b48451b5ee559c332573860d8a3aea0bb3a58ad 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2354,6 +2354,16 @@ 
   [(set_attr "type" "neon_fp_minmax_s<q>")]
 )
 
+(define_insn "neon_v<maxmin><mode>"
+  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
+	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
+		       (match_operand:VCVTF 2 "s_register_operand" "w")]
+		      VMAXMINNM))]
+  "TARGET_NEON && TARGET_FPU_ARMV8"
+  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "neon_fp_minmax_s<q>")]
+)
+
 (define_expand "neon_vpadd<mode>"
   [(match_operand:VD 0 "s_register_operand" "=w")
    (match_operand:VD 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 67acafd075fb515a848fbe968a0183e4673ab0cd..b8bdca8115290adcda50bdb89bdd99feec79968b 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -225,8 +225,10 @@ 
   UNSPEC_VLD4_DUP
   UNSPEC_VLD4_LANE
   UNSPEC_VMAX
+  UNSPEC_VMAXNM
   UNSPEC_VMAX_U
   UNSPEC_VMIN
+  UNSPEC_VMINNM
   UNSPEC_VMIN_U
   UNSPEC_VMLA
   UNSPEC_VMLA_LANE
diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..c58764fed378f64fbc3234feea6f66e1e6d7645a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c
@@ -0,0 +1,166 @@ 
+/* Test the `vmaxnmf32' ARM Neon intrinsic.  */
+
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != b1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2};
+  float32_t b1[] = {1,4};
+  float32_t e[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__edge_case1 ()
+{
+  /* When given a quiet NaN, vmaxnm returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__edge_case2 ()
+{
+  /* When given a quiet NaN, vmaxnm returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2};
+  float32_t b1[] = {1,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__edge_case3 ()
+{
+  /* For 0 and -0, vmaxnm behaves like vmax i.e. returns -0.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {0.0,0.0};
+  float32_t b1[] = {-0.0, -0.0};
+  float32_t e[] = {0.0, 0.0};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32x2_t d = vmax_f32 (a,b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  float32_t actual2[2];
+  vst1_f32 (actual2, d);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__edge_case4 ()
+{
+  /* For inf/inf and -inf/-inf, vmaxnm behaves like vmax
+     i.e. returns inf/-inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf};
+  float32_t b1[] = {inf, -inf};
+  float32_t e[] = {inf, -inf};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32x2_t d = vmax_f32 (a,b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  float32_t actual2[2];
+  vst1_f32 (actual2, d);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] == actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__edge_case5 ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {n,n};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vmaxnm_f32__regular_input1 ();
+  test_vmaxnm_f32__regular_input2 ();
+  test_vmaxnm_f32__edge_case1 ();
+  test_vmaxnm_f32__edge_case2 ();
+  test_vmaxnm_f32__edge_case3 ();
+  test_vmaxnm_f32__edge_case4 ();
+  test_vmaxnm_f32__edge_case5 ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..509b7a65bb330a74e48184f7686f701aff22f91f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c
@@ -0,0 +1,167 @@ 
+/* Test the `vmaxnmqf32' ARM Neon intrinsic.  */
+
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2,5,6};
+  float32_t b1[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != b1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2,7,6};
+  float32_t b1[] = {1,4,5,8};
+  float32_t e[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__edge_case1 ()
+{
+  /* When given a quiet NaN, vmaxnmq returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2,3,4};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__edge_case2 ()
+{
+  /* When given a quiet NaN, vmaxnmq returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2,n,4};
+  float32_t b1[] = {1,n,3,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__edge_case3 ()
+{
+  /* For 0 and -0, vmaxnmq behaves like vmaxq i.e. returns -0.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {0.0, 0.0, -0.0, -0.0};
+  float32_t b1[] = {-0.0, -0.0, 0.0, 0.0};
+  float32_t e[] = {0.0, 0.0, 0.0, 0.0};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32x4_t d = vmaxq_f32 (a,b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  float32_t actual2[4];
+  vst1q_f32 (actual2, d);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__edge_case4 ()
+{
+  /* For inf/inf and -inf/-inf, vmaxnmq behaves like vmaxq
+     i.e. returns inf/-inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf, inf, inf};
+  float32_t b1[] = {inf, -inf, -inf, -inf};
+  float32_t e[] = {inf, -inf, inf, inf};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32x4_t d = vmaxq_f32 (a,b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  float32_t actual2[4];
+  vst1q_f32 (actual2, d);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] == actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__edge_case5 ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n,n,n};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {n,n};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vmaxnmq_f32__regular_input1 ();
+  test_vmaxnmq_f32__regular_input2 ();
+  test_vmaxnmq_f32__edge_case1 ();
+  test_vmaxnmq_f32__edge_case2 ();
+  test_vmaxnmq_f32__edge_case3 ();
+  test_vmaxnmq_f32__edge_case4 ();
+  test_vmaxnmq_f32__edge_case5 ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..e50372ca5edef4326bc8096c306071c1c1e70fca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c
@@ -0,0 +1,166 @@ 
+/* Test the `vminnmf32' ARM Neon intrinsic.  */
+
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vminnm_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != a1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2};
+  float32_t b1[] = {1,4};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__edge_case1 ()
+{
+  /* When given a quiet NaN, vminnm returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__edge_case2 ()
+{
+  /* When given a quiet NaN, vminnm returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2};
+  float32_t b1[] = {1,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__edge_case3 ()
+{
+  /* For 0 and -0, vminnm behaves like vmin i.e. returns -0.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {0.0,0.0};
+  float32_t b1[] = {-0.0, -0.0};
+  float32_t e[] = {-0.0, -0.0};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32x2_t d = vmin_f32 (a,b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  float32_t actual2[2];
+  vst1_f32 (actual2, d);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__edge_case4 ()
+{
+  /* For inf/inf and -inf/-inf, vminnm behaves like vmin
+     i.e. returns inf/-inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf};
+  float32_t b1[] = {inf, -inf};
+  float32_t e[] = {inf, -inf};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32x2_t d = vmin_f32 (a,b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  float32_t actual2[2];
+  vst1_f32 (actual2, d);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] == actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__edge_case5 ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {n,n};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vminnm_f32__regular_input1 ();
+  test_vminnm_f32__regular_input2 ();
+  test_vminnm_f32__edge_case1 ();
+  test_vminnm_f32__edge_case2 ();
+  test_vminnm_f32__edge_case3 ();
+  test_vminnm_f32__edge_case4 ();
+  test_vminnm_f32__edge_case5 ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..bdc1f1e7ccbc6687f385692c74b1f5db8e924dd9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c
@@ -0,0 +1,166 @@ 
+/* Test the `vminnmqf32' ARM Neon intrinsic.  */
+
+/* { dg-options "-save-temps -O3" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2,5,6};
+  float32_t b1[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != a1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2,7,6};
+  float32_t b1[] = {1,4,5,8};
+  float32_t e[] = {1,2,5,6};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__edge_case1 ()
+{
+  /* When given a quiet NaN, vminnmq returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2,3,4};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__edge_case2 ()
+{
+  /* When given a quiet NaN, vminnmq returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2,n,4};
+  float32_t b1[] = {1,n,3,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__edge_case3 ()
+{
+  /* For 0 and -0, vminnmq behaves like vminq i.e. returns -0.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {0.0, 0.0, -0.0, -0.0};
+  float32_t b1[] = {-0.0, -0.0, 0.0, 0.0};
+  float32_t e[] = {-0.0, -0.0, -0.0, -0.0};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32x4_t d = vminq_f32 (a,b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  float32_t actual2[4];
+  vst1q_f32 (actual2, d);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__edge_case4 ()
+{
+  /* For inf/inf and -inf/-inf, vminnmq behaves like vminq
+     i.e. returns inf/-inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf, inf, inf};
+  float32_t b1[] = {inf, -inf, -inf, -inf};
+  float32_t e[] = {inf, -inf, -inf, -inf};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32x4_t d = vminq_f32 (a,b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  float32_t actual2[4];
+  vst1q_f32 (actual2, d);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] == actual2[i] && actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__edge_case5 ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n,n,n};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {n,n};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vminnmq_f32__regular_input1 ();
+  test_vminnmq_f32__regular_input2 ();
+  test_vminnmq_f32__edge_case1 ();
+  test_vminnmq_f32__edge_case2 ();
+  test_vminnmq_f32__edge_case3 ();
+  test_vminnmq_f32__edge_case4 ();
+  test_vminnmq_f32__edge_case5 ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */