[AArch32,NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsincs.

Message ID	5677E964.7000408@foss.arm.com
State	New
Headers	show Return-Path: <gcc-patches-return-417795-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:to :from:subject:message-id:date:mime-version:content-type; q=dns; s=default; b=VIj5svFK6N3QKtiZ+JRGKJdF2fslQSDbRlLBb4FqKbiZ+s931P W/yVQLe5RX+08l8WNg0LPYYhrV/4oU2c7RveSnFjZKMhcBN2sJCUY9trlGCjLNG1 G+v0AGLZvKVYHlXbITF/br72RRGTq5DVJ16zTQJtIo9/tB/TLwZuuvCcM= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org To: gcc-patches@gcc.gnu.org From: Bilyan Borisov <bilyan.borisov@foss.arm.com> Subject: [AArch32][NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsincs. Message-ID: <5677E964.7000408@foss.arm.com> Date: Mon, 21 Dec 2015 11:58:28 +0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.4.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------040407020306040605040200"

diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c index 7dee28ec52df68f8c7a60fe66e1b049fed39c1c0..7b63bdcf86c079288611f79ed89d6540b348fe82 100644 --- a/gcc/config/arm/arm-c.c +++ b/gcc/config/arm/arm-c.c @@ -83,6 +83,9 @@ arm_cpu_builtins (struct cpp_reader* pfile) ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB) || TARGET_ARM_ARCH_ISA_THUMB >=2)); + def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN", + TARGET_ARM_ARCH >= 8); + def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD); builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 0a33d21f2fcf8a1074fb62e89f4418295d446db5..0c8c08cc404cbc446db648d41f0773d0b4798a3a 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -2907,6 +2907,33 @@ vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) return (uint32x4_t)__builtin_neon_vmaxuv4si ((int32x4_t) __a, (int32x4_t) __b); } +#pragma GCC push_options +#pragma GCC target ("fpu=neon-fp-armv8") +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vmaxnm_f32 (float32x2_t a, float32x2_t b) +{ + return (float32x2_t)__builtin_neon_vmaxnmv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vmaxnmq_f32 (float32x4_t a, float32x4_t b) +{ + return (float32x4_t)__builtin_neon_vmaxnmv4sf (a, b); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vminnm_f32 (float32x2_t a, float32x2_t b) +{ + return (float32x2_t)__builtin_neon_vminnmv2sf (a, b); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vminnmq_f32 (float32x4_t a, float32x4_t b) +{ + return (float32x4_t)__builtin_neon_vminnmv4sf (a, b); +} +#pragma GCC pop_options + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vmin_s8 (int8x8_t __a, int8x8_t __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 0b719df760747af7642bd14ab14a9b2144d43359..1d3b6e9b6a08a3cf3b0d6f76bf340208919c9b13 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -126,6 +126,9 @@ VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) VAR2 (BINOP, vminf, v2sf, v4sf) +VAR2 (BINOP, vmaxnm, v2sf, v4sf) +VAR2 (BINOP, vminnm, v2sf, v4sf) + VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si) VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si) VAR1 (BINOP, vpmaxf, v2sf) diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 6a541251ed1e5d7c766aca04f0da97ba6d470541..e2f7cea89688c67d841dfef4c5a4e6e003660c63 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -308,6 +308,8 @@ (define_int_iterator VMAXMINF [UNSPEC_VMAX UNSPEC_VMIN]) +(define_int_iterator VMAXMINNM [UNSPEC_VMAXNM UNSPEC_VMINNM]) + (define_int_iterator VPADDL [UNSPEC_VPADDL_S UNSPEC_VPADDL_U]) (define_int_iterator VPADAL [UNSPEC_VPADAL_S UNSPEC_VPADAL_U]) @@ -741,6 +743,7 @@ (UNSPEC_VMIN "min") (UNSPEC_VMIN_U "min") (UNSPEC_VPMAX "max") (UNSPEC_VPMAX_U "max") (UNSPEC_VPMIN "min") (UNSPEC_VPMIN_U "min") + (UNSPEC_VMAXNM "maxnm") (UNSPEC_VMINNM "minnm") ]) (define_int_attr shift_op [ diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 62fb6daae9983470faf2c9cc686f5181b8bd7cb6..1b48451b5ee559c332573860d8a3aea0bb3a58ad 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -2354,6 +2354,16 @@ [(set_attr "type" "neon_fp_minmax_s<q>")] ) +(define_insn "neon_v<maxmin><mode>" + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w")] + VMAXMINNM))] + "TARGET_NEON && TARGET_FPU_ARMV8" + "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "type" "neon_fp_minmax_s<q>")] +) + (define_expand "neon_vpadd<mode>" [(match_operand:VD 0 "s_register_operand" "=w") (match_operand:VD 1 "s_register_operand" "w") diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 67acafd075fb515a848fbe968a0183e4673ab0cd..b8bdca8115290adcda50bdb89bdd99feec79968b 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -225,8 +225,10 @@ UNSPEC_VLD4_DUP UNSPEC_VLD4_LANE UNSPEC_VMAX + UNSPEC_VMAXNM UNSPEC_VMAX_U UNSPEC_VMIN + UNSPEC_VMINNM UNSPEC_VMIN_U UNSPEC_VMLA UNSPEC_VMLA_LANE diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..c58764fed378f64fbc3234feea6f66e1e6d7645a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c @@ -0,0 +1,166 @@ +/* Test the `vmaxnmf32' ARM Neon intrinsic. */ + +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (); + +void __attribute__ ((noinline)) +test_vmaxnm_f32__regular_input1 () +{ + float32_t a1[] = {1,2}; + float32_t b1[] = {3,4}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != b1[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__regular_input2 () +{ + float32_t a1[] = {3,2}; + float32_t b1[] = {1,4}; + float32_t e[] = {3,4}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__edge_case1 () +{ + /* When given a quiet NaN, vmaxnm returns the other operand. + In this test case we have NaNs in only one operand. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {1,2}; + float32_t b1[] = {n,n}; + float32_t e[] = {1,2}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__edge_case2 () +{ + /* When given a quiet NaN, vmaxnm returns the other operand. + In this test case we have NaNs in both operands. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,2}; + float32_t b1[] = {1,n}; + float32_t e[] = {1,2}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__edge_case3 () +{ + /* For 0 and -0, vmaxnm behaves like vmax i.e. returns -0. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {0.0,0.0}; + float32_t b1[] = {-0.0, -0.0}; + float32_t e[] = {0.0, 0.0}; + + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32x2_t d = vmax_f32 (a,b); + + float32_t actual1[2]; + vst1_f32 (actual1, c); + + float32_t actual2[2]; + vst1_f32 (actual2, d); + + for (int i = 0; i < 2; ++i) + if (actual1[i] != actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__edge_case4 () +{ + /* For inf/inf and -inf/-inf, vmaxnm behaves like vmax + i.e. returns inf/-inf. */ + float32_t inf = __builtin_huge_valf (); + float32_t a1[] = {inf, -inf}; + float32_t b1[] = {inf, -inf}; + float32_t e[] = {inf, -inf}; + + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32x2_t d = vmax_f32 (a,b); + + float32_t actual1[2]; + vst1_f32 (actual1, c); + + float32_t actual2[2]; + vst1_f32 (actual2, d); + + for (int i = 0; i < 2; ++i) + if (actual1[i] == actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnm_f32__edge_case5 () +{ + /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, + not even another NaN, use __builtin_isnan () to check. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,n}; + float32_t b1[] = {n,n}; + float32_t e[] = {n,n}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vmaxnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (!__builtin_isnan (actual[i])) + abort (); +} + +int +main () +{ + test_vmaxnm_f32__regular_input1 (); + test_vmaxnm_f32__regular_input2 (); + test_vmaxnm_f32__edge_case1 (); + test_vmaxnm_f32__edge_case2 (); + test_vmaxnm_f32__edge_case3 (); + test_vmaxnm_f32__edge_case4 (); + test_vmaxnm_f32__edge_case5 (); + return 0; +} + +/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..509b7a65bb330a74e48184f7686f701aff22f91f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c @@ -0,0 +1,167 @@ +/* Test the `vmaxnmqf32' ARM Neon intrinsic. */ + +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (); + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__regular_input1 () +{ + float32_t a1[] = {1,2,5,6}; + float32_t b1[] = {3,4,7,8}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != b1[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__regular_input2 () +{ + float32_t a1[] = {3,2,7,6}; + float32_t b1[] = {1,4,5,8}; + float32_t e[] = {3,4,7,8}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__edge_case1 () +{ + /* When given a quiet NaN, vmaxnmq returns the other operand. + In this test case we have NaNs in only one operand. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {1,2,3,4}; + float32_t b1[] = {n,n,n,n}; + float32_t e[] = {1,2,3,4}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__edge_case2 () +{ + /* When given a quiet NaN, vmaxnmq returns the other operand. + In this test case we have NaNs in both operands. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,2,n,4}; + float32_t b1[] = {1,n,3,n}; + float32_t e[] = {1,2,3,4}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__edge_case3 () +{ + /* For 0 and -0, vmaxnmq behaves like vmaxq i.e. returns -0. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {0.0, 0.0, -0.0, -0.0}; + float32_t b1[] = {-0.0, -0.0, 0.0, 0.0}; + float32_t e[] = {0.0, 0.0, 0.0, 0.0}; + + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32x4_t d = vmaxq_f32 (a,b); + + float32_t actual1[4]; + vst1q_f32 (actual1, c); + + float32_t actual2[4]; + vst1q_f32 (actual2, d); + + for (int i = 0; i < 4; ++i) + if (actual1[i] != actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__edge_case4 () +{ + /* For inf/inf and -inf/-inf, vmaxnmq behaves like vmaxq + i.e. returns inf/-inf. */ + float32_t inf = __builtin_huge_valf (); + float32_t a1[] = {inf, -inf, inf, inf}; + float32_t b1[] = {inf, -inf, -inf, -inf}; + float32_t e[] = {inf, -inf, inf, inf}; + + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32x4_t d = vmaxq_f32 (a,b); + + float32_t actual1[4]; + vst1q_f32 (actual1, c); + + float32_t actual2[4]; + vst1q_f32 (actual2, d); + + for (int i = 0; i < 4; ++i) + if (actual1[i] == actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vmaxnmq_f32__edge_case5 () +{ + /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, + not even another NaN, use __builtin_isnan () to check. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,n,n,n}; + float32_t b1[] = {n,n,n,n}; + float32_t e[] = {n,n}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vmaxnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (!__builtin_isnan (actual[i])) + abort (); +} + +int +main () +{ + test_vmaxnmq_f32__regular_input1 (); + test_vmaxnmq_f32__regular_input2 (); + test_vmaxnmq_f32__edge_case1 (); + test_vmaxnmq_f32__edge_case2 (); + test_vmaxnmq_f32__edge_case3 (); + test_vmaxnmq_f32__edge_case4 (); + test_vmaxnmq_f32__edge_case5 (); + return 0; +} + +/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..e50372ca5edef4326bc8096c306071c1c1e70fca --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c @@ -0,0 +1,166 @@ +/* Test the `vminnmf32' ARM Neon intrinsic. */ + +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (); + +void __attribute__ ((noinline)) +test_vminnm_f32__regular_input1 () +{ + float32_t a1[] = {1,2}; + float32_t b1[] = {3,4}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != a1[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__regular_input2 () +{ + float32_t a1[] = {3,2}; + float32_t b1[] = {1,4}; + float32_t e[] = {1,2}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__edge_case1 () +{ + /* When given a quiet NaN, vminnm returns the other operand. + In this test case we have NaNs in only one operand. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {1,2}; + float32_t b1[] = {n,n}; + float32_t e[] = {1,2}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__edge_case2 () +{ + /* When given a quiet NaN, vminnm returns the other operand. + In this test case we have NaNs in both operands. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,2}; + float32_t b1[] = {1,n}; + float32_t e[] = {1,2}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__edge_case3 () +{ + /* For 0 and -0, vminnm behaves like vmin i.e. returns -0. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {0.0,0.0}; + float32_t b1[] = {-0.0, -0.0}; + float32_t e[] = {-0.0, -0.0}; + + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32x2_t d = vmin_f32 (a,b); + + float32_t actual1[2]; + vst1_f32 (actual1, c); + + float32_t actual2[2]; + vst1_f32 (actual2, d); + + for (int i = 0; i < 2; ++i) + if (actual1[i] != actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__edge_case4 () +{ + /* For inf/inf and -inf/-inf, vminnm behaves like vmin + i.e. returns inf/-inf. */ + float32_t inf = __builtin_huge_valf (); + float32_t a1[] = {inf, -inf}; + float32_t b1[] = {inf, -inf}; + float32_t e[] = {inf, -inf}; + + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32x2_t d = vmin_f32 (a,b); + + float32_t actual1[2]; + vst1_f32 (actual1, c); + + float32_t actual2[2]; + vst1_f32 (actual2, d); + + for (int i = 0; i < 2; ++i) + if (actual1[i] == actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnm_f32__edge_case5 () +{ + /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, + not even another NaN, use __builtin_isnan () to check. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,n}; + float32_t b1[] = {n,n}; + float32_t e[] = {n,n}; + float32x2_t a = vld1_f32 (a1); + float32x2_t b = vld1_f32 (b1); + float32x2_t c = vminnm_f32 (a, b); + float32_t actual[2]; + vst1_f32 (actual, c); + + for (int i = 0; i < 2; ++i) + if (!__builtin_isnan (actual[i])) + abort (); +} + +int +main () +{ + test_vminnm_f32__regular_input1 (); + test_vminnm_f32__regular_input2 (); + test_vminnm_f32__edge_case1 (); + test_vminnm_f32__edge_case2 (); + test_vminnm_f32__edge_case3 (); + test_vminnm_f32__edge_case4 (); + test_vminnm_f32__edge_case5 (); + return 0; +} + +/* { dg-final { scan-assembler-times "vminnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c new file mode 100644 index 0000000000000000000000000000000000000000..bdc1f1e7ccbc6687f385692c74b1f5db8e924dd9 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c @@ -0,0 +1,166 @@ +/* Test the `vminnmqf32' ARM Neon intrinsic. */ + +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (); + +void __attribute__ ((noinline)) +test_vminnmq_f32__regular_input1 () +{ + float32_t a1[] = {1,2,5,6}; + float32_t b1[] = {3,4,7,8}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != a1[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__regular_input2 () +{ + float32_t a1[] = {3,2,7,6}; + float32_t b1[] = {1,4,5,8}; + float32_t e[] = {1,2,5,6}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__edge_case1 () +{ + /* When given a quiet NaN, vminnmq returns the other operand. + In this test case we have NaNs in only one operand. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {1,2,3,4}; + float32_t b1[] = {n,n,n,n}; + float32_t e[] = {1,2,3,4}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__edge_case2 () +{ + /* When given a quiet NaN, vminnmq returns the other operand. + In this test case we have NaNs in both operands. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,2,n,4}; + float32_t b1[] = {1,n,3,n}; + float32_t e[] = {1,2,3,4}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (actual[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__edge_case3 () +{ + /* For 0 and -0, vminnmq behaves like vminq i.e. returns -0. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {0.0, 0.0, -0.0, -0.0}; + float32_t b1[] = {-0.0, -0.0, 0.0, 0.0}; + float32_t e[] = {-0.0, -0.0, -0.0, -0.0}; + + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32x4_t d = vminq_f32 (a,b); + + float32_t actual1[4]; + vst1q_f32 (actual1, c); + + float32_t actual2[4]; + vst1q_f32 (actual2, d); + + for (int i = 0; i < 4; ++i) + if (actual1[i] != actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__edge_case4 () +{ + /* For inf/inf and -inf/-inf, vminnmq behaves like vminq + i.e. returns inf/-inf. */ + float32_t inf = __builtin_huge_valf (); + float32_t a1[] = {inf, -inf, inf, inf}; + float32_t b1[] = {inf, -inf, -inf, -inf}; + float32_t e[] = {inf, -inf, -inf, -inf}; + + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32x4_t d = vminq_f32 (a,b); + + float32_t actual1[4]; + vst1q_f32 (actual1, c); + + float32_t actual2[4]; + vst1q_f32 (actual2, d); + + for (int i = 0; i < 4; ++i) + if (actual1[i] == actual2[i] && actual1[i] != e[i]) + abort (); +} + +void __attribute__ ((noinline)) +test_vminnmq_f32__edge_case5 () +{ + /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, + not even another NaN, use __builtin_isnan () to check. */ + float32_t n = __builtin_nanf (""); + float32_t a1[] = {n,n,n,n}; + float32_t b1[] = {n,n,n,n}; + float32_t e[] = {n,n}; + float32x4_t a = vld1q_f32 (a1); + float32x4_t b = vld1q_f32 (b1); + float32x4_t c = vminnmq_f32 (a, b); + float32_t actual[4]; + vst1q_f32 (actual, c); + + for (int i = 0; i < 4; ++i) + if (!__builtin_isnan (actual[i])) + abort (); +} + +int +main () +{ + test_vminnmq_f32__regular_input1 (); + test_vminnmq_f32__regular_input2 (); + test_vminnmq_f32__edge_case1 (); + test_vminnmq_f32__edge_case2 (); + test_vminnmq_f32__edge_case3 (); + test_vminnmq_f32__edge_case4 (); + test_vminnmq_f32__edge_case5 (); + return 0; +} + +/* { dg-final { scan-assembler-times "vminnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */

[AArch32,NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsincs.

Commit Message

Comments

Patch