diff mbox series

rs6000: Fix _mm_min_ps and _mm_max_ps (PR83315)

Message ID 1522465039-39208-1-git-send-email-segher@kernel.crashing.org
State New
Headers show
Series rs6000: Fix _mm_min_ps and _mm_max_ps (PR83315) | expand

Commit Message

Segher Boessenkool March 31, 2018, 2:57 a.m. UTC
This makes _mm_{min,max}_ps work correctly for QNaNs.

Tested on powerpc64le-linux; committing.


Segher


2018-03-31  Segher Boessenkool  <segher@kernel.crashing.org>

	PR target/83315
	* config/rs6000/xmmintrin.h (_mm_set_ps, _mm_max_ps): Handle (quiet)
	NaN inputs correctly.

gcc/testsuite/
	PR target/83315
	* gcc.target/powerpc/sse-maxps-2.c: New test.
	* gcc.target/powerpc/sse-minps-2.c: New test.

---
 gcc/config/rs6000/xmmintrin.h                  |  6 ++--
 gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c | 43 ++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/sse-minps-2.c | 43 ++++++++++++++++++++++++++
 3 files changed, 90 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/sse-minps-2.c
diff mbox series

Patch

diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index 2cf2bf2..aa2823f 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -438,13 +438,15 @@  _mm_max_ss (__m128 __A, __m128 __B)
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_min ((__v4sf)__A,(__v4sf) __B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __B, (__v4sf) __A);
+  return vec_sel (__B, __A, m);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_ps (__m128 __A, __m128 __B)
 {
-  return ((__m128)vec_max ((__v4sf)__A, (__v4sf)__B));
+  __m128 m = (__m128) vec_vcmpgtfp ((__v4sf) __A, (__v4sf) __B);
+  return vec_sel (__B, __A, m);
 }
 
 /* Perform logical bit-wise operations on 128-bit values.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
new file mode 100644
index 0000000..5cf9c3f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse-maxps-2.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse_test_maxps_2
+#endif
+
+#include <xmmintrin.h>
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128 s1, __m128 s2)
+{
+  return _mm_max_ps (s1, s2);
+}
+
+static void
+TEST (void)
+{
+  union128 u, s1, s2;
+  float e[4];
+  int i;
+
+  s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46);
+  s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14);
+  u.x = test (s1.x, s2.x);
+
+  for (i = 0; i < 4; i++)
+    e[i] = s1.a[i] > s2.a[i] ? s1.a[i] : s2.a[i];
+
+  if (__builtin_memcmp (&u, e, 16))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c
new file mode 100644
index 0000000..4cb4b73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/sse-minps-2.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#define NO_WARN_X86_INTRINSICS 1
+
+#ifndef CHECK_H
+#define CHECK_H "sse-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse_test_minps_2
+#endif
+
+#include <xmmintrin.h>
+
+static __m128
+__attribute__((noinline, unused))
+test (__m128 s1, __m128 s2)
+{
+  return _mm_min_ps (s1, s2);
+}
+
+static void
+TEST (void)
+{
+  union128 u, s1, s2;
+  float e[4];
+  int i;
+
+  s1.x = _mm_set_ps (24.43, __builtin_nanf("1"), __builtin_nanf("2"), 546.46);
+  s2.x = _mm_set_ps (__builtin_nanf("3"), __builtin_nanf("4"), 3.15, 4.14);
+  u.x = test (s1.x, s2.x);
+
+  for (i = 0; i < 4; i++)
+    e[i] = s1.a[i] < s2.a[i] ? s1.a[i] : s2.a[i];
+
+  if (__builtin_memcmp (&u, e, 16))
+    abort ();
+}