[rs6000] Add missing logical-op interfaces to emmintrin.h

Message ID a57f8d4c-bb20-5c1b-72ac-694b34f7b750@linux.ibm.com
State New
Headers show
Series
  • [rs6000] Add missing logical-op interfaces to emmintrin.h
Related show

Commit Message

Bill Schmidt July 11, 2018, 5:26 p.m.
Hi,

It was recently brought to our attention that the existing emmintrin.h
header, which was believed to be feature-complete for SSE2 support, is
actually missing four logical-op interfaces:

 _mm_and_si128
 _mm_andnot_si128
 _mm_or_si128
 _mm_xor_si128

This patch provides those with the obvious implementations, along with
test cases.  I've bootstrapped it on powerpc64le-linux-gnu (P8, P9)
and powerpc64-linux-gnu (P7, P8) and tested it with no regressions.
Is this okay for trunk?

Although this isn't a regression, it is an oversight that leaves the
SSE2 support incomplete.  Thus I'd like to ask permission to also
backport this to gcc-8-branch after a short waiting period.  It's
passed regstrap on P8 and P9 LE, and P7/P8 BE testing is underway.
Is that backport okay if testing succeeds?

[BTW, I'm shepherding this patch on behalf of Steve Munroe.]

Thanks!
Bill


[gcc]

2018-07-10  Bill Schmidt  <wschmidt@linux.ibm.com>
	    Steve Munroe  <munroesj52@gmail.com>

	* config/rs6000/emmintrin.h (_mm_and_si128): New function.
	(_mm_andnot_si128): Likewise.
	(_mm_or_si128): Likewise.
	(_mm_xor_si128): Likewise.

[gcc/testsuite]

2018-07-10  Bill Schmidt  <wschmidt@linux.ibm.com>
	    Steve Munroe  <munroesj52@gmail.com>

	* gcc.target/powerpc/sse2-pand-1.c: New file.
	* gcc.target/powerpc/sse2-pandn-1.c: Likewise.
	* gcc.target/powerpc/sse2-por-1.c: Likewise.
	* gcc.target/powerpc/sse2-pxor-1.c: Likewise.

Comments

Segher Boessenkool July 13, 2018, 5:21 p.m. | #1
Hi!

On Wed, Jul 11, 2018 at 12:26:24PM -0500, Bill Schmidt wrote:
> It was recently brought to our attention that the existing emmintrin.h
> header, which was believed to be feature-complete for SSE2 support, is
> actually missing four logical-op interfaces:
> 
>  _mm_and_si128
>  _mm_andnot_si128
>  _mm_or_si128
>  _mm_xor_si128
> 
> This patch provides those with the obvious implementations, along with
> test cases.  I've bootstrapped it on powerpc64le-linux-gnu (P8, P9)
> and powerpc64-linux-gnu (P7, P8) and tested it with no regressions.
> Is this okay for trunk?
> 
> Although this isn't a regression, it is an oversight that leaves the
> SSE2 support incomplete.  Thus I'd like to ask permission to also
> backport this to gcc-8-branch after a short waiting period.  It's
> passed regstrap on P8 and P9 LE, and P7/P8 BE testing is underway.
> Is that backport okay if testing succeeds?
> 
> [BTW, I'm shepherding this patch on behalf of Steve Munroe.]

This looks fine.  Okay for trunk.  Also okay for 8 (as we discussed, you
probably should check if 8 hasn't diverged from trunk here; it shouldn't
have).

Thanks to both of you,


Segher

Patch

Index: gcc/config/rs6000/emmintrin.h
===================================================================
--- gcc/config/rs6000/emmintrin.h	(revision 262235)
+++ gcc/config/rs6000/emmintrin.h	(working copy)
@@ -1884,6 +1884,30 @@ 
 }
 
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_and_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)vec_and ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_andnot_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)vec_andc ((__v2di) __B, (__v2di) __A);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_or_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)vec_or ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_xor_si128 (__m128i __A, __m128i __B)
+{
+  return (__m128i)vec_xor ((__v2di) __A, (__v2di) __B);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
 {
   return (__m128i) vec_cmpeq ((__v16qi) __A, (__v16qi)__B);
@@ -2333,3 +2357,4 @@ 
 }
 
 #endif /* EMMINTRIN_H_ */
+
Index: gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pand-1.c	(working copy)
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pand_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+  return _mm_and_si128 (s1, s2); 
+}
+
+static void
+TEST (void)
+{
+  union128i_b u, s1, s2;
+  char e[16];
+  int i;
+   
+  s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+  s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+  u.x = test (s1.x, s2.x); 
+   
+  for (i = 0; i < 16; i++)
+     e[i] = s1.a[i] & s2.a[i];
+
+  if (check_union128i_b (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pandn-1.c	(working copy)
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pandn_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+  return _mm_andnot_si128 (s1, s2); 
+}
+
+static void
+TEST (void)
+{
+  union128i_b u, s1, s2;
+  char e[16];
+  int i;
+   
+  s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
+  s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
+  u.x = test (s1.x, s2.x); 
+   
+  for (i = 0; i < 16; i++)
+     e[i] = (~s1.a[i]) & s2.a[i];
+
+  if (check_union128i_b (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/sse2-por-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-por-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-por-1.c	(working copy)
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_por_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+  return _mm_or_si128 (s1, s2); 
+}
+
+static void
+TEST (void)
+{
+  union128i_w u, s1, s2;
+  short e[8];
+  int i;
+   
+  s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
+  s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
+  u.x = test (s1.x, s2.x); 
+   
+  for (i = 0; i < 8; i++)
+    {
+      e[i] = s1.a[i] | s2.a[i];
+    }
+
+  if (check_union128i_w (u, e))
+    abort ();
+}
Index: gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c	(nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pxor-1.c	(working copy)
@@ -0,0 +1,41 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target p8vector_hw } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#include CHECK_H
+
+#ifndef TEST
+#define TEST sse2_test_pxor_1
+#endif
+
+#include <emmintrin.h>
+
+static __m128i
+__attribute__((noinline, unused))
+test (__m128i s1, __m128i s2)
+{
+  return _mm_xor_si128 (s1, s2); 
+}
+
+static void
+TEST (void)
+{
+  union128i_ub u, s1, s2;
+  unsigned char e[16] = {0};
+  int i; 
+   
+  s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
+  s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
+  u.x = test (s1.x, s2.x); 
+ 
+  for (i = 0; i < 16; i++)
+    e[i] = s1.a[i] ^ s2.a[i];
+
+  if (check_union128i_ub (u, e))
+    abort ();
+}