diff mbox series

[v2,rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative

Message ID 7583e423-6494-5288-0985-01fddc6f59f6@us.ibm.com
State New
Headers show
Series [v2,rs6000] (PR84302) Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative | expand

Commit Message

Paul A. Clarke April 13, 2018, 8:21 p.m. UTC
The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
do not properly handle shift values between 16 and 31, inclusive.
These are setting up the shift with vec_splat_s32, which only accepts
*5 bit signed* shift values, or a range of -16 to 15.  Values above 15
produce an error:

  error: argument 1 must be a 5-bit signed literal

Fix is to effectively reduce the range for which vec_splat_s32 is used
to < 32 and use vec_splats otherwise.

Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
should always return a vector of {0}.

2018-04-13  Paul A. Clarke  <pc@us.ibm.com>

Changes in v2:
- fixed the "shift by 0" cases, which were being treated as negative
  shifts and returning {0} instead of a no-op. (Segher)
- fixed the test cases to correctly test the shift-by zero cases.

gcc/

	PR target/83402
	* config/rs6000/emmintrin.h (_mm_slli_epi{16,32,64}):
	Ensure that vec_splat_s32 is only called with 0 <= shift < 16.
	Ensure negative shifts result in {0}.

gcc/testsuite/

	PR target/83402
	* gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for
	several values:  positive, negative, and zero.
	* gcc.target/powerpc/sse2-pslld-1.c: Same.
	* gcc.target/powerpc/sse2-psllq-1.c: Same.

--
PC

Comments

Segher Boessenkool April 13, 2018, 10:40 p.m. UTC | #1
Hi!

On Fri, Apr 13, 2018 at 03:21:14PM -0500, Paul Clarke wrote:
> -      if (__builtin_constant_p(__B))
> -	lshift = (__v4su) vec_splat_s32(__B);
> +      if (__builtin_constant_p(__B) && __B < 16)
> +        lshift = (__v4su) vec_splat_s32(__B);
>        else
> -	lshift = vec_splats ((unsigned int) __B);
> +        lshift = vec_splats ((unsigned int) __B);

You changed the tab chars to spaces here.  Please don't.  I'll fix it.

Rest looks fine...  Let's see if I manage to commit it :-)

Thanks,


Segher
Paul A. Clarke April 23, 2018, 7:23 p.m. UTC | #2
On 04/13/2018 05:40 PM, Segher Boessenkool wrote:
> Rest looks fine...  Let's see if I manage to commit it :-)

Thanks, Segher!

Can I push this to ibm/gcc-7-branch?

PC
Segher Boessenkool April 23, 2018, 7:47 p.m. UTC | #3
On Mon, Apr 23, 2018 at 02:23:42PM -0500, Paul Clarke wrote:
> On 04/13/2018 05:40 PM, Segher Boessenkool wrote:
> > Rest looks fine...  Let's see if I manage to commit it :-)
> 
> Thanks, Segher!
> 
> Can I push this to ibm/gcc-7-branch?

Don't ask me, I'm not maintainer of that branch.  I would point you
to the wiki page explaining who owns it, but i cannot find that page.
Ugh.

You would normally get stuff onto the ibm 7 branch by just merging
the fsf 7 branch; is there any reason it cannot go there?


Segher
Paul A. Clarke April 23, 2018, 8:47 p.m. UTC | #4
On 04/23/2018 02:47 PM, Segher Boessenkool wrote:
> On Mon, Apr 23, 2018 at 02:23:42PM -0500, Paul Clarke wrote:
>> Can I push this to ibm/gcc-7-branch?

> Don't ask me, I'm not maintainer of that branch.  I would point you
> to the wiki page explaining who owns it, but i cannot find that page.
> Ugh.

> You would normally get stuff onto the ibm 7 branch by just merging
> the fsf 7 branch; is there any reason it cannot go there?

Yes, the patch applies to code which was itself backported from trunk (GCC 8) and doesn't exist in GCC 7.

PC
Gerald Pfeifer May 22, 2018, 1:42 a.m. UTC | #5
On Mon, 23 Apr 2018, Segher Boessenkool wrote:
>> Can I push this to ibm/gcc-7-branch?
> Don't ask me, I'm not maintainer of that branch.  I would point you
> to the wiki page explaining who owns it, but i cannot find that page.

https://gcc.gnu.org/svn.html

Except that the ibm/gcc-7-branch is not listed there, just a couple
of other ibm/ branches.

Perhaps you guys can address that?

Gerald
diff mbox series

Patch

Index: gcc/config/rs6000/emmintrin.h
===================================================================
--- gcc/config/rs6000/emmintrin.h	(revision 259375)
+++ gcc/config/rs6000/emmintrin.h	(working copy)
@@ -1488,7 +1488,7 @@  _mm_slli_epi16 (__m128i __A, int __B)
   __v8hu lshift;
   __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
-  if (__B < 16)
+  if (__B >= 0 && __B < 16)
     {
       if (__builtin_constant_p(__B))
 	  lshift = (__v8hu) vec_splat_s16(__B);
@@ -1507,12 +1507,12 @@  _mm_slli_epi32 (__m128i __A, int __B)
   __v4su lshift;
   __v4si result = { 0, 0, 0, 0 };
 
-  if (__B < 32)
+  if (__B >= 0 && __B < 32)
     {
-      if (__builtin_constant_p(__B))
-	lshift = (__v4su) vec_splat_s32(__B);
+      if (__builtin_constant_p(__B) && __B < 16)
+        lshift = (__v4su) vec_splat_s32(__B);
       else
-	lshift = vec_splats ((unsigned int) __B);
+        lshift = vec_splats ((unsigned int) __B);
 
       result = vec_vslw ((__v4si) __A, lshift);
     }
@@ -1527,17 +1527,12 @@  _mm_slli_epi64 (__m128i __A, int __B)
   __v2du lshift;
   __v2di result = { 0, 0 };
 
-  if (__B < 64)
+  if (__B >= 0 && __B < 64)
     {
-      if (__builtin_constant_p(__B))
-	{
-	  if (__B < 32)
-	      lshift = (__v2du) vec_splat_s32(__B);
-	    else
-	      lshift = (__v2du) vec_splats((unsigned long long)__B);
-	}
+      if (__builtin_constant_p(__B) && __B < 16)
+	lshift = (__v2du) vec_splat_s32(__B);
       else
-	  lshift = (__v2du) vec_splats ((unsigned int) __B);
+	lshift = (__v2du) vec_splats ((unsigned int) __B);
 
       result = vec_vsld ((__v2di) __A, lshift);
     }
Index: gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-pslld-1.c	(working copy)
@@ -13,32 +13,50 @@ 
 #define TEST sse2_test_pslld_1
 #endif
 
-#define N 0xf
-
 #include <emmintrin.h>
 
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi32 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi32 (s1, N);  \
+  }
 
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(31, 31)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
+
+#define TEST_CODE(id, N) \
+  { \
+    int e[4] = {0}; \
+    union128i_d u, s; \
+    int i; \
+    s.x = _mm_set_epi32 (1, -2, 3, 4); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 32) \
+      for (i = 0; i < 4; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_d (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
-  union128i_d u, s;
-  int e[4] = {0};
-  int i;
- 
-  s.x = _mm_set_epi32 (1, -2, 3, 4);
-
-  u.x = test (s.x);
-
-  if (N < 32)
-    for (i = 0; i < 4; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_d (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(31, 31);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-psllq-1.c	(working copy)
@@ -13,36 +13,56 @@ 
 #define TEST sse2_test_psllq_1
 #endif
 
-#define N 60
-
 #include <emmintrin.h>
 
 #ifdef _ARCH_PWR8
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi64 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi64 (s1, N);  \
+  }
+
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(31, 31)
+TEST_FUNC(63, 63)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
 #endif
 
+#define TEST_CODE(id, N) \
+  { \
+    union128i_q u, s; \
+    long long e[2] = {0}; \
+    int i; \
+    s.x = _mm_set_epi64x (-1, 0xf); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 64) \
+      for (i = 0; i < 2; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_q (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
 #ifdef _ARCH_PWR8
-  union128i_q u, s;
-  long long e[2] = {0};
-  int i;
- 
-  s.x = _mm_set_epi64x (-1, 0xf);
-
-  u.x = test (s.x);
-
-  if (N < 64)
-    for (i = 0; i < 2; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_q (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(31, 31);
+  TEST_CODE(63, 63);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 #endif
 }
Index: gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c	(revision 259375)
+++ gcc/testsuite/gcc.target/powerpc/sse2-psllw-1.c	(working copy)
@@ -13,32 +13,48 @@ 
 #define TEST sse2_test_psllw_1
 #endif
 
-#define N 0xb
-
 #include <emmintrin.h>
 
-static __m128i
-__attribute__((noinline, unused))
-test (__m128i s1)
-{
-  return _mm_slli_epi16 (s1, N); 
-}
+#define TEST_FUNC(id, N) \
+  static __m128i \
+  __attribute__((noinline, unused)) \
+  test##id (__m128i s1) \
+  { \
+    return _mm_slli_epi16 (s1, N);  \
+  }
 
+TEST_FUNC(0, 0)
+TEST_FUNC(15, 15)
+TEST_FUNC(16, 16)
+TEST_FUNC(neg1, -1)
+TEST_FUNC(neg16, -16)
+TEST_FUNC(neg32, -32)
+TEST_FUNC(neg64, -64)
+TEST_FUNC(neg128, -128)
+
+#define TEST_CODE(id, N) \
+  { \
+    short e[8] = {0}; \
+    union128i_w u, s; \
+    int i; \
+    s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000); \
+    u.x = test##id (s.x); \
+    if (N >= 0 && N < 16) \
+      for (i = 0; i < 8; i++) \
+        e[i] = s.a[i] << (N * (N >= 0)); \
+    if (check_union128i_w (u, e)) \
+      abort (); \
+  }
+
 static void
 TEST (void)
 {
-  union128i_w u, s;
-  short e[8] = {0};
-  int i;
- 
-  s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
-
-  u.x = test (s.x);
-
-  if (N < 16)
-    for (i = 0; i < 8; i++)
-      e[i] = s.a[i] << N; 
-
-  if (check_union128i_w (u, e))
-    abort (); 
+  TEST_CODE(0, 0);
+  TEST_CODE(15, 15);
+  TEST_CODE(16, 16);
+  TEST_CODE(neg1, -1);
+  TEST_CODE(neg16, -16);
+  TEST_CODE(neg32, -32);
+  TEST_CODE(neg64, -64);
+  TEST_CODE(neg128, -128);
 }