diff mbox

[i386,13/8,AVX-512] Fix argument order for perm and recp intrinsics.

Message ID 20140213104430.GA42503@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Feb. 13, 2014, 10:44 a.m. UTC
Hello,
I’ve noticed that _mm512_permutexvar_epi[64|32] intrinsics
have wrong arguments order. As per [1] first argument is index.
For vmpermps/vpermpd intrinsics are fine, but I’ve changed tests
to call CALC with same arg order as intrinsic. here is the same 
problem (wrong argument order) with vrcp14s[d|s].
Also avx512er-vrcp28ss-2.c test called wrong intrinsic.

[1]  http://software.intel.com/sites/landingpage/IntrinsicsGuide/

gcc/
	* config/i386/avx512fintrin.h (_mm512_maskz_permutexvar_epi64): Swap
	arguments order in builtin.
	(_mm512_permutexvar_epi64): Ditto.
	(_mm512_mask_permutexvar_epi64): Ditto
	(_mm512_maskz_permutexvar_epi32): Ditto
	(_mm512_permutexvar_epi32): Ditto
	(_mm512_mask_permutexvar_epi32): Ditto
	* config/i386/sse.md (srcp14<mode>): Swap operands.

gcc/testsuite/
	* gcc.target/i386/avx512er-vrcp28ss-2.c: Call rigth intrinsic.
	* gcc.target/i386/avx512f-vpermd-2.c: Fix reference calculations.
	* gcc.target/i386/avx512f-vpermpd-2.c: Ditto.
	* gcc.target/i386/avx512f-vpermps-2.c: Ditto.
	* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.

Is it ok for trunk? Or we should wait until 4.9 fork?

--
Thanks, K

---
 gcc/config/i386/avx512fintrin.h                    | 24 +++++++++++-----------
 gcc/config/i386/sse.md                             |  6 +++---
 .../gcc.target/i386/avx512er-vrcp28ss-2.c          |  2 +-
 gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c   |  2 +-
 gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c  |  4 ++--
 gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c  |  4 ++--
 .../gcc.target/i386/avx512f-vpermq-var-2.c         |  2 +-
 gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c |  4 ++--
 gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c |  8 ++++----
 9 files changed, 28 insertions(+), 28 deletions(-)

Comments

Uros Bizjak Feb. 13, 2014, 12:37 p.m. UTC | #1
On Thu, Feb 13, 2014 at 11:44 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

> I've noticed that _mm512_permutexvar_epi[64|32] intrinsics
> have wrong arguments order. As per [1] first argument is index.
> For vmpermps/vpermpd intrinsics are fine, but I've changed tests
> to call CALC with same arg order as intrinsic. here is the same
> problem (wrong argument order) with vrcp14s[d|s].
> Also avx512er-vrcp28ss-2.c test called wrong intrinsic.
>
> [1]  http://software.intel.com/sites/landingpage/IntrinsicsGuide/
>
> gcc/
>         * config/i386/avx512fintrin.h (_mm512_maskz_permutexvar_epi64): Swap
>         arguments order in builtin.
>         (_mm512_permutexvar_epi64): Ditto.
>         (_mm512_mask_permutexvar_epi64): Ditto
>         (_mm512_maskz_permutexvar_epi32): Ditto
>         (_mm512_permutexvar_epi32): Ditto
>         (_mm512_mask_permutexvar_epi32): Ditto
>         * config/i386/sse.md (srcp14<mode>): Swap operands.
>
> gcc/testsuite/
>         * gcc.target/i386/avx512er-vrcp28ss-2.c: Call rigth intrinsic.
>         * gcc.target/i386/avx512f-vpermd-2.c: Fix reference calculations.
>         * gcc.target/i386/avx512f-vpermpd-2.c: Ditto.
>         * gcc.target/i386/avx512f-vpermps-2.c: Ditto.
>         * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
>         * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
>         * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index a04b289..d3b2dc5 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -1456,12 +1456,12 @@
>    [(set (match_operand:VF_128 0 "register_operand" "=v")
>         (vec_merge:VF_128
>           (unspec:VF_128
> -           [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
> +           [(match_operand:VF_128 2 "nonimmediate_operand" "vm")]
>             UNSPEC_RCP14)
> -         (match_operand:VF_128 2 "register_operand" "v")
> +         (match_operand:VF_128 1 "register_operand" "v")
>           (const_int 1)))]
>    "TARGET_AVX512F"
> -  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
> +  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"

Please don't change srcp pattern, it should be defined similar to
vrcpss (aka sse_vmrcpv4sf). You need to switch operand order
elsewhere.

Other than that, the patch is OK.

Uros.
Uros Bizjak Feb. 13, 2014, 12:55 p.m. UTC | #2
On Thu, Feb 13, 2014 at 1:37 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

>> I've noticed that _mm512_permutexvar_epi[64|32] intrinsics
>> have wrong arguments order. As per [1] first argument is index.
>> For vmpermps/vpermpd intrinsics are fine, but I've changed tests
>> to call CALC with same arg order as intrinsic. here is the same
>> problem (wrong argument order) with vrcp14s[d|s].
>> Also avx512er-vrcp28ss-2.c test called wrong intrinsic.
>>
>> [1]  http://software.intel.com/sites/landingpage/IntrinsicsGuide/
>>
>> gcc/
>>         * config/i386/avx512fintrin.h (_mm512_maskz_permutexvar_epi64): Swap
>>         arguments order in builtin.
>>         (_mm512_permutexvar_epi64): Ditto.
>>         (_mm512_mask_permutexvar_epi64): Ditto
>>         (_mm512_maskz_permutexvar_epi32): Ditto
>>         (_mm512_permutexvar_epi32): Ditto
>>         (_mm512_mask_permutexvar_epi32): Ditto
>>         * config/i386/sse.md (srcp14<mode>): Swap operands.
>>
>> gcc/testsuite/
>>         * gcc.target/i386/avx512er-vrcp28ss-2.c: Call rigth intrinsic.
>>         * gcc.target/i386/avx512f-vpermd-2.c: Fix reference calculations.
>>         * gcc.target/i386/avx512f-vpermpd-2.c: Ditto.
>>         * gcc.target/i386/avx512f-vpermps-2.c: Ditto.
>>         * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
>>         * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
>>         * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
>>
>> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
>> index a04b289..d3b2dc5 100644
>> --- a/gcc/config/i386/sse.md
>> +++ b/gcc/config/i386/sse.md
>> @@ -1456,12 +1456,12 @@
>>    [(set (match_operand:VF_128 0 "register_operand" "=v")
>>         (vec_merge:VF_128
>>           (unspec:VF_128
>> -           [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
>> +           [(match_operand:VF_128 2 "nonimmediate_operand" "vm")]
>>             UNSPEC_RCP14)
>> -         (match_operand:VF_128 2 "register_operand" "v")
>> +         (match_operand:VF_128 1 "register_operand" "v")
>>           (const_int 1)))]
>>    "TARGET_AVX512F"
>> -  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
>> +  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
>
> Please don't change srcp pattern, it should be defined similar to
> vrcpss (aka sse_vmrcpv4sf). You need to switch operand order
> elsewhere.

No, you are correct. Operands should be swapped as in your patch.

The patch is OK for mainline.

Thanks,
Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index d53a40d..b3a4f3a 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -6148,8 +6148,8 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
-						     (__v8di) __Y,
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
 						     (__v8di)
 						     _mm512_setzero_si512 (),
 						     __M);
@@ -6159,8 +6159,8 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
-						     (__v8di) __Y,
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
 						     (__v8di)
 						     _mm512_setzero_si512 (),
 						     (__mmask8) -1);
@@ -6171,8 +6171,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
 			       __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X,
-						     (__v8di) __Y,
+  return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
+						     (__v8di) __X,
 						     (__v8di) __W,
 						     __M);
 }
@@ -6181,8 +6181,8 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
-						     (__v16si) __Y,
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
 						     (__v16si)
 						     _mm512_setzero_si512 (),
 						     __M);
@@ -6192,8 +6192,8 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
-						     (__v16si) __Y,
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
 						     (__v16si)
 						     _mm512_setzero_si512 (),
 						     (__mmask16) -1);
@@ -6204,8 +6204,8 @@  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
 			       __m512i __Y)
 {
-  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X,
-						     (__v16si) __Y,
+  return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
+						     (__v16si) __X,
 						     (__v16si) __W,
 						     __M);
 }
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a04b289..d3b2dc5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1456,12 +1456,12 @@ 
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
-	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+	    [(match_operand:VF_128 2 "nonimmediate_operand" "vm")]
 	    UNSPEC_RCP14)
-	  (match_operand:VF_128 2 "register_operand" "v")
+	  (match_operand:VF_128 1 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
index 499a977..a7be27c 100644
--- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
@@ -22,7 +22,7 @@  avx512er_test (void)
 
   res_ref[0] = 1.0 / src.a[0];
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
index db5fd09..1c494e3 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c
@@ -11,7 +11,7 @@ 
 #include "avx512f-mask-type.h"
 
 static void
-CALC (int *src1, int *mask, int *dst)
+CALC (int *mask, int *src1, int *dst)
 {
   int i;
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c
index 3d168be..00d171b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c
@@ -10,7 +10,7 @@ 
 #include "avx512f-mask-type.h"
 
 static void
-CALC (double *s1, long long *mask, double *r)
+CALC (long long *mask, double *s1, double *r)
 {
   int i;
 
@@ -41,7 +41,7 @@  TEST (void)
   res2.x = INTRINSIC (_mask_permutexvar_pd) (res2.x, mask, src2.x, src1.x);
   res3.x = INTRINSIC (_maskz_permutexvar_pd) (mask, src2.x, src1.x);
 
-  CALC (src1.a, src2.a, res_ref);
+  CALC (src2.a, src1.a, res_ref);
 
   if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c
index 6182948..53081c4 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c
@@ -10,7 +10,7 @@ 
 #include "avx512f-mask-type.h"
 
 static void
-CALC (float *s1, int *mask, float *r)
+CALC (int *mask, float *s1, float *r)
 {
   int i;
 
@@ -41,7 +41,7 @@  TEST (void)
   res2.x = INTRINSIC (_mask_permutexvar_ps) (res2.x, mask, src2.x, src1.x);
   res3.x = INTRINSIC (_maskz_permutexvar_ps) (mask, src2.x, src1.x);
 
-  CALC (src1.a, src2.a, res_ref);
+  CALC (src2.a, src1.a, res_ref);
 
   if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref))
     abort ();
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
index 2733e17..ff330a5 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c
@@ -11,7 +11,7 @@ 
 #include "avx512f-mask-type.h"
 
 static void
-CALC (long long *src1, long long *mask, long long *dst)
+CALC (long long *mask, long long *src1, long long *dst)
 {
   int i;
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
index 0c9211a..f944600 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
@@ -8,8 +8,8 @@ 
 static void
 compute_vrcp14sd (double *s1, double *s2, double *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
 }
 
 static void
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
index 3344dad..7aca591 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c
@@ -8,10 +8,10 @@ 
 static void
 compute_vrcp14ss (float *s1, float *s2, float *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
-  r[2] = s2[2];
-  r[3] = s2[3];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
+  r[2] = s1[2];
+  r[3] = s1[3];
 }
 
 static void