Message ID | 20140213104430.GA42503@msticlxl57.ims.intel.com |
---|---|
State | New |
Headers | show |
On Thu, Feb 13, 2014 at 11:44 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote: > I've noticed that _mm512_permutexvar_epi[64|32] intrinsics > have wrong arguments order. As per [1] first argument is index. > For vmpermps/vpermpd intrinsics are fine, but I've changed tests > to call CALC with same arg order as intrinsic. here is the same > problem (wrong argument order) with vrcp14s[d|s]. > Also avx512er-vrcp28ss-2.c test called wrong intrinsic. > > [1] http://software.intel.com/sites/landingpage/IntrinsicsGuide/ > > gcc/ > * config/i386/avx512fintrin.h (_mm512_maskz_permutexvar_epi64): Swap > arguments order in builtin. > (_mm512_permutexvar_epi64): Ditto. > (_mm512_mask_permutexvar_epi64): Ditto > (_mm512_maskz_permutexvar_epi32): Ditto > (_mm512_permutexvar_epi32): Ditto > (_mm512_mask_permutexvar_epi32): Ditto > * config/i386/sse.md (srcp14<mode>): Swap operands. > > gcc/testsuite/ > * gcc.target/i386/avx512er-vrcp28ss-2.c: Call rigth intrinsic. > * gcc.target/i386/avx512f-vpermd-2.c: Fix reference calculations. > * gcc.target/i386/avx512f-vpermpd-2.c: Ditto. > * gcc.target/i386/avx512f-vpermps-2.c: Ditto. > * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto. > * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto. > * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto. > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index a04b289..d3b2dc5 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -1456,12 +1456,12 @@ > [(set (match_operand:VF_128 0 "register_operand" "=v") > (vec_merge:VF_128 > (unspec:VF_128 > - [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] > + [(match_operand:VF_128 2 "nonimmediate_operand" "vm")] > UNSPEC_RCP14) > - (match_operand:VF_128 2 "register_operand" "v") > + (match_operand:VF_128 1 "register_operand" "v") > (const_int 1)))] > "TARGET_AVX512F" > - "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" > + "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" Please don't change srcp pattern, it should be defined similar to vrcpss (aka sse_vmrcpv4sf). You need to switch operand order elsewhere. Other than that, the patch is OK. Uros.
On Thu, Feb 13, 2014 at 1:37 PM, Uros Bizjak <ubizjak@gmail.com> wrote: >> I've noticed that _mm512_permutexvar_epi[64|32] intrinsics >> have wrong arguments order. As per [1] first argument is index. >> For vmpermps/vpermpd intrinsics are fine, but I've changed tests >> to call CALC with same arg order as intrinsic. here is the same >> problem (wrong argument order) with vrcp14s[d|s]. >> Also avx512er-vrcp28ss-2.c test called wrong intrinsic. >> >> [1] http://software.intel.com/sites/landingpage/IntrinsicsGuide/ >> >> gcc/ >> * config/i386/avx512fintrin.h (_mm512_maskz_permutexvar_epi64): Swap >> arguments order in builtin. >> (_mm512_permutexvar_epi64): Ditto. >> (_mm512_mask_permutexvar_epi64): Ditto >> (_mm512_maskz_permutexvar_epi32): Ditto >> (_mm512_permutexvar_epi32): Ditto >> (_mm512_mask_permutexvar_epi32): Ditto >> * config/i386/sse.md (srcp14<mode>): Swap operands. >> >> gcc/testsuite/ >> * gcc.target/i386/avx512er-vrcp28ss-2.c: Call rigth intrinsic. >> * gcc.target/i386/avx512f-vpermd-2.c: Fix reference calculations. >> * gcc.target/i386/avx512f-vpermpd-2.c: Ditto. >> * gcc.target/i386/avx512f-vpermps-2.c: Ditto. >> * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto. >> * gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto. >> * gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto. >> >> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md >> index a04b289..d3b2dc5 100644 >> --- a/gcc/config/i386/sse.md >> +++ b/gcc/config/i386/sse.md >> @@ -1456,12 +1456,12 @@ >> [(set (match_operand:VF_128 0 "register_operand" "=v") >> (vec_merge:VF_128 >> (unspec:VF_128 >> - [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] >> + [(match_operand:VF_128 2 "nonimmediate_operand" "vm")] >> UNSPEC_RCP14) >> - (match_operand:VF_128 2 "register_operand" "v") >> + (match_operand:VF_128 1 "register_operand" "v") >> (const_int 1)))] >> "TARGET_AVX512F" >> - "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" >> + "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" > > Please don't change srcp pattern, it should be defined similar to > vrcpss (aka sse_vmrcpv4sf). You need to switch operand order > elsewhere. No, you are correct. Operands should be swapped as in your patch. The patch is OK for mainline. Thanks, Uros.
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index d53a40d..b3a4f3a 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -6148,8 +6148,8 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, - (__v8di) __Y, + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, (__v8di) _mm512_setzero_si512 (), __M); @@ -6159,8 +6159,8 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, - (__v8di) __Y, + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, (__v8di) _mm512_setzero_si512 (), (__mmask8) -1); @@ -6171,8 +6171,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __X, - (__v8di) __Y, + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, (__v8di) __W, __M); } @@ -6181,8 +6181,8 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, - (__v16si) __Y, + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, (__v16si) _mm512_setzero_si512 (), __M); @@ -6192,8 +6192,8 @@ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, - (__v16si) __Y, + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, (__v16si) _mm512_setzero_si512 (), (__mmask16) -1); @@ -6204,8 +6204,8 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { - return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __X, - (__v16si) __Y, + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, (__v16si) __W, __M); } diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a04b289..d3b2dc5 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1456,12 +1456,12 @@ [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 1 "nonimmediate_operand" "vm")] + [(match_operand:VF_128 2 "nonimmediate_operand" "vm")] UNSPEC_RCP14) - (match_operand:VF_128 2 "register_operand" "v") + (match_operand:VF_128 1 "register_operand" "v") (const_int 1)))] "TARGET_AVX512F" - "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}" + "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sse") (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) diff --git a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c index 499a977..a7be27c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c @@ -22,7 +22,7 @@ avx512er_test (void) res_ref[0] = 1.0 / src.a[0]; - res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); + res.x = _mm_rcp28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); if (checkVf (res.a, res_ref, 4)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c index db5fd09..1c494e3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermd-2.c @@ -11,7 +11,7 @@ #include "avx512f-mask-type.h" static void -CALC (int *src1, int *mask, int *dst) +CALC (int *mask, int *src1, int *dst) { int i; diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c index 3d168be..00d171b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermpd-2.c @@ -10,7 +10,7 @@ #include "avx512f-mask-type.h" static void -CALC (double *s1, long long *mask, double *r) +CALC (long long *mask, double *s1, double *r) { int i; @@ -41,7 +41,7 @@ TEST (void) res2.x = INTRINSIC (_mask_permutexvar_pd) (res2.x, mask, src2.x, src1.x); res3.x = INTRINSIC (_maskz_permutexvar_pd) (mask, src2.x, src1.x); - CALC (src1.a, src2.a, res_ref); + CALC (src2.a, src1.a, res_ref); if (UNION_CHECK (AVX512F_LEN, d) (res1, res_ref)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c index 6182948..53081c4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermps-2.c @@ -10,7 +10,7 @@ #include "avx512f-mask-type.h" static void -CALC (float *s1, int *mask, float *r) +CALC (int *mask, float *s1, float *r) { int i; @@ -41,7 +41,7 @@ TEST (void) res2.x = INTRINSIC (_mask_permutexvar_ps) (res2.x, mask, src2.x, src1.x); res3.x = INTRINSIC (_maskz_permutexvar_ps) (mask, src2.x, src1.x); - CALC (src1.a, src2.a, res_ref); + CALC (src2.a, src1.a, res_ref); if (UNION_CHECK (AVX512F_LEN, ) (res1, res_ref)) abort (); diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c index 2733e17..ff330a5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpermq-var-2.c @@ -11,7 +11,7 @@ #include "avx512f-mask-type.h" static void -CALC (long long *src1, long long *mask, long long *dst) +CALC (long long *mask, long long *src1, long long *dst) { int i; diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c index 0c9211a..f944600 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c @@ -8,8 +8,8 @@ static void compute_vrcp14sd (double *s1, double *s2, double *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; } static void diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c index 3344dad..7aca591 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c @@ -8,10 +8,10 @@ static void compute_vrcp14ss (float *s1, float *s2, float *r) { - r[0] = 1.0 / s1[0]; - r[1] = s2[1]; - r[2] = s2[2]; - r[3] = s2[3]; + r[0] = 1.0 / s2[0]; + r[1] = s1[1]; + r[2] = s1[2]; + r[3] = s1[3]; } static void