diff mbox

[i386] Fix operands order in kunpck* insns and corresponding expands

Message ID 20160413132956.GA13305@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Ilya Enkovich April 13, 2016, 1:29 p.m. UTC
Hi,

Current kunpck[hi|si|di] patterns emit operands in a wrong order. This
is compensated by a wrong operands order in vec_pack_trunc_[qi|hi|si]
expands and therefore we get correct code for vectorized loops.  Code
using kunpck* intrinsics would be wrong though.  This patch fixes
operands order and adds runtime tests for _mm512_kunpack* intrinsics.

Bootstrapped and regtested on x86_64-pc-linux-gnu.  OK for trunk?

Thanks,
Ilya
--
gcc/

2016-04-13  Ilya Enkovich  <ilya.enkovich@intel.com>

	* config/i386/i386.md (kunpckhi): Swap operands.
	(kunpcksi): Likewise.
	(kunpckdi): Likewise.
	* config/i386/sse.md (vec_pack_trunc_qi): Likewise.
	(vec_pack_trunc_<mode>): Likewise.

gcc/testsuite/

2016-04-13  Ilya Enkovich  <ilya.enkovich@intel.com>

	* gcc.target/i386/avx512bw-kunpckdq-2.c: New test.
	* gcc.target/i386/avx512bw-kunpckwd-2.c: New test.
	* gcc.target/i386/avx512f-kunpckbw-2.c: New test.

Comments

Kirill Yukhin April 13, 2016, 1:44 p.m. UTC | #1
Hi Ilya,
On 13 Apr 16:29, Ilya Enkovich wrote:
> Hi,
> 
> Current kunpck[hi|si|di] patterns emit operands in a wrong order. This
> is compensated by a wrong operands order in vec_pack_trunc_[qi|hi|si]
> expands and therefore we get correct code for vectorized loops.  Code
> using kunpck* intrinsics would be wrong though.  This patch fixes
> operands order and adds runtime tests for _mm512_kunpack* intrinsics.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu.  OK for trunk?
Patch is OK.
> 
> Thanks,
> Ilya

--
Thanks, K
diff mbox

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 09da69e..56a3050 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8907,7 +8907,7 @@ 
 	    (const_int 8))
 	  (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
   "TARGET_AVX512F"
-  "kunpckbw\t{%1, %2, %0|%0, %2, %1}"
+  "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "mode" "HI")
    (set_attr "type" "msklog")
    (set_attr "prefix" "vex")])
@@ -8920,7 +8920,7 @@ 
 	    (const_int 16))
 	  (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
   "TARGET_AVX512BW"
-  "kunpckwd\t{%1, %2, %0|%0, %2, %1}"
+  "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "mode" "SI")])
 
 (define_insn "kunpckdi"
@@ -8931,7 +8931,7 @@ 
 	    (const_int 32))
 	  (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
   "TARGET_AVX512BW"
-  "kunpckdq\t{%1, %2, %0|%0, %2, %1}"
+  "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "mode" "DI")])
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5132955..b64457e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11747,16 +11747,16 @@ 
 
 (define_expand "vec_pack_trunc_qi"
   [(set (match_operand:HI 0 ("register_operand"))
-        (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 ("register_operand")))
+        (ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 ("register_operand")))
                            (const_int 8))
-                (zero_extend:HI (match_operand:QI 2 ("register_operand")))))]
+                (zero_extend:HI (match_operand:QI 1 ("register_operand")))))]
   "TARGET_AVX512F")
 
 (define_expand "vec_pack_trunc_<mode>"
   [(set (match_operand:<DOUBLEMASKMODE> 0 ("register_operand"))
-        (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))
+        (ior:<DOUBLEMASKMODE> (ashift:<DOUBLEMASKMODE> (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))
                            (match_dup 3))
-                (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 2 ("register_operand")))))]
+                (zero_extend:<DOUBLEMASKMODE> (match_operand:SWI24 1 ("register_operand")))))]
   "TARGET_AVX512BW"
 {
   operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c
new file mode 100644
index 0000000..4fe503e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-2.c
@@ -0,0 +1,24 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512BW
+
+#include "avx512f-helper.h"
+
+static __mmask64 __attribute__((noinline,noclone))
+unpack (__mmask64 arg1, __mmask64 arg2)
+{
+  __mmask64 res;
+
+  res = _mm512_kunpackd (arg1, arg2);
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  if (unpack (0x07UL, 0x70UL) != 0x0700000070UL)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c
new file mode 100644
index 0000000..5d7f895
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-2.c
@@ -0,0 +1,24 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512BW
+
+#include "avx512f-helper.h"
+
+static __mmask32 __attribute__((noinline,noclone))
+unpack (__mmask32 arg1, __mmask32 arg2)
+{
+  __mmask32 res;
+
+  res = _mm512_kunpackw (arg1, arg2);
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  if (unpack (0x07, 0x70) != 0x070070)
+    __builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c
new file mode 100644
index 0000000..86580f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-2.c
@@ -0,0 +1,24 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+static __mmask16 __attribute__((noinline,noclone))
+unpack (__mmask16 arg1, __mmask16 arg2)
+{
+  __mmask16 res;
+
+  res = _mm512_kunpackb (arg1, arg2);
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  if (unpack (0x07, 0x70) != 0x0770)
+    __builtin_abort ();
+}