diff mbox series

[committed] i386: Implement V2SI and V4HI shuffles

Message ID CAFULd4a4JCKgFXz2GjUoqW9fbu1R+hbpFj3n-p8Lih1=CwOjCg@mail.gmail.com
State New
Headers show
Series [committed] i386: Implement V2SI and V4HI shuffles | expand

Commit Message

Uros Bizjak May 26, 2020, 1:21 p.m. UTC
2020-05-26  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:
    * config/i386/mmx.md (*mmx_pshufd_1): New insn pattern.
    * config/i386/i386-expand.c (ix86_vectorize_vec_perm_const):
    Handle E_V2SImode and E_V4HImode.
    (expand_vec_perm_even_odd_1): Handle E_V4HImode.
    Assert that E_V2SImode is already handled.
    (expand_vec_perm_broadcast_1): Assert that E_V2SImode
    is already handled by standard shuffle patterns.

gcc/testsuite/ChangeLog:
    * gcc.target/i386/vperm-v2si.c: New test.
    * gcc.target/i386/vperm-v4hi.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 79f827fd653..338b4f7cf4f 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -18634,10 +18634,26 @@  expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
     case E_V2DFmode:
     case E_V4SFmode:
     case E_V2DImode:
+    case E_V2SImode:
     case E_V4SImode:
       /* These are always directly implementable by expand_vec_perm_1.  */
       gcc_unreachable ();
 
+    case E_V4HImode:
+      if (d->testing_p)
+	break;
+      /* We need 2*log2(N)-1 operations to achieve odd/even
+	 with interleave. */
+      t1 = gen_reg_rtx (V4HImode);
+      emit_insn (gen_mmx_punpckhwd (t1, d->op0, d->op1));
+      emit_insn (gen_mmx_punpcklwd (d->target, d->op0, d->op1));
+      if (odd)
+	t2 = gen_mmx_punpckhwd (d->target, d->target, t1);
+      else
+	t2 = gen_mmx_punpcklwd (d->target, d->target, t1);
+      emit_insn (t2);
+      break;
+
     case E_V8HImode:
       if (TARGET_SSE4_1)
 	return expand_vec_perm_even_odd_pack (d);
@@ -18820,6 +18836,7 @@  expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
     case E_V2DFmode:
     case E_V2DImode:
     case E_V4SFmode:
+    case E_V2SImode:
     case E_V4SImode:
       /* These are always implementable using standard shuffle patterns.  */
       gcc_unreachable ();
@@ -19312,6 +19329,11 @@  ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
       if (d.testing_p && TARGET_SSSE3)
 	return true;
       break;
+    case E_V2SImode:
+    case E_V4HImode:
+      if (!TARGET_MMX_WITH_SSE)
+	return false;
+      break;
     case E_V2DImode:
     case E_V2DFmode:
       if (!TARGET_SSE)
@@ -19344,7 +19366,9 @@  ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
       d.one_operand_p = (which != 3);
 
       /* Implementable with shufps or pshufd.  */
-      if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
+      if (d.one_operand_p
+	  && (d.vmode == V4SFmode
+	      || d.vmode == V4SImode || d.vmode == V2SImode))
 	return true;
 
       /* Otherwise we have to go through the motions and see if we can
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b5564711aa4..c31b4f81079 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1988,6 +1988,28 @@ 
    (set_attr "length_immediate" "1")
    (set_attr "mode" "DI,TI")])
 
+(define_insn "*mmx_pshufd_1"
+  [(set (match_operand:V2SI 0 "register_operand" "=Yv")
+        (vec_select:V2SI
+          (match_operand:V2SI 1 "register_operand" "Yv")
+          (parallel [(match_operand 2 "const_0_to_1_operand")
+                     (match_operand 3 "const_0_to_1_operand")])))]
+  "TARGET_MMX_WITH_SSE"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= 2 << 4;
+  mask |= 3 << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
 	(vec_select:V2SI
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2si.c b/gcc/testsuite/gcc.target/i386/vperm-v2si.c
new file mode 100644
index 00000000000..5b38b316e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v2si.c
@@ -0,0 +1,41 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef int S;
+typedef int V __attribute__((vector_size(8)));
+typedef int IV __attribute__((vector_size(8)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+  b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+  c.s[0] = i[0].s[E0]; \
+  c.s[1] = i[0].s[E1]; \
+  __asm__("" : : : "memory"); \
+  assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+  check_isa ();
+
+  if (!sse_os_support ())
+    exit (0);
+
+  i[0].s[0] = 0;
+  i[0].s[1] = 1;
+  i[0].s[2] = 2;
+  i[0].s[3] = 3;
+
+  check();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v4hi.c b/gcc/testsuite/gcc.target/i386/vperm-v4hi.c
new file mode 100644
index 00000000000..bff6512672d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v4hi.c
@@ -0,0 +1,47 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef short S;
+typedef short V __attribute__((vector_size(8)));
+typedef short IV __attribute__((vector_size(8)));
+typedef union { S s[4]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1, E2, E3) \
+  b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
+  c.s[0] = i[0].s[E0]; \
+  c.s[1] = i[0].s[E1]; \
+  c.s[2] = i[0].s[E2]; \
+  c.s[3] = i[0].s[E3]; \
+  __asm__("" : : : "memory"); \
+  assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-4-2.inc"
+
+int main()
+{
+  check_isa ();
+
+  if (!sse_os_support ())
+    exit (0);
+
+  i[0].s[0] = 0;
+  i[0].s[1] = 1;
+  i[0].s[2] = 2;
+  i[0].s[3] = 3;
+  i[0].s[4] = 4;
+  i[0].s[5] = 5;
+  i[0].s[6] = 6;
+  i[0].s[7] = 7;
+
+  check();
+  return 0;
+}