diff mbox series

i386: Introduce mulv2si3 instruction

Message ID CAFULd4aXWfcmDeDtSG59wiXbkGqz+=-XZhdqBkPYegKue3tYSg@mail.gmail.com
State New
Headers show
Series i386: Introduce mulv2si3 instruction | expand

Commit Message

Uros Bizjak May 5, 2023, 12:13 p.m. UTC
For SSE2 targets the expander unpacks input elements into the correct
position in the V4SI vector and emits PMULUDQ instruction.  The output
elements are then shuffled back to their positions in the V2SI vector.

For SSE4 targets PMULLD instruction is emitted directly.

gcc/ChangeLog:

    * config/i386/mmx.md (mulv2si3): New expander.
    (*mulv2si3): New insn pattern.

gcc/testsuite/ChangeLog:

    * gcc.target/i386/sse2-mmx-mult-vec.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 872ddbc55f2..6dd203f4fa8 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2092,6 +2092,55 @@  (define_insn "*<insn><mode>3"
    (set_attr "type" "sseadd")
    (set_attr "mode" "TI")])
 
+(define_expand "mulv2si3"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(mult:V2SI
+	  (match_operand:V2SI 1 "register_operand")
+	  (match_operand:V2SI 2 "register_operand")))]
+  "TARGET_MMX_WITH_SSE"
+{
+  if (!TARGET_SSE4_1)
+    {
+      rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]),
+				V2SImode);
+      rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]),
+				V2SImode);
+
+      rtx tmp1 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1));
+      rtx tmp2 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2));
+
+      rtx res = gen_reg_rtx (V2DImode);
+      emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2));
+
+      rtx op0 = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res),
+				    const0_rtx, const2_rtx,
+				    const0_rtx, const2_rtx));
+
+      emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
+      DONE;
+    }
+})
+
+(define_insn "*mulv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
+	(mult:V2SI
+	  (match_operand:V2SI 1 "register_operand" "%0,0,v")
+	  (match_operand:V2SI 2 "register_operand" "Yr,*x,v")))]
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+  "@
+   pmulld\t{%2, %0|%0, %2}
+   pmulld\t{%2, %0|%0, %2}
+   vpmulld\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
         (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c
new file mode 100644
index 00000000000..cdc9a7bb8bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse2-mmx-mult-vec.c
@@ -0,0 +1,27 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "sse2-check.h"
+
+#define N 2
+
+int a[N] = {-287807, 604344};
+int b[N] = {474362, 874120};
+int r[N];
+
+int rc[N] = {914249338, -11800128};
+
+static void
+sse2_test (void)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    r[i] = a[i] * b[i];
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    if (r[i] != rc[i])
+      abort ();
+}