diff mbox series

i386: Add variable vec_set for 64bit vectors [PR97194]

Message ID CAFULd4a_1s6=Sf6Mj4mStr9hm6XUUrgFL5xvMkPZAYNkdegWHA@mail.gmail.com
State New
Headers show
Series i386: Add variable vec_set for 64bit vectors [PR97194] | expand

Commit Message

Uros Bizjak June 17, 2021, 1:22 p.m. UTC
To generate sane code a SSE4.1 variable PBLENDV instruction is needed.

2021-06-17  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/97194
    * config/i386/i386-expand.c (expand_vector_set_var):
    Handle V2FS mode remapping.  Pass TARGET_MMX_WITH_SSE to
    ix86_expand_vector_init_duplicate.
    (ix86_expand_vector_init_duplicate): Emit insv_1 for
    QImode for !TARGET_PARTIAL_REG_STALL.
    * config/i386/predicates.md (vec_setm_mmx_operand): New predicate.
    * config/i386/mmx.md (vec_setv2sf): Use vec_setm_mmx_operand
    as operand 2 predicate.  Call ix86_expand_vector_set_var
    for non-constant index operand.
    (vec_setv2si): Ditto.
    (vec_setv4hi): Ditto.
    (vec_setv8qi): ditto.

gcc/testsuite/

    PR target/97194
    * gcc.target/i386/sse4_1-vec-set-1.c: New test.
    * gcc.target/i386/sse4_1-vec-set-2.c: ditto.

Comments

Uros Bizjak June 17, 2021, 1:23 p.m. UTC | #1
On Thu, Jun 17, 2021 at 3:22 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> To generate sane code a SSE4.1 variable PBLENDV instruction is needed.
>
> 2021-06-17  Uroš Bizjak  <ubizjak@gmail.com>
>
> gcc/
>     PR target/97194
>     * config/i386/i386-expand.c (expand_vector_set_var):
>     Handle V2FS mode remapping.  Pass TARGET_MMX_WITH_SSE to
>     ix86_expand_vector_init_duplicate.
>     (ix86_expand_vector_init_duplicate): Emit insv_1 for
>     QImode for !TARGET_PARTIAL_REG_STALL.
>     * config/i386/predicates.md (vec_setm_mmx_operand): New predicate.
>     * config/i386/mmx.md (vec_setv2sf): Use vec_setm_mmx_operand
>     as operand 2 predicate.  Call ix86_expand_vector_set_var
>     for non-constant index operand.
>     (vec_setv2si): Ditto.
>     (vec_setv4hi): Ditto.
>     (vec_setv8qi): ditto.
>
> gcc/testsuite/
>
>     PR target/97194
>     * gcc.target/i386/sse4_1-vec-set-1.c: New test.
>     * gcc.target/i386/sse4_1-vec-set-2.c: ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index eb6f9b0684e..8f4e4e4d884 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -13811,10 +13811,17 @@  ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 	wsmode = GET_MODE_INNER (wvmode);
 
 	val = convert_modes (wsmode, smode, val, true);
-	x = expand_simple_binop (wsmode, ASHIFT, val,
-				 GEN_INT (GET_MODE_BITSIZE (smode)),
-				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
-	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
+
+	if (smode == QImode && !TARGET_PARTIAL_REG_STALL)
+	  emit_insn (gen_insv_1 (wsmode, val, val));
+	else
+	  {
+	    x = expand_simple_binop (wsmode, ASHIFT, val,
+				     GEN_INT (GET_MODE_BITSIZE (smode)),
+				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
+	    val = expand_simple_binop (wsmode, IOR, val, x, x, 1,
+				       OPTAB_LIB_WIDEN);
+	  }
 
 	x = gen_reg_rtx (wvmode);
 	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
@@ -14788,6 +14795,9 @@  ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
 	case E_V8DFmode:
 	  cmp_mode = V8DImode;
 	  break;
+	case E_V2SFmode:
+	  cmp_mode = V2SImode;
+	  break;
 	case E_V4SFmode:
 	  cmp_mode = V4SImode;
 	  break;
@@ -14809,9 +14819,11 @@  ix86_expand_vector_set_var (rtx target, rtx val, rtx idx)
   idxv = gen_reg_rtx (cmp_mode);
   idx_tmp = convert_to_mode (GET_MODE_INNER (cmp_mode), idx, 1);
 
-  ok = ix86_expand_vector_init_duplicate (false, mode, valv, val);
+  ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+					  mode, valv, val);
   gcc_assert (ok);
-  ok = ix86_expand_vector_init_duplicate (false, cmp_mode, idxv, idx_tmp);
+  ok = ix86_expand_vector_init_duplicate (TARGET_MMX_WITH_SSE,
+					  cmp_mode, idxv, idx_tmp);
   gcc_assert (ok);
   vec[0] = target;
   vec[1] = valv;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 59a16f4cd50..a107ac5ccb4 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1279,11 +1279,14 @@  (define_insn "*mmx_concatv2sf"
 (define_expand "vec_setv2sf"
   [(match_operand:V2SF 0 "register_operand")
    (match_operand:SF 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-			  INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+			    INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -2989,11 +2992,14 @@  (define_insn "*mmx_concatv2si"
 (define_expand "vec_setv2si"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:SI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-			  INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+			    INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -3145,11 +3151,14 @@  (define_expand "vec_initv2sisi"
 (define_expand "vec_setv4hi"
   [(match_operand:V4HI 0 "register_operand")
    (match_operand:HI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-			  INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+			    INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
@@ -3177,11 +3186,14 @@  (define_expand "vec_initv4hihi"
 (define_expand "vec_setv8qi"
   [(match_operand:V8QI 0 "register_operand")
    (match_operand:QI 1 "register_operand")
-   (match_operand 2 "const_int_operand")]
+   (match_operand 2 "vec_setm_mmx_operand")]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
 {
-  ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
-			  INTVAL (operands[2]));
+  if (CONST_INT_P (operands[2]))
+    ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
+			    INTVAL (operands[2]));
+  else
+    ix86_expand_vector_set_var (operands[0], operands[1], operands[2]);
   DONE;
 })
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3dd134e7f22..e7a896874d6 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1026,6 +1026,12 @@  (define_predicate "vec_setm_operand"
 	    (match_test "TARGET_AVX2"))
        (match_code "const_int")))
 
+(define_predicate "vec_setm_mmx_operand"
+  (ior (and (match_operand 0 "register_operand")
+	    (match_test "TARGET_SSE4_1")
+	    (match_test "TARGET_MMX_WITH_SSE"))
+       (match_code "const_int")))
+
 ;; True for registers, or 1 or -1.  Used to optimize double-word shifts.
 (define_predicate "reg_or_pm1_operand"
   (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
new file mode 100644
index 00000000000..7c7fd34bbc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-1.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-msse4.1 -O2" } */
+/* { dg-final { scan-assembler-times {(?n)v?pcmpeq[bwd]} 4 } } */
+/* { dg-final { scan-assembler-times {(?n)v?p?blendv} 4 } } */
+
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+
+#define FOO(VTYPE, TYPE)			\
+  VTYPE						\
+  __attribute__ ((noipa))			\
+  foo_##VTYPE (VTYPE a, TYPE b, unsigned int c)	\
+  {						\
+    a[c] = b;					\
+    return a;					\
+  }						\
+
+FOO (v8qi, char);
+
+FOO (v4hi, short);
+
+FOO (v2si, int);
+
+FOO (v2sf, float);
diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
new file mode 100644
index 00000000000..24f80414761
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4_1-vec-set-2.c
@@ -0,0 +1,45 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O2 -msse4.1" } */
+
+
+#ifndef CHECK
+#define CHECK "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK
+
+#include "sse4_1-vec-set-1.c"
+
+#define CALC_TEST(vtype, type, N, idx)				\
+do								\
+  {								\
+    int i,val = idx * idx - idx * 3 + 16;			\
+    type res[N],exp[N];						\
+    vtype resv;							\
+    for (i = 0; i < N; i++)					\
+      {								\
+	res[i] = i * i - i * 3 + 15;				\
+	exp[i] = res[i];					\
+      }								\
+    exp[idx] = val;						\
+    resv = foo_##vtype (*(vtype *)&res[0], val, idx);		\
+    for (i = 0; i < N; i++)					\
+      {								\
+	if (resv[i] != exp[i])					\
+	  abort ();						\
+      }								\
+  }								\
+while (0)
+
+static void
+TEST (void)
+{
+  CALC_TEST (v8qi, char, 8, 5);
+  CALC_TEST (v4hi, short, 4, 2);
+  CALC_TEST (v2si, int, 2, 1);
+}