diff mbox series

i386: Introduce V2QImode vector cmove for -msse4.1 [PR103861]

Message ID CAFULd4bviu8ojcEB7Mb552-AuddJ4bgFtQc2mRHdPpwhwHnxsw@mail.gmail.com
State New
Headers show
Series i386: Introduce V2QImode vector cmove for -msse4.1 [PR103861] | expand

Commit Message

Uros Bizjak Jan. 11, 2022, 6:25 p.m. UTC
This patch also moves V2HI and V4QImode vector conditional moves
to SSE4.1 targets.  Vector cmoves are implemented with SSE logic functions
without -msse4.1, and they are hardly worthwile for narrow vector modes.
More important, we would like to keep vector logic functions for GPR
registers, and the current RTX description of 32-bit vector modes logic
insns does not include the necessary CC reg clobber.  Solve these issues by
restricting vector cmove insns for these modes to -msse4.1, where logic
instructions are avoided, and pblend insn is used instead.

A follow-up patch will add clobbers and necessary splits to 32-bit
vector mode logic insns, and in a future patch, ix86_sse_movcc will be
improved to use expand_simple_{unop,binop} to emit logic insns, allowing
us to re-enable 16-bit and 32-bit narrow vector cmoves for -msse2.

2022-01-11  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    PR target/103861
    * config/i386/mmx.md (vcond<mode><mode>):
    Use VI_16_32 mode iterator.  Enable for TARGET_SSE4_1.
    (vcondu<mode><mode>): Ditto.
    (vcond_mask_<mode><mode>): Ditto.
    (mmx_pblendvb_v8qi): Rename from mmx_pblendvb64.
    (mmx_pblendvb_<mode>): Rename from mmx_pblendvb32.
    Use VI_16_32 mode iterator.
    * config/i386/i386-expand.c (ix86_expand_sse_movcc):
    Update for rename.  Handle V2QImode.
    (expand_vec_perm_blend): Update for rename.

gcc/testsuite/ChangeLog:

    PR target/103861
    * g++.target/i386/pr100637-1b.C (dg-options):
    Use -msse4 instead of -msse2.
    * g++.target/i386/pr100637-1w.C (dg-options): Ditto.
    * g++.target/i386/pr103861-1.C: New test.
    * gcc.target/i386/pr100637-4b.c (dg-options):
    Use -msse4 instead of -msse2.
    * gcc.target/i386/pr103861-4.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index add748bcf40..8b1266fb9f1 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3899,7 +3899,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	{
 	  op_true = force_reg (mode, op_true);
 
-	  gen = gen_mmx_pblendvb64;
+	  gen = gen_mmx_pblendvb_v8qi;
 	  if (mode != V8QImode)
 	    d = gen_reg_rtx (V8QImode);
 	  op_false = gen_lowpart (V8QImode, op_false);
@@ -3913,7 +3913,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	{
 	  op_true = force_reg (mode, op_true);
 
-	  gen = gen_mmx_pblendvb32;
+	  gen = gen_mmx_pblendvb_v4qi;
 	  if (mode != V4QImode)
 	    d = gen_reg_rtx (V4QImode);
 	  op_false = gen_lowpart (V4QImode, op_false);
@@ -3921,6 +3921,14 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	  cmp = gen_lowpart (V4QImode, cmp);
 	}
       break;
+    case E_V2QImode:
+      if (TARGET_SSE4_1)
+	{
+	  op_true = force_reg (mode, op_true);
+
+	  gen = gen_mmx_pblendvb_v2qi;
+	}
+      break;
     case E_V16QImode:
     case E_V8HImode:
     case E_V8HFmode:
@@ -18462,9 +18470,9 @@  expand_vec_perm_blend (struct expand_vec_perm_d *d)
 	    vperm = force_reg (vmode, vperm);
 
 	    if (GET_MODE_SIZE (vmode) == 4)
-	      emit_insn (gen_mmx_pblendvb32 (target, op0, op1, vperm));
+	      emit_insn (gen_mmx_pblendvb_v4qi (target, op0, op1, vperm));
 	    else if (GET_MODE_SIZE (vmode) == 8)
-	      emit_insn (gen_mmx_pblendvb64 (target, op0, op1, vperm));
+	      emit_insn (gen_mmx_pblendvb_v8qi (target, op0, op1, vperm));
 	    else if (GET_MODE_SIZE (vmode) == 16)
 	      emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
 	    else
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 91d642187be..fa67278e003 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2580,14 +2580,14 @@ 
 })
 
 (define_expand "vcond<mode><mode>"
-  [(set (match_operand:VI_32 0 "register_operand")
-	(if_then_else:VI_32
+  [(set (match_operand:VI_16_32 0 "register_operand")
+	(if_then_else:VI_16_32
 	  (match_operator 3 ""
-	    [(match_operand:VI_32 4 "register_operand")
-	     (match_operand:VI_32 5 "register_operand")])
-	  (match_operand:VI_32 1)
-	  (match_operand:VI_32 2)))]
-  "TARGET_SSE2"
+	    [(match_operand:VI_16_32 4 "register_operand")
+	     (match_operand:VI_16_32 5 "register_operand")])
+	  (match_operand:VI_16_32 1)
+	  (match_operand:VI_16_32 2)))]
+  "TARGET_SSE4_1"
 {
   bool ok = ix86_expand_int_vcond (operands);
   gcc_assert (ok);
@@ -2612,14 +2612,14 @@ 
 })
 
 (define_expand "vcondu<mode><mode>"
-  [(set (match_operand:VI_32 0 "register_operand")
-	(if_then_else:VI_32
+  [(set (match_operand:VI_16_32 0 "register_operand")
+	(if_then_else:VI_16_32
 	  (match_operator 3 ""
-	    [(match_operand:VI_32 4 "register_operand")
-	     (match_operand:VI_32 5 "register_operand")])
-	  (match_operand:VI_32 1)
-	  (match_operand:VI_32 2)))]
-  "TARGET_SSE2"
+	    [(match_operand:VI_16_32 4 "register_operand")
+	     (match_operand:VI_16_32 5 "register_operand")])
+	  (match_operand:VI_16_32 1)
+	  (match_operand:VI_16_32 2)))]
+  "TARGET_SSE4_1"
 {
   bool ok = ix86_expand_int_vcond (operands);
   gcc_assert (ok);
@@ -2640,19 +2640,19 @@ 
 })
 
 (define_expand "vcond_mask_<mode><mode>"
-  [(set (match_operand:VI_32 0 "register_operand")
-	(vec_merge:VI_32
-	  (match_operand:VI_32 1 "register_operand")
-	  (match_operand:VI_32 2 "register_operand")
-	  (match_operand:VI_32 3 "register_operand")))]
-  "TARGET_SSE2"
+  [(set (match_operand:VI_16_32 0 "register_operand")
+	(vec_merge:VI_16_32
+	  (match_operand:VI_16_32 1 "register_operand")
+	  (match_operand:VI_16_32 2 "register_operand")
+	  (match_operand:VI_16_32 3 "register_operand")))]
+  "TARGET_SSE4_1"
 {
   ix86_expand_sse_movcc (operands[0], operands[3],
 			 operands[1], operands[2]);
   DONE;
 })
 
-(define_insn "mmx_pblendvb64"
+(define_insn "mmx_pblendvb_v8qi"
   [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
 	(unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand" "0,0,x")
@@ -2672,12 +2672,12 @@ 
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
-(define_insn "mmx_pblendvb32"
-  [(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,x")
-	(unspec:V4QI
-	  [(match_operand:V4QI 1 "register_operand" "0,0,x")
-	   (match_operand:V4QI 2 "register_operand" "Yr,*x,x")
-	   (match_operand:V4QI 3 "register_operand" "Yz,Yz,x")]
+(define_insn "mmx_pblendvb_<mode>"
+  [(set (match_operand:VI_16_32 0 "register_operand" "=Yr,*x,x")
+	(unspec:VI_16_32
+	  [(match_operand:VI_16_32 1 "register_operand" "0,0,x")
+	   (match_operand:VI_16_32 2 "register_operand" "Yr,*x,x")
+	   (match_operand:VI_16_32 3 "register_operand" "Yz,Yz,x")]
 	  UNSPEC_BLENDV))]
   "TARGET_SSE4_1"
   "@
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1b.C b/gcc/testsuite/g++.target/i386/pr100637-1b.C
index 35b5df7c9dd..d602ac08b4d 100644
--- a/gcc/testsuite/g++.target/i386/pr100637-1b.C
+++ b/gcc/testsuite/g++.target/i386/pr100637-1b.C
@@ -1,6 +1,6 @@ 
 /* PR target/100637 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse4" } */
 
 typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
 typedef char __attribute__((__vector_size__ (4))) __v4qi;
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1w.C b/gcc/testsuite/g++.target/i386/pr100637-1w.C
index a3ed06fddee..c6056454897 100644
--- a/gcc/testsuite/g++.target/i386/pr100637-1w.C
+++ b/gcc/testsuite/g++.target/i386/pr100637-1w.C
@@ -1,6 +1,6 @@ 
 /* PR target/100637 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -msse2" } */
+/* { dg-options "-O2 -msse4" } */
 
 typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
 typedef short __attribute__((__vector_size__ (4))) __v2hi;
diff --git a/gcc/testsuite/g++.target/i386/pr103861-1.C b/gcc/testsuite/g++.target/i386/pr103861-1.C
new file mode 100644
index 00000000000..940c939e04f
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103861-1.C
@@ -0,0 +1,17 @@ 
+/* PR target/103861 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4" } */
+
+typedef unsigned char __attribute__((__vector_size__ (2))) __v2qu;
+typedef char __attribute__((__vector_size__ (2))) __v2qi;
+
+__v2qu au, bu;
+__v2qi as, bs;
+
+__v2qu uu (__v2qu a, __v2qu b) { return (a > b) ? au : bu; }
+__v2qu us (__v2qi a, __v2qi b) { return (a > b) ? au : bu; }
+__v2qi su (__v2qu a, __v2qu b) { return (a > b) ? as : bs; }
+__v2qi ss (__v2qi a, __v2qi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-4b.c b/gcc/testsuite/gcc.target/i386/pr100637-4b.c
index 198e3dd3352..add4506e4c1 100644
--- a/gcc/testsuite/gcc.target/i386/pr100637-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100637-4b.c
@@ -1,6 +1,6 @@ 
 /* PR target/100637 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -ftree-vectorize -msse2" } */
+/* { dg-options "-O2 -ftree-vectorize -msse4" } */
 
 typedef char T;
 
diff --git a/gcc/testsuite/gcc.target/i386/pr103861-4.c b/gcc/testsuite/gcc.target/i386/pr103861-4.c
new file mode 100644
index 00000000000..54c1859b027
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr103861-4.c
@@ -0,0 +1,19 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -msse4" } */
+
+typedef char T;
+
+#define M 2
+
+extern T a[M], b[M], s1[M], s2[M], r[M];
+
+void foo (void)
+{
+  int j;
+
+  for (j = 0; j < M; j++)
+    r[j] = (a[j] < b[j]) ? s1[j] : s2[j];
+}
+
+/* { dg-final { scan-assembler "pcmpgtb" { xfail *-*-* } } } */