diff mbox series

i386: Add comparisons for 4-byte vectors [PR100637]

Message ID CAFULd4YaFLQ_kLOZz=_tUu0zMWF7daWNkS8CJR7sYWAt88UBvw@mail.gmail.com
State New
Headers show
Series i386: Add comparisons for 4-byte vectors [PR100637] | expand

Commit Message

Uros Bizjak May 21, 2021, 11:17 a.m. UTC
2021-05-21  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/100637
    * config/i386/i386-expand.c (ix86_expand_sse_movcc):
    Handle V4QI and V2HI modes.
    (ix86_expand_sse_movcc): Ditto.
    * config/i386/mmx.md (*<sat_plusminus:insn><VI_32:mode>3):
    New instruction pattern.
    (*eq<VI_32:mode>3): Ditto.
    (*gt<VI_32:mode>3): Ditto.
    (*xop_pcmov_<VI_32:mode>): Ditto.
    (mmx_pblendvb32): Ditto.
    (mmx_pblendvb64): Rename from mmx_pblendvb.
    (vec_cmp<VI_32:mode><VI_32:mode>): New expander.
    (vec_cmpu<VI_32:mode><VI_32:mode>): Ditto.
    (vcond<VI_32:mode><VI_32:mode>): Ditto.
    (vcondu<VI_32:mode><VI_32:mode>): Ditto.
    (vcond_mask_<VI_32:mode><VI_32:mode>): Ditto.

gcc/testsuite/

    PR target/100637
    * g++.target/i386/pr100637-1b.C: New test.
    * g++.target/i386/pr100637-1w.C: Ditto.
    * gcc.target/i386/pr100637-2b.c: Ditto.
    * gcc.target/i386/pr100637-2w.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 9f3d41955a2..931b3362144 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3721,7 +3721,7 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	{
 	  op_true = force_reg (mode, op_true);
 
-	  gen = gen_mmx_pblendvb;
+	  gen = gen_mmx_pblendvb64;
 	  if (mode != V8QImode)
 	    d = gen_reg_rtx (V8QImode);
 	  op_false = gen_lowpart (V8QImode, op_false);
@@ -3729,6 +3729,20 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 	  cmp = gen_lowpart (V8QImode, cmp);
 	}
       break;
+    case E_V4QImode:
+    case E_V2HImode:
+      if (TARGET_SSE4_1)
+	{
+	  op_true = force_reg (mode, op_true);
+
+	  gen = gen_mmx_pblendvb32;
+	  if (mode != V4QImode)
+	    d = gen_reg_rtx (V4QImode);
+	  op_false = gen_lowpart (V4QImode, op_false);
+	  op_true = gen_lowpart (V4QImode, op_true);
+	  cmp = gen_lowpart (V4QImode, cmp);
+	}
+      break;
     case E_V16QImode:
     case E_V8HImode:
     case E_V4SImode:
@@ -4241,6 +4255,12 @@  ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	      else if (code == GT && TARGET_SSE4_1)
 		gen = gen_sminv8qi3;
 	      break;
+	    case E_V4QImode:
+	      if (code == GTU && TARGET_SSE2)
+		gen = gen_uminv4qi3;
+	      else if (code == GT && TARGET_SSE4_1)
+		gen = gen_sminv4qi3;
+	      break;
 	    case E_V8HImode:
 	      if (code == GTU && TARGET_SSE4_1)
 		gen = gen_uminv8hi3;
@@ -4253,6 +4273,12 @@  ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	      else if (code == GT && TARGET_SSE2)
 		gen = gen_sminv4hi3;
 	      break;
+	    case E_V2HImode:
+	      if (code == GTU && TARGET_SSE4_1)
+		gen = gen_uminv2hi3;
+	      else if (code == GT && TARGET_SSE2)
+		gen = gen_sminv2hi3;
+	      break;
 	    case E_V4SImode:
 	      if (TARGET_SSE4_1)
 		gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3;
@@ -4327,8 +4353,10 @@  ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	    case E_V16HImode:
 	    case E_V16QImode:
 	    case E_V8QImode:
+	    case E_V4QImode:
 	    case E_V8HImode:
 	    case E_V4HImode:
+	    case E_V2HImode:
 	      /* Perform a parallel unsigned saturating subtraction.  */
 	      x = gen_reg_rtx (mode);
 	      emit_insn (gen_rtx_SET
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5e92be34545..4c42e6d93dc 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1403,6 +1403,20 @@  (define_insn "*mmx_<insn><mode>3"
    (set_attr "type" "mmxadd,sseadd,sseadd")
    (set_attr "mode" "DI,TI,TI")])
 
+(define_insn "*<insn><mode>3"
+  [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
+        (sat_plusminus:VI_32
+	  (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
+	  (match_operand:VI_32 2 "register_operand" "x,Yw")))]
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "@
+   p<plusminus_mnemonic><mmxvecsize>\t{%2, %0|%0, %2}
+   vp<plusminus_mnemonic><mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "mode" "TI")])
+
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
         (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
@@ -2032,6 +2046,20 @@  (define_insn "*mmx_eq<mode>3"
    (set_attr "type" "mmxcmp,ssecmp,ssecmp")
    (set_attr "mode" "DI,TI,TI")])
 
+(define_insn "*eq<mode>3"
+  [(set (match_operand:VI_32 0 "register_operand" "=x,x")
+        (eq:VI_32
+	  (match_operand:VI_32 1 "register_operand" "%0,x")
+	  (match_operand:VI_32 2 "register_operand" "x,x")))]
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "@
+   pcmpeq<mmxvecsize>\t{%2, %0|%0, %2}
+   vpcmpeq<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "mode" "TI")])
+
 (define_insn "mmx_gt<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
         (gt:MMXMODEI
@@ -2047,6 +2075,19 @@  (define_insn "mmx_gt<mode>3"
    (set_attr "type" "mmxcmp,ssecmp,ssecmp")
    (set_attr "mode" "DI,TI,TI")])
 
+(define_insn "*gt<mode>3"
+  [(set (match_operand:VI_32 0 "register_operand" "=x,x")
+        (gt:VI_32
+	  (match_operand:VI_32 1 "register_operand" "0,x")
+	  (match_operand:VI_32 2 "register_operand" "x,x")))]
+  "TARGET_SSE2"
+  "@
+   pcmpgt<mmxvecsize>\t{%2, %0|%0, %2}
+   vpcmpgt<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "mode" "TI")])
+
 (define_expand "vec_cmp<mode><mode>"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 	(match_operator:MMXMODEI 1 ""
@@ -2059,6 +2100,18 @@  (define_expand "vec_cmp<mode><mode>"
   DONE;
 })
 
+(define_expand "vec_cmp<mode><mode>"
+  [(set (match_operand:VI_32 0 "register_operand")
+	(match_operator:VI_32 1 ""
+	  [(match_operand:VI_32 2 "register_operand")
+	   (match_operand:VI_32 3 "register_operand")]))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vec_cmp (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vec_cmpu<mode><mode>"
   [(set (match_operand:MMXMODEI 0 "register_operand")
 	(match_operator:MMXMODEI 1 ""
@@ -2071,6 +2124,18 @@  (define_expand "vec_cmpu<mode><mode>"
   DONE;
 })
 
+(define_expand "vec_cmpu<mode><mode>"
+  [(set (match_operand:VI_32 0 "register_operand")
+	(match_operator:VI_32 1 ""
+	  [(match_operand:VI_32 2 "register_operand")
+	   (match_operand:VI_32 3 "register_operand")]))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vec_cmp (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
   [(set (match_operand:MMXMODE124 0 "register_operand")
 	(if_then_else:MMXMODE124
@@ -2088,6 +2153,21 @@  (define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
   DONE;
 })
 
+(define_expand "vcond<mode><mode>"
+  [(set (match_operand:VI_32 0 "register_operand")
+	(if_then_else:VI_32
+	  (match_operator 3 ""
+	    [(match_operand:VI_32 4 "register_operand")
+	     (match_operand:VI_32 5 "register_operand")])
+	  (match_operand:VI_32 1)
+	  (match_operand:VI_32 2)))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
   [(set (match_operand:MMXMODE124 0 "register_operand")
 	(if_then_else:MMXMODE124
@@ -2105,6 +2185,21 @@  (define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
   DONE;
 })
 
+(define_expand "vcondu<mode><mode>"
+  [(set (match_operand:VI_32 0 "register_operand")
+	(if_then_else:VI_32
+	  (match_operator 3 ""
+	    [(match_operand:VI_32 4 "register_operand")
+	     (match_operand:VI_32 5 "register_operand")])
+	  (match_operand:VI_32 1)
+	  (match_operand:VI_32 2)))]
+  "TARGET_SSE2"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
   [(set (match_operand:MMXMODE124 0 "register_operand")
 	(vec_merge:MMXMODE124
@@ -2118,7 +2213,20 @@  (define_expand "vcond_mask_<mode><mmxintvecmodelower>"
   DONE;
 })
 
-(define_insn "mmx_pblendvb"
+(define_expand "vcond_mask_<mode><mode>"
+  [(set (match_operand:VI_32 0 "register_operand")
+	(vec_merge:VI_32
+	  (match_operand:VI_32 1 "register_operand")
+	  (match_operand:VI_32 2 "register_operand")
+	  (match_operand:VI_32 3 "register_operand")))]
+  "TARGET_SSE2"
+{
+  ix86_expand_sse_movcc (operands[0], operands[3],
+			 operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "mmx_pblendvb64"
   [(set (match_operand:V8QI 0 "register_operand" "=Yr,*x,x")
 	(unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand" "0,0,x")
@@ -2138,6 +2246,26 @@  (define_insn "mmx_pblendvb"
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
+(define_insn "mmx_pblendvb32"
+  [(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,x")
+	(unspec:V4QI
+	  [(match_operand:V4QI 1 "register_operand" "0,0,x")
+	   (match_operand:V4QI 2 "register_operand" "Yr,*x,x")
+	   (match_operand:V4QI 3 "register_operand" "Yz,Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1"
+  "@
+   pblendvb\t{%3, %2, %0|%0, %2, %3}
+   pblendvb\t{%3, %2, %0|%0, %2, %3}
+   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "*,*,1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "TI")])
+
 ;; XOP parallel XMM conditional moves
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:MMXMODE124 0 "register_operand" "=x")
@@ -2149,6 +2277,16 @@  (define_insn "*xop_pcmov_<mode>"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")])
 
+(define_insn "*xop_pcmov_<mode>"
+  [(set (match_operand:VI_32 0 "register_operand" "=x")
+        (if_then_else:VI_32
+          (match_operand:VI_32 3 "register_operand" "x")
+          (match_operand:VI_32 1 "register_operand" "x")
+          (match_operand:VI_32 2 "register_operand" "x")))]
+  "TARGET_XOP"
+  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "sse4arg")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral logical operations
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1b.C b/gcc/testsuite/g++.target/i386/pr100637-1b.C
new file mode 100644
index 00000000000..35b5df7c9dd
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr100637-1b.C
@@ -0,0 +1,17 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned char __attribute__((__vector_size__ (4))) __v4qu;
+typedef char __attribute__((__vector_size__ (4))) __v4qi;
+
+__v4qu au, bu;
+__v4qi as, bs;
+
+__v4qu uu (__v4qu a, __v4qu b) { return (a > b) ? au : bu; }
+__v4qu us (__v4qi a, __v4qi b) { return (a > b) ? au : bu; }
+__v4qi su (__v4qu a, __v4qu b) { return (a > b) ? as : bs; }
+__v4qi ss (__v4qi a, __v4qi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtb" 2 } } */
diff --git a/gcc/testsuite/g++.target/i386/pr100637-1w.C b/gcc/testsuite/g++.target/i386/pr100637-1w.C
new file mode 100644
index 00000000000..a3ed06fddee
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr100637-1w.C
@@ -0,0 +1,17 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned short __attribute__((__vector_size__ (4))) __v2hu;
+typedef short __attribute__((__vector_size__ (4))) __v2hi;
+
+__v2hu au, bu;
+__v2hi as, bs;
+
+__v2hu uu (__v2hu a, __v2hu b) { return (a > b) ? au : bu; }
+__v2hu us (__v2hi a, __v2hi b) { return (a > b) ? au : bu; }
+__v2hi su (__v2hu a, __v2hu b) { return (a > b) ? as : bs; }
+__v2hi ss (__v2hi a, __v2hi b) { return (a > b) ? as : bs; }
+
+/* { dg-final { scan-assembler-times "pcmpeqw" 2 } } */
+/* { dg-final { scan-assembler-times "pcmpgtw" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-2b.c b/gcc/testsuite/gcc.target/i386/pr100637-2b.c
new file mode 100644
index 00000000000..04480bb5a9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-2b.c
@@ -0,0 +1,21 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef char vec __attribute__((vector_size(4)));
+
+vec lt (vec a, vec b) { return a < b; }
+vec le (vec a, vec b) { return a <= b; }
+vec eq (vec a, vec b) { return a == b; }
+vec ne (vec a, vec b) { return a != b; }
+vec ge (vec a, vec b) { return a >= b; }
+vec gt (vec a, vec b) { return a > b; }
+
+typedef unsigned char uvec __attribute__((vector_size(4)));
+
+vec ltu (uvec a, uvec b) { return a < b; }
+vec leu (uvec a, uvec b) { return a <= b; }
+vec geu (uvec a, uvec b) { return a >= b; }
+vec gtu (uvec a, uvec b) { return a > b; }
+
+/* { dg-final { scan-assembler-not "cmpb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100637-2w.c b/gcc/testsuite/gcc.target/i386/pr100637-2w.c
new file mode 100644
index 00000000000..fbbaac983f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr100637-2w.c
@@ -0,0 +1,21 @@ 
+/* PR target/100637 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef short vec __attribute__((vector_size(4)));
+
+vec lt (vec a, vec b) { return a < b; }
+vec le (vec a, vec b) { return a <= b; }
+vec eq (vec a, vec b) { return a == b; }
+vec ne (vec a, vec b) { return a != b; }
+vec ge (vec a, vec b) { return a >= b; }
+vec gt (vec a, vec b) { return a > b; }
+
+typedef unsigned short uvec __attribute__((vector_size(4)));
+
+vec ltu (uvec a, uvec b) { return a < b; }
+vec leu (uvec a, uvec b) { return a <= b; }
+vec geu (uvec a, uvec b) { return a >= b; }
+vec gtu (uvec a, uvec b) { return a > b; }
+
+/* { dg-final { scan-assembler-not "cmpw" } } */