diff mbox series

i386: Implement FP vector compares for V2SFmode [PR98218]

Message ID CAFULd4ZGT=Qm5dnGUuuoQqbWBM_FX8DvzpgHa=1B86u6x=Herg@mail.gmail.com
State New
Headers show
Series i386: Implement FP vector compares for V2SFmode [PR98218] | expand

Commit Message

Uros Bizjak May 12, 2021, 6:18 a.m. UTC
Implement FP vector compares for V2SFmode for TARGET_MMX_WITH_SSE.

2021-05-12  Uroš Bizjak  <ubizjak@gmail.com>

gcc/
    PR target/98218
    * config/i386/i386-expand.c (ix86_expand_sse_movcc): Handle V2SF mode.
    * config/i386/mmx.md (MMXMODE124): New mode iterator.
    (V2FI): Ditto.
    (mmxintvecmode): New mode attribute.
    (mmxintvecmodelower): Ditto.
    (*mmx_maskcmpv2sf3_comm): New insn pattern.
    (*mmx_maskcmpv2sf3): Ditto.
    (vec_cmpv2sfv2si): New expander.
    (vcond<V2FI:mode>v2si): Ditto.
    (mmx_vlendvps): New insn pattern.
    (vcond<MMXMODE124:mode><MMXMODEI:mode>): Also handle V2SFmode.
    (vcondu<MMXMODE124:mode><MMXMODEI:mode>): Ditto.
    (vcond_mask_<mode><mmxintvecmodelower>): Ditto.

gcc/testsuite/

    PR target/98218
    * g++.target/i386/pr98218-1.C: Ditto.
    * gcc.target/i386/pr98218-4.c: New test.

    * gcc.target/i386/pr98218-1.c: Correct PR number.
    * gcc.target/i386/pr98218-1a.c: Ditto.
    * gcc.target/i386/pr98218-2.c: Ditto.
    * gcc.target/i386/pr98218-2a.c: Ditto.
    * gcc.target/i386/pr98218-3.c: Ditto.
    * gcc.target/i386/pr98218-3a.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 5cfde5b3d30..dd230081b16 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3680,6 +3680,13 @@  ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
 
   switch (mode)
     {
+    case E_V2SFmode:
+      if (TARGET_SSE4_1)
+	{
+	  gen = gen_mmx_blendvps;
+	  op_true = force_reg (mode, op_true);
+	}
+      break;
     case E_V4SFmode:
       if (TARGET_SSE4_1)
 	gen = gen_sse4_1_blendvps;
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f08570856f9..d433c524652 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -49,6 +49,7 @@  (define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
+(define_mode_iterator MMXMODE124 [V8QI V4HI V2SI V2SF])
 
 ;; Mix-n-match
 (define_mode_iterator MMXMODE12 [V8QI V4HI])
@@ -56,12 +57,22 @@  (define_mode_iterator MMXMODE14 [V8QI V2SI])
 (define_mode_iterator MMXMODE24 [V4HI V2SI])
 (define_mode_iterator MMXMODE248 [V4HI V2SI V1DI])
 
+;; All V2S* modes
+(define_mode_iterator V2FI [V2SF V2SI])
+
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr mmxvecsize [(V8QI "b") (V4HI "w") (V2SI "d") (V1DI "q")])
 
 (define_mode_attr mmxdoublemode
   [(V8QI "V8HI") (V4HI "V4SI")])
 
+;; Mapping of vector float modes to an integer mode of the same size
+(define_mode_attr mmxintvecmode
+  [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")])
+
+(define_mode_attr mmxintvecmodelower
+  [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")])
+
 (define_mode_attr Yv_Yw
   [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")])
 
@@ -714,6 +725,85 @@  (define_insn "mmx_gev2sf3"
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "V2SF")])
 
+(define_insn "*mmx_maskcmpv2sf3_comm"
+  [(set (match_operand:V2SF 0 "register_operand" "=x,x")
+	(match_operator:V2SF 3 "sse_comparison_operator"
+	  [(match_operand:V2SF 1 "register_operand" "%0,x")
+	   (match_operand:V2SF 2 "register_operand" "x,x")]))]
+  "TARGET_MMX_WITH_SSE
+   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_maskcmpv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=x,x")
+	(match_operator:V2SF 3 "sse_comparison_operator"
+	  [(match_operand:V2SF 1 "register_operand" "0,x")
+	   (match_operand:V2SF 2 "register_operand" "x,x")]))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "V4SF")])
+
+(define_expand "vec_cmpv2sfv2si"
+  [(set (match_operand:V2SI 0 "register_operand")
+	(match_operator:V2SI 1 ""
+	  [(match_operand:V2SF 2 "register_operand")
+	   (match_operand:V2SF 3 "register_operand")]))]
+  "TARGET_MMX_WITH_SSE"
+{
+  bool ok = ix86_expand_fp_vec_cmp (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "vcond<mode>v2sf"
+  [(set (match_operand:V2FI 0 "register_operand")
+	(if_then_else:V2FI
+	  (match_operator 3 ""
+	    [(match_operand:V2SF 4 "register_operand")
+	     (match_operand:V2SF 5 "register_operand")])
+	  (match_operand:V2FI 1)
+	  (match_operand:V2FI 2)))]
+  "TARGET_MMX_WITH_SSE"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_insn "mmx_blendvps"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x")
+	(unspec:V2SF
+	  [(match_operand:V2SF 1 "register_operand" "0,0,x")
+	   (match_operand:V2SF 2 "register_operand" "Yr,*x,x")
+	   (match_operand:V2SF 3 "register_operand" "Yz,Yz,x")]
+	  UNSPEC_BLENDV))]
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
+  "@
+   blendvps\t{%3, %2, %0|%0, %2, %3}
+   blendvps\t{%3, %2, %0|%0, %2, %3}
+   vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix_data16" "1,1,*")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "btver2_decode" "vector")
+   (set_attr "mode" "V4SF")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point logical operations
@@ -1657,42 +1747,46 @@  (define_expand "vec_cmpu<mode><mode>"
   DONE;
 })
 
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:MMXMODEI 0 "register_operand")
-	(if_then_else:MMXMODEI
+(define_expand "vcond<MMXMODE124:mode><MMXMODEI:mode>"
+  [(set (match_operand:MMXMODE124 0 "register_operand")
+	(if_then_else:MMXMODE124
 	  (match_operator 3 ""
 	    [(match_operand:MMXMODEI 4 "register_operand")
 	     (match_operand:MMXMODEI 5 "register_operand")])
-	  (match_operand:MMXMODEI 1)
-	  (match_operand:MMXMODEI 2)))]
-  "TARGET_MMX_WITH_SSE"
+	  (match_operand:MMXMODE124 1)
+	  (match_operand:MMXMODE124 2)))]
+  "TARGET_MMX_WITH_SSE
+   && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
+       == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
 {
   bool ok = ix86_expand_int_vcond (operands);
   gcc_assert (ok);
   DONE;
 })
 
-(define_expand "vcondu<mode><mode>"
-  [(set (match_operand:MMXMODEI 0 "register_operand")
-	(if_then_else:MMXMODEI
+(define_expand "vcondu<MMXMODE124:mode><MMXMODEI:mode>"
+  [(set (match_operand:MMXMODE124 0 "register_operand")
+	(if_then_else:MMXMODE124
 	  (match_operator 3 ""
 	    [(match_operand:MMXMODEI 4 "register_operand")
 	     (match_operand:MMXMODEI 5 "register_operand")])
-	  (match_operand:MMXMODEI 1)
-	  (match_operand:MMXMODEI 2)))]
-  "TARGET_MMX_WITH_SSE"
+	  (match_operand:MMXMODE124 1)
+	  (match_operand:MMXMODE124 2)))]
+  "TARGET_MMX_WITH_SSE
+   && (GET_MODE_NUNITS (<MMXMODE124:MODE>mode)
+       == GET_MODE_NUNITS (<MMXMODEI:MODE>mode))"
 {
   bool ok = ix86_expand_int_vcond (operands);
   gcc_assert (ok);
   DONE;
 })
 
-(define_expand "vcond_mask_<mode><mode>"
-  [(set (match_operand:MMXMODEI 0 "register_operand")
-	(vec_merge:MMXMODEI
-	  (match_operand:MMXMODEI 1 "register_operand")
-	  (match_operand:MMXMODEI 2 "register_operand")
-	  (match_operand:MMXMODEI 3 "register_operand")))]
+(define_expand "vcond_mask_<mode><mmxintvecmodelower>"
+  [(set (match_operand:MMXMODE124 0 "register_operand")
+	(vec_merge:MMXMODE124
+	  (match_operand:MMXMODE124 1 "register_operand")
+	  (match_operand:MMXMODE124 2 "register_operand")
+	  (match_operand:<mmxintvecmode> 3 "register_operand")))]
   "TARGET_MMX_WITH_SSE"
 {
   ix86_expand_sse_movcc (operands[0], operands[3],
diff --git a/gcc/testsuite/g++.target/i386/pr98218-1.C b/gcc/testsuite/g++.target/i386/pr98218-1.C
new file mode 100644
index 00000000000..61ea4bf9008
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr98218-1.C
@@ -0,0 +1,20 @@ 
+/* PR target/98218 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned int __attribute__((__vector_size__ (8))) v64u32;
+typedef int __attribute__((__vector_size__ (8))) v64s32;
+typedef float __attribute__((__vector_size__ (8))) v64f32;
+
+v64u32 au, bu;
+v64s32 as, bs;
+v64f32 af, bf;
+
+v64u32 tu (v64f32 a, v64f32 b) { return (a > b) ? au : bu; }
+v64s32 ts (v64f32 a, v64f32 b) { return (a > b) ? as : bs; }
+v64f32 fu (v64u32 a, v64u32 b) { return (a > b) ? af : bf; }
+v64f32 fs (v64s32 a, v64s32 b) { return (a > b) ? af : bf; }
+v64f32 ff (v64f32 a, v64f32 b) { return (a > b) ? af : bf; }
+
+/* { dg-final { scan-assembler-times "cmpltps" 3 } } */
+/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1.c b/gcc/testsuite/gcc.target/i386/pr98218-1.c
index 48407dabc2a..9d6602c08a2 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-1.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-1a.c b/gcc/testsuite/gcc.target/i386/pr98218-1a.c
index 3470c87cdc3..2610438b24a 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-1a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-1a.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -ftree-vectorize -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2.c b/gcc/testsuite/gcc.target/i386/pr98218-2.c
index 0b716126413..948bf4f5978 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-2.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-2a.c b/gcc/testsuite/gcc.target/i386/pr98218-2a.c
index 6afd0a412d7..73c7226044f 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-2a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-2a.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -ftree-vectorize -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3.c b/gcc/testsuite/gcc.target/i386/pr98218-3.c
index 83a8c298640..1b40d0cee36 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-3.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-3a.c b/gcc/testsuite/gcc.target/i386/pr98218-3a.c
index 272d54e5b34..cf1d4972807 100644
--- a/gcc/testsuite/gcc.target/i386/pr98218-3a.c
+++ b/gcc/testsuite/gcc.target/i386/pr98218-3a.c
@@ -1,4 +1,4 @@ 
-/* PR target/98522 */
+/* PR target/98218 */
 /* { dg-do compile { target { ! ia32 } } } */
 /* { dg-options "-O2 -ftree-vectorize -msse2" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr98218-4.c b/gcc/testsuite/gcc.target/i386/pr98218-4.c
new file mode 100644
index 00000000000..647bdb1171b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98218-4.c
@@ -0,0 +1,16 @@ 
+/* PR target/98218 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2" } */
+
+typedef unsigned int __attribute__((__vector_size__ (8))) v64u32;
+typedef int __attribute__((__vector_size__ (8))) v64s32;
+typedef float __attribute__((__vector_size__ (8))) v64f32;
+
+v64u32 tu (v64f32 a, v64f32 b) { return a > b; }
+v64s32 ts (v64f32 a, v64f32 b) { return a > b; }
+v64f32 fu (v64u32 a, v64u32 b) { return a > b; }
+v64f32 fs (v64s32 a, v64s32 b) { return a > b; }
+v64f32 ff (v64f32 a, v64f32 b) { return a > b; }
+
+/* { dg-final { scan-assembler-times "cmpltps" 3 } } */
+/* { dg-final { scan-assembler-times "pcmpgtd" 2 } } */