diff mbox series

Fix wrong code due to incorrest define_split

Message ID 20231030103730.168701-1-hongtao.liu@intel.com
State New
Headers show
Series Fix wrong code due to incorrest define_split | expand

Commit Message

Liu, Hongtao Oct. 30, 2023, 10:37 a.m. UTC
-(define_split
-  [(set (match_operand:V2HI 0 "register_operand")
-        (eq:V2HI
-          (eq:V2HI
-            (us_minus:V2HI
-              (match_operand:V2HI 1 "register_operand")
-              (match_operand:V2HI 2 "register_operand"))
-            (match_operand:V2HI 3 "const0_operand"))
-          (match_operand:V2HI 4 "const0_operand")))]
-  "TARGET_SSE4_1"
-  [(set (match_dup 0)
-        (umin:V2HI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-        (eq:V2HI (match_dup 0) (match_dup 2)))])

the splitter is wrong when op1 == op2.(the original pattern returns 0, after split, it returns 1)
So remove the splitter.

Also extend another define_split to define_insn_and_split to handle
below pattern

494(set (reg:V4QI 112)
495    (unspec:V4QI [
496            (subreg:V4QI (reg:V2HF 111 [ bf ]) 0)
497            (subreg:V4QI (reg:V2HF 110 [ af ]) 0)
498            (subreg:V4QI (eq:V2HI (eq:V2HI (reg:V2HI 105)
499                        (const_vector:V2HI [
500                                (const_int 0 [0]) repeated x2
501                            ]))
502                    (const_vector:V2HI [
503                            (const_int 0 [0]) repeated x2
504                        ])) 0)
505        ] UNSPEC_BLENDV))

define_split doesn't work since pass_combine assumes it produces at
most 2 insns after split, but here it produces 3 since we need to move
const0_rtx (V2HImode) to reg. The move insn can be eliminated later.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready push to trunk.

gcc/ChangeLog:

	PR target/112276
	* config/i386/mmx.md (*mmx_pblendvb_v8qi_1): Change
	define_split to define_insn_and_split to handle
	immediate_operand for comparison.
	(*mmx_pblendvb_v8qi_2): Ditto.
	(*mmx_pblendvb_<mode>_1): Ditto.
	(*mmx_pblendvb_v4qi_2): Ditto.
	(<code><mode>3): Remove define_split after it.
	(<code>v8qi3): Ditto.
	(<code><mode>3): Ditto.
	(<ode>v2hi3): Ditto.

gcc/testsuite/ChangeLog:

	* g++.target/i386/part-vect-vcondhf.C: Adjust testcase.
	* gcc.target/i386/pr112276.c: New test.
---
 gcc/config/i386/mmx.md                        | 112 ++++++------------
 .../g++.target/i386/part-vect-vcondhf.C       |   1 -
 gcc/testsuite/gcc.target/i386/pr112276.c      |  36 ++++++
 3 files changed, 70 insertions(+), 79 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr112276.c
diff mbox series

Patch

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e3d0fb5b107..2b97bb8fa98 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3360,21 +3360,6 @@  (define_insn "<code><mode>3"
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V4HI 0 "register_operand")
-	(eq:V4HI
-	  (eq:V4HI
-	    (us_minus:V4HI
-	      (match_operand:V4HI 1 "register_operand")
-	      (match_operand:V4HI 2 "register_operand"))
-	    (match_operand:V4HI 3 "const0_operand"))
-	  (match_operand:V4HI 4 "const0_operand")))]
-  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
-  [(set (match_dup 0)
-	(umin:V4HI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V4HI (match_dup 0) (match_dup 2)))])
-
 (define_expand "mmx_<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
         (umaxmin:V8QI
@@ -3408,21 +3393,6 @@  (define_expand "<code>v8qi3"
 	  (match_operand:V8QI 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE")
 
-(define_split
-  [(set (match_operand:V8QI 0 "register_operand")
-	(eq:V8QI
-	  (eq:V8QI
-	    (us_minus:V8QI
-	      (match_operand:V8QI 1 "register_operand")
-	      (match_operand:V8QI 2 "register_operand"))
-	    (match_operand:V8QI 3 "const0_operand"))
-	  (match_operand:V8QI 4 "const0_operand")))]
-  "TARGET_MMX_WITH_SSE"
-  [(set (match_dup 0)
-	(umin:V8QI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V8QI (match_dup 0) (match_dup 2)))])
-
 (define_insn "<code><mode>3"
   [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
         (umaxmin:VI1_16_32
@@ -3436,21 +3406,6 @@  (define_insn "<code><mode>3"
    (set_attr "type" "sseiadd")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V4QI 0 "register_operand")
-	(eq:V4QI
-	  (eq:V4QI
-	    (us_minus:V4QI
-	      (match_operand:V4QI 1 "register_operand")
-	      (match_operand:V4QI 2 "register_operand"))
-	    (match_operand:V4QI 3 "const0_operand"))
-	  (match_operand:V4QI 4 "const0_operand")))]
-  "TARGET_SSE2"
-  [(set (match_dup 0)
-	(umin:V4QI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V4QI (match_dup 0) (match_dup 2)))])
-
 (define_insn "<code>v2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=Yr,*x,Yv")
 	(umaxmin:V2HI
@@ -3467,21 +3422,6 @@  (define_insn "<code>v2hi3"
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "TI")])
 
-(define_split
-  [(set (match_operand:V2HI 0 "register_operand")
-	(eq:V2HI
-	  (eq:V2HI
-	    (us_minus:V2HI
-	      (match_operand:V2HI 1 "register_operand")
-	      (match_operand:V2HI 2 "register_operand"))
-	    (match_operand:V2HI 3 "const0_operand"))
-	  (match_operand:V2HI 4 "const0_operand")))]
-  "TARGET_SSE4_1"
-  [(set (match_dup 0)
-	(umin:V2HI (match_dup 1) (match_dup 2)))
-   (set (match_dup 0)
-	(eq:V2HI (match_dup 0) (match_dup 2)))])
-
 (define_insn "ssse3_abs<mode>2"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
 	(abs:MMXMODEI
@@ -3954,7 +3894,7 @@  (define_insn "mmx_pblendvb_v8qi"
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_1"
   [(set (match_operand:V8QI 0 "register_operand")
        (unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand")
@@ -3962,21 +3902,26 @@  (define_split
 	   (eq:V8QI
 	     (eq:V8QI
 		(match_operand:V8QI 3 "register_operand")
-		(match_operand:V8QI 4 "register_operand"))
+		(match_operand:V8QI 4 "nonmemory_operand"))
 	     (match_operand:V8QI 5 "const0_operand"))]
 	   UNSPEC_BLENDV))]
-  "TARGET_MMX_WITH_SSE"
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:V8QI (match_dup 3) (match_dup 4)))
+	(eq:V8QI (match_dup 3) (match_dup 7)))
    (set (match_dup 0)
 	(unspec:V8QI
 	  [(match_dup 2)
 	   (match_dup 1)
 	   (match_dup 6)]
 	  UNSPEC_BLENDV))]
-  "operands[6] = gen_reg_rtx (V8QImode);")
+{
+  operands[6] = gen_reg_rtx (V8QImode);
+  operands[7] = force_reg (V8QImode, operands[4]);
+})
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v8qi_2"
   [(set (match_operand:V8QI 0 "register_operand")
        (unspec:V8QI
 	  [(match_operand:V8QI 1 "register_operand")
@@ -3985,12 +3930,14 @@  (define_split
 	     (eq:MMXMODE24
 	       (eq:MMXMODE24
 		 (match_operand:MMXMODE24 3 "register_operand")
-		 (match_operand:MMXMODE24 4 "register_operand"))
+		 (match_operand:MMXMODE24 4 "nonmemory_operand"))
 	     (match_operand:MMXMODE24 5 "const0_operand")) 0)]
 	   UNSPEC_BLENDV))]
-  "TARGET_MMX_WITH_SSE"
+  "TARGET_MMX_WITH_SSE && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:MMXMODE24 (match_dup 3) (match_dup 4)))
+	(eq:MMXMODE24 (match_dup 3) (match_dup 8)))
    (set (match_dup 0)
 	(unspec:V8QI
 	  [(match_dup 2)
@@ -4000,6 +3947,7 @@  (define_split
 {
   operands[6] = gen_reg_rtx (<MODE>mode);
   operands[7] = lowpart_subreg (V8QImode, operands[6], <MODE>mode);
+  operands[8] = force_reg (<MODE>mode, operands[4]);
 })
 
 (define_insn "mmx_pblendvb_<mode>"
@@ -4022,7 +3970,7 @@  (define_insn "mmx_pblendvb_<mode>"
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "TI")])
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_<mode>_1"
   [(set (match_operand:VI_16_32 0 "register_operand")
 	(unspec:VI_16_32
 	  [(match_operand:VI_16_32 1 "register_operand")
@@ -4030,21 +3978,26 @@  (define_split
 	   (eq:VI_16_32
 	     (eq:VI_16_32
 		(match_operand:VI_16_32 3 "register_operand")
-		(match_operand:VI_16_32 4 "register_operand"))
+		(match_operand:VI_16_32 4 "nonmemory_operand"))
 	     (match_operand:VI_16_32 5 "const0_operand"))]
 	   UNSPEC_BLENDV))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:VI_16_32 (match_dup 3) (match_dup 4)))
+	(eq:VI_16_32 (match_dup 3) (match_dup 7)))
    (set (match_dup 0)
 	(unspec:VI_16_32
 	  [(match_dup 2)
 	   (match_dup 1)
 	   (match_dup 6)]
 	  UNSPEC_BLENDV))]
-  "operands[6] = gen_reg_rtx (<MODE>mode);")
+{
+  operands[6] = gen_reg_rtx (<MODE>mode);
+  operands[7] = force_reg (<MODE>mode, operands[4]);
+})
 
-(define_split
+(define_insn_and_split "*mmx_pblendvb_v4qi_2"
   [(set (match_operand:V4QI 0 "register_operand")
        (unspec:V4QI
 	  [(match_operand:V4QI 1 "register_operand")
@@ -4053,12 +4006,14 @@  (define_split
 	     (eq:V2HI
 	       (eq:V2HI
 		 (match_operand:V2HI 3 "register_operand")
-		 (match_operand:V2HI 4 "register_operand"))
+		 (match_operand:V2HI 4 "nonmemory_operand"))
 	     (match_operand:V2HI 5 "const0_operand")) 0)]
 	   UNSPEC_BLENDV))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
   [(set (match_dup 6)
-	(eq:V2HI (match_dup 3) (match_dup 4)))
+	(eq:V2HI (match_dup 3) (match_dup 8)))
    (set (match_dup 0)
 	(unspec:V4QI
 	  [(match_dup 2)
@@ -4068,6 +4023,7 @@  (define_split
 {
   operands[6] = gen_reg_rtx (V2HImode);
   operands[7] = lowpart_subreg (V4QImode, operands[6], V2HImode);
+  operands[8] = force_reg (V2HImode, operands[4]);
 })
 
 ;; XOP parallel XMM conditional moves
diff --git a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
index f19727816cf..e623e6cde79 100644
--- a/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
+++ b/gcc/testsuite/g++.target/i386/part-vect-vcondhf.C
@@ -3,7 +3,6 @@ 
 /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */
 /* { dg-final { scan-assembler-times "vpcmpeqw" 6 } } */
 /* { dg-final { scan-assembler-times "vpcmpgtw" 2 } } */
-/* { dg-final { scan-assembler-times "vpminuw" 2 } } */
 /* { dg-final { scan-assembler-times "vcmpph" 8 } } */
 /* { dg-final { scan-assembler-times "vpblendvb" 8 } } */
 typedef unsigned short  __attribute__((__vector_size__ (4))) __v2hu;
diff --git a/gcc/testsuite/gcc.target/i386/pr112276.c b/gcc/testsuite/gcc.target/i386/pr112276.c
new file mode 100644
index 00000000000..5365313f4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr112276.c
@@ -0,0 +1,36 @@ 
+/* { dg-do run  { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+
+#include "sse4_1-check.h"
+
+typedef unsigned short __attribute__((__vector_size__ (8))) U4;
+typedef unsigned short __attribute__((__vector_size__ (4))) U2;
+
+U4
+__attribute__((noipa))
+foo4 (U4 a, U4 b)
+{
+  return a > b;
+}
+
+U2
+__attribute__((noipa))
+foo2 (U2 a, U2 b)
+{
+  return a > b;
+}
+
+static void
+sse4_1_test ()
+{
+  U4 a = __extension__(U4) {1, 1, 1, 1};
+  U4 b = foo4 (a, a);
+  if (b[0] || b[1] || b[2] || b[3]) __builtin_abort();
+
+  U2 c = __extension__(U2) {1, 1};
+  U2 d = foo2 (c, c);
+  if (d[0] || d[1]) __builtin_abort();
+
+  return;
+}