diff mbox series

i386: Fix <sse2p4_1>_pinsr<ssemodesuffix> and its splitters [PR103772]

Message ID CAFULd4bzYjXV9474OZdinqUH0Go6HT-y40u4WRjcX14qtdVPWg@mail.gmail.com
State New
Headers show
Series i386: Fix <sse2p4_1>_pinsr<ssemodesuffix> and its splitters [PR103772] | expand

Commit Message

Uros Bizjak Dec. 20, 2021, 8:19 p.m. UTC
The clever trick to duplicate the value of the input operand into itself
proved not so clever after all.  The splitter should not clobber the input
operand in any case, since the register can hold the value outside the HImode
lowpart when accessed as subreg.  Use the standard earlyclobber approach
instead.

The testcase fails with avx2 ISA, but I was not able to create the testcase
that wouldn't require -mavx512fp16 compile flag.

2021-12-20  Uroš Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog:

    PR target/103772
    * config/i386/sse.md (<sse2p4_1>_pinsr<ssemodesuffix>): Add
    earlyclobber to (x,x,x,i) alternative.
    (<sse2p4_1>_pinsr<ssemodesuffix> peephole2): Remove.
    (<sse2p4_1>_pinsr<ssemodesuffix> splitter): Use output
    operand as a temporary register.  Split after reload_completed.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..cb1c0b1edec 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17430,7 +17430,7 @@ 
 
 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
 (define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
-  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,x")
+  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,&x")
 	(vec_merge:PINSR_MODE
 	  (vec_duplicate:PINSR_MODE
 	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m,x"))
@@ -17499,25 +17499,6 @@ 
 	   (const_string "*")))])
 
 ;; For TARGET_AVX2, implement insert from XMM reg with PBROADCASTW + PBLENDW.
-;; First try to get a scratch register and go through it.  In case this fails,
-;; overwrite source reg with broadcasted value and blend from there.
-(define_peephole2
-  [(match_scratch:V8_128 4 "x")
-   (set (match_operand:V8_128 0 "sse_reg_operand")
-	(vec_merge:V8_128
-	  (vec_duplicate:V8_128
-	    (match_operand:<ssescalarmode> 2 "sse_reg_operand"))
-	  (match_operand:V8_128 1 "sse_reg_operand")
-	  (match_operand:SI 3 "const_int_operand")))]
-  "TARGET_AVX2
-   && INTVAL (operands[3]) > 1
-   && ((unsigned) exact_log2 (INTVAL (operands[3]))
-       < GET_MODE_NUNITS (<MODE>mode))"
-  [(set (match_dup 4)
-	(vec_duplicate:V8_128 (match_dup 2)))
-   (set (match_dup 0)
-	(vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))])
-
 (define_split
   [(set (match_operand:V8_128 0 "sse_reg_operand")
 	(vec_merge:V8_128
@@ -17525,18 +17506,14 @@ 
 	    (match_operand:<ssescalarmode> 2 "sse_reg_operand"))
 	  (match_operand:V8_128 1 "sse_reg_operand")
 	  (match_operand:SI 3 "const_int_operand")))]
-  "TARGET_AVX2 && epilogue_completed
+  "TARGET_AVX2 && reload_completed
    && INTVAL (operands[3]) > 1
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (<MODE>mode))"
-  [(set (match_dup 4)
+  [(set (match_dup 0)
 	(vec_duplicate:V8_128 (match_dup 2)))
    (set (match_dup 0)
-	(vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))]
-{
-  operands[4] = lowpart_subreg (<MODE>mode, operands[2],
-				<ssescalarmode>mode);
-})
+	(vec_merge:V8_128 (match_dup 0) (match_dup 1) (match_dup 3)))])
 
 (define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
   [(match_operand:AVX512_VEC 0 "register_operand")