diff mbox

[i386] : Fix PR 70998, CE in pre_and_rev_post_order_compute, at cfganal.c

Message ID CAFULd4bJ0KBBkkDtkPu-mWOXnicQdZ2Bh+MczTAQyX1YZyrEAQ@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak May 8, 2016, 6:32 p.m. UTC
Hello!

As exposed by r235906 [1], we should not widen DFmode memory access to
V2DFmode in the splitter.

Attached patch introduces two new patterns that use correct mode of
memory operand. These two patterns are appropriate for the
TARGET_SSE_PARTIAL_REG_DEPENDENCY splitters, as they don't need to
widen memory access.

2016-05-08  Uros Bizjak  <ubizjak@gmail.com>

    PR target/70998
    * config/i386/sse.md (*sse2_vd_cvtsd2ss): New insn pattern.
    (*sse2_vd_cvtss2sd): Ditto.
    * config/i386/i386.md
    (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_truncate df->sf splitter):
    Generate *sse2_vd_cvtsd2ss pattern.
    (TARGET_SSE_PARTIAL_REG_DEPENDENCY float_extend sf->df splitter):
    Generate *sse2_vd_cvtss2sd pattern.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

[1] https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=235906

Uros.
diff mbox

Patch

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 236007)
+++ config/i386/i386.md	(working copy)
@@ -5192,13 +5192,12 @@ 
   [(set (match_dup 0)
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
-	    (float_truncate:V2SF
+	    (float_truncate:SF
 	      (match_dup 1)))
 	  (match_dup 0)
 	  (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
-  operands[1] = lowpart_subreg (V2DFmode, operands[1], DFmode);
   emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
 })
 
@@ -5219,15 +5218,13 @@ 
        || TARGET_AVX512VL)"
   [(set (match_dup 0)
         (vec_merge:V2DF
-          (float_extend:V2DF
-            (vec_select:V2SF
-              (match_dup 1)
-              (parallel [(const_int 0) (const_int 1)])))
-          (match_dup 0)
+	  (vec_duplicate:V2DF
+	    (float_extend:DF
+	      (match_dup 1)))
+	  (match_dup 0)
           (const_int 1)))]
 {
   operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
-  operands[1] = lowpart_subreg (V4SFmode, operands[1], SFmode);
   emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
 })
 
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 236007)
+++ config/i386/sse.md	(working copy)
@@ -4949,6 +4949,27 @@ 
    (set_attr "prefix" "orig,orig,<round_prefix>")
    (set_attr "mode" "SF")])
 
+(define_insn "*sse2_vd_cvtsd2ss"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
+	(vec_merge:V4SF
+	  (vec_duplicate:V4SF
+	    (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
+	  (match_operand:V4SF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtsd2ss\t{%2, %0|%0, %2}
+   cvtsd2ss\t{%2, %0|%0, %2}
+   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,double,*")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "SF")])
+
 (define_insn "sse2_cvtss2sd<round_saeonly_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
@@ -4972,6 +4993,27 @@ 
    (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
    (set_attr "mode" "DF")])
 
+(define_insn "*sse2_vd_cvtss2sd"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
+	(vec_merge:V2DF
+	  (vec_duplicate:V2DF
+	    (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
+	  (const_int 1)))]
+  "TARGET_SSE2"
+  "@
+   cvtss2sd\t{%2, %0|%0, %2}
+   cvtss2sd\t{%2, %0|%0, %2}
+   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double,*")
+   (set_attr "athlon_decode" "direct,direct,*")
+   (set_attr "bdver1_decode" "direct,direct,*")
+   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "mode" "DF")])
+
 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF