Patchwork [i386] : Slightly penalize non-native FP moves.

login
register
mail settings
Submitter Uros Bizjak
Date May 31, 2011, 3:23 p.m.
Message ID <BANLkTinP_HKCzAQC+xekACzJnxZOBvD-eQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/98047/
State New
Headers show

Comments

Uros Bizjak - May 31, 2011, 3:23 p.m.
Hello!

Attached patch slightly penalizes FP moves that use non-native
registers.  Additionally, the patch merges FP push splitters.

2011-05-31  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*pushxf_nointeger): Merge alternatives 1 and 2.
	(FP push_operand splitters): Merge {TF,XF,DF} mode splitters.

2011-05-31  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*movtf_internal): Avoid allocating general
	registers.  Penalize F*r->o alternative to prevent partial memory
	stalls.  Slightly penalize *roF->*r alternative.  Generate SSE
	CONST_DOUBLE immediates when optimizing function for size.  Do not move
	CONST_DOUBLEs directly to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(*movxf_internal): Slightly penalize Yx*roF->Yx*r alternative.
	(*movdf_internal): Slightly penalize Yd*roF->Yd*r alternative.
	(*movdf_internal_rex64): Slightly penalize rm->r, F->m and r->m
	alternatives.
	(*movsf_internal): Slightly penalize rmF->r and Fr->m alternatives.

	(fp_register_operand splitters): Use fp_register_operand
	constraint.  Do not use FP_REG_P in insn condition.
	(any_fp_register_operand splitters): Use any_fp_register_operand
	constraint.  Do not use ANY_FP_REG_P in insn condition.

Patch was tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 174465)
+++ i386.md	(working copy)
@@ -2678,6 +2678,7 @@ 
    (set_attr "unit" "sse,*,*")
    (set_attr "mode" "TF,SI,SI")])
 
+;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:TF 0 "push_operand" "")
 	(match_operand:TF 1 "sse_reg_operand" ""))]
@@ -2685,14 +2686,6 @@ 
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
    (set (mem:TF (reg:P SP_REG)) (match_dup 1))])
 
-(define_split
-  [(set (match_operand:TF 0 "push_operand" "")
-	(match_operand:TF 1 "general_operand" ""))]
-  "TARGET_SSE2 && reload_completed
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*pushxf"
   [(set (match_operand:XF 0 "push_operand" "=<,<")
 	(match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
@@ -2712,17 +2705,18 @@ 
 ;; only once, but this ought to be handled elsewhere).
 
 (define_insn "*pushxf_nointeger"
-  [(set (match_operand:XF 0 "push_operand" "=X,X,X")
-	(match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
+  [(set (match_operand:XF 0 "push_operand" "=X,X")
+	(match_operand:XF 1 "general_no_elim_operand" "f,*rFo"))]
   "optimize_function_for_size_p (cfun)"
 {
   /* This insn should be already split before reg-stack.  */
   gcc_unreachable ();
 }
   [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*,*")
-   (set_attr "mode" "XF,SI,SI")])
+   (set_attr "unit" "i387,*")
+   (set_attr "mode" "XF,SI")])
 
+;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:XF 0 "push_operand" "")
 	(match_operand:XF 1 "fp_register_operand" ""))]
@@ -2731,14 +2725,6 @@ 
    (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
   "operands[2] = GEN_INT (-GET_MODE_SIZE (XFmode));")
 
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-	(match_operand:XF 1 "general_operand" ""))]
-  "reload_completed
-   && !FP_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 ;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
 ;; Size of pushdf using integer instructions is 2+2*memory operand size
 ;; On the average, pushdf using integers can be still shorter.
@@ -2763,14 +2749,6 @@ 
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
    (set (mem:DF (reg:P SP_REG)) (match_dup 1))])
 
-(define_split
-  [(set (match_operand:DF 0 "push_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  "reload_completed
-   && !ANY_FP_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*pushsf_rex64"
   [(set (match_operand:SF 0 "push_operand" "=X,X,X")
 	(match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
@@ -2797,15 +2775,6 @@ 
    (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,SI,SF")])
 
-(define_split
-  [(set (match_operand:SF 0 "push_operand" "")
-	(match_operand:SF 1 "memory_operand" ""))]
-  "reload_completed
-   && MEM_P (operands[1])
-   && (operands[2] = find_constant_src (insn))"
-  [(set (match_dup 0)
-	(match_dup 2))])
-
 ;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:SF 0 "push_operand" "")
@@ -2813,7 +2782,25 @@ 
   "reload_completed"
   [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
    (set (mem:SF (reg:P SP_REG)) (match_dup 1))]
-  "operands[2] = GEN_INT (-GET_MODE_SIZE (<MODE>mode));")
+  "operands[2] = GEN_INT (-GET_MODE_SIZE (<P:MODE>mode));")
+
+(define_split
+  [(set (match_operand:SF 0 "push_operand" "")
+	(match_operand:SF 1 "memory_operand" ""))]
+  "reload_completed
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))])
+
+(define_split
+  [(set (match_operand 0 "push_operand" "")
+	(match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !ANY_FP_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
 
 ;; Floating point move instructions.
 
@@ -2833,17 +2820,25 @@ 
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o")
-	(match_operand:TF 1 "general_operand"	   "xm,x,C,roF,Fr"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?*r ,!o")
+	(match_operand:TF 1 "general_operand"	   "xm,x,C,*roF,F*r"))]
   "TARGET_SSE2
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && (!can_create_pseudo_p ()
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
+       || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && standard_sse_constant_p (operands[1])
+	   && !memory_operand (operands[0], TFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], TFmode)))"
 {
   switch (which_alternative)
     {
     case 0:
     case 1:
-      /* Handle misaligned load/store since we don't have movmisaligntf
-	 pattern. */
+      /* Handle misaligned load/store since we
+         don't have movmisaligntf pattern. */
       if (misaligned_operand (operands[0], TFmode)
 	  || misaligned_operand (operands[1], TFmode))
 	{
@@ -2892,7 +2887,7 @@ 
 
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,!o")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,?Yx*r ,!o")
 	(match_operand:XF 1 "general_operand"	   "fm,f,G,Yx*roF,FYx*r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
@@ -2926,9 +2921,9 @@ 
 
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+		"=f,m,f,?r,?m,?r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
 	(match_operand:DF 1 "general_operand"
-		"fm,f,G,rm,r,F,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+		"fm,f,G,rm,r ,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -3076,7 +3071,7 @@ 
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,Yd*r  ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
+		"=f,m,f,?Yd*r ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
 	(match_operand:DF 1 "general_operand"
 		"fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
@@ -3212,7 +3207,7 @@ 
 
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
-	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
+	  "=f,m,f,?r ,?m,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
 	(match_operand:SF 1 "general_operand"
 	  "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
@@ -3309,14 +3304,13 @@ 
 	       (const_string "SF")))])
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "any_fp_register_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
        || GET_MODE (operands[0]) == SFmode)
-   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
@@ -3329,13 +3323,12 @@ 
 })
 
 (define_split
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand 0 "any_fp_register_operand" "")
 	(float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode)
-   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
@@ -3349,9 +3342,9 @@ 
 
 ;; Split the load of -0.0 or -1.0 into fldz;fchs or fld1;fchs sequence
 (define_split
-  [(set (match_operand:X87MODEF 0 "register_operand" "")
+  [(set (match_operand:X87MODEF 0 "fp_register_operand" "")
 	(match_operand:X87MODEF 1 "immediate_operand" ""))]
-  "reload_completed && FP_REGNO_P (REGNO (operands[0]))
+  "reload_completed
    && (standard_80387_constant_p (operands[1]) == 8
        || standard_80387_constant_p (operands[1]) == 9)"
   [(set (match_dup 0)(match_dup 1))
@@ -5225,24 +5218,22 @@ 
    (set_attr "fp_int_src" "true")])
 
 (define_split
-  [(set (match_operand:X87MODEF 0 "register_operand" "")
+  [(set (match_operand:X87MODEF 0 "fp_register_operand" "")
 	(float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
    (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
   "TARGET_80387
    && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
-   && reload_completed
-   && FP_REG_P (operands[0])"
+   && reload_completed"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (float:X87MODEF (match_dup 2)))])
 
 (define_split
-  [(set (match_operand:X87MODEF 0 "register_operand" "")
+  [(set (match_operand:X87MODEF 0 "fp_register_operand" "")
 	(float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
    (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
   "TARGET_80387
    && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, <SSEMODEI24:MODE>mode)
-   && reload_completed
-   && FP_REG_P (operands[0])"
+   && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 ;; Avoid store forwarding (partial memory) stall penalty
@@ -5265,7 +5256,7 @@ 
    (set_attr "fp_int_src" "true")])
 
 (define_split
-  [(set (match_operand:X87MODEF 0 "register_operand" "")
+  [(set (match_operand:X87MODEF 0 "fp_register_operand" "")
 	(float:X87MODEF (match_operand:DI 1 "register_operand" "")))
    (clobber (match_scratch:V4SI 3 ""))
    (clobber (match_scratch:V4SI 4 ""))
@@ -5273,8 +5264,7 @@ 
   "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
    && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && FP_REG_P (operands[0])"
+   && reload_completed"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
 {
@@ -5291,7 +5281,7 @@ 
 })
 
 (define_split
-  [(set (match_operand:X87MODEF 0 "register_operand" "")
+  [(set (match_operand:X87MODEF 0 "fp_register_operand" "")
 	(float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
    (clobber (match_scratch:V4SI 3 ""))
    (clobber (match_scratch:V4SI 4 ""))
@@ -5299,8 +5289,7 @@ 
   "TARGET_80387 && X87_ENABLE_FLOAT (<X87MODEF:MODE>mode, DImode)
    && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
    && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
-   && reload_completed
-   && FP_REG_P (operands[0])"
+   && reload_completed"
   [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
 
 ;; Avoid store forwarding (partial memory) stall penalty by extending