[i386] : Avoid partial memory stalls for FP moves and related FP immediate cleanups

Submitted by Uros Bizjak on May 30, 2011, 8:55 p.m.

Details

Message ID BANLkTimPPyZQ2fYHwKkRtLBm_ZMNczsUJw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak May 30, 2011, 8:55 p.m.
Hello!

Attached patch prevents partial memory stalls for XFmode and DFmode
(32bit) immediate->memory moves by penalizing memory target in case of
immediate move. The patch also cleans move splitters that handle
immediate operands (most notably, there are no FP subregs after the
reload pass; ix86_split_long_move splitters can be simplified and
merged into one pattern).

2011-05-30  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative
	to prevent partial memory stalls.  Do not move CONST_DOUBLEs directly
	to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(*movdf_internal_rex64): Do not penalize F->r alternative.
	(*movdf_internal):  Penalize FYd*r->o alternative to prevent partial
	memory stalls.  Generate SSE and x87 CONST_DOUBLE immediates only
	when optimizing function for size.  Do not move CONST_DOUBLEs
	directly to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(FP move splitters): Merge {TF,XF,DF}mode move splitters.  Do not
	handle SUBREGs.  Do not check for MEM_P operands in the insn condition,
	check for ANY_FP_REGNO_P instead.
	* config/i386/constraints.md (Yd): Enable GENERAL_REGS for
	TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing
	function for speed.
	* config/i386/i386.c (ix86_option_override_internal): Do not
	set TARGET_INTEGER_DFMODE_MOVES here.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, committed to mainline SVN.

Uros.

Patch hide | download patch | download mbox

Index: i386.md
===================================================================
--- i386.md	(revision 174435)
+++ i386.md	(working copy)
@@ -2833,8 +2833,8 @@ 
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
-	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o")
+	(match_operand:TF 1 "general_operand"	   "xm,x,C,roF,Fr"))]
   "TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -2877,24 +2877,19 @@ 
 		   (const_string "TI"))]
 	       (const_string "DI")))])
 
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed
-   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
+;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,o")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,!o")
 	(match_operand:XF 1 "general_operand"	   "fm,f,G,Yx*roF,FYx*r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || (optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]) > 0)
-       || memory_operand (operands[0], XFmode))"
+	   && standard_80387_constant_p (operands[1]) > 0
+	   && !memory_operand (operands[0], XFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], XFmode)))"
 {
   switch (which_alternative)
     {
@@ -2905,8 +2900,10 @@ 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
-    case 3: case 4:
+    case 3:
+    case 4:
       return "#";
+
     default:
       gcc_unreachable ();
     }
@@ -2914,25 +2911,11 @@ 
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
-(define_split
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-	(match_operand:XF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+		"=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
 	(match_operand:DF 1 "general_operand"
-		"fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+		"fm,f,G,rm,r,F,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -3080,21 +3063,20 @@ 
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,Yd*r  ,o    ,Y2*x,Y2*x,Y2*x,m  ")
+		"=f,m,f,Yd*r  ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
 	(match_operand:DF 1 "general_operand"
 		"fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || (!TARGET_INTEGER_DFMODE_MOVES
+       || (optimize_function_for_size_p (cfun)
 	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
 		&& standard_80387_constant_p (operands[1]) > 0)
 	       || (TARGET_SSE2 && TARGET_SSE_MATH
 		   && standard_sse_constant_p (operands[1])))
 	   && !memory_operand (operands[0], DFmode))
-       || ((TARGET_INTEGER_DFMODE_MOVES
-	    || !TARGET_MEMORY_MISMATCH_STALL)
+       || (!TARGET_MEMORY_MISMATCH_STALL
 	   && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
@@ -3215,20 +3197,6 @@ 
 	      ]
 	      (const_string "DF")))])
 
-(define_split
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (ANY_FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (ANY_FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
@@ -3331,31 +3299,19 @@ 
   [(set (match_operand 0 "register_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
        || GET_MODE (operands[0]) == SFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3363,31 +3319,18 @@ 
   [(set (match_operand 0 "register_operand" "")
 	(float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode
-       || GET_MODE (operands[0]) == SFmode)
+       || GET_MODE (operands[0]) == DFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3411,6 +3354,17 @@ 
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand" "")
+        (match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_insn "swapxf"
   [(set (match_operand:XF 0 "register_operand" "+f")
 	(match_operand:XF 1 "register_operand" "+f"))
@@ -16650,7 +16604,7 @@ 
   [(set (match_operand:SWI 0 "push_operand" "")
 	(match_operand:SWI 1 "memory_operand" ""))
    (match_scratch:SWI 2 "<r>")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16661,7 +16615,7 @@ 
   [(set (match_operand:SF 0 "push_operand" "")
 	(match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16813,7 +16767,7 @@ 
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
@@ -16826,7 +16780,7 @@ 
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
@@ -16879,7 +16833,7 @@ 
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
@@ -16895,7 +16849,7 @@ 
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
Index: constraints.md
===================================================================
--- constraints.md	(revision 174435)
+++ constraints.md	(working copy)
@@ -108,7 +108,9 @@ 
  "@internal Any MMX register, when inter-unit moves are enabled.")
 
 (define_register_constraint "Yd"
- "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "(TARGET_64BIT
+   || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)))
+  ? GENERAL_REGS : NO_REGS"
  "@internal Any integer register when integer DFmode moves are enabled.")
 
 (define_register_constraint "Yx"
Index: i386.c
===================================================================
--- i386.c	(revision 174435)
+++ i386.c	(working copy)
@@ -3947,13 +3947,6 @@  ix86_option_override_internal (bool main
   if (!TARGET_80387)
     target_flags |= MASK_NO_FANCY_MATH_387;
 
-  /* On 32bit targets, avoid moving DFmode values in
-     integer registers when optimizing for size.  */
-  if (TARGET_64BIT)
-    target_flags |= TARGET_INTEGER_DFMODE_MOVES;
-  else if (optimize_size)
-    target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
-
   /* Turn on MMX builtins for -msse.  */
   if (TARGET_SSE)
     {