Patchwork [i386] : Avoid partial memory stalls for FP moves and related FP immediate cleanups

login
register
mail settings
Submitter Uros Bizjak
Date May 30, 2011, 8:55 p.m.
Message ID <BANLkTimPPyZQ2fYHwKkRtLBm_ZMNczsUJw@mail.gmail.com>
Download mbox | patch
Permalink /patch/97955/
State New
Headers show

Comments

Uros Bizjak - May 30, 2011, 8:55 p.m.
Hello!

Attached patch prevents partial memory stalls for XFmode and DFmode
(32bit) immediate->memory moves by penalizing memory target in case of
immediate move. The patch also cleans move splitters that handle
immediate operands (most notably, there are no FP subregs after the
reload pass; ix86_split_long_move splitters can be simplified and
merged into one pattern).

2011-05-30  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative
	to prevent partial memory stalls.  Do not move CONST_DOUBLEs directly
	to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(*movdf_internal_rex64): Do not penalize F->r alternative.
	(*movdf_internal):  Penalize FYd*r->o alternative to prevent partial
	memory stalls.  Generate SSE and x87 CONST_DOUBLE immediates only
	when optimizing function for size.  Do not move CONST_DOUBLEs
	directly to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(FP move splitters): Merge {TF,XF,DF}mode move splitters.  Do not
	handle SUBREGs.  Do not check for MEM_P operands in the insn condition,
	check for ANY_FP_REGNO_P instead.
	* config/i386/constraints.md (Yd): Enable GENERAL_REGS for
	TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing
	function for speed.
	* config/i386/i386.c (ix86_option_override_internal): Do not
	set TARGET_INTEGER_DFMODE_MOVES here.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, committed to mainline SVN.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 174435)
+++ i386.md	(working copy)
@@ -2833,8 +2833,8 @@ 
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
-	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o")
+	(match_operand:TF 1 "general_operand"	   "xm,x,C,roF,Fr"))]
   "TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -2877,24 +2877,19 @@ 
 		   (const_string "TI"))]
 	       (const_string "DI")))])
 
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed
-   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
+;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,o")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,!o")
 	(match_operand:XF 1 "general_operand"	   "fm,f,G,Yx*roF,FYx*r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || (optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]) > 0)
-       || memory_operand (operands[0], XFmode))"
+	   && standard_80387_constant_p (operands[1]) > 0
+	   && !memory_operand (operands[0], XFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], XFmode)))"
 {
   switch (which_alternative)
     {
@@ -2905,8 +2900,10 @@ 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
-    case 3: case 4:
+    case 3:
+    case 4:
       return "#";
+
     default:
       gcc_unreachable ();
     }
@@ -2914,25 +2911,11 @@ 
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
-(define_split
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-	(match_operand:XF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+		"=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
 	(match_operand:DF 1 "general_operand"
-		"fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+		"fm,f,G,rm,r,F,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -3080,21 +3063,20 @@ 
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,Yd*r  ,o    ,Y2*x,Y2*x,Y2*x,m  ")
+		"=f,m,f,Yd*r  ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
 	(match_operand:DF 1 "general_operand"
 		"fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || (!TARGET_INTEGER_DFMODE_MOVES
+       || (optimize_function_for_size_p (cfun)
 	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
 		&& standard_80387_constant_p (operands[1]) > 0)
 	       || (TARGET_SSE2 && TARGET_SSE_MATH
 		   && standard_sse_constant_p (operands[1])))
 	   && !memory_operand (operands[0], DFmode))
-       || ((TARGET_INTEGER_DFMODE_MOVES
-	    || !TARGET_MEMORY_MISMATCH_STALL)
+       || (!TARGET_MEMORY_MISMATCH_STALL
 	   && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
@@ -3215,20 +3197,6 @@ 
 	      ]
 	      (const_string "DF")))])
 
-(define_split
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (ANY_FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (ANY_FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
@@ -3331,31 +3299,19 @@ 
   [(set (match_operand 0 "register_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
        || GET_MODE (operands[0]) == SFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3363,31 +3319,18 @@ 
   [(set (match_operand 0 "register_operand" "")
 	(float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode
-       || GET_MODE (operands[0]) == SFmode)
+       || GET_MODE (operands[0]) == DFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3411,6 +3354,17 @@ 
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand" "")
+        (match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_insn "swapxf"
   [(set (match_operand:XF 0 "register_operand" "+f")
 	(match_operand:XF 1 "register_operand" "+f"))
@@ -16650,7 +16604,7 @@ 
   [(set (match_operand:SWI 0 "push_operand" "")
 	(match_operand:SWI 1 "memory_operand" ""))
    (match_scratch:SWI 2 "<r>")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16661,7 +16615,7 @@ 
   [(set (match_operand:SF 0 "push_operand" "")
 	(match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16813,7 +16767,7 @@ 
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
@@ -16826,7 +16780,7 @@ 
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
@@ -16879,7 +16833,7 @@ 
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
@@ -16895,7 +16849,7 @@ 
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
Index: constraints.md
===================================================================
--- constraints.md	(revision 174435)
+++ constraints.md	(working copy)
@@ -108,7 +108,9 @@ 
  "@internal Any MMX register, when inter-unit moves are enabled.")
 
 (define_register_constraint "Yd"
- "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "(TARGET_64BIT
+   || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)))
+  ? GENERAL_REGS : NO_REGS"
  "@internal Any integer register when integer DFmode moves are enabled.")
 
 (define_register_constraint "Yx"
Index: i386.c
===================================================================
--- i386.c	(revision 174435)
+++ i386.c	(working copy)
@@ -3947,13 +3947,6 @@  ix86_option_override_internal (bool main
   if (!TARGET_80387)
     target_flags |= MASK_NO_FANCY_MATH_387;
 
-  /* On 32bit targets, avoid moving DFmode values in
-     integer registers when optimizing for size.  */
-  if (TARGET_64BIT)
-    target_flags |= TARGET_INTEGER_DFMODE_MOVES;
-  else if (optimize_size)
-    target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
-
   /* Turn on MMX builtins for -msse.  */
   if (TARGET_SSE)
     {