Patchwork [i386] : Merge push{xf,df}_integer, movdf_integer with corresponding base patterns

login
register
mail settings
Submitter Uros Bizjak
Date May 14, 2011, 3:25 p.m.
Message ID <BANLkTi=qanK6ACRG2o-3YeGBGYOw9ipFJQ@mail.gmail.com>
Download mbox | patch
Permalink /patch/95560/
State New
Headers show

Comments

Uros Bizjak - May 14, 2011, 3:25 p.m.
Hello!

Attached patch introduces Yd and Yx conditional register constraints
to merge push{xf,df}_integer, movdf_integer with corresponding base
patterns.  Additionaly, the patch adds standard_sse_constant_p to
check for valid SSE constants in relevant patterns and
standard_sse_constant_opcode to output SSE insn.

2011-05-14  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/constraint.md (Yd, Yx): New register constraints.
	* config/i386/i386.md (*pushdf): Merge with *pushdf_nointeger.  Use
	Yd conditional register constraint.
	(*movtf_internal): Use standard_sse_constant_opcode.
	(*movxf_internal): Merge with *movxf_internal_nointeger.  Use
	Yx conditional register constraint.
	(*movdf_internal): Merge with *movdf_internal_nointeger.  Use
	Yd conditional register constraint.  Use standard_sse_constant_p to
	check for valid SSE constants and call standard_sse_constant_opcode to
	output SSE insn.
	(*movsf_internal): Use standard_sse_constant_p to check for valid SSE
	constants and call standard_sse_constant_opcode to output SSE insn.
	* config/i386/i386.c (ix86_option_ovverride_internal): Set
	TARGET_INTEGER_DFMODE_MOVES for 64bit targets.  Clear it when
	optimize_size is set.
	(standard_sse_constant_opcode): Output conditional AVX templates.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}. Patch was committed to SVN mainline.

Uros.

Patch

Index: i386.md
===================================================================
--- i386.md	(revision 173748)
+++ i386.md	(working copy)
@@ -2702,10 +2702,14 @@ 
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter.
+
 (define_insn "*pushdf"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<")
-	(match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
-  "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+	(match_operand:DF 1 "general_no_elim_operand" "f,Yd*rFo,Y2"))]
+  ""
 {
   /* This insn should be already split before reg-stack.  */
   gcc_unreachable ();
@@ -2714,23 +2718,6 @@ 
    (set_attr "unit" "i387,*,*")
    (set_attr "mode" "DF,SI,DF")])
 
-;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
-;; Size of pushdf using integer instructions is 2+2*memory operand size
-;; On the average, pushdf using integers can be still shorter.  Allow this
-;; pattern for optimize_size too.
-
-(define_insn "*pushdf_nointeger"
-  [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-	(match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
-  "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)"
-{
-  /* This insn should be already split before reg-stack.  */
-  gcc_unreachable ();
-}
-  [(set_attr "type" "multi")
-   (set_attr "unit" "i387,*,*,*")
-   (set_attr "mode" "DF,SI,SI,DF")])
-
 ;; %%% Kill this when call knows how to work this out.
 (define_split
   [(set (match_operand:DF 0 "push_operand" "")
@@ -2822,14 +2809,14 @@ 
 	return "%vmovaps\t{%1, %0|%0, %1}";
       else
 	return "%vmovdqa\t{%1, %0|%0, %1}";
+
     case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-	return "%vxorps\t%0, %d0";
-      else
-	return "%vpxor\t%0, %d0";
+      return standard_sse_constant_opcode (insn, operands[1]);
+
     case 3:
     case 4:
 	return "#";
+
     default:
       gcc_unreachable ();
     }
@@ -2862,42 +2849,14 @@ 
   "ix86_split_long_move (operands); DONE;")
 
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
-	(match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
-  "optimize_function_for_speed_p (cfun)
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && (!can_create_pseudo_p ()
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], XFmode))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3: case 4:
-      return "#";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi")
-   (set_attr "mode" "XF,XF,XF,SI,SI")])
-
-;; Do not use integer registers when optimizing for size
-(define_insn "*movxf_internal_nointeger"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
-	(match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
-  "optimize_function_for_size_p (cfun)
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,o")
+	(match_operand:XF 1 "general_operand"	   "fm,f,G,Yx*roF,FYx*r"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
-       || standard_80387_constant_p (operands[1])
+       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && standard_80387_constant_p (operands[1]) > 0)
        || memory_operand (operands[0], XFmode))"
 {
   switch (which_alternative)
@@ -2940,10 +2899,12 @@ 
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
-           && optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
+		&& standard_80387_constant_p (operands[1]) > 0)
+	       || (TARGET_SSE2 && TARGET_SSE_MATH
+		   && standard_sse_constant_p (operands[1]))))
        || memory_operand (operands[0], DFmode))"
 {
   switch (which_alternative)
@@ -2966,23 +2927,8 @@ 
       return "#";
 
     case 7:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vxorps\t%0, %d0";
-	case MODE_V2DF:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vxorpd\t%0, %d0";
-	case MODE_TI:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vpxor\t%0, %d0";
-	default:
-	  gcc_unreachable ();
-	}
+      return standard_sse_constant_opcode (insn, operands[1]);
+
     case 8:
     case 9:
     case 10:
@@ -3094,21 +3040,26 @@ 
 	      ]
 	      (const_string "DF")))])
 
+;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
+		"=f,m,f,Yd*r  ,o    ,Y2*x,Y2*x,Y2*x,m  ")
 	(match_operand:DF 1 "general_operand"
-		"fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
+		"fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && optimize_function_for_speed_p (cfun)
-   && TARGET_INTEGER_DFMODE_MOVES
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
-           && optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], DFmode))"
+       || (optimize_function_for_size_p (cfun)
+	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
+		&& standard_80387_constant_p (operands[1]) > 0)
+	       || (TARGET_SSE2 && TARGET_SSE_MATH
+		   && standard_sse_constant_p (operands[1])))
+	   && !memory_operand (operands[0], DFmode))
+       || ((TARGET_INTEGER_DFMODE_MOVES
+	    || (optimize_function_for_size_p (cfun)
+	        && !TARGET_MEMORY_MISMATCH_STALL))
+	   && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
     {
@@ -3124,179 +3075,8 @@ 
       return "#";
 
     case 5:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vxorps\t%0, %d0";
-	case MODE_V2DF:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vxorpd\t%0, %d0";
-	case MODE_TI:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vpxor\t%0, %d0";
-	default:
-	  gcc_unreachable ();
-	}
-    case 6:
-    case 7:
-    case 8:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vmovaps\t{%1, %0|%0, %1}";
-	case MODE_V2DF:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovapd\t{%1, %0|%0, %1}";
-	case MODE_TI:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vmovaps\t{%1, %0|%0, %1}";
-	  else
-	    return "%vmovdqa\t{%1, %0|%0, %1}";
-	case MODE_DI:
-	  return "%vmovq\t{%1, %0|%0, %1}";
-	case MODE_DF:
-	  if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-	    return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
-	  else
-	    return "%vmovsd\t{%1, %0|%0, %1}";
-	case MODE_V1DF:
-	  if (TARGET_AVX && REG_P (operands[0]))
-	    return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
-	  else
-	    return "%vmovlpd\t{%1, %0|%0, %1}";
-	case MODE_V2SF:
-	  if (TARGET_AVX && REG_P (operands[0]))
-	    return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
-	  else
-	    return "%vmovlps\t{%1, %0|%0, %1}";
-	default:
-	  gcc_unreachable ();
-	}
+      return standard_sse_constant_opcode (insn, operands[1]);
 
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
-   (set (attr "prefix")
-     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
-       (const_string "orig")
-       (const_string "maybe_vex")))
-   (set (attr "prefix_data16")
-     (if_then_else (eq_attr "mode" "V1DF")
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,1,2")
-		 (const_string "DF")
-	       (eq_attr "alternative" "3,4")
-		 (const_string "SI")
-
-	       /* For SSE1, we have many fewer alternatives.  */
-	       (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-		 (cond [(eq_attr "alternative" "5,6")
-			  (const_string "V4SF")
-		       ]
-		   (const_string "V2SF"))
-
-	       /* xorps is one byte shorter.  */
-	       (eq_attr "alternative" "5")
-		 (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-			    (const_int 0))
-			  (const_string "V4SF")
-			(ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-			    (const_int 0))
-			  (const_string "TI")
-		       ]
-		       (const_string "V2DF"))
-
-	       /* For architectures resolving dependencies on
-		  whole SSE registers use APD move to break dependency
-		  chains, otherwise use short move to avoid extra work.
-
-		  movaps encodes one byte shorter.  */
-	       (eq_attr "alternative" "6")
-		 (cond
-		   [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
-		        (const_int 0))
-		      (const_string "V4SF")
-		    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-		        (const_int 0))
-		      (const_string "V2DF")
-		   ]
-		   (const_string "DF"))
-	       /* For architectures resolving dependencies on register
-		  parts we may avoid extra work to zero out upper part
-		  of register.  */
-	       (eq_attr "alternative" "7")
-		 (if_then_else
-		   (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
-		       (const_int 0))
-		   (const_string "V1DF")
-		   (const_string "DF"))
-	      ]
-	      (const_string "DF")))])
-
-;; Moving is usually shorter when only FP registers are used. This separate
-;; movdf pattern avoids the use of integer registers for FP operations
-;; when optimizing for size.
-
-(define_insn "*movdf_internal_nointeger"
-  [(set (match_operand:DF 0 "nonimmediate_operand"
-			"=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
-	(match_operand:DF 1 "general_operand"
-			"fm,f,G,*roF,F*r,C   ,Y2*x,mY2*x,Y2*x"))]
-  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && (optimize_function_for_size_p (cfun)
-       || !TARGET_INTEGER_DFMODE_MOVES)
-   && (!can_create_pseudo_p ()
-       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
-           && optimize_function_for_size_p (cfun)
-           && !memory_operand (operands[0], DFmode)
-	   && standard_80387_constant_p (operands[1]))
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || ((optimize_function_for_size_p (cfun)
-            || !TARGET_MEMORY_MISMATCH_STALL)
- 	   && memory_operand (operands[0], DFmode)))"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return output_387_reg_move (insn, operands);
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3:
-    case 4:
-      return "#";
-
-    case 5:
-      switch (get_attr_mode (insn))
-	{
-	case MODE_V4SF:
-	  return "%vxorps\t%0, %d0";
-	case MODE_V2DF:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vxorpd\t%0, %d0";
-	case MODE_TI:
-	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return "%vxorps\t%0, %d0";
-	  else
-	    return "%vpxor\t%0, %d0";
-	default:
-	  gcc_unreachable ();
-	}
     case 6:
     case 7:
     case 8:
@@ -3421,9 +3201,12 @@ 
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
+       || (optimize_function_for_size_p (cfun)
+	   && ((!TARGET_SSE_MATH
+		&& standard_80387_constant_p (operands[1]) > 0)
+	       || (TARGET_SSE_MATH
+		   && standard_sse_constant_p (operands[1]))))
        || memory_operand (operands[0], SFmode))"
 {
   switch (which_alternative)
@@ -3438,11 +3221,10 @@ 
     case 3:
     case 4:
       return "mov{l}\t{%1, %0|%0, %1}";
+
     case 5:
-      if (get_attr_mode (insn) == MODE_TI)
-	return "%vpxor\t%0, %d0";
-      else
-	return "%vxorps\t%0, %d0";
+      return standard_sse_constant_opcode (insn, operands[1]);
+
     case 6:
       if (get_attr_mode (insn) == MODE_V4SF)
 	return "%vmovaps\t{%1, %0|%0, %1}";
Index: constraints.md
===================================================================
--- constraints.md	(revision 173748)
+++ constraints.md	(working copy)
@@ -90,6 +90,8 @@ 
 ;;  2	SSE2 enabled
 ;;  i	SSE2 inter-unit moves enabled
 ;;  m	MMX inter-unit moves enabled
+;;  d	Integer register when integer DFmode moves are enabled
+;;  x	Integer register when integer XFmode moves are enabled
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -105,6 +107,14 @@ 
  "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
  "@internal Any MMX register, when inter-unit moves are enabled.")
 
+(define_register_constraint "Yd"
+ "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer DFmode moves are enabled.")
+
+(define_register_constraint "Yx"
+ "optimize_function_for_speed_p (cfun) ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer XFmode moves are enabled.")
+
 ;; Integer constant constraints.
 (define_constraint "I"
   "Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
@@ -149,7 +159,7 @@ 
 (define_constraint "G"
   "Standard 80387 floating point constant."
   (and (match_code "const_double")
-       (match_test "standard_80387_constant_p (op)")))
+       (match_test "standard_80387_constant_p (op) > 0")))
 
 ;; This can theoretically be any mode's CONST0_RTX.
 (define_constraint "C"
Index: i386.c
===================================================================
--- i386.c	(revision 173748)
+++ i386.c	(working copy)
@@ -3933,6 +3933,13 @@  ix86_option_override_internal (bool main
   if (!TARGET_80387)
     target_flags |= MASK_NO_FANCY_MATH_387;
 
+  /* On 32bit targets, avoid moving DFmode values in
+     integer registers when optimizing for size.  */
+  if (TARGET_64BIT)
+    target_flags |= TARGET_INTEGER_DFMODE_MOVES;
+  else if (optimize_size)
+    target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
+
   /* Turn on MMX builtins for -msse.  */
   if (TARGET_SSE)
     {
@@ -8580,17 +8587,17 @@  standard_sse_constant_opcode (rtx insn, 
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
 	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	    return "%vxorps\t%0, %d0";
 	  else
-	    return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+	    return "%vxorpd\t%0, %d0";
 	case MODE_TI:
 	  if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
-	    return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	    return "%vxorps\t%0, %d0";
 	  else
-	    return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+	    return "%vpxor\t%0, %d0";
 	case MODE_V8SF:
 	  return "vxorps\t%x0, %x0, %x0";
 	case MODE_V4DF:
@@ -8607,7 +8614,7 @@  standard_sse_constant_opcode (rtx insn, 
 	  break;
 	}
     case 2:
-      return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
+      return "%vpcmpeqd\t%0, %d0";
     default:
       break;
     }