===================================================================
@@ -2702,10 +2702,14 @@
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
+;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
+;; Size of pushdf using integer instructions is 2+2*memory operand size
+;; On the average, pushdf using integers can be still shorter.
+
(define_insn "*pushdf"
[(set (match_operand:DF 0 "push_operand" "=<,<,<")
- (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
- "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
+ (match_operand:DF 1 "general_no_elim_operand" "f,Yd*rFo,Y2"))]
+ ""
{
/* This insn should be already split before reg-stack. */
gcc_unreachable ();
@@ -2714,23 +2718,6 @@
(set_attr "unit" "i387,*,*")
(set_attr "mode" "DF,SI,DF")])
-;; Size of pushdf is 3 (for sub) + 2 (for fstp) + memory operand size.
-;; Size of pushdf using integer instructions is 2+2*memory operand size
-;; On the average, pushdf using integers can be still shorter. Allow this
-;; pattern for optimize_size too.
-
-(define_insn "*pushdf_nointeger"
- [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
- (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
- "!(TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES)"
-{
- /* This insn should be already split before reg-stack. */
- gcc_unreachable ();
-}
- [(set_attr "type" "multi")
- (set_attr "unit" "i387,*,*,*")
- (set_attr "mode" "DF,SI,SI,DF")])
-
;; %%% Kill this when call knows how to work this out.
(define_split
[(set (match_operand:DF 0 "push_operand" "")
@@ -2822,14 +2809,14 @@
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
+
case 2:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "%vxorps\t%0, %d0";
- else
- return "%vpxor\t%0, %d0";
+ return standard_sse_constant_opcode (insn, operands[1]);
+
case 3:
case 4:
return "#";
+
default:
gcc_unreachable ();
}
@@ -2862,42 +2849,14 @@
"ix86_split_long_move (operands); DONE;")
(define_insn "*movxf_internal"
- [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
- (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
- "optimize_function_for_speed_p (cfun)
- && !(MEM_P (operands[0]) && MEM_P (operands[1]))
- && (!can_create_pseudo_p ()
- || GET_CODE (operands[1]) != CONST_DOUBLE
- || memory_operand (operands[0], XFmode))"
-{
- switch (which_alternative)
- {
- case 0:
- case 1:
- return output_387_reg_move (insn, operands);
-
- case 2:
- return standard_80387_constant_opcode (operands[1]);
-
- case 3: case 4:
- return "#";
-
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "fmov,fmov,fmov,multi,multi")
- (set_attr "mode" "XF,XF,XF,SI,SI")])
-
-;; Do not use integer registers when optimizing for size
-(define_insn "*movxf_internal_nointeger"
- [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
- (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
- "optimize_function_for_size_p (cfun)
- && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+ [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r ,o")
+ (match_operand:XF 1 "general_operand" "fm,f,G,Yx*roF,FYx*r"))]
+ "!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
- || standard_80387_constant_p (operands[1])
+ || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
|| GET_CODE (operands[1]) != CONST_DOUBLE
+ || (optimize_function_for_size_p (cfun)
+ && standard_80387_constant_p (operands[1]) > 0)
|| memory_operand (operands[0], XFmode))"
{
switch (which_alternative)
@@ -2940,10 +2899,12 @@
"TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
- || (!(TARGET_SSE2 && TARGET_SSE_MATH)
- && optimize_function_for_size_p (cfun)
- && standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
+ || (optimize_function_for_size_p (cfun)
+ && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
+ && standard_80387_constant_p (operands[1]) > 0)
+ || (TARGET_SSE2 && TARGET_SSE_MATH
+ && standard_sse_constant_p (operands[1]))))
|| memory_operand (operands[0], DFmode))"
{
switch (which_alternative)
@@ -2966,23 +2927,8 @@
return "#";
case 7:
- switch (get_attr_mode (insn))
- {
- case MODE_V4SF:
- return "%vxorps\t%0, %d0";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vxorpd\t%0, %d0";
- case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vpxor\t%0, %d0";
- default:
- gcc_unreachable ();
- }
+ return standard_sse_constant_opcode (insn, operands[1]);
+
case 8:
case 9:
case 10:
@@ -3094,21 +3040,26 @@
]
(const_string "DF")))])
+;; Possible store forwarding (partial memory) stall in alternative 4.
(define_insn "*movdf_internal"
[(set (match_operand:DF 0 "nonimmediate_operand"
- "=f,m,f,r ,o ,Y2*x,Y2*x,Y2*x,m ")
+ "=f,m,f,Yd*r ,o ,Y2*x,Y2*x,Y2*x,m ")
(match_operand:DF 1 "general_operand"
- "fm,f,G,roF,Fr,C ,Y2*x,m ,Y2*x"))]
+ "fm,f,G,Yd*roF,FYd*r,C ,Y2*x,m ,Y2*x"))]
"!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
- && optimize_function_for_speed_p (cfun)
- && TARGET_INTEGER_DFMODE_MOVES
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
- || (!(TARGET_SSE2 && TARGET_SSE_MATH)
- && optimize_function_for_size_p (cfun)
- && standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
- || memory_operand (operands[0], DFmode))"
+ || (optimize_function_for_size_p (cfun)
+ && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
+ && standard_80387_constant_p (operands[1]) > 0)
+ || (TARGET_SSE2 && TARGET_SSE_MATH
+ && standard_sse_constant_p (operands[1])))
+ && !memory_operand (operands[0], DFmode))
+ || ((TARGET_INTEGER_DFMODE_MOVES
+ || (optimize_function_for_size_p (cfun)
+ && !TARGET_MEMORY_MISMATCH_STALL))
+ && memory_operand (operands[0], DFmode)))"
{
switch (which_alternative)
{
@@ -3124,179 +3075,8 @@
return "#";
case 5:
- switch (get_attr_mode (insn))
- {
- case MODE_V4SF:
- return "%vxorps\t%0, %d0";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vxorpd\t%0, %d0";
- case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vpxor\t%0, %d0";
- default:
- gcc_unreachable ();
- }
- case 6:
- case 7:
- case 8:
- switch (get_attr_mode (insn))
- {
- case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vmovaps\t{%1, %0|%0, %1}";
- else
- return "%vmovapd\t{%1, %0|%0, %1}";
- case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vmovaps\t{%1, %0|%0, %1}";
- else
- return "%vmovdqa\t{%1, %0|%0, %1}";
- case MODE_DI:
- return "%vmovq\t{%1, %0|%0, %1}";
- case MODE_DF:
- if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
- return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
- else
- return "%vmovsd\t{%1, %0|%0, %1}";
- case MODE_V1DF:
- if (TARGET_AVX && REG_P (operands[0]))
- return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
- else
- return "%vmovlpd\t{%1, %0|%0, %1}";
- case MODE_V2SF:
- if (TARGET_AVX && REG_P (operands[0]))
- return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
- else
- return "%vmovlps\t{%1, %0|%0, %1}";
- default:
- gcc_unreachable ();
- }
+ return standard_sse_constant_opcode (insn, operands[1]);
- default:
- gcc_unreachable ();
- }
-}
- [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
- (set (attr "prefix")
- (if_then_else (eq_attr "alternative" "0,1,2,3,4")
- (const_string "orig")
- (const_string "maybe_vex")))
- (set (attr "prefix_data16")
- (if_then_else (eq_attr "mode" "V1DF")
- (const_string "1")
- (const_string "*")))
- (set (attr "mode")
- (cond [(eq_attr "alternative" "0,1,2")
- (const_string "DF")
- (eq_attr "alternative" "3,4")
- (const_string "SI")
-
- /* For SSE1, we have many fewer alternatives. */
- (eq (symbol_ref "TARGET_SSE2") (const_int 0))
- (cond [(eq_attr "alternative" "5,6")
- (const_string "V4SF")
- ]
- (const_string "V2SF"))
-
- /* xorps is one byte shorter. */
- (eq_attr "alternative" "5")
- (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
- (const_int 0))
- (const_string "V4SF")
- (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
- (const_int 0))
- (const_string "TI")
- ]
- (const_string "V2DF"))
-
- /* For architectures resolving dependencies on
- whole SSE registers use APD move to break dependency
- chains, otherwise use short move to avoid extra work.
-
- movaps encodes one byte shorter. */
- (eq_attr "alternative" "6")
- (cond
- [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
- (const_int 0))
- (const_string "V4SF")
- (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
- (const_int 0))
- (const_string "V2DF")
- ]
- (const_string "DF"))
- /* For architectures resolving dependencies on register
- parts we may avoid extra work to zero out upper part
- of register. */
- (eq_attr "alternative" "7")
- (if_then_else
- (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
- (const_int 0))
- (const_string "V1DF")
- (const_string "DF"))
- ]
- (const_string "DF")))])
-
-;; Moving is usually shorter when only FP registers are used. This separate
-;; movdf pattern avoids the use of integer registers for FP operations
-;; when optimizing for size.
-
-(define_insn "*movdf_internal_nointeger"
- [(set (match_operand:DF 0 "nonimmediate_operand"
- "=f,m,f,*r ,o ,Y2*x,Y2*x,Y2*x ,m ")
- (match_operand:DF 1 "general_operand"
- "fm,f,G,*roF,F*r,C ,Y2*x,mY2*x,Y2*x"))]
- "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
- && (optimize_function_for_size_p (cfun)
- || !TARGET_INTEGER_DFMODE_MOVES)
- && (!can_create_pseudo_p ()
- || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
- || (!(TARGET_SSE2 && TARGET_SSE_MATH)
- && optimize_function_for_size_p (cfun)
- && !memory_operand (operands[0], DFmode)
- && standard_80387_constant_p (operands[1]))
- || GET_CODE (operands[1]) != CONST_DOUBLE
- || ((optimize_function_for_size_p (cfun)
- || !TARGET_MEMORY_MISMATCH_STALL)
- && memory_operand (operands[0], DFmode)))"
-{
- switch (which_alternative)
- {
- case 0:
- case 1:
- return output_387_reg_move (insn, operands);
-
- case 2:
- return standard_80387_constant_opcode (operands[1]);
-
- case 3:
- case 4:
- return "#";
-
- case 5:
- switch (get_attr_mode (insn))
- {
- case MODE_V4SF:
- return "%vxorps\t%0, %d0";
- case MODE_V2DF:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vxorpd\t%0, %d0";
- case MODE_TI:
- if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return "%vxorps\t%0, %d0";
- else
- return "%vpxor\t%0, %d0";
- default:
- gcc_unreachable ();
- }
case 6:
case 7:
case 8:
@@ -3421,9 +3201,12 @@
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
- || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
- && standard_80387_constant_p (operands[1]))
|| GET_CODE (operands[1]) != CONST_DOUBLE
+ || (optimize_function_for_size_p (cfun)
+ && ((!TARGET_SSE_MATH
+ && standard_80387_constant_p (operands[1]) > 0)
+ || (TARGET_SSE_MATH
+ && standard_sse_constant_p (operands[1]))))
|| memory_operand (operands[0], SFmode))"
{
switch (which_alternative)
@@ -3438,11 +3221,10 @@
case 3:
case 4:
return "mov{l}\t{%1, %0|%0, %1}";
+
case 5:
- if (get_attr_mode (insn) == MODE_TI)
- return "%vpxor\t%0, %d0";
- else
- return "%vxorps\t%0, %d0";
+ return standard_sse_constant_opcode (insn, operands[1]);
+
case 6:
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
===================================================================
@@ -90,6 +90,8 @@
;; 2 SSE2 enabled
;; i SSE2 inter-unit moves enabled
;; m MMX inter-unit moves enabled
+;; d Integer register when integer DFmode moves are enabled
+;; x Integer register when integer XFmode moves are enabled
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@@ -105,6 +107,14 @@
"TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS"
"@internal Any MMX register, when inter-unit moves are enabled.")
+(define_register_constraint "Yd"
+ "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer DFmode moves are enabled.")
+
+(define_register_constraint "Yx"
+ "optimize_function_for_speed_p (cfun) ? GENERAL_REGS : NO_REGS"
+ "@internal Any integer register when integer XFmode moves are enabled.")
+
;; Integer constant constraints.
(define_constraint "I"
"Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
@@ -149,7 +159,7 @@
(define_constraint "G"
"Standard 80387 floating point constant."
(and (match_code "const_double")
- (match_test "standard_80387_constant_p (op)")))
+ (match_test "standard_80387_constant_p (op) > 0")))
;; This can theoretically be any mode's CONST0_RTX.
(define_constraint "C"
===================================================================
@@ -3933,6 +3933,13 @@ ix86_option_override_internal (bool main
if (!TARGET_80387)
target_flags |= MASK_NO_FANCY_MATH_387;
+ /* On 32bit targets, avoid moving DFmode values in
+ integer registers when optimizing for size. */
+ if (TARGET_64BIT)
+ target_flags |= TARGET_INTEGER_DFMODE_MOVES;
+ else if (optimize_size)
+ target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
+
/* Turn on MMX builtins for -msse. */
if (TARGET_SSE)
{
@@ -8580,17 +8587,17 @@ standard_sse_constant_opcode (rtx insn,
switch (get_attr_mode (insn))
{
case MODE_V4SF:
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
case MODE_V2DF:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
else
- return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+ return "%vxorpd\t%0, %d0";
case MODE_TI:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
- return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+ return "%vxorps\t%0, %d0";
else
- return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+ return "%vpxor\t%0, %d0";
case MODE_V8SF:
return "vxorps\t%x0, %x0, %x0";
case MODE_V4DF:
@@ -8607,7 +8614,7 @@ standard_sse_constant_opcode (rtx insn,
break;
}
case 2:
- return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
+ return "%vpcmpeqd\t%0, %d0";
default:
break;
}