===================================================================
@@ -16655,7 +16655,7 @@
(define_insn "sse4_1_round<mode>2"
[(set (match_operand:MODEF 0 "register_operand" "=x,v")
- (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x,v")
+ (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm")
(match_operand:SI 2 "const_0_to_15_operand" "n,n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1"
@@ -17251,12 +17251,19 @@
FIST_ROUNDING))
(clobber (reg:CC FLAGS_REG))])]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
- && !flag_trapping_math"
+ && (TARGET_SSE4_1 || !flag_trapping_math)"
{
- if (TARGET_64BIT && optimize_insn_for_size_p ())
- FAIL;
+ if (TARGET_SSE4_1)
+ {
+ rtx tmp = gen_reg_rtx (<MODEF:MODE>mode);
- if (ROUND_<ROUNDING> == ROUND_FLOOR)
+ emit_insn (gen_sse4_1_round<mode>2
+ (tmp, operands[1], GEN_INT (ROUND_<ROUNDING>
+ | ROUND_NO_EXC)));
+ emit_insn (gen_fix_trunc<MODEF:mode><SWI48:mode>2
+ (operands[0], tmp));
+ }
+ else if (ROUND_<ROUNDING> == ROUND_FLOOR)
ix86_expand_lfloorceil (operands[0], operands[1], true);
else if (ROUND_<ROUNDING> == ROUND_CEIL)
ix86_expand_lfloorceil (operands[0], operands[1], false);
===================================================================
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1 -mfpmath=sse" } */
+
+double floor (double);
+double ceil (double);
+
+int ifloor (double x) { return floor (x); }
+int iceil (double x) { return ceil (x); }
+
+#ifdef __x86_64__
+long long llfloor (double x) { return floor (x); }
+long long llceil (double x) { return ceil (x); }
+#endif
+
+/* { dg-final { scan-assembler-times "roundsd" 2 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "roundsd" 4 { target { ! ia32 } } } } */