diff mbox series

[middle-end/i386] : Fix PR88556, Inline built-in sinh, cosh, tanh for -ffast-math

Message ID CAFULd4YPT0cTRt-7w-0JeJht_a1_3gTOCiFhkr4ccF5wVSxO0g@mail.gmail.com
State New
Headers show
Series [middle-end/i386] : Fix PR88556, Inline built-in sinh, cosh, tanh for -ffast-math | expand

Commit Message

Uros Bizjak Dec. 20, 2018, 10:08 a.m. UTC
Attached patch inlines calls to asinh{,f,l}, acosh{,f,l} and
atanh{,f,l} using x87 XFmode arithmetic. The expanders are modelled
after the removed inlines in glibc [1].

2018-12-20  Uros Bizjak  <ubizjak@gmail.com>

    PR target/88556
    * internal-fn.def (COSH): New.
    (SINH): Ditto.
    (TANH): Ditto.
    * optabs.def (cosh_optab): New.
    (sinh_optab): Ditto.
    (tanh_optab): Ditto.
    * config/i386/i386-protos.h (ix86_emit_i387_sinh): New prototype.
    (ix86_emit_i387_cosh): Ditto.
    (ix86_emit_i387_tanh): Ditto.
    * config/i386/i386.c (ix86_emit_i387_sinh): New function.
    (ix86_emit_i387_cosh): Ditto.
    (ix86_emit_i387_tanh): Ditto.
    * config/i386/i386.md (sinhxf2): New expander.
    (sinh<mode>2):    Ditto.
    (coshxf2): Ditto.
    (cosh<mode>2): Ditto.
    (tanhxf2): Ditto.
    (tanh<mode>2): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

The patch also needs approval for its straightforward middle-end parts.

[1] https://sourceware.org/ml/libc-alpha/2018-12/msg00772.html

Uros.

Comments

Richard Biener Dec. 20, 2018, 10:30 a.m. UTC | #1
On Thu, Dec 20, 2018 at 11:08 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> Attached patch inlines calls to asinh{,f,l}, acosh{,f,l} and
> atanh{,f,l} using x87 XFmode arithmetic. The expanders are modelled
> after the removed inlines in glibc [1].
>
> 2018-12-20  Uros Bizjak  <ubizjak@gmail.com>
>
>     PR target/88556
>     * internal-fn.def (COSH): New.
>     (SINH): Ditto.
>     (TANH): Ditto.
>     * optabs.def (cosh_optab): New.
>     (sinh_optab): Ditto.
>     (tanh_optab): Ditto.
>     * config/i386/i386-protos.h (ix86_emit_i387_sinh): New prototype.
>     (ix86_emit_i387_cosh): Ditto.
>     (ix86_emit_i387_tanh): Ditto.
>     * config/i386/i386.c (ix86_emit_i387_sinh): New function.
>     (ix86_emit_i387_cosh): Ditto.
>     (ix86_emit_i387_tanh): Ditto.
>     * config/i386/i386.md (sinhxf2): New expander.
>     (sinh<mode>2):    Ditto.
>     (coshxf2): Ditto.
>     (cosh<mode>2): Ditto.
>     (tanhxf2): Ditto.
>     (tanh<mode>2): Ditto.
>
> Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.
>
> The patch also needs approval for its straightforward middle-end parts.

Consider those "obvious".

Richard.

>
> [1] https://sourceware.org/ml/libc-alpha/2018-12/msg00772.html
>
> Uros.
diff mbox series

Patch

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ae118079b34..1e802bac1ea 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -170,6 +170,9 @@  extern void x86_function_profiler (FILE *, int);
 extern void x86_emit_floatuns (rtx [2]);
 extern void ix86_emit_fp_unordered_jump (rtx);
 
+extern void ix86_emit_i387_sinh (rtx, rtx);
+extern void ix86_emit_i387_cosh (rtx, rtx);
+extern void ix86_emit_i387_tanh (rtx, rtx);
 extern void ix86_emit_i387_asinh (rtx, rtx);
 extern void ix86_emit_i387_acosh (rtx, rtx);
 extern void ix86_emit_i387_atanh (rtx, rtx);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index b3c86761e25..958980319ef 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -44199,6 +44199,122 @@  ix86_emit_fp_unordered_jump (rtx label)
   JUMP_LABEL (insn) = label;
 }
 
+/* Output code to perform an sinh XFmode calculation.  */
+
+void ix86_emit_i387_sinh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  rtx half = const_double_from_real_value (dconsthalf, XFmode);
+  rtx cst1, tmp;
+  rtx_code_label *jump_label = gen_label_rtx ();
+  rtx_insn *insn;
+
+  /* scratch = fxam (op1) */
+  emit_insn (gen_fxamxf2_i387 (scratch, op1));
+
+  /* e1 = expm1 (|op1|) */
+  emit_insn (gen_absxf2 (e2, op1));
+  emit_insn (gen_expm1xf2 (e1, e2));
+
+  /* e2 = e1 / (e1 + 1.0) + e1 */
+  cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
+  emit_insn (gen_addxf3 (e2, e1, cst1));
+  emit_insn (gen_divxf3 (e2, e1, e2));
+  emit_insn (gen_addxf3 (e2, e2, e1));
+
+  /* flags = signbit (op1) */
+  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
+
+  /* if (flags) then e2 = -e2 */
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
+			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
+			      pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = jump_label;
+
+  emit_insn (gen_negxf2 (e2, e2));
+
+  emit_label (jump_label);
+  LABEL_NUSES (jump_label) = 1;
+
+  /* op0 = 0.5 * e2 */
+  half = force_reg (XFmode, half);
+  emit_insn (gen_mulxf3 (op0, e2, half));
+}
+
+/* Output code to perform an cosh XFmode calculation.  */
+
+void ix86_emit_i387_cosh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx half = const_double_from_real_value (dconsthalf, XFmode);
+  rtx cst1;
+
+  /* e1 = exp (op1) */
+  emit_insn (gen_expxf2 (e1, op1));
+
+  /* e2 = e1 + 1.0 / e1 */
+  cst1 = force_reg (XFmode, CONST1_RTX (XFmode));
+  emit_insn (gen_divxf3 (e2, cst1, e1));
+  emit_insn (gen_addxf3 (e2, e1, e2));
+
+  /* op0 = 0.5 * e2 */
+  half = force_reg (XFmode, half);
+  emit_insn (gen_mulxf3 (op0, e2, half));
+}
+
+/* Output code to perform an tanh XFmode calculation.  */
+
+void ix86_emit_i387_tanh (rtx op0, rtx op1)
+{
+  rtx e1 = gen_reg_rtx (XFmode);
+  rtx e2 = gen_reg_rtx (XFmode);
+  rtx scratch = gen_reg_rtx (HImode);
+  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
+  rtx cst2, tmp;
+  rtx_code_label *jump_label = gen_label_rtx ();
+  rtx_insn *insn;
+
+  /* scratch = fxam (op1) */
+  emit_insn (gen_fxamxf2_i387 (scratch, op1));
+
+  /* e1 = expm1 (-|2 * op1|) */
+  emit_insn (gen_addxf3 (e2, op1, op1));
+  emit_insn (gen_absxf2 (e2, e2));
+  emit_insn (gen_negxf2 (e2, e2));
+  emit_insn (gen_expm1xf2 (e1, e2));
+
+  /* e2 = e1 / (e1 + 2.0) */
+  cst2 = force_reg (XFmode, CONST2_RTX (XFmode));
+  emit_insn (gen_addxf3 (e2, e1, cst2));
+  emit_insn (gen_divxf3 (e2, e1, e2));
+
+  /* flags = signbit (op1) */
+  emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02)));
+
+  /* if (!flags) then e2 = -e2 */
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
+			      gen_rtx_NE (VOIDmode, flags, const0_rtx),
+			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
+			      pc_rtx);
+  insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+  predict_jump (REG_BR_PROB_BASE * 50 / 100);
+  JUMP_LABEL (insn) = jump_label;
+
+  emit_insn (gen_negxf2 (e2, e2));
+
+  emit_label (jump_label);
+  LABEL_NUSES (jump_label) = 1;
+
+  emit_move_insn (op0, e2);
+}
+
 /* Output code to perform an asinh XFmode calculation.  */
 
 void ix86_emit_i387_asinh (rtx op0, rtx op1)
@@ -44323,7 +44439,7 @@  void ix86_emit_i387_atanh (rtx op0, rtx op1)
   emit_label (jump_label);
   LABEL_NUSES (jump_label) = 1;
 
-  /* op0 = 0.5 * e2) */
+  /* op0 = 0.5 * e2 */
   half = force_reg (XFmode, half);
   emit_insn (gen_mulxf3 (op0, e2, half));
 }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6e29427e30c..372dc6d8c2d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15483,6 +15483,89 @@ 
   DONE;
 })
 
+(define_expand "sinhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_sinh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "sinh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_finite_math_only
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_sinhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "coshxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_cosh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "cosh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_coshxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
+(define_expand "tanhxf2"
+  [(use (match_operand:XF 0 "register_operand"))
+   (use (match_operand:XF 1 "register_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_i387_tanh (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "tanh<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand"))
+   (use (match_operand:MODEF 1 "general_operand"))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_tanhxf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2 (operands[0], op0));
+  DONE;
+})
+
 (define_expand "asinhxf2"
   [(use (match_operand:XF 0 "register_operand"))
    (use (match_operand:XF 1 "register_operand"))]
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 8c97625ef3f..a7e7db68d9b 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -207,6 +207,7 @@  DEF_INTERNAL_FLT_FN (ASINH, ECF_CONST, asinh, unary)
 DEF_INTERNAL_FLT_FN (ATAN, ECF_CONST, atan, unary)
 DEF_INTERNAL_FLT_FN (ATANH, ECF_CONST, atanh, unary)
 DEF_INTERNAL_FLT_FN (COS, ECF_CONST, cos, unary)
+DEF_INTERNAL_FLT_FN (COSH, ECF_CONST, cosh, unary)
 DEF_INTERNAL_FLT_FN (EXP, ECF_CONST, exp, unary)
 DEF_INTERNAL_FLT_FN (EXP10, ECF_CONST, exp10, unary)
 DEF_INTERNAL_FLT_FN (EXP2, ECF_CONST, exp2, unary)
@@ -218,8 +219,10 @@  DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
 DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
 DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
 DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
+DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (SQRT, ECF_CONST, sqrt, unary)
 DEF_INTERNAL_FLT_FN (TAN, ECF_CONST, tan, unary)
+DEF_INTERNAL_FLT_FN (TANH, ECF_CONST, tanh, unary)
 
 /* FP rounding.  */
 DEF_INTERNAL_FLT_FLOATN_FN (CEIL, ECF_CONST, ceil, unary)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 3ede65bdaf4..7d65ab89074 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -282,6 +282,7 @@  OPTAB_D (atanh_optab, "atanh$a2")
 OPTAB_D (copysign_optab, "copysign$F$a3")
 OPTAB_D (xorsign_optab, "xorsign$F$a3")
 OPTAB_D (cos_optab, "cos$a2")
+OPTAB_D (cosh_optab, "cosh$a2")
 OPTAB_D (exp10_optab, "exp10$a2")
 OPTAB_D (exp2_optab, "exp2$a2")
 OPTAB_D (exp_optab, "exp$a2")
@@ -304,7 +305,9 @@  OPTAB_D (signbit_optab, "signbit$F$a2")
 OPTAB_D (significand_optab, "significand$a2")
 OPTAB_D (sin_optab, "sin$a2")
 OPTAB_D (sincos_optab, "sincos$a3")
+OPTAB_D (sinh_optab, "sinh$a2")
 OPTAB_D (tan_optab, "tan$a2")
+OPTAB_D (tanh_optab, "tanh$a2")
 
 /* C99 implementations of fmax/fmin.  */
 OPTAB_D (fmax_optab, "fmax$a3")