i386: Expand roundeven for SSE4.1+
diff mbox series

Message ID CACMrGjAVzbBa8_vZ-34iosquwXt+scMDAv3quTA+AZxRoGA51g@mail.gmail.com
State New
Headers show
Series
  • i386: Expand roundeven for SSE4.1+
Related show

Commit Message

Tejas Joshi July 13, 2019, 2:05 p.m. UTC
Hi.
This patch is for expanding roundeven inline for SSE4.1 and later.
Note that this patch is to be applied on top of
<https://gcc.gnu.org/ml/gcc-patches/2019-06/msg01828.html>. The patch
is bootstrapped and regression tested on x86_64-linux-gnu.

Thanks,
Tejas

gcc/ChangeLog:

2019-07-13  Tejas Joshi  <tejasjoshi9673@gmail.com>

    * builtins.c (mathfn_built_in_2): Changed a CASE_MATHFN to
    CASE_MATHFN_FLOATN for roundeven.
    * config/i386/i386.md: Define UNSPEC_ROUNDEVEN.
    (define_constant): Define ROUND_ROUNDEVEN rounding mode.
    (roundeven<mode>2): New define_expand.
    * internal-fn.def (ROUNDEVEN): New builtin function.
    * optabs.def (roundeven_optab): New optab.

gcc/testsuite/ChangeLog:

2019-07-13  Tejas Joshi  <tejasjoshi9673@gmail.com>

    * gcc.target/i386/avx-vround-roundeven-1.c: New test.
    * gcc.target/i386/avx-vround-roundeven-2.c: New test.

Patch
diff mbox series

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 8ceb077b0bf..f61f10422fd 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2056,7 +2056,7 @@  mathfn_built_in_2 (tree type, combined_fn fn)
     CASE_MATHFN (REMQUO)
     CASE_MATHFN_FLOATN (RINT)
     CASE_MATHFN_FLOATN (ROUND)
-    CASE_MATHFN (ROUNDEVEN)
+    CASE_MATHFN_FLOATN (ROUNDEVEN)
     CASE_MATHFN (SCALB)
     CASE_MATHFN (SCALBLN)
     CASE_MATHFN (SCALBN)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index db5fa9ae3ca..bd5d6335f2b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -169,6 +169,9 @@ 
   ;; For ROUND support
   UNSPEC_ROUND
 
+  ;;for SSE 4.1+ rounding
+  UNSPEC_ROUNDEVEN
+
   ;; For CRC32 support
   UNSPEC_CRC32
 
@@ -303,7 +306,8 @@ 
 
 ;; Constants to represent rounding modes in the ROUND instruction
 (define_constants
-  [(ROUND_FLOOR			0x1)
+  [(ROUND_ROUNDEVEN		0x0)
+   (ROUND_FLOOR			0x1)
    (ROUND_CEIL			0x2)
    (ROUND_TRUNC			0x3)
    (ROUND_MXCSR			0x4)
@@ -16328,6 +16332,20 @@ 
   "TARGET_USE_FANCY_MATH_387
    && (flag_fp_int_builtin_inexact || !flag_trapping_math)")
 
+(define_expand "roundeven<mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "register_operand")
+		   (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
+				 UNSPEC_ROUNDEVEN))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_SSE4_1)"
+{
+  gcc_assert (TARGET_SSE4_1);
+  emit_insn (gen_sse4_1_round<mode>2
+	     (operands[0], operands[1], GEN_INT (ROUND_ROUNDEVEN
+						| ROUND_NO_EXC)));
+  DONE;
+})
+
 (define_expand "<rounding_insn><mode>2"
   [(parallel [(set (match_operand:MODEF 0 "register_operand")
 		   (unspec:MODEF [(match_operand:MODEF 1 "register_operand")]
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 906d74b1d08..15f019b9b49 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -234,6 +234,7 @@  DEF_INTERNAL_FLT_FLOATN_FN (FLOOR, ECF_CONST, floor, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (NEARBYINT, ECF_CONST, nearbyint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
+DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
 
 /* Binary math functions.  */
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 4ffd0f35a40..065e3f64dda 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -268,6 +268,7 @@  OPTAB_D (fnms_optab, "fnms$a4")
 
 OPTAB_D (rint_optab, "rint$a2")
 OPTAB_D (round_optab, "round$a2")
+OPTAB_D (roundeven_optab, "roundeven$a2")
 OPTAB_D (floor_optab, "floor$a2")
 OPTAB_D (ceil_optab, "ceil$a2")
 OPTAB_D (btrunc_optab, "btrunc$a2")
diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c
new file mode 100644
index 00000000000..072d0f0e73a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx" } */
+
+__attribute__((noinline, noclone)) double
+f1 (double x)
+{
+  return __builtin_roundeven (x);
+}
+
+__attribute__((noinline, noclone)) float
+f2 (float x)
+{
+  return __builtin_roundevenf (x);
+}
+
+/* { dg-final { scan-assembler-times "vroundsd\[^\n\r\]*xmm" 1 } } */
+/* { dg-final { scan-assembler-times "vroundss\[^\n\r\]*xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c
new file mode 100644
index 00000000000..211758d026a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx-vround-roundeven-2.c
@@ -0,0 +1,21 @@ 
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-mavx" } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#define TEST avx_test
+#define SRC "avx-vround-roundeven-1.c"
+#endif
+
+#include CHECK_H
+#include SRC
+
+static void
+TEST (void)
+{
+  if (f1 (0.5) != 0.0 || f1 (1.5) != 2.0 || f1 (-0.5) != 0.0 || f1 (-1.5) != -2.0)
+    abort ();
+  if (f2 (0.5f) != 0.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != 0.0f || f2 (-1.5f) != -2.0f)
+    abort ();
+}