===================================================================
@@ -9646,6 +9646,40 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<MODE>")])
+(define_expand "round<mode>2"
+ [(set (match_dup 4)
+ (plus:VF
+ (match_operand:VF 1 "nonimmediate_operand" "")
+ (match_dup 3)))
+ (set (match_operand:VF 0 "register_operand" "")
+ (unspec:VF
+ [(match_dup 4) (match_dup 5)]
+ UNSPEC_ROUND))]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ enum machine_mode scalar_mode;
+ const struct real_format *fmt;
+ REAL_VALUE_TYPE pred_half, half_minus_pred_half;
+ rtx half, vec_half;
+
+ scalar_mode = GET_MODE_INNER (<MODE>mode);
+
+ /* load nextafter (0.5, 0.0) */
+ fmt = REAL_MODE_FORMAT (scalar_mode);
+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
+ REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
+ half = const_double_from_real_value (pred_half, scalar_mode);
+
+ vec_half = ix86_build_const_vector (<MODE>mode, true, half);
+ vec_half = force_reg (<MODE>mode, vec_half);
+
+ operands[3] = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
+
+ operands[4] = gen_reg_rtx (<MODE>mode);
+ operands[5] = GEN_INT (ROUND_TRUNC);
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Intel SSE4.2 string/text processing instructions
===================================================================
@@ -23661,10 +23661,12 @@ enum ix86_builtins
IX86_BUILTIN_CEILPD,
IX86_BUILTIN_TRUNCPD,
IX86_BUILTIN_RINTPD,
+ IX86_BUILTIN_ROUNDPD_AZ,
IX86_BUILTIN_FLOORPS,
IX86_BUILTIN_CEILPS,
IX86_BUILTIN_TRUNCPS,
IX86_BUILTIN_RINTPS,
+ IX86_BUILTIN_ROUNDPS_AZ,
IX86_BUILTIN_PTESTZ,
IX86_BUILTIN_PTESTC,
@@ -23837,10 +23839,12 @@ enum ix86_builtins
IX86_BUILTIN_CEILPD256,
IX86_BUILTIN_TRUNCPD256,
IX86_BUILTIN_RINTPD256,
+ IX86_BUILTIN_ROUNDPD_AZ256,
IX86_BUILTIN_FLOORPS256,
IX86_BUILTIN_CEILPS256,
IX86_BUILTIN_TRUNCPS256,
IX86_BUILTIN_RINTPS256,
+ IX86_BUILTIN_ROUNDPS_AZ256,
IX86_BUILTIN_UNPCKHPD256,
IX86_BUILTIN_UNPCKLPD256,
@@ -25063,11 +25067,15 @@ static const struct builtin_description bdesc_args
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
+
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
@@ -25185,11 +25193,15 @@ static const struct builtin_description bdesc_args
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
+
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
@@ -28146,6 +28158,34 @@ ix86_builtin_vectorized_function (tree fndecl, tre
}
break;
+ case BUILT_IN_ROUND:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == DFmode && in_mode == DFmode)
+ {
+ if (out_n == 2 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
+ else if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
+ }
+ break;
+
+ case BUILT_IN_ROUNDF:
+ /* The round insn does not trap on denormals. */
+ if (flag_trapping_math || !TARGET_ROUND)
+ break;
+
+ if (out_mode == SFmode && in_mode == SFmode)
+ {
+ if (out_n == 4 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
+ else if (out_n == 8 && in_n == 8)
+ return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
+ }
+ break;
+
case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern double floor (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = round (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != round (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern double round (double);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+ int i, sign = 1;
+ double f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ double a[NUM];
+ double r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = round (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != round (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "sse4_1-check.h"
+
+#include <math.h>
+
+extern float roundf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+sse4_1_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = roundf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != roundf (a[i]))
+ abort();
+}
===================================================================
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */
+/* { dg-require-effective-target avx } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#include "avx-check.h"
+
+#include <math.h>
+
+extern float roundf (float);
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+ int i, sign = 1;
+ float f = rand ();
+
+ for (i = 0; i < NUM; i++)
+ {
+ src[i] = (i + 1) * f * M_PI * sign;
+ if (i < (NUM / 2))
+ {
+ if ((i % 6) == 0)
+ f = f * src[i];
+ }
+ else if (i == (NUM / 2))
+ f = rand ();
+ else if ((i % 6) == 0)
+ f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+ sign = -sign;
+ }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx_test (void)
+{
+ float a[NUM];
+ float r[NUM];
+ int i;
+
+ init_src (a);
+
+ for (i = 0; i < NUM; i++)
+ r[i] = roundf (a[i]);
+
+ /* check results: */
+ for (i = 0; i < NUM; i++)
+ if (r[i] != roundf (a[i]))
+ abort();
+}