Message ID | 20211019011512.100358-3-pc@us.ibm.com |
---|---|
State | New |
Headers | show |
Series | rs6000: Support more SSE4 intrinsics | expand |
Patches 1/3 and 3/3 have been committed. This is only a ping for 2/3. On Mon, Oct 18, 2021 at 08:15:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > Suppress exceptions (when specified), by saving, manipulating, and > restoring the FPSCR. Similarly, save, set, and restore the floating-point > rounding mode when required. > > No attempt is made to optimize writing the FPSCR (by checking if the new > value would be the same), other than using lighter weight instructions > when possible. Note that explicit instruction scheduling "barriers" are > added to prevent floating-point computations from being moved before or > after the explicit FPSCR manipulations. (That these are required has > been reported as an issue in GCC: PR102783.) > > The scalar versions naively use the parallel versions to compute the > single scalar result and then construct the remainder of the result. > > Of minor note, the values of _MM_FROUND_TO_NEG_INF and _MM_FROUND_TO_ZERO > are swapped from the corresponding values on x86 so as to match the > corresponding rounding mode values in the Power ISA. > > Move implementations of _mm_ceil* and _mm_floor* into _mm_round*, and > convert _mm_ceil* and _mm_floor* into macros. This matches the current > analogous implementations in config/i386/smmintrin.h. > > Function signatures match the analogous functions in config/i386/smmintrin.h. > > Add tests for _mm_round_pd, _mm_round_ps, _mm_round_sd, _mm_round_ss, > modeled after the very similar "floor" and "ceil" tests. > > Include basic tests, plus tests at the boundaries for floating-point > representation, positive and negative, test all of the parameterized > rounding modes as well as the C99 rounding modes and interactions > between the two. > > Exceptions are not explicitly tested. > > 2021-10-18 Paul A. Clarke <pc@us.ibm.com> > > gcc > * config/rs6000/smmintrin.h (_mm_round_pd, _mm_round_ps, > _mm_round_sd, _mm_round_ss, _MM_FROUND_TO_NEAREST_INT, > _MM_FROUND_TO_ZERO, _MM_FROUND_TO_POS_INF, _MM_FROUND_TO_NEG_INF, > _MM_FROUND_CUR_DIRECTION, _MM_FROUND_RAISE_EXC, _MM_FROUND_NO_EXC, > _MM_FROUND_NINT, _MM_FROUND_FLOOR, _MM_FROUND_CEIL, _MM_FROUND_TRUNC, > _MM_FROUND_RINT, _MM_FROUND_NEARBYINT): New. > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > Convert from function to macro. > > gcc/testsuite > * gcc.target/powerpc/sse4_1-round3.h: New. > * gcc.target/powerpc/sse4_1-roundpd.c: New. > * gcc.target/powerpc/sse4_1-roundps.c: New. > * gcc.target/powerpc/sse4_1-roundsd.c: New. > * gcc.target/powerpc/sse4_1-roundss.c: New. > --- > gcc/config/rs6000/smmintrin.h | 292 ++++++++++++++---- > .../gcc.target/powerpc/sse4_1-round3.h | 81 +++++ > .../gcc.target/powerpc/sse4_1-roundpd.c | 143 +++++++++ > .../gcc.target/powerpc/sse4_1-roundps.c | 98 ++++++ > .../gcc.target/powerpc/sse4_1-roundsd.c | 256 +++++++++++++++ > .../gcc.target/powerpc/sse4_1-roundss.c | 208 +++++++++++++ > 6 files changed, 1014 insertions(+), 64 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > index 90ce03d22709..6bb03e6e20ac 100644 > --- a/gcc/config/rs6000/smmintrin.h > +++ b/gcc/config/rs6000/smmintrin.h > @@ -42,6 +42,234 @@ > #include <altivec.h> > #include <tmmintrin.h> > > +/* Rounding mode macros. */ > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > +#define _MM_FROUND_TO_ZERO 0x01 > +#define _MM_FROUND_TO_POS_INF 0x02 > +#define _MM_FROUND_TO_NEG_INF 0x03 > +#define _MM_FROUND_CUR_DIRECTION 0x04 > + > +#define _MM_FROUND_NINT \ > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_FLOOR \ > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_CEIL \ > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_TRUNC \ > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_RINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > +#define _MM_FROUND_NEARBYINT \ > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > + > +#define _MM_FROUND_RAISE_EXC 0x00 > +#define _MM_FROUND_NO_EXC 0x08 > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_pd (__m128d __A, int __rounding) > +{ > + __v2df __r; > + union { > + double __fr; > + long long __fpscr; > + } __enables_save, __fpscr_save; > + > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Save enabled exceptions, disable all exceptions, > + and preserve the rounding mode. */ > +#ifdef _ARCH_PWR9 > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > +#else > + __fpscr_save.__fr = __builtin_mffs (); > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > + __fpscr_save.__fpscr &= ~0xf8; > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > +#endif > + /* Insert an artificial "read/write" reference to the variable > + read below, to ensure the compiler does not schedule > + a read/use of the variable before the FPSCR is modified, above. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : "+wa" (__A)); > + } > + > + switch (__rounding) > + { > + case _MM_FROUND_TO_NEAREST_INT: > + __fpscr_save.__fr = __builtin_mffsl (); > + __attribute__ ((fallthrough)); > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > + __builtin_set_fpscr_rn (0b00); > + /* Insert an artificial "read/write" reference to the variable > + read below, to ensure the compiler does not schedule > + a read/use of the variable before the FPSCR is modified, above. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : "+wa" (__A)); > + > + __r = vec_rint ((__v2df) __A); > + > + /* Insert an artificial "read" reference to the variable written > + above, to ensure the compiler does not schedule the computation > + of the value after the manipulation of the FPSCR, below. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : : "wa" (__r)); > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > + break; > + case _MM_FROUND_TO_NEG_INF: > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > + __r = vec_floor ((__v2df) __A); > + break; > + case _MM_FROUND_TO_POS_INF: > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > + __r = vec_ceil ((__v2df) __A); > + break; > + case _MM_FROUND_TO_ZERO: > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > + __r = vec_trunc ((__v2df) __A); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + __r = vec_rint ((__v2df) __A); > + break; > + } > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Insert an artificial "read" reference to the variable written > + above, to ensure the compiler does not schedule the computation > + of the value after the manipulation of the FPSCR, below. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : : "wa" (__r)); > + /* Restore enabled exceptions. */ > + __fpscr_save.__fr = __builtin_mffsl (); > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > + } > + return (__m128d) __r; > +} > + > +extern __inline __m128d > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) > +{ > + __B = _mm_round_pd (__B, __rounding); > + __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; > + return (__m128d) __r; > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_ps (__m128 __A, int __rounding) > +{ > + __v4sf __r; > + union { > + double __fr; > + long long __fpscr; > + } __enables_save, __fpscr_save; > + > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Save enabled exceptions, disable all exceptions, > + and preserve the rounding mode. */ > +#ifdef _ARCH_PWR9 > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > +#else > + __fpscr_save.__fr = __builtin_mffs (); > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > + __fpscr_save.__fpscr &= ~0xf8; > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > +#endif > + /* Insert an artificial "read/write" reference to the variable > + read below, to ensure the compiler does not schedule > + a read/use of the variable before the FPSCR is modified, above. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : "+wa" (__A)); > + } > + > + switch (__rounding) > + { > + case _MM_FROUND_TO_NEAREST_INT: > + __fpscr_save.__fr = __builtin_mffsl (); > + __attribute__ ((fallthrough)); > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > + __builtin_set_fpscr_rn (0b00); > + /* Insert an artificial "read/write" reference to the variable > + read below, to ensure the compiler does not schedule > + a read/use of the variable before the FPSCR is modified, above. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : "+wa" (__A)); > + > + __r = vec_rint ((__v4sf) __A); > + > + /* Insert an artificial "read" reference to the variable written > + above, to ensure the compiler does not schedule the computation > + of the value after the manipulation of the FPSCR, below. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : : "wa" (__r)); > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > + break; > + case _MM_FROUND_TO_NEG_INF: > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > + __r = vec_floor ((__v4sf) __A); > + break; > + case _MM_FROUND_TO_POS_INF: > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > + __r = vec_ceil ((__v4sf) __A); > + break; > + case _MM_FROUND_TO_ZERO: > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > + __r = vec_trunc ((__v4sf) __A); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + __r = vec_rint ((__v4sf) __A); > + break; > + } > + if (__rounding & _MM_FROUND_NO_EXC) > + { > + /* Insert an artificial "read" reference to the variable written > + above, to ensure the compiler does not schedule the computation > + of the value after the manipulation of the FPSCR, below. > + This can be removed if and when GCC PR102783 is fixed. > + */ > + __asm__ ("" : : "wa" (__r)); > + /* Restore enabled exceptions. */ > + __fpscr_save.__fr = __builtin_mffsl (); > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > + } > + return (__m128) __r; > +} > + > +extern __inline __m128 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) > +{ > + __B = _mm_round_ps (__B, __rounding); > + __v4sf __r = (__v4sf) __A; > + __r[0] = ((__v4sf) __B)[0]; > + return (__m128) __r; > +} > + > +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) > +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) > + > +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) > +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) > + > +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) > +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) > + > +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) > +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) > + > extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) > { > @@ -210,70 +438,6 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) > > #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) > > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_pd (__m128d __A) > -{ > - return (__m128d) vec_ceil ((__v2df) __A); > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_sd (__m128d __A, __m128d __B) > -{ > - __v2df __r = vec_ceil ((__v2df) __B); > - __r[1] = ((__v2df) __A)[1]; > - return (__m128d) __r; > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_pd (__m128d __A) > -{ > - return (__m128d) vec_floor ((__v2df) __A); > -} > - > -__inline __m128d > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_sd (__m128d __A, __m128d __B) > -{ > - __v2df __r = vec_floor ((__v2df) __B); > - __r[1] = ((__v2df) __A)[1]; > - return (__m128d) __r; > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_ps (__m128 __A) > -{ > - return (__m128) vec_ceil ((__v4sf) __A); > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_ceil_ss (__m128 __A, __m128 __B) > -{ > - __v4sf __r = (__v4sf) __A; > - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); > - return __r; > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_ps (__m128 __A) > -{ > - return (__m128) vec_floor ((__v4sf) __A); > -} > - > -__inline __m128 > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm_floor_ss (__m128 __A, __m128 __B) > -{ > - __v4sf __r = (__v4sf) __A; > - __r[0] = __builtin_floor (((__v4sf) __B)[0]); > - return __r; > -} > - > #ifdef _ARCH_PWR8 > extern __inline __m128i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > new file mode 100644 > index 000000000000..de6cbf7be438 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > @@ -0,0 +1,81 @@ > +#include <smmintrin.h> > +#include <fenv.h> > +#include "sse4_1-check.h" > + > +#define DIM(a) (sizeof (a) / sizeof (a)[0]) > + > +static int roundings[] = > + { > + _MM_FROUND_TO_NEAREST_INT, > + _MM_FROUND_TO_NEG_INF, > + _MM_FROUND_TO_POS_INF, > + _MM_FROUND_TO_ZERO, > + _MM_FROUND_CUR_DIRECTION > + }; > + > +static int modes[] = > + { > + FE_TONEAREST, > + FE_UPWARD, > + FE_DOWNWARD, > + FE_TOWARDZERO > + }; > + > +static void > +TEST (void) > +{ > + int i, j, ri, mi, round_save; > + > + round_save = fegetround (); > + for (mi = 0; mi < DIM (modes); mi++) { > + fesetround (modes[mi]); > + for (i = 0; i < DIM (data); i++) { > + for (ri = 0; ri < DIM (roundings); ri++) { > + union value guess; > + union value *current_answers = answers[ri]; > + switch ( roundings[ri] ) { > + case _MM_FROUND_TO_NEAREST_INT: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_NEAREST_INT); > + break; > + case _MM_FROUND_TO_NEG_INF: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_NEG_INF); > + break; > + case _MM_FROUND_TO_POS_INF: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_POS_INF); > + break; > + case _MM_FROUND_TO_ZERO: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_TO_ZERO); > + break; > + case _MM_FROUND_CUR_DIRECTION: > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > + _MM_FROUND_CUR_DIRECTION); > + switch ( modes[mi] ) { > + case FE_TONEAREST: > + current_answers = answers_NEAREST_INT; > + break; > + case FE_UPWARD: > + current_answers = answers_POS_INF; > + break; > + case FE_DOWNWARD: > + current_answers = answers_NEG_INF; > + break; > + case FE_TOWARDZERO: > + current_answers = answers_ZERO; > + break; > + } > + break; > + default: > + abort (); > + } > + for (j = 0; j < DIM (guess.f); j++) > + if (guess.f[j] != current_answers[i].f[j]) > + abort (); > + } > + } > + } > + fesetround (round_save); > +} > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > new file mode 100644 > index 000000000000..58d9cc524167 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > @@ -0,0 +1,143 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target vsx_hw } */ > +/* { dg-options "-O2 -mvsx" } */ > + > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128d > +#define FP_T double > + > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) > + > +#include "sse4_1-round-data.h" > + > +struct data2 data[] = { > + { .value1 = { .f = { 0.00, 0.25 } } }, > + { .value1 = { .f = { 0.50, 0.75 } } }, > + > + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, > + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, > + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, > + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, > + > + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, > + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, > + > + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, > + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, > + > + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, > + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, > + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, > + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, > + > + { .value1 = { .f = { -1.00, -0.75 } } }, > + { .value1 = { .f = { -0.50, -0.25 } } } > +}; > + > +union value answers_NEAREST_INT[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 1.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, -1.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value answers_NEG_INF[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 0.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, -1.00 } }, > + { .f = { -1.00, -1.00 } } > +}; > + > +union value answers_POS_INF[] = { > + { .f = { 0.00, 1.00 } }, > + { .f = { 1.00, 1.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, 0.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value answers_ZERO[] = { > + { .f = { 0.00, 0.00 } }, > + { .f = { 0.00, 0.00 } }, > + > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > + > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > + > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > + > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > + > + { .f = { -1.00, 0.00 } }, > + { .f = { 0.00, 0.00 } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > new file mode 100644 > index 000000000000..4b0366dfddf3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > @@ -0,0 +1,98 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target vsx_hw } */ > +/* { dg-options "-O2 -mvsx" } */ > + > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128 > +#define FP_T float > + > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) > + > +#include "sse4_1-round-data.h" > + > +struct data2 data[] = { > + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, > + > + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, > + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, > + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, > + 0x1.fffffep+22, 0x1.fffffep+23 } } }, > + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, > + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, > + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, > + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, > + > + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } > +}; > + > +union value answers_NEAREST_INT[] = { > + { .f = { 0.00, 0.00, 0.00, 1.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.000000p+22, 0x1.000000p+22 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.000000p+23, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, -1.00, 0.00, 0.00 } } > +}; > + > +union value answers_NEG_INF[] = { > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > + -0x1.000000p+22, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, -1.00, -1.00, -1.00 } } > +}; > + > +union value answers_POS_INF[] = { > + { .f = { 0.00, 1.00, 1.00, 1.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, > + 0x1.000000p+22, 0x1.000000p+22 } }, > + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, > + 0x1.000000p+23, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > +}; > + > +union value answers_ZERO[] = { > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > + > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > + > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > new file mode 100644 > index 000000000000..4f8d9e08c93d > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > @@ -0,0 +1,256 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target vsx_hw } */ > +/* { dg-options "-O2 -mvsx" } */ > + > +#include <stdio.h> > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128d > +#define FP_T double > + > +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) > + > +#include "sse4_1-round-data.h" > + > +static struct data2 data[] = { > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.00, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.25, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0.75, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -1.00, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.75, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.50, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > + .value2 = { .f = { -0.25, IGNORED } } } > +}; > + > +static union value answers_NEAREST_INT[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH } } > +}; > + > +static union value answers_NEG_INF[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH } } > +}; > + > +static union value answers_POS_INF[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } } > +}; > + > +static union value answers_ZERO[] = { > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > + > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > + > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > new file mode 100644 > index 000000000000..d788ebda64dd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > @@ -0,0 +1,208 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target vsx_hw } */ > +/* { dg-options "-O2 -mvsx" } */ > + > +#include <stdio.h> > +#define NO_WARN_X86_INTRINSICS 1 > +#include <smmintrin.h> > + > +#define VEC_T __m128 > +#define FP_T float > + > +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) > + > +#include "sse4_1-round-data.h" > + > +static struct data2 data[] = { > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > + > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } > +}; > + > +static union value answers_NEAREST_INT[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_NEG_INF[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_POS_INF[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +static union value answers_ZERO[] = { > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > +}; > + > +union value *answers[] = { > + answers_NEAREST_INT, > + answers_NEG_INF, > + answers_POS_INF, > + answers_ZERO, > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > +}; > + > +#include "sse4_1-round3.h" > -- > 2.27.0 >
On Tue, Oct 26, 2021 at 03:00:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > Patches 1/3 and 3/3 have been committed. > This is only a ping for 2/3. Gentle re-ping. > On Mon, Oct 18, 2021 at 08:15:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > > Suppress exceptions (when specified), by saving, manipulating, and > > restoring the FPSCR. Similarly, save, set, and restore the floating-point > > rounding mode when required. > > > > No attempt is made to optimize writing the FPSCR (by checking if the new > > value would be the same), other than using lighter weight instructions > > when possible. Note that explicit instruction scheduling "barriers" are > > added to prevent floating-point computations from being moved before or > > after the explicit FPSCR manipulations. (That these are required has > > been reported as an issue in GCC: PR102783.) > > > > The scalar versions naively use the parallel versions to compute the > > single scalar result and then construct the remainder of the result. > > > > Of minor note, the values of _MM_FROUND_TO_NEG_INF and _MM_FROUND_TO_ZERO > > are swapped from the corresponding values on x86 so as to match the > > corresponding rounding mode values in the Power ISA. > > > > Move implementations of _mm_ceil* and _mm_floor* into _mm_round*, and > > convert _mm_ceil* and _mm_floor* into macros. This matches the current > > analogous implementations in config/i386/smmintrin.h. > > > > Function signatures match the analogous functions in config/i386/smmintrin.h. > > > > Add tests for _mm_round_pd, _mm_round_ps, _mm_round_sd, _mm_round_ss, > > modeled after the very similar "floor" and "ceil" tests. > > > > Include basic tests, plus tests at the boundaries for floating-point > > representation, positive and negative, test all of the parameterized > > rounding modes as well as the C99 rounding modes and interactions > > between the two. > > > > Exceptions are not explicitly tested. > > > > 2021-10-18 Paul A. Clarke <pc@us.ibm.com> > > > > gcc > > * config/rs6000/smmintrin.h (_mm_round_pd, _mm_round_ps, > > _mm_round_sd, _mm_round_ss, _MM_FROUND_TO_NEAREST_INT, > > _MM_FROUND_TO_ZERO, _MM_FROUND_TO_POS_INF, _MM_FROUND_TO_NEG_INF, > > _MM_FROUND_CUR_DIRECTION, _MM_FROUND_RAISE_EXC, _MM_FROUND_NO_EXC, > > _MM_FROUND_NINT, _MM_FROUND_FLOOR, _MM_FROUND_CEIL, _MM_FROUND_TRUNC, > > _MM_FROUND_RINT, _MM_FROUND_NEARBYINT): New. > > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > > Convert from function to macro. > > > > gcc/testsuite > > * gcc.target/powerpc/sse4_1-round3.h: New. > > * gcc.target/powerpc/sse4_1-roundpd.c: New. > > * gcc.target/powerpc/sse4_1-roundps.c: New. > > * gcc.target/powerpc/sse4_1-roundsd.c: New. > > * gcc.target/powerpc/sse4_1-roundss.c: New. > > --- > > gcc/config/rs6000/smmintrin.h | 292 ++++++++++++++---- > > .../gcc.target/powerpc/sse4_1-round3.h | 81 +++++ > > .../gcc.target/powerpc/sse4_1-roundpd.c | 143 +++++++++ > > .../gcc.target/powerpc/sse4_1-roundps.c | 98 ++++++ > > .../gcc.target/powerpc/sse4_1-roundsd.c | 256 +++++++++++++++ > > .../gcc.target/powerpc/sse4_1-roundss.c | 208 +++++++++++++ > > 6 files changed, 1014 insertions(+), 64 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > > index 90ce03d22709..6bb03e6e20ac 100644 > > --- a/gcc/config/rs6000/smmintrin.h > > +++ b/gcc/config/rs6000/smmintrin.h > > @@ -42,6 +42,234 @@ > > #include <altivec.h> > > #include <tmmintrin.h> > > > > +/* Rounding mode macros. */ > > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > > +#define _MM_FROUND_TO_ZERO 0x01 > > +#define _MM_FROUND_TO_POS_INF 0x02 > > +#define _MM_FROUND_TO_NEG_INF 0x03 > > +#define _MM_FROUND_CUR_DIRECTION 0x04 > > + > > +#define _MM_FROUND_NINT \ > > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_FLOOR \ > > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_CEIL \ > > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_TRUNC \ > > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_RINT \ > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > > +#define _MM_FROUND_NEARBYINT \ > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > > + > > +#define _MM_FROUND_RAISE_EXC 0x00 > > +#define _MM_FROUND_NO_EXC 0x08 > > + > > +extern __inline __m128d > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_round_pd (__m128d __A, int __rounding) > > +{ > > + __v2df __r; > > + union { > > + double __fr; > > + long long __fpscr; > > + } __enables_save, __fpscr_save; > > + > > + if (__rounding & _MM_FROUND_NO_EXC) > > + { > > + /* Save enabled exceptions, disable all exceptions, > > + and preserve the rounding mode. */ > > +#ifdef _ARCH_PWR9 > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > +#else > > + __fpscr_save.__fr = __builtin_mffs (); > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > + __fpscr_save.__fpscr &= ~0xf8; > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > +#endif > > + /* Insert an artificial "read/write" reference to the variable > > + read below, to ensure the compiler does not schedule > > + a read/use of the variable before the FPSCR is modified, above. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : "+wa" (__A)); > > + } > > + > > + switch (__rounding) > > + { > > + case _MM_FROUND_TO_NEAREST_INT: > > + __fpscr_save.__fr = __builtin_mffsl (); > > + __attribute__ ((fallthrough)); > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > + __builtin_set_fpscr_rn (0b00); > > + /* Insert an artificial "read/write" reference to the variable > > + read below, to ensure the compiler does not schedule > > + a read/use of the variable before the FPSCR is modified, above. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : "+wa" (__A)); > > + > > + __r = vec_rint ((__v2df) __A); > > + > > + /* Insert an artificial "read" reference to the variable written > > + above, to ensure the compiler does not schedule the computation > > + of the value after the manipulation of the FPSCR, below. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : : "wa" (__r)); > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > + break; > > + case _MM_FROUND_TO_NEG_INF: > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > + __r = vec_floor ((__v2df) __A); > > + break; > > + case _MM_FROUND_TO_POS_INF: > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > + __r = vec_ceil ((__v2df) __A); > > + break; > > + case _MM_FROUND_TO_ZERO: > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > + __r = vec_trunc ((__v2df) __A); > > + break; > > + case _MM_FROUND_CUR_DIRECTION: > > + __r = vec_rint ((__v2df) __A); > > + break; > > + } > > + if (__rounding & _MM_FROUND_NO_EXC) > > + { > > + /* Insert an artificial "read" reference to the variable written > > + above, to ensure the compiler does not schedule the computation > > + of the value after the manipulation of the FPSCR, below. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : : "wa" (__r)); > > + /* Restore enabled exceptions. */ > > + __fpscr_save.__fr = __builtin_mffsl (); > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > + } > > + return (__m128d) __r; > > +} > > + > > +extern __inline __m128d > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) > > +{ > > + __B = _mm_round_pd (__B, __rounding); > > + __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; > > + return (__m128d) __r; > > +} > > + > > +extern __inline __m128 > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_round_ps (__m128 __A, int __rounding) > > +{ > > + __v4sf __r; > > + union { > > + double __fr; > > + long long __fpscr; > > + } __enables_save, __fpscr_save; > > + > > + if (__rounding & _MM_FROUND_NO_EXC) > > + { > > + /* Save enabled exceptions, disable all exceptions, > > + and preserve the rounding mode. */ > > +#ifdef _ARCH_PWR9 > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > +#else > > + __fpscr_save.__fr = __builtin_mffs (); > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > + __fpscr_save.__fpscr &= ~0xf8; > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > +#endif > > + /* Insert an artificial "read/write" reference to the variable > > + read below, to ensure the compiler does not schedule > > + a read/use of the variable before the FPSCR is modified, above. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : "+wa" (__A)); > > + } > > + > > + switch (__rounding) > > + { > > + case _MM_FROUND_TO_NEAREST_INT: > > + __fpscr_save.__fr = __builtin_mffsl (); > > + __attribute__ ((fallthrough)); > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > + __builtin_set_fpscr_rn (0b00); > > + /* Insert an artificial "read/write" reference to the variable > > + read below, to ensure the compiler does not schedule > > + a read/use of the variable before the FPSCR is modified, above. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : "+wa" (__A)); > > + > > + __r = vec_rint ((__v4sf) __A); > > + > > + /* Insert an artificial "read" reference to the variable written > > + above, to ensure the compiler does not schedule the computation > > + of the value after the manipulation of the FPSCR, below. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : : "wa" (__r)); > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > + break; > > + case _MM_FROUND_TO_NEG_INF: > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > + __r = vec_floor ((__v4sf) __A); > > + break; > > + case _MM_FROUND_TO_POS_INF: > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > + __r = vec_ceil ((__v4sf) __A); > > + break; > > + case _MM_FROUND_TO_ZERO: > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > + __r = vec_trunc ((__v4sf) __A); > > + break; > > + case _MM_FROUND_CUR_DIRECTION: > > + __r = vec_rint ((__v4sf) __A); > > + break; > > + } > > + if (__rounding & _MM_FROUND_NO_EXC) > > + { > > + /* Insert an artificial "read" reference to the variable written > > + above, to ensure the compiler does not schedule the computation > > + of the value after the manipulation of the FPSCR, below. > > + This can be removed if and when GCC PR102783 is fixed. > > + */ > > + __asm__ ("" : : "wa" (__r)); > > + /* Restore enabled exceptions. */ > > + __fpscr_save.__fr = __builtin_mffsl (); > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > + } > > + return (__m128) __r; > > +} > > + > > +extern __inline __m128 > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) > > +{ > > + __B = _mm_round_ps (__B, __rounding); > > + __v4sf __r = (__v4sf) __A; > > + __r[0] = ((__v4sf) __B)[0]; > > + return (__m128) __r; > > +} > > + > > +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) > > +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) > > + > > +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) > > +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) > > + > > +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) > > +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) > > + > > +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) > > +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) > > + > > extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) > > { > > @@ -210,70 +438,6 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) > > > > #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) > > > > -__inline __m128d > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_ceil_pd (__m128d __A) > > -{ > > - return (__m128d) vec_ceil ((__v2df) __A); > > -} > > - > > -__inline __m128d > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_ceil_sd (__m128d __A, __m128d __B) > > -{ > > - __v2df __r = vec_ceil ((__v2df) __B); > > - __r[1] = ((__v2df) __A)[1]; > > - return (__m128d) __r; > > -} > > - > > -__inline __m128d > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_floor_pd (__m128d __A) > > -{ > > - return (__m128d) vec_floor ((__v2df) __A); > > -} > > - > > -__inline __m128d > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_floor_sd (__m128d __A, __m128d __B) > > -{ > > - __v2df __r = vec_floor ((__v2df) __B); > > - __r[1] = ((__v2df) __A)[1]; > > - return (__m128d) __r; > > -} > > - > > -__inline __m128 > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_ceil_ps (__m128 __A) > > -{ > > - return (__m128) vec_ceil ((__v4sf) __A); > > -} > > - > > -__inline __m128 > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_ceil_ss (__m128 __A, __m128 __B) > > -{ > > - __v4sf __r = (__v4sf) __A; > > - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); > > - return __r; > > -} > > - > > -__inline __m128 > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_floor_ps (__m128 __A) > > -{ > > - return (__m128) vec_floor ((__v4sf) __A); > > -} > > - > > -__inline __m128 > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > -_mm_floor_ss (__m128 __A, __m128 __B) > > -{ > > - __v4sf __r = (__v4sf) __A; > > - __r[0] = __builtin_floor (((__v4sf) __B)[0]); > > - return __r; > > -} > > - > > #ifdef _ARCH_PWR8 > > extern __inline __m128i > > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > new file mode 100644 > > index 000000000000..de6cbf7be438 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > @@ -0,0 +1,81 @@ > > +#include <smmintrin.h> > > +#include <fenv.h> > > +#include "sse4_1-check.h" > > + > > +#define DIM(a) (sizeof (a) / sizeof (a)[0]) > > + > > +static int roundings[] = > > + { > > + _MM_FROUND_TO_NEAREST_INT, > > + _MM_FROUND_TO_NEG_INF, > > + _MM_FROUND_TO_POS_INF, > > + _MM_FROUND_TO_ZERO, > > + _MM_FROUND_CUR_DIRECTION > > + }; > > + > > +static int modes[] = > > + { > > + FE_TONEAREST, > > + FE_UPWARD, > > + FE_DOWNWARD, > > + FE_TOWARDZERO > > + }; > > + > > +static void > > +TEST (void) > > +{ > > + int i, j, ri, mi, round_save; > > + > > + round_save = fegetround (); > > + for (mi = 0; mi < DIM (modes); mi++) { > > + fesetround (modes[mi]); > > + for (i = 0; i < DIM (data); i++) { > > + for (ri = 0; ri < DIM (roundings); ri++) { > > + union value guess; > > + union value *current_answers = answers[ri]; > > + switch ( roundings[ri] ) { > > + case _MM_FROUND_TO_NEAREST_INT: > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > + _MM_FROUND_TO_NEAREST_INT); > > + break; > > + case _MM_FROUND_TO_NEG_INF: > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > + _MM_FROUND_TO_NEG_INF); > > + break; > > + case _MM_FROUND_TO_POS_INF: > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > + _MM_FROUND_TO_POS_INF); > > + break; > > + case _MM_FROUND_TO_ZERO: > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > + _MM_FROUND_TO_ZERO); > > + break; > > + case _MM_FROUND_CUR_DIRECTION: > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > + _MM_FROUND_CUR_DIRECTION); > > + switch ( modes[mi] ) { > > + case FE_TONEAREST: > > + current_answers = answers_NEAREST_INT; > > + break; > > + case FE_UPWARD: > > + current_answers = answers_POS_INF; > > + break; > > + case FE_DOWNWARD: > > + current_answers = answers_NEG_INF; > > + break; > > + case FE_TOWARDZERO: > > + current_answers = answers_ZERO; > > + break; > > + } > > + break; > > + default: > > + abort (); > > + } > > + for (j = 0; j < DIM (guess.f); j++) > > + if (guess.f[j] != current_answers[i].f[j]) > > + abort (); > > + } > > + } > > + } > > + fesetround (round_save); > > +} > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > new file mode 100644 > > index 000000000000..58d9cc524167 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > @@ -0,0 +1,143 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target vsx_hw } */ > > +/* { dg-options "-O2 -mvsx" } */ > > + > > +#define NO_WARN_X86_INTRINSICS 1 > > +#include <smmintrin.h> > > + > > +#define VEC_T __m128d > > +#define FP_T double > > + > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) > > + > > +#include "sse4_1-round-data.h" > > + > > +struct data2 data[] = { > > + { .value1 = { .f = { 0.00, 0.25 } } }, > > + { .value1 = { .f = { 0.50, 0.75 } } }, > > + > > + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, > > + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, > > + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, > > + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, > > + > > + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, > > + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, > > + > > + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, > > + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, > > + > > + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, > > + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, > > + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, > > + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, > > + > > + { .value1 = { .f = { -1.00, -0.75 } } }, > > + { .value1 = { .f = { -0.50, -0.25 } } } > > +}; > > + > > +union value answers_NEAREST_INT[] = { > > + { .f = { 0.00, 0.00 } }, > > + { .f = { 0.00, 1.00 } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > + > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > + > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > + > > + { .f = { -1.00, -1.00 } }, > > + { .f = { 0.00, 0.00 } } > > +}; > > + > > +union value answers_NEG_INF[] = { > > + { .f = { 0.00, 0.00 } }, > > + { .f = { 0.00, 0.00 } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > + > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > + > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, > > + > > + { .f = { -1.00, -1.00 } }, > > + { .f = { -1.00, -1.00 } } > > +}; > > + > > +union value answers_POS_INF[] = { > > + { .f = { 0.00, 1.00 } }, > > + { .f = { 1.00, 1.00 } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > + > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > + > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > + > > + { .f = { -1.00, 0.00 } }, > > + { .f = { 0.00, 0.00 } } > > +}; > > + > > +union value answers_ZERO[] = { > > + { .f = { 0.00, 0.00 } }, > > + { .f = { 0.00, 0.00 } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > + > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > + > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > + > > + { .f = { -1.00, 0.00 } }, > > + { .f = { 0.00, 0.00 } } > > +}; > > + > > +union value *answers[] = { > > + answers_NEAREST_INT, > > + answers_NEG_INF, > > + answers_POS_INF, > > + answers_ZERO, > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > +}; > > + > > +#include "sse4_1-round3.h" > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > new file mode 100644 > > index 000000000000..4b0366dfddf3 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > @@ -0,0 +1,98 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target vsx_hw } */ > > +/* { dg-options "-O2 -mvsx" } */ > > + > > +#define NO_WARN_X86_INTRINSICS 1 > > +#include <smmintrin.h> > > + > > +#define VEC_T __m128 > > +#define FP_T float > > + > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) > > + > > +#include "sse4_1-round-data.h" > > + > > +struct data2 data[] = { > > + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, > > + > > + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, > > + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, > > + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, > > + 0x1.fffffep+22, 0x1.fffffep+23 } } }, > > + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, > > + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, > > + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, > > + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, > > + > > + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } > > +}; > > + > > +union value answers_NEAREST_INT[] = { > > + { .f = { 0.00, 0.00, 0.00, 1.00 } }, > > + > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > + > > + { .f = { -1.00, -1.00, 0.00, 0.00 } } > > +}; > > + > > +union value answers_NEG_INF[] = { > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > + > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > + -0x1.000000p+22, -0x1.fffff8p+21 } }, > > + > > + { .f = { -1.00, -1.00, -1.00, -1.00 } } > > +}; > > + > > +union value answers_POS_INF[] = { > > + { .f = { 0.00, 1.00, 1.00, 1.00 } }, > > + > > + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > + > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > +}; > > + > > +union value answers_ZERO[] = { > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > + > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > + > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > +}; > > + > > +union value *answers[] = { > > + answers_NEAREST_INT, > > + answers_NEG_INF, > > + answers_POS_INF, > > + answers_ZERO, > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > +}; > > + > > +#include "sse4_1-round3.h" > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > new file mode 100644 > > index 000000000000..4f8d9e08c93d > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > @@ -0,0 +1,256 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target vsx_hw } */ > > +/* { dg-options "-O2 -mvsx" } */ > > + > > +#include <stdio.h> > > +#define NO_WARN_X86_INTRINSICS 1 > > +#include <smmintrin.h> > > + > > +#define VEC_T __m128d > > +#define FP_T double > > + > > +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) > > + > > +#include "sse4_1-round-data.h" > > + > > +static struct data2 data[] = { > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0.00, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0.25, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0.50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0.75, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -1.00, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0.75, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0.50, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > + .value2 = { .f = { -0.25, IGNORED } } } > > +}; > > + > > +static union value answers_NEAREST_INT[] = { > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { -0.00, PASSTHROUGH } }, > > + { .f = { -0.00, PASSTHROUGH } } > > +}; > > + > > +static union value answers_NEG_INF[] = { > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH } } > > +}; > > + > > +static union value answers_POS_INF[] = { > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } } > > +}; > > + > > +static union value answers_ZERO[] = { > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > + > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > + > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH } } > > +}; > > + > > +union value *answers[] = { > > + answers_NEAREST_INT, > > + answers_NEG_INF, > > + answers_POS_INF, > > + answers_ZERO, > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > +}; > > + > > +#include "sse4_1-round3.h" > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > new file mode 100644 > > index 000000000000..d788ebda64dd > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > @@ -0,0 +1,208 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target vsx_hw } */ > > +/* { dg-options "-O2 -mvsx" } */ > > + > > +#include <stdio.h> > > +#define NO_WARN_X86_INTRINSICS 1 > > +#include <smmintrin.h> > > + > > +#define VEC_T __m128 > > +#define FP_T float > > + > > +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) > > + > > +#include "sse4_1-round-data.h" > > + > > +static struct data2 data[] = { > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > + > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } > > +}; > > + > > +static union value answers_NEAREST_INT[] = { > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > +}; > > + > > +static union value answers_NEG_INF[] = { > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > +}; > > + > > +static union value answers_POS_INF[] = { > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > +}; > > + > > +static union value answers_ZERO[] = { > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > +}; > > + > > +union value *answers[] = { > > + answers_NEAREST_INT, > > + answers_NEG_INF, > > + answers_POS_INF, > > + answers_ZERO, > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > +}; > > + > > +#include "sse4_1-round3.h" > > -- > > 2.27.0 > >
On Mon, Nov 08, 2021 at 11:40:42AM -0600, Paul A. Clarke via Gcc-patches wrote: > On Tue, Oct 26, 2021 at 03:00:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > > Patches 1/3 and 3/3 have been committed. > > This is only a ping for 2/3. > > Gentle re-ping. Gentle re-re-ping. > > On Mon, Oct 18, 2021 at 08:15:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > > > Suppress exceptions (when specified), by saving, manipulating, and > > > restoring the FPSCR. Similarly, save, set, and restore the floating-point > > > rounding mode when required. > > > > > > No attempt is made to optimize writing the FPSCR (by checking if the new > > > value would be the same), other than using lighter weight instructions > > > when possible. Note that explicit instruction scheduling "barriers" are > > > added to prevent floating-point computations from being moved before or > > > after the explicit FPSCR manipulations. (That these are required has > > > been reported as an issue in GCC: PR102783.) > > > > > > The scalar versions naively use the parallel versions to compute the > > > single scalar result and then construct the remainder of the result. > > > > > > Of minor note, the values of _MM_FROUND_TO_NEG_INF and _MM_FROUND_TO_ZERO > > > are swapped from the corresponding values on x86 so as to match the > > > corresponding rounding mode values in the Power ISA. > > > > > > Move implementations of _mm_ceil* and _mm_floor* into _mm_round*, and > > > convert _mm_ceil* and _mm_floor* into macros. This matches the current > > > analogous implementations in config/i386/smmintrin.h. > > > > > > Function signatures match the analogous functions in config/i386/smmintrin.h. > > > > > > Add tests for _mm_round_pd, _mm_round_ps, _mm_round_sd, _mm_round_ss, > > > modeled after the very similar "floor" and "ceil" tests. > > > > > > Include basic tests, plus tests at the boundaries for floating-point > > > representation, positive and negative, test all of the parameterized > > > rounding modes as well as the C99 rounding modes and interactions > > > between the two. > > > > > > Exceptions are not explicitly tested. > > > > > > 2021-10-18 Paul A. Clarke <pc@us.ibm.com> > > > > > > gcc > > > * config/rs6000/smmintrin.h (_mm_round_pd, _mm_round_ps, > > > _mm_round_sd, _mm_round_ss, _MM_FROUND_TO_NEAREST_INT, > > > _MM_FROUND_TO_ZERO, _MM_FROUND_TO_POS_INF, _MM_FROUND_TO_NEG_INF, > > > _MM_FROUND_CUR_DIRECTION, _MM_FROUND_RAISE_EXC, _MM_FROUND_NO_EXC, > > > _MM_FROUND_NINT, _MM_FROUND_FLOOR, _MM_FROUND_CEIL, _MM_FROUND_TRUNC, > > > _MM_FROUND_RINT, _MM_FROUND_NEARBYINT): New. > > > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > > > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > > > Convert from function to macro. > > > > > > gcc/testsuite > > > * gcc.target/powerpc/sse4_1-round3.h: New. > > > * gcc.target/powerpc/sse4_1-roundpd.c: New. > > > * gcc.target/powerpc/sse4_1-roundps.c: New. > > > * gcc.target/powerpc/sse4_1-roundsd.c: New. > > > * gcc.target/powerpc/sse4_1-roundss.c: New. > > > --- > > > gcc/config/rs6000/smmintrin.h | 292 ++++++++++++++---- > > > .../gcc.target/powerpc/sse4_1-round3.h | 81 +++++ > > > .../gcc.target/powerpc/sse4_1-roundpd.c | 143 +++++++++ > > > .../gcc.target/powerpc/sse4_1-roundps.c | 98 ++++++ > > > .../gcc.target/powerpc/sse4_1-roundsd.c | 256 +++++++++++++++ > > > .../gcc.target/powerpc/sse4_1-roundss.c | 208 +++++++++++++ > > > 6 files changed, 1014 insertions(+), 64 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > > > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > > > index 90ce03d22709..6bb03e6e20ac 100644 > > > --- a/gcc/config/rs6000/smmintrin.h > > > +++ b/gcc/config/rs6000/smmintrin.h > > > @@ -42,6 +42,234 @@ > > > #include <altivec.h> > > > #include <tmmintrin.h> > > > > > > +/* Rounding mode macros. */ > > > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > > > +#define _MM_FROUND_TO_ZERO 0x01 > > > +#define _MM_FROUND_TO_POS_INF 0x02 > > > +#define _MM_FROUND_TO_NEG_INF 0x03 > > > +#define _MM_FROUND_CUR_DIRECTION 0x04 > > > + > > > +#define _MM_FROUND_NINT \ > > > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > > > +#define _MM_FROUND_FLOOR \ > > > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > > > +#define _MM_FROUND_CEIL \ > > > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > > > +#define _MM_FROUND_TRUNC \ > > > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > > > +#define _MM_FROUND_RINT \ > > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > > > +#define _MM_FROUND_NEARBYINT \ > > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > > > + > > > +#define _MM_FROUND_RAISE_EXC 0x00 > > > +#define _MM_FROUND_NO_EXC 0x08 > > > + > > > +extern __inline __m128d > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > +_mm_round_pd (__m128d __A, int __rounding) > > > +{ > > > + __v2df __r; > > > + union { > > > + double __fr; > > > + long long __fpscr; > > > + } __enables_save, __fpscr_save; > > > + > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > + { > > > + /* Save enabled exceptions, disable all exceptions, > > > + and preserve the rounding mode. */ > > > +#ifdef _ARCH_PWR9 > > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > +#else > > > + __fpscr_save.__fr = __builtin_mffs (); > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > + __fpscr_save.__fpscr &= ~0xf8; > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > +#endif > > > + /* Insert an artificial "read/write" reference to the variable > > > + read below, to ensure the compiler does not schedule > > > + a read/use of the variable before the FPSCR is modified, above. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : "+wa" (__A)); > > > + } > > > + > > > + switch (__rounding) > > > + { > > > + case _MM_FROUND_TO_NEAREST_INT: > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > + __attribute__ ((fallthrough)); > > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > > + __builtin_set_fpscr_rn (0b00); > > > + /* Insert an artificial "read/write" reference to the variable > > > + read below, to ensure the compiler does not schedule > > > + a read/use of the variable before the FPSCR is modified, above. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : "+wa" (__A)); > > > + > > > + __r = vec_rint ((__v2df) __A); > > > + > > > + /* Insert an artificial "read" reference to the variable written > > > + above, to ensure the compiler does not schedule the computation > > > + of the value after the manipulation of the FPSCR, below. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : : "wa" (__r)); > > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > > + break; > > > + case _MM_FROUND_TO_NEG_INF: > > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > > + __r = vec_floor ((__v2df) __A); > > > + break; > > > + case _MM_FROUND_TO_POS_INF: > > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > > + __r = vec_ceil ((__v2df) __A); > > > + break; > > > + case _MM_FROUND_TO_ZERO: > > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > > + __r = vec_trunc ((__v2df) __A); > > > + break; > > > + case _MM_FROUND_CUR_DIRECTION: > > > + __r = vec_rint ((__v2df) __A); > > > + break; > > > + } > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > + { > > > + /* Insert an artificial "read" reference to the variable written > > > + above, to ensure the compiler does not schedule the computation > > > + of the value after the manipulation of the FPSCR, below. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : : "wa" (__r)); > > > + /* Restore enabled exceptions. */ > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > + } > > > + return (__m128d) __r; > > > +} > > > + > > > +extern __inline __m128d > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) > > > +{ > > > + __B = _mm_round_pd (__B, __rounding); > > > + __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; > > > + return (__m128d) __r; > > > +} > > > + > > > +extern __inline __m128 > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > +_mm_round_ps (__m128 __A, int __rounding) > > > +{ > > > + __v4sf __r; > > > + union { > > > + double __fr; > > > + long long __fpscr; > > > + } __enables_save, __fpscr_save; > > > + > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > + { > > > + /* Save enabled exceptions, disable all exceptions, > > > + and preserve the rounding mode. */ > > > +#ifdef _ARCH_PWR9 > > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > +#else > > > + __fpscr_save.__fr = __builtin_mffs (); > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > + __fpscr_save.__fpscr &= ~0xf8; > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > +#endif > > > + /* Insert an artificial "read/write" reference to the variable > > > + read below, to ensure the compiler does not schedule > > > + a read/use of the variable before the FPSCR is modified, above. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : "+wa" (__A)); > > > + } > > > + > > > + switch (__rounding) > > > + { > > > + case _MM_FROUND_TO_NEAREST_INT: > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > + __attribute__ ((fallthrough)); > > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > > + __builtin_set_fpscr_rn (0b00); > > > + /* Insert an artificial "read/write" reference to the variable > > > + read below, to ensure the compiler does not schedule > > > + a read/use of the variable before the FPSCR is modified, above. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : "+wa" (__A)); > > > + > > > + __r = vec_rint ((__v4sf) __A); > > > + > > > + /* Insert an artificial "read" reference to the variable written > > > + above, to ensure the compiler does not schedule the computation > > > + of the value after the manipulation of the FPSCR, below. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : : "wa" (__r)); > > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > > + break; > > > + case _MM_FROUND_TO_NEG_INF: > > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > > + __r = vec_floor ((__v4sf) __A); > > > + break; > > > + case _MM_FROUND_TO_POS_INF: > > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > > + __r = vec_ceil ((__v4sf) __A); > > > + break; > > > + case _MM_FROUND_TO_ZERO: > > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > > + __r = vec_trunc ((__v4sf) __A); > > > + break; > > > + case _MM_FROUND_CUR_DIRECTION: > > > + __r = vec_rint ((__v4sf) __A); > > > + break; > > > + } > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > + { > > > + /* Insert an artificial "read" reference to the variable written > > > + above, to ensure the compiler does not schedule the computation > > > + of the value after the manipulation of the FPSCR, below. > > > + This can be removed if and when GCC PR102783 is fixed. > > > + */ > > > + __asm__ ("" : : "wa" (__r)); > > > + /* Restore enabled exceptions. */ > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > + } > > > + return (__m128) __r; > > > +} > > > + > > > +extern __inline __m128 > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) > > > +{ > > > + __B = _mm_round_ps (__B, __rounding); > > > + __v4sf __r = (__v4sf) __A; > > > + __r[0] = ((__v4sf) __B)[0]; > > > + return (__m128) __r; > > > +} > > > + > > > +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) > > > +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) > > > + > > > +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) > > > +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) > > > + > > > +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) > > > +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) > > > + > > > +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) > > > +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) > > > + > > > extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > > _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) > > > { > > > @@ -210,70 +438,6 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) > > > > > > #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) > > > > > > -__inline __m128d > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_ceil_pd (__m128d __A) > > > -{ > > > - return (__m128d) vec_ceil ((__v2df) __A); > > > -} > > > - > > > -__inline __m128d > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_ceil_sd (__m128d __A, __m128d __B) > > > -{ > > > - __v2df __r = vec_ceil ((__v2df) __B); > > > - __r[1] = ((__v2df) __A)[1]; > > > - return (__m128d) __r; > > > -} > > > - > > > -__inline __m128d > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_floor_pd (__m128d __A) > > > -{ > > > - return (__m128d) vec_floor ((__v2df) __A); > > > -} > > > - > > > -__inline __m128d > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_floor_sd (__m128d __A, __m128d __B) > > > -{ > > > - __v2df __r = vec_floor ((__v2df) __B); > > > - __r[1] = ((__v2df) __A)[1]; > > > - return (__m128d) __r; > > > -} > > > - > > > -__inline __m128 > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_ceil_ps (__m128 __A) > > > -{ > > > - return (__m128) vec_ceil ((__v4sf) __A); > > > -} > > > - > > > -__inline __m128 > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_ceil_ss (__m128 __A, __m128 __B) > > > -{ > > > - __v4sf __r = (__v4sf) __A; > > > - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); > > > - return __r; > > > -} > > > - > > > -__inline __m128 > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_floor_ps (__m128 __A) > > > -{ > > > - return (__m128) vec_floor ((__v4sf) __A); > > > -} > > > - > > > -__inline __m128 > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > -_mm_floor_ss (__m128 __A, __m128 __B) > > > -{ > > > - __v4sf __r = (__v4sf) __A; > > > - __r[0] = __builtin_floor (((__v4sf) __B)[0]); > > > - return __r; > > > -} > > > - > > > #ifdef _ARCH_PWR8 > > > extern __inline __m128i > > > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > new file mode 100644 > > > index 000000000000..de6cbf7be438 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > @@ -0,0 +1,81 @@ > > > +#include <smmintrin.h> > > > +#include <fenv.h> > > > +#include "sse4_1-check.h" > > > + > > > +#define DIM(a) (sizeof (a) / sizeof (a)[0]) > > > + > > > +static int roundings[] = > > > + { > > > + _MM_FROUND_TO_NEAREST_INT, > > > + _MM_FROUND_TO_NEG_INF, > > > + _MM_FROUND_TO_POS_INF, > > > + _MM_FROUND_TO_ZERO, > > > + _MM_FROUND_CUR_DIRECTION > > > + }; > > > + > > > +static int modes[] = > > > + { > > > + FE_TONEAREST, > > > + FE_UPWARD, > > > + FE_DOWNWARD, > > > + FE_TOWARDZERO > > > + }; > > > + > > > +static void > > > +TEST (void) > > > +{ > > > + int i, j, ri, mi, round_save; > > > + > > > + round_save = fegetround (); > > > + for (mi = 0; mi < DIM (modes); mi++) { > > > + fesetround (modes[mi]); > > > + for (i = 0; i < DIM (data); i++) { > > > + for (ri = 0; ri < DIM (roundings); ri++) { > > > + union value guess; > > > + union value *current_answers = answers[ri]; > > > + switch ( roundings[ri] ) { > > > + case _MM_FROUND_TO_NEAREST_INT: > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > + _MM_FROUND_TO_NEAREST_INT); > > > + break; > > > + case _MM_FROUND_TO_NEG_INF: > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > + _MM_FROUND_TO_NEG_INF); > > > + break; > > > + case _MM_FROUND_TO_POS_INF: > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > + _MM_FROUND_TO_POS_INF); > > > + break; > > > + case _MM_FROUND_TO_ZERO: > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > + _MM_FROUND_TO_ZERO); > > > + break; > > > + case _MM_FROUND_CUR_DIRECTION: > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > + _MM_FROUND_CUR_DIRECTION); > > > + switch ( modes[mi] ) { > > > + case FE_TONEAREST: > > > + current_answers = answers_NEAREST_INT; > > > + break; > > > + case FE_UPWARD: > > > + current_answers = answers_POS_INF; > > > + break; > > > + case FE_DOWNWARD: > > > + current_answers = answers_NEG_INF; > > > + break; > > > + case FE_TOWARDZERO: > > > + current_answers = answers_ZERO; > > > + break; > > > + } > > > + break; > > > + default: > > > + abort (); > > > + } > > > + for (j = 0; j < DIM (guess.f); j++) > > > + if (guess.f[j] != current_answers[i].f[j]) > > > + abort (); > > > + } > > > + } > > > + } > > > + fesetround (round_save); > > > +} > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > new file mode 100644 > > > index 000000000000..58d9cc524167 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > @@ -0,0 +1,143 @@ > > > +/* { dg-do run } */ > > > +/* { dg-require-effective-target vsx_hw } */ > > > +/* { dg-options "-O2 -mvsx" } */ > > > + > > > +#define NO_WARN_X86_INTRINSICS 1 > > > +#include <smmintrin.h> > > > + > > > +#define VEC_T __m128d > > > +#define FP_T double > > > + > > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) > > > + > > > +#include "sse4_1-round-data.h" > > > + > > > +struct data2 data[] = { > > > + { .value1 = { .f = { 0.00, 0.25 } } }, > > > + { .value1 = { .f = { 0.50, 0.75 } } }, > > > + > > > + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, > > > + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, > > > + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, > > > + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, > > > + > > > + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, > > > + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, > > > + > > > + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, > > > + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, > > > + > > > + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, > > > + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, > > > + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, > > > + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, > > > + > > > + { .value1 = { .f = { -1.00, -0.75 } } }, > > > + { .value1 = { .f = { -0.50, -0.25 } } } > > > +}; > > > + > > > +union value answers_NEAREST_INT[] = { > > > + { .f = { 0.00, 0.00 } }, > > > + { .f = { 0.00, 1.00 } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > + > > > + { .f = { -1.00, -1.00 } }, > > > + { .f = { 0.00, 0.00 } } > > > +}; > > > + > > > +union value answers_NEG_INF[] = { > > > + { .f = { 0.00, 0.00 } }, > > > + { .f = { 0.00, 0.00 } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, > > > + > > > + { .f = { -1.00, -1.00 } }, > > > + { .f = { -1.00, -1.00 } } > > > +}; > > > + > > > +union value answers_POS_INF[] = { > > > + { .f = { 0.00, 1.00 } }, > > > + { .f = { 1.00, 1.00 } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > + > > > + { .f = { -1.00, 0.00 } }, > > > + { .f = { 0.00, 0.00 } } > > > +}; > > > + > > > +union value answers_ZERO[] = { > > > + { .f = { 0.00, 0.00 } }, > > > + { .f = { 0.00, 0.00 } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > + > > > + { .f = { -1.00, 0.00 } }, > > > + { .f = { 0.00, 0.00 } } > > > +}; > > > + > > > +union value *answers[] = { > > > + answers_NEAREST_INT, > > > + answers_NEG_INF, > > > + answers_POS_INF, > > > + answers_ZERO, > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > +}; > > > + > > > +#include "sse4_1-round3.h" > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > new file mode 100644 > > > index 000000000000..4b0366dfddf3 > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > @@ -0,0 +1,98 @@ > > > +/* { dg-do run } */ > > > +/* { dg-require-effective-target vsx_hw } */ > > > +/* { dg-options "-O2 -mvsx" } */ > > > + > > > +#define NO_WARN_X86_INTRINSICS 1 > > > +#include <smmintrin.h> > > > + > > > +#define VEC_T __m128 > > > +#define FP_T float > > > + > > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) > > > + > > > +#include "sse4_1-round-data.h" > > > + > > > +struct data2 data[] = { > > > + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, > > > + > > > + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, > > > + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, > > > + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, > > > + 0x1.fffffep+22, 0x1.fffffep+23 } } }, > > > + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, > > > + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, > > > + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, > > > + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, > > > + > > > + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } > > > +}; > > > + > > > +union value answers_NEAREST_INT[] = { > > > + { .f = { 0.00, 0.00, 0.00, 1.00 } }, > > > + > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > + > > > + { .f = { -1.00, -1.00, 0.00, 0.00 } } > > > +}; > > > + > > > +union value answers_NEG_INF[] = { > > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > > + > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > > + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, > > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > > + -0x1.000000p+22, -0x1.fffff8p+21 } }, > > > + > > > + { .f = { -1.00, -1.00, -1.00, -1.00 } } > > > +}; > > > + > > > +union value answers_POS_INF[] = { > > > + { .f = { 0.00, 1.00, 1.00, 1.00 } }, > > > + > > > + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, > > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > > + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, > > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > + > > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > > +}; > > > + > > > +union value answers_ZERO[] = { > > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > > + > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > + > > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > > +}; > > > + > > > +union value *answers[] = { > > > + answers_NEAREST_INT, > > > + answers_NEG_INF, > > > + answers_POS_INF, > > > + answers_ZERO, > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > +}; > > > + > > > +#include "sse4_1-round3.h" > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > new file mode 100644 > > > index 000000000000..4f8d9e08c93d > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > @@ -0,0 +1,256 @@ > > > +/* { dg-do run } */ > > > +/* { dg-require-effective-target vsx_hw } */ > > > +/* { dg-options "-O2 -mvsx" } */ > > > + > > > +#include <stdio.h> > > > +#define NO_WARN_X86_INTRINSICS 1 > > > +#include <smmintrin.h> > > > + > > > +#define VEC_T __m128d > > > +#define FP_T double > > > + > > > +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) > > > + > > > +#include "sse4_1-round-data.h" > > > + > > > +static struct data2 data[] = { > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.00, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.25, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.75, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -1.00, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.75, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.50, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.25, IGNORED } } } > > > +}; > > > + > > > +static union value answers_NEAREST_INT[] = { > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { -0.00, PASSTHROUGH } }, > > > + { .f = { -0.00, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_NEG_INF[] = { > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_POS_INF[] = { > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_ZERO[] = { > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH } } > > > +}; > > > + > > > +union value *answers[] = { > > > + answers_NEAREST_INT, > > > + answers_NEG_INF, > > > + answers_POS_INF, > > > + answers_ZERO, > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > +}; > > > + > > > +#include "sse4_1-round3.h" > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > new file mode 100644 > > > index 000000000000..d788ebda64dd > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > @@ -0,0 +1,208 @@ > > > +/* { dg-do run } */ > > > +/* { dg-require-effective-target vsx_hw } */ > > > +/* { dg-options "-O2 -mvsx" } */ > > > + > > > +#include <stdio.h> > > > +#define NO_WARN_X86_INTRINSICS 1 > > > +#include <smmintrin.h> > > > + > > > +#define VEC_T __m128 > > > +#define FP_T float > > > + > > > +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) > > > + > > > +#include "sse4_1-round-data.h" > > > + > > > +static struct data2 data[] = { > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > > + > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } > > > +}; > > > + > > > +static union value answers_NEAREST_INT[] = { > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_NEG_INF[] = { > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_POS_INF[] = { > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > +}; > > > + > > > +static union value answers_ZERO[] = { > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > +}; > > > + > > > +union value *answers[] = { > > > + answers_NEAREST_INT, > > > + answers_NEG_INF, > > > + answers_POS_INF, > > > + answers_ZERO, > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > +}; > > > + > > > +#include "sse4_1-round3.h" > > > -- > > > 2.27.0 > > >
On Thu, Nov 18, 2021 at 08:24:52PM -0600, Paul A. Clarke via Gcc-patches wrote: > On Mon, Nov 08, 2021 at 11:40:42AM -0600, Paul A. Clarke via Gcc-patches wrote: > > On Tue, Oct 26, 2021 at 03:00:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > > > Patches 1/3 and 3/3 have been committed. > > > This is only a ping for 2/3. > > > > Gentle re-ping. > > Gentle re-re-ping. and once more. :-) > > > On Mon, Oct 18, 2021 at 08:15:11PM -0500, Paul A. Clarke via Gcc-patches wrote: > > > > Suppress exceptions (when specified), by saving, manipulating, and > > > > restoring the FPSCR. Similarly, save, set, and restore the floating-point > > > > rounding mode when required. > > > > > > > > No attempt is made to optimize writing the FPSCR (by checking if the new > > > > value would be the same), other than using lighter weight instructions > > > > when possible. Note that explicit instruction scheduling "barriers" are > > > > added to prevent floating-point computations from being moved before or > > > > after the explicit FPSCR manipulations. (That these are required has > > > > been reported as an issue in GCC: PR102783.) > > > > > > > > The scalar versions naively use the parallel versions to compute the > > > > single scalar result and then construct the remainder of the result. > > > > > > > > Of minor note, the values of _MM_FROUND_TO_NEG_INF and _MM_FROUND_TO_ZERO > > > > are swapped from the corresponding values on x86 so as to match the > > > > corresponding rounding mode values in the Power ISA. > > > > > > > > Move implementations of _mm_ceil* and _mm_floor* into _mm_round*, and > > > > convert _mm_ceil* and _mm_floor* into macros. This matches the current > > > > analogous implementations in config/i386/smmintrin.h. > > > > > > > > Function signatures match the analogous functions in config/i386/smmintrin.h. > > > > > > > > Add tests for _mm_round_pd, _mm_round_ps, _mm_round_sd, _mm_round_ss, > > > > modeled after the very similar "floor" and "ceil" tests. > > > > > > > > Include basic tests, plus tests at the boundaries for floating-point > > > > representation, positive and negative, test all of the parameterized > > > > rounding modes as well as the C99 rounding modes and interactions > > > > between the two. > > > > > > > > Exceptions are not explicitly tested. > > > > > > > > 2021-10-18 Paul A. Clarke <pc@us.ibm.com> > > > > > > > > gcc > > > > * config/rs6000/smmintrin.h (_mm_round_pd, _mm_round_ps, > > > > _mm_round_sd, _mm_round_ss, _MM_FROUND_TO_NEAREST_INT, > > > > _MM_FROUND_TO_ZERO, _MM_FROUND_TO_POS_INF, _MM_FROUND_TO_NEG_INF, > > > > _MM_FROUND_CUR_DIRECTION, _MM_FROUND_RAISE_EXC, _MM_FROUND_NO_EXC, > > > > _MM_FROUND_NINT, _MM_FROUND_FLOOR, _MM_FROUND_CEIL, _MM_FROUND_TRUNC, > > > > _MM_FROUND_RINT, _MM_FROUND_NEARBYINT): New. > > > > * config/rs6000/smmintrin.h (_mm_ceil_pd, _mm_ceil_ps, _mm_ceil_sd, > > > > _mm_ceil_ss, _mm_floor_pd, _mm_floor_ps, _mm_floor_sd, _mm_floor_ss): > > > > Convert from function to macro. > > > > > > > > gcc/testsuite > > > > * gcc.target/powerpc/sse4_1-round3.h: New. > > > > * gcc.target/powerpc/sse4_1-roundpd.c: New. > > > > * gcc.target/powerpc/sse4_1-roundps.c: New. > > > > * gcc.target/powerpc/sse4_1-roundsd.c: New. > > > > * gcc.target/powerpc/sse4_1-roundss.c: New. > > > > --- > > > > gcc/config/rs6000/smmintrin.h | 292 ++++++++++++++---- > > > > .../gcc.target/powerpc/sse4_1-round3.h | 81 +++++ > > > > .../gcc.target/powerpc/sse4_1-roundpd.c | 143 +++++++++ > > > > .../gcc.target/powerpc/sse4_1-roundps.c | 98 ++++++ > > > > .../gcc.target/powerpc/sse4_1-roundsd.c | 256 +++++++++++++++ > > > > .../gcc.target/powerpc/sse4_1-roundss.c | 208 +++++++++++++ > > > > 6 files changed, 1014 insertions(+), 64 deletions(-) > > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > > create mode 100644 gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > > > > > > diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h > > > > index 90ce03d22709..6bb03e6e20ac 100644 > > > > --- a/gcc/config/rs6000/smmintrin.h > > > > +++ b/gcc/config/rs6000/smmintrin.h > > > > @@ -42,6 +42,234 @@ > > > > #include <altivec.h> > > > > #include <tmmintrin.h> > > > > > > > > +/* Rounding mode macros. */ > > > > +#define _MM_FROUND_TO_NEAREST_INT 0x00 > > > > +#define _MM_FROUND_TO_ZERO 0x01 > > > > +#define _MM_FROUND_TO_POS_INF 0x02 > > > > +#define _MM_FROUND_TO_NEG_INF 0x03 > > > > +#define _MM_FROUND_CUR_DIRECTION 0x04 > > > > + > > > > +#define _MM_FROUND_NINT \ > > > > + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) > > > > +#define _MM_FROUND_FLOOR \ > > > > + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) > > > > +#define _MM_FROUND_CEIL \ > > > > + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) > > > > +#define _MM_FROUND_TRUNC \ > > > > + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) > > > > +#define _MM_FROUND_RINT \ > > > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) > > > > +#define _MM_FROUND_NEARBYINT \ > > > > + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) > > > > + > > > > +#define _MM_FROUND_RAISE_EXC 0x00 > > > > +#define _MM_FROUND_NO_EXC 0x08 > > > > + > > > > +extern __inline __m128d > > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > +_mm_round_pd (__m128d __A, int __rounding) > > > > +{ > > > > + __v2df __r; > > > > + union { > > > > + double __fr; > > > > + long long __fpscr; > > > > + } __enables_save, __fpscr_save; > > > > + > > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > > + { > > > > + /* Save enabled exceptions, disable all exceptions, > > > > + and preserve the rounding mode. */ > > > > +#ifdef _ARCH_PWR9 > > > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > > +#else > > > > + __fpscr_save.__fr = __builtin_mffs (); > > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > > + __fpscr_save.__fpscr &= ~0xf8; > > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > > +#endif > > > > + /* Insert an artificial "read/write" reference to the variable > > > > + read below, to ensure the compiler does not schedule > > > > + a read/use of the variable before the FPSCR is modified, above. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : "+wa" (__A)); > > > > + } > > > > + > > > > + switch (__rounding) > > > > + { > > > > + case _MM_FROUND_TO_NEAREST_INT: > > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > > + __attribute__ ((fallthrough)); > > > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > > > + __builtin_set_fpscr_rn (0b00); > > > > + /* Insert an artificial "read/write" reference to the variable > > > > + read below, to ensure the compiler does not schedule > > > > + a read/use of the variable before the FPSCR is modified, above. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : "+wa" (__A)); > > > > + > > > > + __r = vec_rint ((__v2df) __A); > > > > + > > > > + /* Insert an artificial "read" reference to the variable written > > > > + above, to ensure the compiler does not schedule the computation > > > > + of the value after the manipulation of the FPSCR, below. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : : "wa" (__r)); > > > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > > > + break; > > > > + case _MM_FROUND_TO_NEG_INF: > > > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > > > + __r = vec_floor ((__v2df) __A); > > > > + break; > > > > + case _MM_FROUND_TO_POS_INF: > > > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > > > + __r = vec_ceil ((__v2df) __A); > > > > + break; > > > > + case _MM_FROUND_TO_ZERO: > > > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > > > + __r = vec_trunc ((__v2df) __A); > > > > + break; > > > > + case _MM_FROUND_CUR_DIRECTION: > > > > + __r = vec_rint ((__v2df) __A); > > > > + break; > > > > + } > > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > > + { > > > > + /* Insert an artificial "read" reference to the variable written > > > > + above, to ensure the compiler does not schedule the computation > > > > + of the value after the manipulation of the FPSCR, below. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : : "wa" (__r)); > > > > + /* Restore enabled exceptions. */ > > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > > + } > > > > + return (__m128d) __r; > > > > +} > > > > + > > > > +extern __inline __m128d > > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) > > > > +{ > > > > + __B = _mm_round_pd (__B, __rounding); > > > > + __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; > > > > + return (__m128d) __r; > > > > +} > > > > + > > > > +extern __inline __m128 > > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > +_mm_round_ps (__m128 __A, int __rounding) > > > > +{ > > > > + __v4sf __r; > > > > + union { > > > > + double __fr; > > > > + long long __fpscr; > > > > + } __enables_save, __fpscr_save; > > > > + > > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > > + { > > > > + /* Save enabled exceptions, disable all exceptions, > > > > + and preserve the rounding mode. */ > > > > +#ifdef _ARCH_PWR9 > > > > + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); > > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > > +#else > > > > + __fpscr_save.__fr = __builtin_mffs (); > > > > + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; > > > > + __fpscr_save.__fpscr &= ~0xf8; > > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > > +#endif > > > > + /* Insert an artificial "read/write" reference to the variable > > > > + read below, to ensure the compiler does not schedule > > > > + a read/use of the variable before the FPSCR is modified, above. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : "+wa" (__A)); > > > > + } > > > > + > > > > + switch (__rounding) > > > > + { > > > > + case _MM_FROUND_TO_NEAREST_INT: > > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > > + __attribute__ ((fallthrough)); > > > > + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: > > > > + __builtin_set_fpscr_rn (0b00); > > > > + /* Insert an artificial "read/write" reference to the variable > > > > + read below, to ensure the compiler does not schedule > > > > + a read/use of the variable before the FPSCR is modified, above. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : "+wa" (__A)); > > > > + > > > > + __r = vec_rint ((__v4sf) __A); > > > > + > > > > + /* Insert an artificial "read" reference to the variable written > > > > + above, to ensure the compiler does not schedule the computation > > > > + of the value after the manipulation of the FPSCR, below. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : : "wa" (__r)); > > > > + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); > > > > + break; > > > > + case _MM_FROUND_TO_NEG_INF: > > > > + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: > > > > + __r = vec_floor ((__v4sf) __A); > > > > + break; > > > > + case _MM_FROUND_TO_POS_INF: > > > > + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: > > > > + __r = vec_ceil ((__v4sf) __A); > > > > + break; > > > > + case _MM_FROUND_TO_ZERO: > > > > + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: > > > > + __r = vec_trunc ((__v4sf) __A); > > > > + break; > > > > + case _MM_FROUND_CUR_DIRECTION: > > > > + __r = vec_rint ((__v4sf) __A); > > > > + break; > > > > + } > > > > + if (__rounding & _MM_FROUND_NO_EXC) > > > > + { > > > > + /* Insert an artificial "read" reference to the variable written > > > > + above, to ensure the compiler does not schedule the computation > > > > + of the value after the manipulation of the FPSCR, below. > > > > + This can be removed if and when GCC PR102783 is fixed. > > > > + */ > > > > + __asm__ ("" : : "wa" (__r)); > > > > + /* Restore enabled exceptions. */ > > > > + __fpscr_save.__fr = __builtin_mffsl (); > > > > + __fpscr_save.__fpscr |= __enables_save.__fpscr; > > > > + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); > > > > + } > > > > + return (__m128) __r; > > > > +} > > > > + > > > > +extern __inline __m128 > > > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) > > > > +{ > > > > + __B = _mm_round_ps (__B, __rounding); > > > > + __v4sf __r = (__v4sf) __A; > > > > + __r[0] = ((__v4sf) __B)[0]; > > > > + return (__m128) __r; > > > > +} > > > > + > > > > +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) > > > > +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) > > > > + > > > > +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) > > > > +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) > > > > + > > > > +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) > > > > +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) > > > > + > > > > +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) > > > > +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) > > > > + > > > > extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) > > > > _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) > > > > { > > > > @@ -210,70 +438,6 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) > > > > > > > > #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) > > > > > > > > -__inline __m128d > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_ceil_pd (__m128d __A) > > > > -{ > > > > - return (__m128d) vec_ceil ((__v2df) __A); > > > > -} > > > > - > > > > -__inline __m128d > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_ceil_sd (__m128d __A, __m128d __B) > > > > -{ > > > > - __v2df __r = vec_ceil ((__v2df) __B); > > > > - __r[1] = ((__v2df) __A)[1]; > > > > - return (__m128d) __r; > > > > -} > > > > - > > > > -__inline __m128d > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_floor_pd (__m128d __A) > > > > -{ > > > > - return (__m128d) vec_floor ((__v2df) __A); > > > > -} > > > > - > > > > -__inline __m128d > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_floor_sd (__m128d __A, __m128d __B) > > > > -{ > > > > - __v2df __r = vec_floor ((__v2df) __B); > > > > - __r[1] = ((__v2df) __A)[1]; > > > > - return (__m128d) __r; > > > > -} > > > > - > > > > -__inline __m128 > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_ceil_ps (__m128 __A) > > > > -{ > > > > - return (__m128) vec_ceil ((__v4sf) __A); > > > > -} > > > > - > > > > -__inline __m128 > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_ceil_ss (__m128 __A, __m128 __B) > > > > -{ > > > > - __v4sf __r = (__v4sf) __A; > > > > - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); > > > > - return __r; > > > > -} > > > > - > > > > -__inline __m128 > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_floor_ps (__m128 __A) > > > > -{ > > > > - return (__m128) vec_floor ((__v4sf) __A); > > > > -} > > > > - > > > > -__inline __m128 > > > > -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > -_mm_floor_ss (__m128 __A, __m128 __B) > > > > -{ > > > > - __v4sf __r = (__v4sf) __A; > > > > - __r[0] = __builtin_floor (((__v4sf) __B)[0]); > > > > - return __r; > > > > -} > > > > - > > > > #ifdef _ARCH_PWR8 > > > > extern __inline __m128i > > > > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > > new file mode 100644 > > > > index 000000000000..de6cbf7be438 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h > > > > @@ -0,0 +1,81 @@ > > > > +#include <smmintrin.h> > > > > +#include <fenv.h> > > > > +#include "sse4_1-check.h" > > > > + > > > > +#define DIM(a) (sizeof (a) / sizeof (a)[0]) > > > > + > > > > +static int roundings[] = > > > > + { > > > > + _MM_FROUND_TO_NEAREST_INT, > > > > + _MM_FROUND_TO_NEG_INF, > > > > + _MM_FROUND_TO_POS_INF, > > > > + _MM_FROUND_TO_ZERO, > > > > + _MM_FROUND_CUR_DIRECTION > > > > + }; > > > > + > > > > +static int modes[] = > > > > + { > > > > + FE_TONEAREST, > > > > + FE_UPWARD, > > > > + FE_DOWNWARD, > > > > + FE_TOWARDZERO > > > > + }; > > > > + > > > > +static void > > > > +TEST (void) > > > > +{ > > > > + int i, j, ri, mi, round_save; > > > > + > > > > + round_save = fegetround (); > > > > + for (mi = 0; mi < DIM (modes); mi++) { > > > > + fesetround (modes[mi]); > > > > + for (i = 0; i < DIM (data); i++) { > > > > + for (ri = 0; ri < DIM (roundings); ri++) { > > > > + union value guess; > > > > + union value *current_answers = answers[ri]; > > > > + switch ( roundings[ri] ) { > > > > + case _MM_FROUND_TO_NEAREST_INT: > > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > > + _MM_FROUND_TO_NEAREST_INT); > > > > + break; > > > > + case _MM_FROUND_TO_NEG_INF: > > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > > + _MM_FROUND_TO_NEG_INF); > > > > + break; > > > > + case _MM_FROUND_TO_POS_INF: > > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > > + _MM_FROUND_TO_POS_INF); > > > > + break; > > > > + case _MM_FROUND_TO_ZERO: > > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > > + _MM_FROUND_TO_ZERO); > > > > + break; > > > > + case _MM_FROUND_CUR_DIRECTION: > > > > + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, > > > > + _MM_FROUND_CUR_DIRECTION); > > > > + switch ( modes[mi] ) { > > > > + case FE_TONEAREST: > > > > + current_answers = answers_NEAREST_INT; > > > > + break; > > > > + case FE_UPWARD: > > > > + current_answers = answers_POS_INF; > > > > + break; > > > > + case FE_DOWNWARD: > > > > + current_answers = answers_NEG_INF; > > > > + break; > > > > + case FE_TOWARDZERO: > > > > + current_answers = answers_ZERO; > > > > + break; > > > > + } > > > > + break; > > > > + default: > > > > + abort (); > > > > + } > > > > + for (j = 0; j < DIM (guess.f); j++) > > > > + if (guess.f[j] != current_answers[i].f[j]) > > > > + abort (); > > > > + } > > > > + } > > > > + } > > > > + fesetround (round_save); > > > > +} > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > > new file mode 100644 > > > > index 000000000000..58d9cc524167 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c > > > > @@ -0,0 +1,143 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-require-effective-target vsx_hw } */ > > > > +/* { dg-options "-O2 -mvsx" } */ > > > > + > > > > +#define NO_WARN_X86_INTRINSICS 1 > > > > +#include <smmintrin.h> > > > > + > > > > +#define VEC_T __m128d > > > > +#define FP_T double > > > > + > > > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) > > > > + > > > > +#include "sse4_1-round-data.h" > > > > + > > > > +struct data2 data[] = { > > > > + { .value1 = { .f = { 0.00, 0.25 } } }, > > > > + { .value1 = { .f = { 0.50, 0.75 } } }, > > > > + > > > > + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, > > > > + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, > > > > + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, > > > > + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, > > > > + > > > > + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, > > > > + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, > > > > + > > > > + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, > > > > + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, > > > > + > > > > + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, > > > > + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, > > > > + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, > > > > + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, > > > > + > > > > + { .value1 = { .f = { -1.00, -0.75 } } }, > > > > + { .value1 = { .f = { -0.50, -0.25 } } } > > > > +}; > > > > + > > > > +union value answers_NEAREST_INT[] = { > > > > + { .f = { 0.00, 0.00 } }, > > > > + { .f = { 0.00, 1.00 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > > + > > > > + { .f = { -1.00, -1.00 } }, > > > > + { .f = { 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value answers_NEG_INF[] = { > > > > + { .f = { 0.00, 0.00 } }, > > > > + { .f = { 0.00, 0.00 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > > + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > > + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, > > > > + > > > > + { .f = { -1.00, -1.00 } }, > > > > + { .f = { -1.00, -1.00 } } > > > > +}; > > > > + > > > > +union value answers_POS_INF[] = { > > > > + { .f = { 0.00, 1.00 } }, > > > > + { .f = { 1.00, 1.00 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, > > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, > > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > > + > > > > + { .f = { -1.00, 0.00 } }, > > > > + { .f = { 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value answers_ZERO[] = { > > > > + { .f = { 0.00, 0.00 } }, > > > > + { .f = { 0.00, 0.00 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, > > > > + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, > > > > + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, > > > > + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, > > > > + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, > > > > + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, > > > > + > > > > + { .f = { -1.00, 0.00 } }, > > > > + { .f = { 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value *answers[] = { > > > > + answers_NEAREST_INT, > > > > + answers_NEG_INF, > > > > + answers_POS_INF, > > > > + answers_ZERO, > > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > > +}; > > > > + > > > > +#include "sse4_1-round3.h" > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > > new file mode 100644 > > > > index 000000000000..4b0366dfddf3 > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c > > > > @@ -0,0 +1,98 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-require-effective-target vsx_hw } */ > > > > +/* { dg-options "-O2 -mvsx" } */ > > > > + > > > > +#define NO_WARN_X86_INTRINSICS 1 > > > > +#include <smmintrin.h> > > > > + > > > > +#define VEC_T __m128 > > > > +#define FP_T float > > > > + > > > > +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) > > > > + > > > > +#include "sse4_1-round-data.h" > > > > + > > > > +struct data2 data[] = { > > > > + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, > > > > + > > > > + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, > > > > + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, > > > > + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, > > > > + 0x1.fffffep+22, 0x1.fffffep+23 } } }, > > > > + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, > > > > + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, > > > > + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, > > > > + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, > > > > + > > > > + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } > > > > +}; > > > > + > > > > +union value answers_NEAREST_INT[] = { > > > > + { .f = { 0.00, 0.00, 0.00, 1.00 } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > > + > > > > + { .f = { -1.00, -1.00, 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value answers_NEG_INF[] = { > > > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > > > + { .f = { -0x1.fffffep+23, -0x1.000000p+23, > > > > + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, > > > > + { .f = { -0x1.000000p+22, -0x1.000000p+22, > > > > + -0x1.000000p+22, -0x1.fffff8p+21 } }, > > > > + > > > > + { .f = { -1.00, -1.00, -1.00, -1.00 } } > > > > +}; > > > > + > > > > +union value answers_POS_INF[] = { > > > > + { .f = { 0.00, 1.00, 1.00, 1.00 } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, > > > > + 0x1.000000p+22, 0x1.000000p+22 } }, > > > > + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, > > > > + 0x1.000000p+23, 0x1.fffffep+23 } }, > > > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > > + > > > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value answers_ZERO[] = { > > > > + { .f = { 0.00, 0.00, 0.00, 0.00 } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, > > > > + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, > > > > + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, > > > > + 0x1.fffffcp+22, 0x1.fffffep+23 } }, > > > > + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, > > > > + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, > > > > + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, > > > > + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, > > > > + > > > > + { .f = { -1.00, 0.00, 0.00, 0.00 } } > > > > +}; > > > > + > > > > +union value *answers[] = { > > > > + answers_NEAREST_INT, > > > > + answers_NEG_INF, > > > > + answers_POS_INF, > > > > + answers_ZERO, > > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > > +}; > > > > + > > > > +#include "sse4_1-round3.h" > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > > new file mode 100644 > > > > index 000000000000..4f8d9e08c93d > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c > > > > @@ -0,0 +1,256 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-require-effective-target vsx_hw } */ > > > > +/* { dg-options "-O2 -mvsx" } */ > > > > + > > > > +#include <stdio.h> > > > > +#define NO_WARN_X86_INTRINSICS 1 > > > > +#include <smmintrin.h> > > > > + > > > > +#define VEC_T __m128d > > > > +#define FP_T double > > > > + > > > > +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) > > > > + > > > > +#include "sse4_1-round-data.h" > > > > + > > > > +static struct data2 data[] = { > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.00, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.25, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.75, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -1.00, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.75, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.50, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.25, IGNORED } } } > > > > +}; > > > > + > > > > +static union value answers_NEAREST_INT[] = { > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { -0.00, PASSTHROUGH } }, > > > > + { .f = { -0.00, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_NEG_INF[] = { > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_POS_INF[] = { > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_ZERO[] = { > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH } } > > > > +}; > > > > + > > > > +union value *answers[] = { > > > > + answers_NEAREST_INT, > > > > + answers_NEG_INF, > > > > + answers_POS_INF, > > > > + answers_ZERO, > > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > > +}; > > > > + > > > > +#include "sse4_1-round3.h" > > > > diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > > new file mode 100644 > > > > index 000000000000..d788ebda64dd > > > > --- /dev/null > > > > +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c > > > > @@ -0,0 +1,208 @@ > > > > +/* { dg-do run } */ > > > > +/* { dg-require-effective-target vsx_hw } */ > > > > +/* { dg-options "-O2 -mvsx" } */ > > > > + > > > > +#include <stdio.h> > > > > +#define NO_WARN_X86_INTRINSICS 1 > > > > +#include <smmintrin.h> > > > > + > > > > +#define VEC_T __m128 > > > > +#define FP_T float > > > > + > > > > +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) > > > > + > > > > +#include "sse4_1-round-data.h" > > > > + > > > > +static struct data2 data[] = { > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, > > > > + > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, > > > > + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } > > > > +}; > > > > + > > > > +static union value answers_NEAREST_INT[] = { > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_NEG_INF[] = { > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_POS_INF[] = { > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > > +}; > > > > + > > > > +static union value answers_ZERO[] = { > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + > > > > + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, > > > > + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } > > > > +}; > > > > + > > > > +union value *answers[] = { > > > > + answers_NEAREST_INT, > > > > + answers_NEG_INF, > > > > + answers_POS_INF, > > > > + answers_ZERO, > > > > + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ > > > > +}; > > > > + > > > > +#include "sse4_1-round3.h" > > > > -- > > > > 2.27.0 > > > >
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index 90ce03d22709..6bb03e6e20ac 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -42,6 +42,234 @@ #include <altivec.h> #include <tmmintrin.h> +/* Rounding mode macros. */ +#define _MM_FROUND_TO_NEAREST_INT 0x00 +#define _MM_FROUND_TO_ZERO 0x01 +#define _MM_FROUND_TO_POS_INF 0x02 +#define _MM_FROUND_TO_NEG_INF 0x03 +#define _MM_FROUND_CUR_DIRECTION 0x04 + +#define _MM_FROUND_NINT \ + (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_FLOOR \ + (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_CEIL \ + (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_TRUNC \ + (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_RINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC) +#define _MM_FROUND_NEARBYINT \ + (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC) + +#define _MM_FROUND_RAISE_EXC 0x00 +#define _MM_FROUND_NO_EXC 0x08 + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_pd (__m128d __A, int __rounding) +{ + __v2df __r; + union { + double __fr; + long long __fpscr; + } __enables_save, __fpscr_save; + + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Save enabled exceptions, disable all exceptions, + and preserve the rounding mode. */ +#ifdef _ARCH_PWR9 + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; +#else + __fpscr_save.__fr = __builtin_mffs (); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + __fpscr_save.__fpscr &= ~0xf8; + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); +#endif + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : "+wa" (__A)); + } + + switch (__rounding) + { + case _MM_FROUND_TO_NEAREST_INT: + __fpscr_save.__fr = __builtin_mffsl (); + __attribute__ ((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: + __builtin_set_fpscr_rn (0b00); + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : "+wa" (__A)); + + __r = vec_rint ((__v2df) __A); + + /* Insert an artificial "read" reference to the variable written + above, to ensure the compiler does not schedule the computation + of the value after the manipulation of the FPSCR, below. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : : "wa" (__r)); + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: + __r = vec_floor ((__v2df) __A); + break; + case _MM_FROUND_TO_POS_INF: + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: + __r = vec_ceil ((__v2df) __A); + break; + case _MM_FROUND_TO_ZERO: + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: + __r = vec_trunc ((__v2df) __A); + break; + case _MM_FROUND_CUR_DIRECTION: + __r = vec_rint ((__v2df) __A); + break; + } + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Insert an artificial "read" reference to the variable written + above, to ensure the compiler does not schedule the computation + of the value after the manipulation of the FPSCR, below. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : : "wa" (__r)); + /* Restore enabled exceptions. */ + __fpscr_save.__fr = __builtin_mffsl (); + __fpscr_save.__fpscr |= __enables_save.__fpscr; + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); + } + return (__m128d) __r; +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_sd (__m128d __A, __m128d __B, int __rounding) +{ + __B = _mm_round_pd (__B, __rounding); + __v2df __r = { ((__v2df) __B)[0], ((__v2df) __A)[1] }; + return (__m128d) __r; +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_ps (__m128 __A, int __rounding) +{ + __v4sf __r; + union { + double __fr; + long long __fpscr; + } __enables_save, __fpscr_save; + + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Save enabled exceptions, disable all exceptions, + and preserve the rounding mode. */ +#ifdef _ARCH_PWR9 + __asm__ ("mffsce %0" : "=f" (__fpscr_save.__fr)); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; +#else + __fpscr_save.__fr = __builtin_mffs (); + __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8; + __fpscr_save.__fpscr &= ~0xf8; + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); +#endif + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : "+wa" (__A)); + } + + switch (__rounding) + { + case _MM_FROUND_TO_NEAREST_INT: + __fpscr_save.__fr = __builtin_mffsl (); + __attribute__ ((fallthrough)); + case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC: + __builtin_set_fpscr_rn (0b00); + /* Insert an artificial "read/write" reference to the variable + read below, to ensure the compiler does not schedule + a read/use of the variable before the FPSCR is modified, above. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : "+wa" (__A)); + + __r = vec_rint ((__v4sf) __A); + + /* Insert an artificial "read" reference to the variable written + above, to ensure the compiler does not schedule the computation + of the value after the manipulation of the FPSCR, below. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : : "wa" (__r)); + __builtin_set_fpscr_rn (__fpscr_save.__fpscr); + break; + case _MM_FROUND_TO_NEG_INF: + case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC: + __r = vec_floor ((__v4sf) __A); + break; + case _MM_FROUND_TO_POS_INF: + case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC: + __r = vec_ceil ((__v4sf) __A); + break; + case _MM_FROUND_TO_ZERO: + case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC: + __r = vec_trunc ((__v4sf) __A); + break; + case _MM_FROUND_CUR_DIRECTION: + __r = vec_rint ((__v4sf) __A); + break; + } + if (__rounding & _MM_FROUND_NO_EXC) + { + /* Insert an artificial "read" reference to the variable written + above, to ensure the compiler does not schedule the computation + of the value after the manipulation of the FPSCR, below. + This can be removed if and when GCC PR102783 is fixed. + */ + __asm__ ("" : : "wa" (__r)); + /* Restore enabled exceptions. */ + __fpscr_save.__fr = __builtin_mffsl (); + __fpscr_save.__fpscr |= __enables_save.__fpscr; + __builtin_mtfsf (0b00000011, __fpscr_save.__fr); + } + return (__m128) __r; +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_round_ss (__m128 __A, __m128 __B, int __rounding) +{ + __B = _mm_round_ps (__B, __rounding); + __v4sf __r = (__v4sf) __A; + __r[0] = ((__v4sf) __B)[0]; + return (__m128) __r; +} + +#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL) +#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR) +#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR) + +#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL) +#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL) + +#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR) +#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR) + extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) { @@ -210,70 +438,6 @@ _mm_testnzc_si128 (__m128i __A, __m128i __B) #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V)) -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_pd (__m128d __A) -{ - return (__m128d) vec_ceil ((__v2df) __A); -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_sd (__m128d __A, __m128d __B) -{ - __v2df __r = vec_ceil ((__v2df) __B); - __r[1] = ((__v2df) __A)[1]; - return (__m128d) __r; -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_pd (__m128d __A) -{ - return (__m128d) vec_floor ((__v2df) __A); -} - -__inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_sd (__m128d __A, __m128d __B) -{ - __v2df __r = vec_floor ((__v2df) __B); - __r[1] = ((__v2df) __A)[1]; - return (__m128d) __r; -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_ps (__m128 __A) -{ - return (__m128) vec_ceil ((__v4sf) __A); -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_ceil_ss (__m128 __A, __m128 __B) -{ - __v4sf __r = (__v4sf) __A; - __r[0] = __builtin_ceil (((__v4sf) __B)[0]); - return __r; -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_ps (__m128 __A) -{ - return (__m128) vec_floor ((__v4sf) __A); -} - -__inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_floor_ss (__m128 __A, __m128 __B) -{ - __v4sf __r = (__v4sf) __A; - __r[0] = __builtin_floor (((__v4sf) __B)[0]); - return __r; -} - #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h new file mode 100644 index 000000000000..de6cbf7be438 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-round3.h @@ -0,0 +1,81 @@ +#include <smmintrin.h> +#include <fenv.h> +#include "sse4_1-check.h" + +#define DIM(a) (sizeof (a) / sizeof (a)[0]) + +static int roundings[] = + { + _MM_FROUND_TO_NEAREST_INT, + _MM_FROUND_TO_NEG_INF, + _MM_FROUND_TO_POS_INF, + _MM_FROUND_TO_ZERO, + _MM_FROUND_CUR_DIRECTION + }; + +static int modes[] = + { + FE_TONEAREST, + FE_UPWARD, + FE_DOWNWARD, + FE_TOWARDZERO + }; + +static void +TEST (void) +{ + int i, j, ri, mi, round_save; + + round_save = fegetround (); + for (mi = 0; mi < DIM (modes); mi++) { + fesetround (modes[mi]); + for (i = 0; i < DIM (data); i++) { + for (ri = 0; ri < DIM (roundings); ri++) { + union value guess; + union value *current_answers = answers[ri]; + switch ( roundings[ri] ) { + case _MM_FROUND_TO_NEAREST_INT: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_NEAREST_INT); + break; + case _MM_FROUND_TO_NEG_INF: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_NEG_INF); + break; + case _MM_FROUND_TO_POS_INF: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_POS_INF); + break; + case _MM_FROUND_TO_ZERO: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_TO_ZERO); + break; + case _MM_FROUND_CUR_DIRECTION: + guess.x = ROUND_INTRIN (data[i].value1.x, data[i].value2.x, + _MM_FROUND_CUR_DIRECTION); + switch ( modes[mi] ) { + case FE_TONEAREST: + current_answers = answers_NEAREST_INT; + break; + case FE_UPWARD: + current_answers = answers_POS_INF; + break; + case FE_DOWNWARD: + current_answers = answers_NEG_INF; + break; + case FE_TOWARDZERO: + current_answers = answers_ZERO; + break; + } + break; + default: + abort (); + } + for (j = 0; j < DIM (guess.f); j++) + if (guess.f[j] != current_answers[i].f[j]) + abort (); + } + } + } + fesetround (round_save); +} diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c new file mode 100644 index 000000000000..58d9cc524167 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundpd.c @@ -0,0 +1,143 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128d +#define FP_T double + +#define ROUND_INTRIN(x, ignored, mode) _mm_round_pd (x, mode) + +#include "sse4_1-round-data.h" + +struct data2 data[] = { + { .value1 = { .f = { 0.00, 0.25 } } }, + { .value1 = { .f = { 0.50, 0.75 } } }, + + { .value1 = { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffdp+50 } } }, + { .value1 = { .f = { 0x1.ffffffffffffep+50, 0x1.fffffffffffffp+50 } } }, + { .value1 = { .f = { 0x1.0000000000000p+51, 0x1.0000000000001p+51 } } }, + { .value1 = { .f = { 0x1.0000000000002p+51, 0x1.0000000000003p+51 } } }, + + { .value1 = { .f = { 0x1.ffffffffffffep+51, 0x1.fffffffffffffp+51 } } }, + { .value1 = { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } } }, + + { .value1 = { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } } }, + { .value1 = { .f = { -0x1.fffffffffffffp+51, -0x1.ffffffffffffep+51 } } }, + + { .value1 = { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } } }, + { .value1 = { .f = { -0x1.0000000000001p+51, -0x1.0000000000000p+51 } } }, + { .value1 = { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffep+50 } } }, + { .value1 = { .f = { -0x1.ffffffffffffdp+50, -0x1.ffffffffffffcp+50 } } }, + + { .value1 = { .f = { -1.00, -0.75 } } }, + { .value1 = { .f = { -0.50, -0.25 } } } +}; + +union value answers_NEAREST_INT[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 1.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, -1.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value answers_NEG_INF[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 0.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.0000000000000p+52, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000002p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.0000000000000p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, -1.00 } }, + { .f = { -1.00, -1.00 } } +}; + +union value answers_POS_INF[] = { + { .f = { 0.00, 1.00 } }, + { .f = { 1.00, 1.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000002p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000004p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.0000000000000p+52 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, 0.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value answers_ZERO[] = { + { .f = { 0.00, 0.00 } }, + { .f = { 0.00, 0.00 } }, + + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.ffffffffffffcp+50, 0x1.ffffffffffffcp+50 } }, + { .f = { 0x1.0000000000000p+51, 0x1.0000000000000p+51 } }, + { .f = { 0x1.0000000000002p+51, 0x1.0000000000002p+51 } }, + + { .f = { 0x1.ffffffffffffep+51, 0x1.ffffffffffffep+51 } }, + { .f = { 0x1.0000000000000p+52, 0x1.0000000000001p+52 } }, + + { .f = { -0x1.0000000000001p+52, -0x1.0000000000000p+52 } }, + { .f = { -0x1.ffffffffffffep+51, -0x1.ffffffffffffep+51 } }, + + { .f = { -0x1.0000000000004p+51, -0x1.0000000000002p+51 } }, + { .f = { -0x1.0000000000000p+51, -0x1.0000000000000p+51 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + { .f = { -0x1.ffffffffffffcp+50, -0x1.ffffffffffffcp+50 } }, + + { .f = { -1.00, 0.00 } }, + { .f = { 0.00, 0.00 } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c new file mode 100644 index 000000000000..4b0366dfddf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundps.c @@ -0,0 +1,98 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128 +#define FP_T float + +#define ROUND_INTRIN(x, ignored, mode) _mm_round_ps (x, mode) + +#include "sse4_1-round-data.h" + +struct data2 data[] = { + { .value1 = { .f = { 0.00, 0.25, 0.50, 0.75 } } }, + + { .value1 = { .f = { 0x1.fffff8p+21, 0x1.fffffap+21, + 0x1.fffffcp+21, 0x1.fffffep+21 } } }, + { .value1 = { .f = { 0x1.fffffap+22, 0x1.fffffcp+22, + 0x1.fffffep+22, 0x1.fffffep+23 } } }, + { .value1 = { .f = { -0x1.fffffep+23, -0x1.fffffep+22, + -0x1.fffffcp+22, -0x1.fffffap+22 } } }, + { .value1 = { .f = { -0x1.fffffep+21, -0x1.fffffcp+21, + -0x1.fffffap+21, -0x1.fffff8p+21 } } }, + + { .value1 = { .f = { -1.00, -0.75, -0.50, -0.25 } } } +}; + +union value answers_NEAREST_INT[] = { + { .f = { 0.00, 0.00, 0.00, 1.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.000000p+22, 0x1.000000p+22 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.000000p+23, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.000000p+23, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.000000p+22, -0x1.000000p+22, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, -1.00, 0.00, 0.00 } } +}; + +union value answers_NEG_INF[] = { + { .f = { 0.00, 0.00, 0.00, 0.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.fffffcp+22, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.000000p+23, + -0x1.fffffcp+22, -0x1.fffffcp+22 } }, + { .f = { -0x1.000000p+22, -0x1.000000p+22, + -0x1.000000p+22, -0x1.fffff8p+21 } }, + + { .f = { -1.00, -1.00, -1.00, -1.00 } } +}; + +union value answers_POS_INF[] = { + { .f = { 0.00, 1.00, 1.00, 1.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.000000p+22, + 0x1.000000p+22, 0x1.000000p+22 } }, + { .f = { 0x1.fffffcp+22, 0x1.fffffcp+22, + 0x1.000000p+23, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, 0.00, 0.00, 0.00 } } +}; + +union value answers_ZERO[] = { + { .f = { 0.00, 0.00, 0.00, 0.00 } }, + + { .f = { 0x1.fffff8p+21, 0x1.fffff8p+21, + 0x1.fffff8p+21, 0x1.fffff8p+21 } }, + { .f = { 0x1.fffff8p+22, 0x1.fffffcp+22, + 0x1.fffffcp+22, 0x1.fffffep+23 } }, + { .f = { -0x1.fffffep+23, -0x1.fffffcp+22, + -0x1.fffffcp+22, -0x1.fffff8p+22 } }, + { .f = { -0x1.fffff8p+21, -0x1.fffff8p+21, + -0x1.fffff8p+21, -0x1.fffff8p+21 } }, + + { .f = { -1.00, 0.00, 0.00, 0.00 } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c new file mode 100644 index 000000000000..4f8d9e08c93d --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundsd.c @@ -0,0 +1,256 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#include <stdio.h> +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128d +#define FP_T double + +#define ROUND_INTRIN(x, y, mode) _mm_round_sd (x, y, mode) + +#include "sse4_1-round-data.h" + +static struct data2 data[] = { + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.00, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.25, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0.75, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffcp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffdp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffep+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffffffffffp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000000p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000001p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000002p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000003p+51, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.ffffffffffffep+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffffffffffp+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000000p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { 0x1.0000000000001p+52, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000001p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000000p+52, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffffffffffp+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffep+51, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000004p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000002p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000001p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.0000000000000p+51, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffep+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffdp+50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0x1.ffffffffffffcp+50, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -1.00, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.75, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.50, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH } }, + .value2 = { .f = { -0.25, IGNORED } } } +}; + +static union value answers_NEAREST_INT[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH } } +}; + +static union value answers_NEG_INF[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH } } +}; + +static union value answers_POS_INF[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000004p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } } +}; + +static union value answers_ZERO[] = { + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000002p+51, PASSTHROUGH } }, + + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { 0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { 0x1.0000000000001p+52, PASSTHROUGH } }, + + { .f = { -0x1.0000000000001p+52, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+52, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffep+51, PASSTHROUGH } }, + + { .f = { -0x1.0000000000004p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000002p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.0000000000000p+51, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + { .f = { -0x1.ffffffffffffcp+50, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c new file mode 100644 index 000000000000..d788ebda64dd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/sse4_1-roundss.c @@ -0,0 +1,208 @@ +/* { dg-do run } */ +/* { dg-require-effective-target vsx_hw } */ +/* { dg-options "-O2 -mvsx" } */ + +#include <stdio.h> +#define NO_WARN_X86_INTRINSICS 1 +#include <smmintrin.h> + +#define VEC_T __m128 +#define FP_T float + +#define ROUND_INTRIN(x, y, mode) _mm_round_ss (x, y, mode) + +#include "sse4_1-round-data.h" + +static struct data2 data[] = { + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.00, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.25, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.50, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0.75, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { 0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+23, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffcp+22, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffap+22, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffep+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffcp+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffffap+21, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0x1.fffff8p+21, IGNORED, IGNORED, IGNORED } } }, + + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -1.00, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.75, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.50, IGNORED, IGNORED, IGNORED } } }, + { .value1 = { .f = { IGNORED, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + .value2 = { .f = { -0.25, IGNORED, IGNORED, IGNORED } } } +}; + +static union value answers_NEAREST_INT[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_NEG_INF[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_POS_INF[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.000000p+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +static union value answers_ZERO[] = { + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { 0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffffep+23, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffffcp+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+22, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { -0x1.fffff8p+21, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + + { .f = { -1.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } }, + { .f = { 0.00, PASSTHROUGH, PASSTHROUGH, PASSTHROUGH } } +}; + +union value *answers[] = { + answers_NEAREST_INT, + answers_NEG_INF, + answers_POS_INF, + answers_ZERO, + 0 /* CUR_DIRECTION answers depend on current rounding mode. */ +}; + +#include "sse4_1-round3.h"