Message ID | CAMZc-bxFVR4+WVJrAcvT_5HHJqPOasbBgftqA3EMkRqOsVG31g@mail.gmail.com |
---|---|
State | New |
Headers | show |
Series | Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/897803) | expand |
Hi Uros: would you help to review this patch? Regards, Hongtao. On Sun, Mar 24, 2019 at 8:13 PM Hongtao Liu <crazylht@gmail.com> wrote: > > Hi: > The following patch adds forgotten avx512f fpclass instrinsics for > masked scalar operations. > > Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512), > ok for trunk? > > Index: ChangeLog > =================================================================== > --- ChangeLog (revision 269894) > +++ ChangeLog (working copy) > @@ -1,3 +1,16 @@ > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> > + > + PR target/89803 > + * config/i386/avx512dqintrin.h > + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask): > + New intrinsics. > + * config/i386/i386-builtin.def > + (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask): > + New builtins. > + * config/i386/sse.md > + (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>): > + Modified with mask. > + > 2019-03-23 Segher Boessenkool <segher@kernel.crashing.org> > > * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit > Index: config/i386/avx512dqintrin.h > =================================================================== > --- config/i386/avx512dqintrin.h (revision 269894) > +++ config/i386/avx512dqintrin.h (working copy) > @@ -1372,6 +1372,20 @@ > return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm); > } > > +extern __inline __mmask8 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) > +{ > + return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); > +} > + > +extern __inline __mmask8 > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > +{ > + return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); > +} > + > extern __inline __m512i > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R) > @@ -2623,6 +2637,12 @@ > #define _mm_fpclass_sd_mask(X, C) \ > ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \ > > +#define _mm_mask_fpclass_ss_mask(X, C, U) \ > + ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), > (int) (C)), (__mmask8) (U)) > + > +#define _mm_mask_fpclass_sd_mask(X, C, U) \ > + ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), > (int) (C)), (__mmask8) (U)) > + > #define _mm512_mask_fpclass_pd_mask(u, X, C) \ > ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ > (int) (C), (__mmask8)(u))) > Index: config/i386/i386-builtin.def > =================================================================== > --- config/i386/i386-builtin.def (revision 269894) > +++ config/i386/i386-builtin.def (working copy) > @@ -2082,9 +2082,11 @@ > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512dq_fpclassv4df_mask, > "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, > UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512dq_fpclassv2df_mask, > "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, > UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df, > "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) > QI_FTYPE_V2DF_INT) > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, > CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask", > IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512dq_fpclassv8sf_mask, > "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, > UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512dq_fpclassv4sf_mask, > "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, > UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf, > "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) > QI_FTYPE_V4SF_INT) > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, > CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask", > IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", > IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI) > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", > IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI) > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", > IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI) > Index: config/i386/sse.md > =================================================================== > --- config/i386/sse.md (revision 269894) > +++ config/i386/sse.md (working copy) > @@ -21111,7 +21111,7 @@ > (set_attr "prefix" "evex") > (set_attr "mode" "<MODE>")]) > > -(define_insn "avx512dq_vmfpclass<mode>" > +(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>" > [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") > (and:<avx512fmaskmode> > (unspec:<avx512fmaskmode> > @@ -21120,7 +21120,7 @@ > UNSPEC_FPCLASS) > (const_int 1)))] > "TARGET_AVX512DQ" > - "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; > + "vfpclass<ssescalarmodesuffix>\t{%2, %1, > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, > %2}"; > [(set_attr "type" "sse") > (set_attr "length_immediate" "1") > (set_attr "prefix" "evex") > Index: testsuite/ChangeLog > =================================================================== > --- testsuite/ChangeLog (revision 269894) > +++ testsuite/ChangeLog (working copy) > @@ -1,3 +1,19 @@ > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> > + > + PR target/89803 > + * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask, > + __builtin_ia32_fpclasssd_mask): Define. > + * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask, > + __builtin_ia32_fpclasssd_mask): Likewise. > + * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask) > + (__builtin_ia32_fpclasssd_mask): Likewise. > + * gcc.target/i386/avx512dq-vfpclassss-2.c: New. > + * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise. > + * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test): > + Add test for _mm_mask_fpclass_ss_mask. > + * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test): > + Add test for _mm_mask_fpclass_sd_mask. > + > 2019-03-22 Vladimir Makarov <vmakarov@redhat.com> > > PR rtl-optimization/89676 > Index: testsuite/gcc.target/i386/avx-1.c > =================================================================== > --- testsuite/gcc.target/i386/avx-1.c (revision 269894) > +++ testsuite/gcc.target/i386/avx-1.c (working copy) > @@ -446,6 +446,8 @@ > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > +#define __builtin_ia32_fpclassss_mask(A, D, U) > __builtin_ia32_fpclassss_mask(A, 1, U) > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > __builtin_ia32_fpclasssd_mask(A, 1, U) > #define __builtin_ia32_fpclassps512_mask(A, D, C) > __builtin_ia32_fpclassps512_mask(A, 1, C) > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > __builtin_ia32_fpclasspd512_mask(A, 1, C) > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c > =================================================================== > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894) > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy) > @@ -1,6 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512dq -O2" } */ > /* { dg-final { scan-assembler-times "vfpclasssd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } > } */ > +/* { dg-final { scan-assembler-times "vfpclasssd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > @@ -11,4 +12,5 @@ > avx512dq_test (void) > { > m8 = _mm_fpclass_sd_mask (x128, 13); > + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); > } > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c > =================================================================== > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent) > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy) > @@ -0,0 +1,75 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512dq" } */ > +/* { dg-require-effective-target avx512dq } */ > + > +#define AVX512DQ > +#include "avx512f-helper.h" > + > +#include <math.h> > +#include <limits.h> > +#include <float.h> > +#define SIZE (128 / 64) > +#include "avx512f-mask-type.h" > + > +#ifndef __FPCLASSSD__ > +#define __FPCLASSSD__ > +int check_fp_class_dp (double src, int imm) > +{ > + int qNaN_res = isnan (src); > + int sNaN_res = isnan (src); > + int Pzero_res = (src == 0.0); > + int Nzero_res = (src == -0.0); > + int PInf_res = (isinf (src) == 1); > + int NInf_res = (isinf (src) == -1); > + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); > + int FinNeg_res = __builtin_finite (src) && (src < 0); > + > + int result = (((imm & 1) && qNaN_res) > + || (((imm >> 1) & 1) && Pzero_res) > + || (((imm >> 2) & 1) && Nzero_res) > + || (((imm >> 3) & 1) && PInf_res) > + || (((imm >> 4) & 1) && NInf_res) > + || (((imm >> 5) & 1) && Denorm_res) > + || (((imm >> 6) & 1) && FinNeg_res) > + || (((imm >> 7) & 1) && sNaN_res)); > + return result; > +} > +#endif > + > +__mmask8 > +CALC (double *s1, int imm) > +{ > + int i; > + __mmask8 res = 0; > + > + if (check_fp_class_dp(s1[0], imm)) > + res = res | 1; > + > + return res; > +} > + > +void > +TEST (void) > +{ > + int i; > + union128d src; > + __mmask8 res1, res2, res_ref = 0; > + __mmask8 mask = MASK_VALUE; > + > + src.a[0] = 1.0 / 0.0; > + for (i = 1; i < SIZE; i++) > + { > + src.a[i] = -24.43 + 0.6 * i; > + } > + > + res1 = _mm_fpclass_sd_mask (src.x, 0xFF); > + res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF); > + > + res_ref = CALC (src.a, 0xFF); > + > + if (res_ref != res1) > + abort (); > + > + if ((res_ref & mask) != res2) > + abort (); > +} > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c > =================================================================== > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894) > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy) > @@ -1,6 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512dq -O2" } */ > /* { dg-final { scan-assembler-times "vfpclassss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } > } */ > +/* { dg-final { scan-assembler-times "vfpclassss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ > \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > @@ -11,4 +12,5 @@ > avx512dq_test (void) > { > m8 = _mm_fpclass_ss_mask (x128, 13); > + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); > } > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c > =================================================================== > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent) > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy) > @@ -0,0 +1,76 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -mavx512dq" } */ > +/* { dg-require-effective-target avx512dq } */ > + > +#define AVX512DQ > +#include "avx512f-helper.h" > + > +#include <math.h> > +#include <limits.h> > +#include <float.h> > +#include "avx512f-mask-type.h" > +#define SIZE (128 / 32) > + > +#ifndef __FPCLASSSS__ > +#define __FPCLASSSS__ > +int check_fp_class_sp (float src, int imm) > +{ > + int qNaN_res = isnan (src); > + int sNaN_res = isnan (src); > + int Pzero_res = (src == 0.0); > + int Nzero_res = (src == -0.0); > + int PInf_res = (isinf (src) == 1); > + int NInf_res = (isinf (src) == -1); > + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); > + int FinNeg_res = __builtin_finite (src) && (src < 0); > + > + int result = (((imm & 1) && qNaN_res) > + || (((imm >> 1) & 1) && Pzero_res) > + || (((imm >> 2) & 1) && Nzero_res) > + || (((imm >> 3) & 1) && PInf_res) > + || (((imm >> 4) & 1) && NInf_res) > + || (((imm >> 5) & 1) && Denorm_res) > + || (((imm >> 6) & 1) && FinNeg_res) > + || (((imm >> 7) & 1) && sNaN_res)); > + return result; > +} > +#endif > + > +__mmask8 > +CALC (float *s1, int imm) > +{ > + int i; > + __mmask8 res = 0; > + > + if (check_fp_class_sp(s1[0], imm)) > + res = res | 1; > + > + return res; > +} > + > +void > +TEST (void) > +{ > + int i; > + union128 src; > + __mmask8 res1, res2, res_ref = 0; > + __mmask8 mask = MASK_VALUE; > + > + src.a[0] = 1.0 / 0.0; > + for (i = 1; i < SIZE; i++) > + { > + src.a[i] = -24.43 + 0.6 * i; > + } > + > + res1 = _mm_fpclass_ss_mask (src.x, 0xFF); > + res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF); > + > + > + res_ref = CALC (src.a, 0xFF); > + > + if (res_ref != res1) > + abort (); > + > + if ((mask & res_ref) != res2) > + abort (); > +} > Index: testsuite/gcc.target/i386/sse-13.c > =================================================================== > --- testsuite/gcc.target/i386/sse-13.c (revision 269894) > +++ testsuite/gcc.target/i386/sse-13.c (working copy) > @@ -463,6 +463,8 @@ > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > +#define __builtin_ia32_fpclassss_mask(A, D, U) > __builtin_ia32_fpclassss_mask(A, 1, U) > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > __builtin_ia32_fpclasssd_mask(A, 1, U) > #define __builtin_ia32_fpclassps512_mask(A, D, C) > __builtin_ia32_fpclassps512_mask(A, 1, C) > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > __builtin_ia32_fpclasspd512_mask(A, 1, C) > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > Index: testsuite/gcc.target/i386/sse-23.c > =================================================================== > --- testsuite/gcc.target/i386/sse-23.c (revision 269894) > +++ testsuite/gcc.target/i386/sse-23.c (working copy) > @@ -462,6 +462,8 @@ > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > +#define __builtin_ia32_fpclassss_mask(A, D, U) > __builtin_ia32_fpclassss_mask(A, 1, U) > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > __builtin_ia32_fpclasssd_mask(A, 1, U) > #define __builtin_ia32_fpclassps512_mask(A, D, C) > __builtin_ia32_fpclassps512_mask(A, 1, C) > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > __builtin_ia32_fpclasspd512_mask(A, 1, C) > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > > -- > BR, > Hongtao
On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote: > > Hi Uros: > would you help to review this patch? This is AVX512F patch, you will need the approval from the maintainer first. I have no plans to maintain AVX512 beyond rubber-stamping OK dead obvious regression from a reputable contributors. It is simply too much involvment for me. If the appointed maintainer doesn't respond anymore, then I suggest you raise the issue with GCC steering committe. Uros. > Regards, > Hongtao. > > On Sun, Mar 24, 2019 at 8:13 PM Hongtao Liu <crazylht@gmail.com> wrote: > > > > Hi: > > The following patch adds forgotten avx512f fpclass instrinsics for > > masked scalar operations. > > > > Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512), > > ok for trunk? > > > > Index: ChangeLog > > =================================================================== > > --- ChangeLog (revision 269894) > > +++ ChangeLog (working copy) > > @@ -1,3 +1,16 @@ > > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> > > + > > + PR target/89803 > > + * config/i386/avx512dqintrin.h > > + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask): > > + New intrinsics. > > + * config/i386/i386-builtin.def > > + (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask): > > + New builtins. > > + * config/i386/sse.md > > + (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>): > > + Modified with mask. > > + > > 2019-03-23 Segher Boessenkool <segher@kernel.crashing.org> > > > > * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit > > Index: config/i386/avx512dqintrin.h > > =================================================================== > > --- config/i386/avx512dqintrin.h (revision 269894) > > +++ config/i386/avx512dqintrin.h (working copy) > > @@ -1372,6 +1372,20 @@ > > return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm); > > } > > > > +extern __inline __mmask8 > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) > > +{ > > + return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); > > +} > > + > > +extern __inline __mmask8 > > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) > > +{ > > + return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); > > +} > > + > > extern __inline __m512i > > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > > _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R) > > @@ -2623,6 +2637,12 @@ > > #define _mm_fpclass_sd_mask(X, C) \ > > ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \ > > > > +#define _mm_mask_fpclass_ss_mask(X, C, U) \ > > + ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), > > (int) (C)), (__mmask8) (U)) > > + > > +#define _mm_mask_fpclass_sd_mask(X, C, U) \ > > + ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), > > (int) (C)), (__mmask8) (U)) > > + > > #define _mm512_mask_fpclass_pd_mask(u, X, C) \ > > ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ > > (int) (C), (__mmask8)(u))) > > Index: config/i386/i386-builtin.def > > =================================================================== > > --- config/i386/i386-builtin.def (revision 269894) > > +++ config/i386/i386-builtin.def (working copy) > > @@ -2082,9 +2082,11 @@ > > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512dq_fpclassv4df_mask, > > "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, > > UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512dq_fpclassv2df_mask, > > "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, > > UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df, > > "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) > > QI_FTYPE_V2DF_INT) > > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, > > CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask", > > IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512dq_fpclassv8sf_mask, > > "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, > > UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512dq_fpclassv4sf_mask, > > "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, > > UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf, > > "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) > > QI_FTYPE_V4SF_INT) > > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, > > CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask", > > IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) > > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", > > IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI) > > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", > > IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI) > > BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, > > CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", > > IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI) > > Index: config/i386/sse.md > > =================================================================== > > --- config/i386/sse.md (revision 269894) > > +++ config/i386/sse.md (working copy) > > @@ -21111,7 +21111,7 @@ > > (set_attr "prefix" "evex") > > (set_attr "mode" "<MODE>")]) > > > > -(define_insn "avx512dq_vmfpclass<mode>" > > +(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>" > > [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") > > (and:<avx512fmaskmode> > > (unspec:<avx512fmaskmode> > > @@ -21120,7 +21120,7 @@ > > UNSPEC_FPCLASS) > > (const_int 1)))] > > "TARGET_AVX512DQ" > > - "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; > > + "vfpclass<ssescalarmodesuffix>\t{%2, %1, > > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, > > %2}"; > > [(set_attr "type" "sse") > > (set_attr "length_immediate" "1") > > (set_attr "prefix" "evex") > > Index: testsuite/ChangeLog > > =================================================================== > > --- testsuite/ChangeLog (revision 269894) > > +++ testsuite/ChangeLog (working copy) > > @@ -1,3 +1,19 @@ > > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> > > + > > + PR target/89803 > > + * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask, > > + __builtin_ia32_fpclasssd_mask): Define. > > + * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask, > > + __builtin_ia32_fpclasssd_mask): Likewise. > > + * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask) > > + (__builtin_ia32_fpclasssd_mask): Likewise. > > + * gcc.target/i386/avx512dq-vfpclassss-2.c: New. > > + * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise. > > + * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test): > > + Add test for _mm_mask_fpclass_ss_mask. > > + * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test): > > + Add test for _mm_mask_fpclass_sd_mask. > > + > > 2019-03-22 Vladimir Makarov <vmakarov@redhat.com> > > > > PR rtl-optimization/89676 > > Index: testsuite/gcc.target/i386/avx-1.c > > =================================================================== > > --- testsuite/gcc.target/i386/avx-1.c (revision 269894) > > +++ testsuite/gcc.target/i386/avx-1.c (working copy) > > @@ -446,6 +446,8 @@ > > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > > +#define __builtin_ia32_fpclassss_mask(A, D, U) > > __builtin_ia32_fpclassss_mask(A, 1, U) > > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > > __builtin_ia32_fpclasssd_mask(A, 1, U) > > #define __builtin_ia32_fpclassps512_mask(A, D, C) > > __builtin_ia32_fpclassps512_mask(A, 1, C) > > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > > __builtin_ia32_fpclasspd512_mask(A, 1, C) > > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c > > =================================================================== > > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894) > > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy) > > @@ -1,6 +1,7 @@ > > /* { dg-do compile } */ > > /* { dg-options "-mavx512dq -O2" } */ > > /* { dg-final { scan-assembler-times "vfpclasssd\[ > > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } > > } */ > > +/* { dg-final { scan-assembler-times "vfpclasssd\[ > > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ > > \\t\]+#)" 1 } } */ > > > > #include <immintrin.h> > > > > @@ -11,4 +12,5 @@ > > avx512dq_test (void) > > { > > m8 = _mm_fpclass_sd_mask (x128, 13); > > + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); > > } > > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c > > =================================================================== > > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent) > > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy) > > @@ -0,0 +1,75 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -mavx512dq" } */ > > +/* { dg-require-effective-target avx512dq } */ > > + > > +#define AVX512DQ > > +#include "avx512f-helper.h" > > + > > +#include <math.h> > > +#include <limits.h> > > +#include <float.h> > > +#define SIZE (128 / 64) > > +#include "avx512f-mask-type.h" > > + > > +#ifndef __FPCLASSSD__ > > +#define __FPCLASSSD__ > > +int check_fp_class_dp (double src, int imm) > > +{ > > + int qNaN_res = isnan (src); > > + int sNaN_res = isnan (src); > > + int Pzero_res = (src == 0.0); > > + int Nzero_res = (src == -0.0); > > + int PInf_res = (isinf (src) == 1); > > + int NInf_res = (isinf (src) == -1); > > + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); > > + int FinNeg_res = __builtin_finite (src) && (src < 0); > > + > > + int result = (((imm & 1) && qNaN_res) > > + || (((imm >> 1) & 1) && Pzero_res) > > + || (((imm >> 2) & 1) && Nzero_res) > > + || (((imm >> 3) & 1) && PInf_res) > > + || (((imm >> 4) & 1) && NInf_res) > > + || (((imm >> 5) & 1) && Denorm_res) > > + || (((imm >> 6) & 1) && FinNeg_res) > > + || (((imm >> 7) & 1) && sNaN_res)); > > + return result; > > +} > > +#endif > > + > > +__mmask8 > > +CALC (double *s1, int imm) > > +{ > > + int i; > > + __mmask8 res = 0; > > + > > + if (check_fp_class_dp(s1[0], imm)) > > + res = res | 1; > > + > > + return res; > > +} > > + > > +void > > +TEST (void) > > +{ > > + int i; > > + union128d src; > > + __mmask8 res1, res2, res_ref = 0; > > + __mmask8 mask = MASK_VALUE; > > + > > + src.a[0] = 1.0 / 0.0; > > + for (i = 1; i < SIZE; i++) > > + { > > + src.a[i] = -24.43 + 0.6 * i; > > + } > > + > > + res1 = _mm_fpclass_sd_mask (src.x, 0xFF); > > + res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF); > > + > > + res_ref = CALC (src.a, 0xFF); > > + > > + if (res_ref != res1) > > + abort (); > > + > > + if ((res_ref & mask) != res2) > > + abort (); > > +} > > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c > > =================================================================== > > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894) > > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy) > > @@ -1,6 +1,7 @@ > > /* { dg-do compile } */ > > /* { dg-options "-mavx512dq -O2" } */ > > /* { dg-final { scan-assembler-times "vfpclassss\[ > > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } > > } */ > > +/* { dg-final { scan-assembler-times "vfpclassss\[ > > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ > > \\t\]+#)" 1 } } */ > > > > #include <immintrin.h> > > > > @@ -11,4 +12,5 @@ > > avx512dq_test (void) > > { > > m8 = _mm_fpclass_ss_mask (x128, 13); > > + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); > > } > > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c > > =================================================================== > > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent) > > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy) > > @@ -0,0 +1,76 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-O2 -mavx512dq" } */ > > +/* { dg-require-effective-target avx512dq } */ > > + > > +#define AVX512DQ > > +#include "avx512f-helper.h" > > + > > +#include <math.h> > > +#include <limits.h> > > +#include <float.h> > > +#include "avx512f-mask-type.h" > > +#define SIZE (128 / 32) > > + > > +#ifndef __FPCLASSSS__ > > +#define __FPCLASSSS__ > > +int check_fp_class_sp (float src, int imm) > > +{ > > + int qNaN_res = isnan (src); > > + int sNaN_res = isnan (src); > > + int Pzero_res = (src == 0.0); > > + int Nzero_res = (src == -0.0); > > + int PInf_res = (isinf (src) == 1); > > + int NInf_res = (isinf (src) == -1); > > + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); > > + int FinNeg_res = __builtin_finite (src) && (src < 0); > > + > > + int result = (((imm & 1) && qNaN_res) > > + || (((imm >> 1) & 1) && Pzero_res) > > + || (((imm >> 2) & 1) && Nzero_res) > > + || (((imm >> 3) & 1) && PInf_res) > > + || (((imm >> 4) & 1) && NInf_res) > > + || (((imm >> 5) & 1) && Denorm_res) > > + || (((imm >> 6) & 1) && FinNeg_res) > > + || (((imm >> 7) & 1) && sNaN_res)); > > + return result; > > +} > > +#endif > > + > > +__mmask8 > > +CALC (float *s1, int imm) > > +{ > > + int i; > > + __mmask8 res = 0; > > + > > + if (check_fp_class_sp(s1[0], imm)) > > + res = res | 1; > > + > > + return res; > > +} > > + > > +void > > +TEST (void) > > +{ > > + int i; > > + union128 src; > > + __mmask8 res1, res2, res_ref = 0; > > + __mmask8 mask = MASK_VALUE; > > + > > + src.a[0] = 1.0 / 0.0; > > + for (i = 1; i < SIZE; i++) > > + { > > + src.a[i] = -24.43 + 0.6 * i; > > + } > > + > > + res1 = _mm_fpclass_ss_mask (src.x, 0xFF); > > + res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF); > > + > > + > > + res_ref = CALC (src.a, 0xFF); > > + > > + if (res_ref != res1) > > + abort (); > > + > > + if ((mask & res_ref) != res2) > > + abort (); > > +} > > Index: testsuite/gcc.target/i386/sse-13.c > > =================================================================== > > --- testsuite/gcc.target/i386/sse-13.c (revision 269894) > > +++ testsuite/gcc.target/i386/sse-13.c (working copy) > > @@ -463,6 +463,8 @@ > > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > > +#define __builtin_ia32_fpclassss_mask(A, D, U) > > __builtin_ia32_fpclassss_mask(A, 1, U) > > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > > __builtin_ia32_fpclasssd_mask(A, 1, U) > > #define __builtin_ia32_fpclassps512_mask(A, D, C) > > __builtin_ia32_fpclassps512_mask(A, 1, C) > > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > > __builtin_ia32_fpclasspd512_mask(A, 1, C) > > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > > Index: testsuite/gcc.target/i386/sse-23.c > > =================================================================== > > --- testsuite/gcc.target/i386/sse-23.c (revision 269894) > > +++ testsuite/gcc.target/i386/sse-23.c (working copy) > > @@ -462,6 +462,8 @@ > > #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) > > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) > > #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) > > #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) > > +#define __builtin_ia32_fpclassss_mask(A, D, U) > > __builtin_ia32_fpclassss_mask(A, 1, U) > > +#define __builtin_ia32_fpclasssd_mask(A, D, U) > > __builtin_ia32_fpclasssd_mask(A, 1, U) > > #define __builtin_ia32_fpclassps512_mask(A, D, C) > > __builtin_ia32_fpclassps512_mask(A, 1, C) > > #define __builtin_ia32_fpclasspd512_mask(A, D, C) > > __builtin_ia32_fpclasspd512_mask(A, 1, C) > > #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) > > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) > > > > -- > > BR, > > Hongtao > > > > -- > BR, > Hongtao
On 3/28/19 1:38 AM, Uros Bizjak wrote: > On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote: >> >> Hi Uros: >> would you help to review this patch? > > This is AVX512F patch, you will need the approval from the maintainer > first. I have no plans to maintain AVX512 beyond rubber-stamping OK > dead obvious regression from a reputable contributors. It is simply > too much involvment for me. If the appointed maintainer doesn't > respond anymore, then I suggest you raise the issue with GCC steering > committe. Also note, this is not fixing a regression relative to a prior release. I'd prefer to see this moved to gcc-10 unless there is a strong justification for pushing it into gcc-9. The subject like should also be changed to reference the right bz. I think the right one is 89803. jeff
On Sat, Mar 30, 2019 at 5:34 AM Jeff Law <law@redhat.com> wrote: > > On 3/28/19 1:38 AM, Uros Bizjak wrote: > > On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote: > >> > >> Hi Uros: > >> would you help to review this patch? > > > > This is AVX512F patch, you will need the approval from the maintainer > > first. I have no plans to maintain AVX512 beyond rubber-stamping OK > > dead obvious regression from a reputable contributors. It is simply > > too much involvment for me. If the appointed maintainer doesn't > > respond anymore, then I suggest you raise the issue with GCC steering > > committe. > Also note, this is not fixing a regression relative to a prior release. > I'd prefer to see this moved to gcc-10 unless there is a strong > justification for pushing it into gcc-9. > > The subject like should also be changed to reference the right bz. I > think the right one is 89803. > > jeff Yes, it's PR 89803, sorry for typo. And thank you for you explanation.
Index: ChangeLog =================================================================== --- ChangeLog (revision 269894) +++ ChangeLog (working copy) @@ -1,3 +1,16 @@ +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> + + PR target/89803 + * config/i386/avx512dqintrin.h + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask): + New intrinsics. + * config/i386/i386-builtin.def + (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask): + New builtins. + * config/i386/sse.md + (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>): + Modified with mask. + 2019-03-23 Segher Boessenkool <segher@kernel.crashing.org> * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit Index: config/i386/avx512dqintrin.h =================================================================== --- config/i386/avx512dqintrin.h (revision 269894) +++ config/i386/avx512dqintrin.h (working copy) @@ -1372,6 +1372,20 @@ return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm); } +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm) +{ + return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U); +} + extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R) @@ -2623,6 +2637,12 @@ #define _mm_fpclass_sd_mask(X, C) \ ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \ +#define _mm_mask_fpclass_ss_mask(X, C, U) \ + ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X), (int) (C)), (__mmask8) (U)) + +#define _mm_mask_fpclass_sd_mask(X, C, U) \ + ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X), (int) (C)), (__mmask8) (U)) + #define _mm512_mask_fpclass_pd_mask(u, X, C) \ ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \ (int) (C), (__mmask8)(u))) Index: config/i386/i386-builtin.def =================================================================== --- config/i386/i386-builtin.def (revision 269894) +++ config/i386/i386-builtin.def (working copy) @@ -2082,9 +2082,11 @@ BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask", IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask", IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI) BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI) Index: config/i386/sse.md =================================================================== --- config/i386/sse.md (revision 269894) +++ config/i386/sse.md (working copy) @@ -21111,7 +21111,7 @@ (set_attr "prefix" "evex") (set_attr "mode" "<MODE>")]) -(define_insn "avx512dq_vmfpclass<mode>" +(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>" [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (and:<avx512fmaskmode> (unspec:<avx512fmaskmode> @@ -21120,7 +21120,7 @@ UNSPEC_FPCLASS) (const_int 1)))] "TARGET_AVX512DQ" - "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"; + "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") Index: testsuite/ChangeLog =================================================================== --- testsuite/ChangeLog (revision 269894) +++ testsuite/ChangeLog (working copy) @@ -1,3 +1,19 @@ +2019-03-24 Hongtao Liu <hongtao.liu@intel.com> + + PR target/89803 + * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask, + __builtin_ia32_fpclasssd_mask): Define. + * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask, + __builtin_ia32_fpclasssd_mask): Likewise. + * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask) + (__builtin_ia32_fpclasssd_mask): Likewise. + * gcc.target/i386/avx512dq-vfpclassss-2.c: New. + * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise. + * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test): + Add test for _mm_mask_fpclass_ss_mask. + * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test): + Add test for _mm_mask_fpclass_sd_mask. + 2019-03-22 Vladimir Makarov <vmakarov@redhat.com> PR rtl-optimization/89676 Index: testsuite/gcc.target/i386/avx-1.c =================================================================== --- testsuite/gcc.target/i386/avx-1.c (revision 269894) +++ testsuite/gcc.target/i386/avx-1.c (working copy) @@ -446,6 +446,8 @@ #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) +#define __builtin_ia32_fpclassss_mask(A, D, U) __builtin_ia32_fpclassss_mask(A, 1, U) +#define __builtin_ia32_fpclasssd_mask(A, D, U) __builtin_ia32_fpclasssd_mask(A, 1, U) #define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C) #define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C) #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c =================================================================== --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894) +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy) @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512dq -O2" } */ /* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclasssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> @@ -11,4 +12,5 @@ avx512dq_test (void) { m8 = _mm_fpclass_sd_mask (x128, 13); + m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13); } Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c =================================================================== --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent) +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy) @@ -0,0 +1,75 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512dq" } */ +/* { dg-require-effective-target avx512dq } */ + +#define AVX512DQ +#include "avx512f-helper.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#define SIZE (128 / 64) +#include "avx512f-mask-type.h" + +#ifndef __FPCLASSSD__ +#define __FPCLASSSD__ +int check_fp_class_dp (double src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +__mmask8 +CALC (double *s1, int imm) +{ + int i; + __mmask8 res = 0; + + if (check_fp_class_dp(s1[0], imm)) + res = res | 1; + + return res; +} + +void +TEST (void) +{ + int i; + union128d src; + __mmask8 res1, res2, res_ref = 0; + __mmask8 mask = MASK_VALUE; + + src.a[0] = 1.0 / 0.0; + for (i = 1; i < SIZE; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = _mm_fpclass_sd_mask (src.x, 0xFF); + res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF); + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((res_ref & mask) != res2) + abort (); +} Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c =================================================================== --- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894) +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy) @@ -1,6 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-mavx512dq -O2" } */ /* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vfpclassss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ #include <immintrin.h> @@ -11,4 +12,5 @@ avx512dq_test (void) { m8 = _mm_fpclass_ss_mask (x128, 13); + m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13); } Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c =================================================================== --- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent) +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy) @@ -0,0 +1,76 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512dq" } */ +/* { dg-require-effective-target avx512dq } */ + +#define AVX512DQ +#include "avx512f-helper.h" + +#include <math.h> +#include <limits.h> +#include <float.h> +#include "avx512f-mask-type.h" +#define SIZE (128 / 32) + +#ifndef __FPCLASSSS__ +#define __FPCLASSSS__ +int check_fp_class_sp (float src, int imm) +{ + int qNaN_res = isnan (src); + int sNaN_res = isnan (src); + int Pzero_res = (src == 0.0); + int Nzero_res = (src == -0.0); + int PInf_res = (isinf (src) == 1); + int NInf_res = (isinf (src) == -1); + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); + int FinNeg_res = __builtin_finite (src) && (src < 0); + + int result = (((imm & 1) && qNaN_res) + || (((imm >> 1) & 1) && Pzero_res) + || (((imm >> 2) & 1) && Nzero_res) + || (((imm >> 3) & 1) && PInf_res) + || (((imm >> 4) & 1) && NInf_res) + || (((imm >> 5) & 1) && Denorm_res) + || (((imm >> 6) & 1) && FinNeg_res) + || (((imm >> 7) & 1) && sNaN_res)); + return result; +} +#endif + +__mmask8 +CALC (float *s1, int imm) +{ + int i; + __mmask8 res = 0; + + if (check_fp_class_sp(s1[0], imm)) + res = res | 1; + + return res; +} + +void +TEST (void) +{ + int i; + union128 src; + __mmask8 res1, res2, res_ref = 0; + __mmask8 mask = MASK_VALUE; + + src.a[0] = 1.0 / 0.0; + for (i = 1; i < SIZE; i++) + { + src.a[i] = -24.43 + 0.6 * i; + } + + res1 = _mm_fpclass_ss_mask (src.x, 0xFF); + res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF); + + + res_ref = CALC (src.a, 0xFF); + + if (res_ref != res1) + abort (); + + if ((mask & res_ref) != res2) + abort (); +} Index: testsuite/gcc.target/i386/sse-13.c =================================================================== --- testsuite/gcc.target/i386/sse-13.c (revision 269894) +++ testsuite/gcc.target/i386/sse-13.c (working copy) @@ -463,6 +463,8 @@ #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) +#define __builtin_ia32_fpclassss_mask(A, D, U) __builtin_ia32_fpclassss_mask(A, 1, U) +#define __builtin_ia32_fpclasssd_mask(A, D, U) __builtin_ia32_fpclasssd_mask(A, 1, U) #define __builtin_ia32_fpclassps512_mask(A, D, C) __builtin_ia32_fpclassps512_mask(A, 1, C) #define __builtin_ia32_fpclasspd512_mask(A, D, C) __builtin_ia32_fpclasspd512_mask(A, 1, C) #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D) __builtin_ia32_extracti64x2_512_mask(A, 1, C, D) Index: testsuite/gcc.target/i386/sse-23.c =================================================================== --- testsuite/gcc.target/i386/sse-23.c (revision 269894) +++ testsuite/gcc.target/i386/sse-23.c (working copy) @@ -462,6 +462,8 @@ #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E) __builtin_ia32_insertf32x8_mask(A, B, 1, D, E) #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1) #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1) +#define __builtin_ia32_fpclassss_mask(A, D, U) __builtin_ia32_fpclassss_mask(A, 1, U) +#define __builtin_ia32_fpclasssd_mask(A, D, U) __builtin_ia32_fpclasssd_mask(A, 1, U)