diff mbox series

Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/897803)

Message ID CAMZc-bxFVR4+WVJrAcvT_5HHJqPOasbBgftqA3EMkRqOsVG31g@mail.gmail.com
State New
Headers show
Series Add missing avx512dqintrin.h _mm_mask_fpclass_s[sd]_mask (PR target/897803) | expand

Commit Message

Hongtao Liu March 24, 2019, 12:13 p.m. UTC
Hi:
  The following patch adds forgotten avx512f fpclass instrinsics for
masked scalar operations.

Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
ok for trunk?

 #define __builtin_ia32_fpclassps512_mask(A, D, C)
__builtin_ia32_fpclassps512_mask(A, 1, C)
 #define __builtin_ia32_fpclasspd512_mask(A, D, C)
__builtin_ia32_fpclasspd512_mask(A, 1, C)
 #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
__builtin_ia32_extracti64x2_512_mask(A, 1, C, D)

Comments

Hongtao Liu March 28, 2019, 6:47 a.m. UTC | #1
Hi Uros:
  would you help to review this patch?

Regards,
Hongtao.

On Sun, Mar 24, 2019 at 8:13 PM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi:
>   The following patch adds forgotten avx512f fpclass instrinsics for
> masked scalar operations.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
> ok for trunk?
>
> Index: ChangeLog
> ===================================================================
> --- ChangeLog (revision 269894)
> +++ ChangeLog (working copy)
> @@ -1,3 +1,16 @@
> +2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
> +
> + PR target/89803
> + * config/i386/avx512dqintrin.h
> + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
> + New intrinsics.
> + * config/i386/i386-builtin.def
> + (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask):
> + New builtins.
> + * config/i386/sse.md
> + (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>):
> + Modified with mask.
> +
>  2019-03-23  Segher Boessenkool  <segher@kernel.crashing.org>
>
>   * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit
> Index: config/i386/avx512dqintrin.h
> ===================================================================
> --- config/i386/avx512dqintrin.h (revision 269894)
> +++ config/i386/avx512dqintrin.h (working copy)
> @@ -1372,6 +1372,20 @@
>    return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
>  }
>
> +extern __inline __mmask8
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
> +{
> +  return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
> +}
> +
> +extern __inline __mmask8
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
> +{
> +  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
> +}
> +
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
> @@ -2623,6 +2637,12 @@
>  #define _mm_fpclass_sd_mask(X, C) \
>    ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
>
> +#define _mm_mask_fpclass_ss_mask(X, C, U) \
> +  ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),
> (int) (C)), (__mmask8) (U))
> +
> +#define _mm_mask_fpclass_sd_mask(X, C, U) \
> +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),
> (int) (C)), (__mmask8) (U))
> +
>  #define _mm512_mask_fpclass_pd_mask(u, X, C)                            \
>    ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
>   (int) (C), (__mmask8)(u)))
> Index: config/i386/i386-builtin.def
> ===================================================================
> --- config/i386/i386-builtin.def (revision 269894)
> +++ config/i386/i386-builtin.def (working copy)
> @@ -2082,9 +2082,11 @@
>  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512dq_fpclassv4df_mask,
> "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256,
> UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512dq_fpclassv2df_mask,
> "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128,
> UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df,
> "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int)
> QI_FTYPE_V2DF_INT)
> +BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
> CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask",
> IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512dq_fpclassv8sf_mask,
> "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256,
> UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512dq_fpclassv4sf_mask,
> "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128,
> UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf,
> "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int)
> QI_FTYPE_V4SF_INT)
> +BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
> CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask",
> IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
>  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128",
> IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI)
>  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256",
> IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI)
>  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128",
> IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI)
> Index: config/i386/sse.md
> ===================================================================
> --- config/i386/sse.md (revision 269894)
> +++ config/i386/sse.md (working copy)
> @@ -21111,7 +21111,7 @@
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "<MODE>")])
>
> -(define_insn "avx512dq_vmfpclass<mode>"
> +(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
>   (and:<avx512fmaskmode>
>     (unspec:<avx512fmaskmode>
> @@ -21120,7 +21120,7 @@
>       UNSPEC_FPCLASS)
>     (const_int 1)))]
>     "TARGET_AVX512DQ"
> -   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
> +   "vfpclass<ssescalarmodesuffix>\t{%2, %1,
> %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1,
> %2}";
>    [(set_attr "type" "sse")
>     (set_attr "length_immediate" "1")
>     (set_attr "prefix" "evex")
> Index: testsuite/ChangeLog
> ===================================================================
> --- testsuite/ChangeLog (revision 269894)
> +++ testsuite/ChangeLog (working copy)
> @@ -1,3 +1,19 @@
> +2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
> +
> + PR target/89803
> + * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask,
> + __builtin_ia32_fpclasssd_mask): Define.
> + * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask,
> + __builtin_ia32_fpclasssd_mask): Likewise.
> + * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask)
> + (__builtin_ia32_fpclasssd_mask): Likewise.
> + * gcc.target/i386/avx512dq-vfpclassss-2.c: New.
> + * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise.
> + * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test):
> + Add test for _mm_mask_fpclass_ss_mask.
> + * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test):
> + Add test for _mm_mask_fpclass_sd_mask.
> +
>  2019-03-22  Vladimir Makarov  <vmakarov@redhat.com>
>
>   PR rtl-optimization/89676
> Index: testsuite/gcc.target/i386/avx-1.c
> ===================================================================
> --- testsuite/gcc.target/i386/avx-1.c (revision 269894)
> +++ testsuite/gcc.target/i386/avx-1.c (working copy)
> @@ -446,6 +446,8 @@
>  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
>  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
>  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> +#define __builtin_ia32_fpclassss_mask(A, D, U)
> __builtin_ia32_fpclassss_mask(A, 1, U)
> +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> __builtin_ia32_fpclasssd_mask(A, 1, U)
>  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> __builtin_ia32_fpclassps512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> __builtin_ia32_fpclasspd512_mask(A, 1, C)
>  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
> Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c
> ===================================================================
> --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894)
> +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy)
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512dq -O2" } */
>  /* { dg-final { scan-assembler-times "vfpclasssd\[
> \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
> } */
> +/* { dg-final { scan-assembler-times "vfpclasssd\[
> \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
> \\t\]+#)" 1 } } */
>
>  #include <immintrin.h>
>
> @@ -11,4 +12,5 @@
>  avx512dq_test (void)
>  {
>    m8 = _mm_fpclass_sd_mask (x128, 13);
> +  m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13);
>  }
> Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c
> ===================================================================
> --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent)
> +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy)
> @@ -0,0 +1,75 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512dq" } */
> +/* { dg-require-effective-target avx512dq } */
> +
> +#define AVX512DQ
> +#include "avx512f-helper.h"
> +
> +#include <math.h>
> +#include <limits.h>
> +#include <float.h>
> +#define SIZE (128 / 64)
> +#include "avx512f-mask-type.h"
> +
> +#ifndef __FPCLASSSD__
> +#define __FPCLASSSD__
> +int check_fp_class_dp (double src, int imm)
> +{
> +  int qNaN_res = isnan (src);
> +  int sNaN_res = isnan (src);
> +  int Pzero_res = (src == 0.0);
> +  int Nzero_res = (src == -0.0);
> +  int PInf_res = (isinf (src) == 1);
> +  int NInf_res = (isinf (src) == -1);
> +  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
> +  int FinNeg_res = __builtin_finite (src) && (src < 0);
> +
> +  int result = (((imm & 1) && qNaN_res)
> + || (((imm >> 1) & 1) && Pzero_res)
> + || (((imm >> 2) & 1) && Nzero_res)
> + || (((imm >> 3) & 1) && PInf_res)
> + || (((imm >> 4) & 1) && NInf_res)
> + || (((imm >> 5) & 1) && Denorm_res)
> + || (((imm >> 6) & 1) && FinNeg_res)
> + || (((imm >> 7) & 1) && sNaN_res));
> +  return result;
> +}
> +#endif
> +
> +__mmask8
> +CALC (double *s1, int imm)
> +{
> +  int i;
> +  __mmask8 res = 0;
> +
> +  if (check_fp_class_dp(s1[0], imm))
> +    res = res | 1;
> +
> +  return res;
> +}
> +
> +void
> +TEST (void)
> +{
> +  int i;
> +  union128d src;
> +  __mmask8 res1, res2, res_ref = 0;
> +  __mmask8 mask = MASK_VALUE;
> +
> +  src.a[0] = 1.0 / 0.0;
> +  for (i = 1; i < SIZE; i++)
> +    {
> +      src.a[i] = -24.43 + 0.6 * i;
> +    }
> +
> +  res1 = _mm_fpclass_sd_mask (src.x, 0xFF);
> +  res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF);
> +
> +  res_ref = CALC (src.a, 0xFF);
> +
> +  if (res_ref != res1)
> +    abort ();
> +
> +  if ((res_ref & mask) != res2)
> +    abort ();
> +}
> Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c
> ===================================================================
> --- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894)
> +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy)
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512dq -O2" } */
>  /* { dg-final { scan-assembler-times "vfpclassss\[
> \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
> } */
> +/* { dg-final { scan-assembler-times "vfpclassss\[
> \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
> \\t\]+#)" 1 } } */
>
>  #include <immintrin.h>
>
> @@ -11,4 +12,5 @@
>  avx512dq_test (void)
>  {
>    m8 = _mm_fpclass_ss_mask (x128, 13);
> +  m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13);
>  }
> Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c
> ===================================================================
> --- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent)
> +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy)
> @@ -0,0 +1,76 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512dq" } */
> +/* { dg-require-effective-target avx512dq } */
> +
> +#define AVX512DQ
> +#include "avx512f-helper.h"
> +
> +#include <math.h>
> +#include <limits.h>
> +#include <float.h>
> +#include "avx512f-mask-type.h"
> +#define SIZE (128 / 32)
> +
> +#ifndef __FPCLASSSS__
> +#define __FPCLASSSS__
> +int check_fp_class_sp (float src, int imm)
> +{
> +  int qNaN_res = isnan (src);
> +  int sNaN_res = isnan (src);
> +  int Pzero_res = (src == 0.0);
> +  int Nzero_res = (src == -0.0);
> +  int PInf_res = (isinf (src) == 1);
> +  int NInf_res = (isinf (src) == -1);
> +  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
> +  int FinNeg_res = __builtin_finite (src) && (src < 0);
> +
> +  int result = (((imm & 1) && qNaN_res)
> + || (((imm >> 1) & 1) && Pzero_res)
> + || (((imm >> 2) & 1) && Nzero_res)
> + || (((imm >> 3) & 1) && PInf_res)
> + || (((imm >> 4) & 1) && NInf_res)
> + || (((imm >> 5) & 1) && Denorm_res)
> + || (((imm >> 6) & 1) && FinNeg_res)
> + || (((imm >> 7) & 1) && sNaN_res));
> +  return result;
> +}
> +#endif
> +
> +__mmask8
> +CALC (float *s1, int imm)
> +{
> +  int i;
> +  __mmask8 res = 0;
> +
> +  if (check_fp_class_sp(s1[0], imm))
> +    res = res | 1;
> +
> +  return res;
> +}
> +
> +void
> +TEST (void)
> +{
> +  int i;
> +  union128 src;
> +  __mmask8 res1, res2, res_ref = 0;
> +  __mmask8 mask = MASK_VALUE;
> +
> +  src.a[0] = 1.0 / 0.0;
> +  for (i = 1; i < SIZE; i++)
> +    {
> +      src.a[i] = -24.43 + 0.6 * i;
> +    }
> +
> +  res1 = _mm_fpclass_ss_mask (src.x, 0xFF);
> +  res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF);
> +
> +
> +  res_ref = CALC (src.a, 0xFF);
> +
> +  if (res_ref != res1)
> +    abort ();
> +
> +  if ((mask & res_ref) != res2)
> +    abort ();
> +}
> Index: testsuite/gcc.target/i386/sse-13.c
> ===================================================================
> --- testsuite/gcc.target/i386/sse-13.c (revision 269894)
> +++ testsuite/gcc.target/i386/sse-13.c (working copy)
> @@ -463,6 +463,8 @@
>  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
>  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
>  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> +#define __builtin_ia32_fpclassss_mask(A, D, U)
> __builtin_ia32_fpclassss_mask(A, 1, U)
> +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> __builtin_ia32_fpclasssd_mask(A, 1, U)
>  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> __builtin_ia32_fpclassps512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> __builtin_ia32_fpclasspd512_mask(A, 1, C)
>  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
> Index: testsuite/gcc.target/i386/sse-23.c
> ===================================================================
> --- testsuite/gcc.target/i386/sse-23.c (revision 269894)
> +++ testsuite/gcc.target/i386/sse-23.c (working copy)
> @@ -462,6 +462,8 @@
>  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
>  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
>  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> +#define __builtin_ia32_fpclassss_mask(A, D, U)
> __builtin_ia32_fpclassss_mask(A, 1, U)
> +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> __builtin_ia32_fpclasssd_mask(A, 1, U)
>  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> __builtin_ia32_fpclassps512_mask(A, 1, C)
>  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> __builtin_ia32_fpclasspd512_mask(A, 1, C)
>  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
>
> --
> BR,
> Hongtao
Uros Bizjak March 28, 2019, 7:38 a.m. UTC | #2
On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote:
>
> Hi Uros:
>   would you help to review this patch?

This is AVX512F patch, you will need the approval from the maintainer
first. I have no plans to maintain AVX512 beyond rubber-stamping OK
dead obvious regression from a reputable contributors. It is simply
too much involvment for me. If the appointed maintainer doesn't
respond anymore, then I suggest you raise the issue with GCC steering
committe.

Uros.

> Regards,
> Hongtao.
>
> On Sun, Mar 24, 2019 at 8:13 PM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > Hi:
> >   The following patch adds forgotten avx512f fpclass instrinsics for
> > masked scalar operations.
> >
> > Bootstrapped/regtested on x86_64-linux and i686-linux (on skylake-avx512),
> > ok for trunk?
> >
> > Index: ChangeLog
> > ===================================================================
> > --- ChangeLog (revision 269894)
> > +++ ChangeLog (working copy)
> > @@ -1,3 +1,16 @@
> > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
> > +
> > + PR target/89803
> > + * config/i386/avx512dqintrin.h
> > + (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
> > + New intrinsics.
> > + * config/i386/i386-builtin.def
> > + (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask):
> > + New builtins.
> > + * config/i386/sse.md
> > + (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>):
> > + Modified with mask.
> > +
> >  2019-03-23  Segher Boessenkool  <segher@kernel.crashing.org>
> >
> >   * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit
> > Index: config/i386/avx512dqintrin.h
> > ===================================================================
> > --- config/i386/avx512dqintrin.h (revision 269894)
> > +++ config/i386/avx512dqintrin.h (working copy)
> > @@ -1372,6 +1372,20 @@
> >    return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
> >  }
> >
> > +extern __inline __mmask8
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
> > +{
> > +  return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
> > +}
> > +
> > +extern __inline __mmask8
> > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> > +_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
> > +{
> > +  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
> > +}
> > +
> >  extern __inline __m512i
> >  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> >  _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
> > @@ -2623,6 +2637,12 @@
> >  #define _mm_fpclass_sd_mask(X, C) \
> >    ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \
> >
> > +#define _mm_mask_fpclass_ss_mask(X, C, U) \
> > +  ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),
> > (int) (C)), (__mmask8) (U))
> > +
> > +#define _mm_mask_fpclass_sd_mask(X, C, U) \
> > +  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),
> > (int) (C)), (__mmask8) (U))
> > +
> >  #define _mm512_mask_fpclass_pd_mask(u, X, C)                            \
> >    ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
> >   (int) (C), (__mmask8)(u)))
> > Index: config/i386/i386-builtin.def
> > ===================================================================
> > --- config/i386/i386-builtin.def (revision 269894)
> > +++ config/i386/i386-builtin.def (working copy)
> > @@ -2082,9 +2082,11 @@
> >  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512dq_fpclassv4df_mask,
> > "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256,
> > UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512dq_fpclassv2df_mask,
> > "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128,
> > UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df,
> > "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int)
> > QI_FTYPE_V2DF_INT)
> > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
> > CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask",
> > IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512dq_fpclassv8sf_mask,
> > "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256,
> > UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512dq_fpclassv4sf_mask,
> > "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128,
> > UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf,
> > "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int)
> > QI_FTYPE_V4SF_INT)
> > +BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
> > CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask",
> > IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
> >  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128",
> > IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI)
> >  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256",
> > IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI)
> >  BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
> > CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128",
> > IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI)
> > Index: config/i386/sse.md
> > ===================================================================
> > --- config/i386/sse.md (revision 269894)
> > +++ config/i386/sse.md (working copy)
> > @@ -21111,7 +21111,7 @@
> >     (set_attr "prefix" "evex")
> >     (set_attr "mode" "<MODE>")])
> >
> > -(define_insn "avx512dq_vmfpclass<mode>"
> > +(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
> >    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
> >   (and:<avx512fmaskmode>
> >     (unspec:<avx512fmaskmode>
> > @@ -21120,7 +21120,7 @@
> >       UNSPEC_FPCLASS)
> >     (const_int 1)))]
> >     "TARGET_AVX512DQ"
> > -   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
> > +   "vfpclass<ssescalarmodesuffix>\t{%2, %1,
> > %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1,
> > %2}";
> >    [(set_attr "type" "sse")
> >     (set_attr "length_immediate" "1")
> >     (set_attr "prefix" "evex")
> > Index: testsuite/ChangeLog
> > ===================================================================
> > --- testsuite/ChangeLog (revision 269894)
> > +++ testsuite/ChangeLog (working copy)
> > @@ -1,3 +1,19 @@
> > +2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
> > +
> > + PR target/89803
> > + * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask,
> > + __builtin_ia32_fpclasssd_mask): Define.
> > + * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask,
> > + __builtin_ia32_fpclasssd_mask): Likewise.
> > + * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask)
> > + (__builtin_ia32_fpclasssd_mask): Likewise.
> > + * gcc.target/i386/avx512dq-vfpclassss-2.c: New.
> > + * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise.
> > + * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test):
> > + Add test for _mm_mask_fpclass_ss_mask.
> > + * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test):
> > + Add test for _mm_mask_fpclass_sd_mask.
> > +
> >  2019-03-22  Vladimir Makarov  <vmakarov@redhat.com>
> >
> >   PR rtl-optimization/89676
> > Index: testsuite/gcc.target/i386/avx-1.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/avx-1.c (revision 269894)
> > +++ testsuite/gcc.target/i386/avx-1.c (working copy)
> > @@ -446,6 +446,8 @@
> >  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
> >  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
> >  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> > +#define __builtin_ia32_fpclassss_mask(A, D, U)
> > __builtin_ia32_fpclassss_mask(A, 1, U)
> > +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> > __builtin_ia32_fpclasssd_mask(A, 1, U)
> >  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> > __builtin_ia32_fpclassps512_mask(A, 1, C)
> >  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> > __builtin_ia32_fpclasspd512_mask(A, 1, C)
> >  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
> > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894)
> > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy)
> > @@ -1,6 +1,7 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-mavx512dq -O2" } */
> >  /* { dg-final { scan-assembler-times "vfpclasssd\[
> > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
> > } */
> > +/* { dg-final { scan-assembler-times "vfpclasssd\[
> > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
> > \\t\]+#)" 1 } } */
> >
> >  #include <immintrin.h>
> >
> > @@ -11,4 +12,5 @@
> >  avx512dq_test (void)
> >  {
> >    m8 = _mm_fpclass_sd_mask (x128, 13);
> > +  m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13);
> >  }
> > Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent)
> > +++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy)
> > @@ -0,0 +1,75 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavx512dq" } */
> > +/* { dg-require-effective-target avx512dq } */
> > +
> > +#define AVX512DQ
> > +#include "avx512f-helper.h"
> > +
> > +#include <math.h>
> > +#include <limits.h>
> > +#include <float.h>
> > +#define SIZE (128 / 64)
> > +#include "avx512f-mask-type.h"
> > +
> > +#ifndef __FPCLASSSD__
> > +#define __FPCLASSSD__
> > +int check_fp_class_dp (double src, int imm)
> > +{
> > +  int qNaN_res = isnan (src);
> > +  int sNaN_res = isnan (src);
> > +  int Pzero_res = (src == 0.0);
> > +  int Nzero_res = (src == -0.0);
> > +  int PInf_res = (isinf (src) == 1);
> > +  int NInf_res = (isinf (src) == -1);
> > +  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
> > +  int FinNeg_res = __builtin_finite (src) && (src < 0);
> > +
> > +  int result = (((imm & 1) && qNaN_res)
> > + || (((imm >> 1) & 1) && Pzero_res)
> > + || (((imm >> 2) & 1) && Nzero_res)
> > + || (((imm >> 3) & 1) && PInf_res)
> > + || (((imm >> 4) & 1) && NInf_res)
> > + || (((imm >> 5) & 1) && Denorm_res)
> > + || (((imm >> 6) & 1) && FinNeg_res)
> > + || (((imm >> 7) & 1) && sNaN_res));
> > +  return result;
> > +}
> > +#endif
> > +
> > +__mmask8
> > +CALC (double *s1, int imm)
> > +{
> > +  int i;
> > +  __mmask8 res = 0;
> > +
> > +  if (check_fp_class_dp(s1[0], imm))
> > +    res = res | 1;
> > +
> > +  return res;
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > +  int i;
> > +  union128d src;
> > +  __mmask8 res1, res2, res_ref = 0;
> > +  __mmask8 mask = MASK_VALUE;
> > +
> > +  src.a[0] = 1.0 / 0.0;
> > +  for (i = 1; i < SIZE; i++)
> > +    {
> > +      src.a[i] = -24.43 + 0.6 * i;
> > +    }
> > +
> > +  res1 = _mm_fpclass_sd_mask (src.x, 0xFF);
> > +  res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF);
> > +
> > +  res_ref = CALC (src.a, 0xFF);
> > +
> > +  if (res_ref != res1)
> > +    abort ();
> > +
> > +  if ((res_ref & mask) != res2)
> > +    abort ();
> > +}
> > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894)
> > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy)
> > @@ -1,6 +1,7 @@
> >  /* { dg-do compile } */
> >  /* { dg-options "-mavx512dq -O2" } */
> >  /* { dg-final { scan-assembler-times "vfpclassss\[
> > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
> > } */
> > +/* { dg-final { scan-assembler-times "vfpclassss\[
> > \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
> > \\t\]+#)" 1 } } */
> >
> >  #include <immintrin.h>
> >
> > @@ -11,4 +12,5 @@
> >  avx512dq_test (void)
> >  {
> >    m8 = _mm_fpclass_ss_mask (x128, 13);
> > +  m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13);
> >  }
> > Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent)
> > +++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy)
> > @@ -0,0 +1,76 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-O2 -mavx512dq" } */
> > +/* { dg-require-effective-target avx512dq } */
> > +
> > +#define AVX512DQ
> > +#include "avx512f-helper.h"
> > +
> > +#include <math.h>
> > +#include <limits.h>
> > +#include <float.h>
> > +#include "avx512f-mask-type.h"
> > +#define SIZE (128 / 32)
> > +
> > +#ifndef __FPCLASSSS__
> > +#define __FPCLASSSS__
> > +int check_fp_class_sp (float src, int imm)
> > +{
> > +  int qNaN_res = isnan (src);
> > +  int sNaN_res = isnan (src);
> > +  int Pzero_res = (src == 0.0);
> > +  int Nzero_res = (src == -0.0);
> > +  int PInf_res = (isinf (src) == 1);
> > +  int NInf_res = (isinf (src) == -1);
> > +  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
> > +  int FinNeg_res = __builtin_finite (src) && (src < 0);
> > +
> > +  int result = (((imm & 1) && qNaN_res)
> > + || (((imm >> 1) & 1) && Pzero_res)
> > + || (((imm >> 2) & 1) && Nzero_res)
> > + || (((imm >> 3) & 1) && PInf_res)
> > + || (((imm >> 4) & 1) && NInf_res)
> > + || (((imm >> 5) & 1) && Denorm_res)
> > + || (((imm >> 6) & 1) && FinNeg_res)
> > + || (((imm >> 7) & 1) && sNaN_res));
> > +  return result;
> > +}
> > +#endif
> > +
> > +__mmask8
> > +CALC (float *s1, int imm)
> > +{
> > +  int i;
> > +  __mmask8 res = 0;
> > +
> > +  if (check_fp_class_sp(s1[0], imm))
> > +    res = res | 1;
> > +
> > +  return res;
> > +}
> > +
> > +void
> > +TEST (void)
> > +{
> > +  int i;
> > +  union128 src;
> > +  __mmask8 res1, res2, res_ref = 0;
> > +  __mmask8 mask = MASK_VALUE;
> > +
> > +  src.a[0] = 1.0 / 0.0;
> > +  for (i = 1; i < SIZE; i++)
> > +    {
> > +      src.a[i] = -24.43 + 0.6 * i;
> > +    }
> > +
> > +  res1 = _mm_fpclass_ss_mask (src.x, 0xFF);
> > +  res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF);
> > +
> > +
> > +  res_ref = CALC (src.a, 0xFF);
> > +
> > +  if (res_ref != res1)
> > +    abort ();
> > +
> > +  if ((mask & res_ref) != res2)
> > +    abort ();
> > +}
> > Index: testsuite/gcc.target/i386/sse-13.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/sse-13.c (revision 269894)
> > +++ testsuite/gcc.target/i386/sse-13.c (working copy)
> > @@ -463,6 +463,8 @@
> >  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
> >  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
> >  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> > +#define __builtin_ia32_fpclassss_mask(A, D, U)
> > __builtin_ia32_fpclassss_mask(A, 1, U)
> > +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> > __builtin_ia32_fpclasssd_mask(A, 1, U)
> >  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> > __builtin_ia32_fpclassps512_mask(A, 1, C)
> >  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> > __builtin_ia32_fpclasspd512_mask(A, 1, C)
> >  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
> > Index: testsuite/gcc.target/i386/sse-23.c
> > ===================================================================
> > --- testsuite/gcc.target/i386/sse-23.c (revision 269894)
> > +++ testsuite/gcc.target/i386/sse-23.c (working copy)
> > @@ -462,6 +462,8 @@
> >  #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
> > __builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
> >  #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
> >  #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
> > +#define __builtin_ia32_fpclassss_mask(A, D, U)
> > __builtin_ia32_fpclassss_mask(A, 1, U)
> > +#define __builtin_ia32_fpclasssd_mask(A, D, U)
> > __builtin_ia32_fpclasssd_mask(A, 1, U)
> >  #define __builtin_ia32_fpclassps512_mask(A, D, C)
> > __builtin_ia32_fpclassps512_mask(A, 1, C)
> >  #define __builtin_ia32_fpclasspd512_mask(A, D, C)
> > __builtin_ia32_fpclasspd512_mask(A, 1, C)
> >  #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
> > __builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
> >
> > --
> > BR,
> > Hongtao
>
>
>
> --
> BR,
> Hongtao
Jeff Law March 29, 2019, 9:33 p.m. UTC | #3
On 3/28/19 1:38 AM, Uros Bizjak wrote:
> On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote:
>>
>> Hi Uros:
>>   would you help to review this patch?
> 
> This is AVX512F patch, you will need the approval from the maintainer
> first. I have no plans to maintain AVX512 beyond rubber-stamping OK
> dead obvious regression from a reputable contributors. It is simply
> too much involvment for me. If the appointed maintainer doesn't
> respond anymore, then I suggest you raise the issue with GCC steering
> committe.
Also note, this is not fixing a regression relative to a prior release.
  I'd prefer to see this moved to gcc-10 unless there is a strong
justification for pushing it into gcc-9.

The subject like should also be changed to reference the right bz.  I
think the right one is 89803.

jeff
Hongtao Liu March 30, 2019, 3:30 a.m. UTC | #4
On Sat, Mar 30, 2019 at 5:34 AM Jeff Law <law@redhat.com> wrote:
>
> On 3/28/19 1:38 AM, Uros Bizjak wrote:
> > On Thu, Mar 28, 2019 at 7:47 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >>
> >> Hi Uros:
> >>   would you help to review this patch?
> >
> > This is AVX512F patch, you will need the approval from the maintainer
> > first. I have no plans to maintain AVX512 beyond rubber-stamping OK
> > dead obvious regression from a reputable contributors. It is simply
> > too much involvment for me. If the appointed maintainer doesn't
> > respond anymore, then I suggest you raise the issue with GCC steering
> > committe.
> Also note, this is not fixing a regression relative to a prior release.
>   I'd prefer to see this moved to gcc-10 unless there is a strong
> justification for pushing it into gcc-9.
>
> The subject like should also be changed to reference the right bz.  I
> think the right one is 89803.
>
> jeff

Yes, it's PR 89803, sorry for typo.

And thank you for you explanation.
diff mbox series

Patch

Index: ChangeLog
===================================================================
--- ChangeLog (revision 269894)
+++ ChangeLog (working copy)
@@ -1,3 +1,16 @@ 
+2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
+
+ PR target/89803
+ * config/i386/avx512dqintrin.h
+ (_mm_mask_fpclass_ss_mask,_mm_mask_fpclass_sd_mask):
+ New intrinsics.
+ * config/i386/i386-builtin.def
+ (__builtin_ia32_fpclassss_mask, _builtin_ia32_fpclasssd_mask):
+ New builtins.
+ * config/i386/sse.md
+ (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>):
+ Modified with mask.
+
 2019-03-23  Segher Boessenkool  <segher@kernel.crashing.org>

  * config/rs6000/xmmintrin.h (_mm_movemask_pi8): Implement for 32-bit
Index: config/i386/avx512dqintrin.h
===================================================================
--- config/i386/avx512dqintrin.h (revision 269894)
+++ config/i386/avx512dqintrin.h (working copy)
@@ -1372,6 +1372,20 @@ 
   return (__mmask8) __builtin_ia32_fpclasssd ((__v2df) __A, __imm);
 }

+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_ss_mask (__mmask8 __U, __m128 __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) __A, __imm, __U);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
+{
+  return (__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) __A, __imm, __U);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cvtt_roundpd_epi64 (__m512d __A, const int __R)
@@ -2623,6 +2637,12 @@ 
 #define _mm_fpclass_sd_mask(X, C) \
   ((__mmask8) __builtin_ia32_fpclasssd ((__v2df) (__m128d) (X), (int) (C))) \

+#define _mm_mask_fpclass_ss_mask(X, C, U) \
+  ((__mmask8) __builtin_ia32_fpclassss_mask ((__v4sf) (__m128) (X),
(int) (C)), (__mmask8) (U))
+
+#define _mm_mask_fpclass_sd_mask(X, C, U) \
+  ((__mmask8) __builtin_ia32_fpclasssd_mask ((__v2df) (__m128d) (X),
(int) (C)), (__mmask8) (U))
+
 #define _mm512_mask_fpclass_pd_mask(u, X, C)                            \
   ((__mmask8) __builtin_ia32_fpclasspd512_mask ((__v8df) (__m512d) (X), \
  (int) (C), (__mmask8)(u)))
Index: config/i386/i386-builtin.def
===================================================================
--- config/i386/i386-builtin.def (revision 269894)
+++ config/i386/i386-builtin.def (working copy)
@@ -2082,9 +2082,11 @@ 
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512dq_fpclassv4df_mask,
"__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256,
UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512dq_fpclassv2df_mask,
"__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128,
UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv2df,
"__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int)
QI_FTYPE_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
CODE_FOR_avx512dq_vmfpclassv2df_mask, "__builtin_ia32_fpclasssd_mask",
IX86_BUILTIN_FPCLASSSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512dq_fpclassv8sf_mask,
"__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256,
UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512dq_fpclassv4sf_mask,
"__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128,
UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vmfpclassv4sf,
"__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int)
QI_FTYPE_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0,
CODE_FOR_avx512dq_vmfpclassv4sf_mask, "__builtin_ia32_fpclassss_mask",
IX86_BUILTIN_FPCLASSSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128",
IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256",
IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0,
CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128",
IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI)
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 269894)
+++ config/i386/sse.md (working copy)
@@ -21111,7 +21111,7 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])

-(define_insn "avx512dq_vmfpclass<mode>"
+(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
  (and:<avx512fmaskmode>
    (unspec:<avx512fmaskmode>
@@ -21120,7 +21120,7 @@ 
      UNSPEC_FPCLASS)
    (const_int 1)))]
    "TARGET_AVX512DQ"
-   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+   "vfpclass<ssescalarmodesuffix>\t{%2, %1,
%0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1,
%2}";
   [(set_attr "type" "sse")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
Index: testsuite/ChangeLog
===================================================================
--- testsuite/ChangeLog (revision 269894)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,19 @@ 
+2019-03-24 Hongtao Liu <hongtao.liu@intel.com>
+
+ PR target/89803
+ * gcc.target/i386/avx-1.c (__builtin_ia32_fpclassss_mask,
+ __builtin_ia32_fpclasssd_mask): Define.
+ * gcc.target/i386/sse-13.c (__builtin_ia32_fpclassss_mask,
+ __builtin_ia32_fpclasssd_mask): Likewise.
+ * gcc.target/i386/sse-23.c (__builtin_ia32_fpclassss_mask)
+ (__builtin_ia32_fpclasssd_mask): Likewise.
+ * gcc.target/i386/avx512dq-vfpclassss-2.c: New.
+ * gcc.target/i386/avx512dq-vfpclasssd-2.c: Likewise.
+ * gcc.target/i386/avx512dq-vfpclassss-1.c (avx512f_test):
+ Add test for _mm_mask_fpclass_ss_mask.
+ * gcc.target/i386/avx512dq-vfpclasssd-1.c (avx512f_test):
+ Add test for _mm_mask_fpclass_sd_mask.
+
 2019-03-22  Vladimir Makarov  <vmakarov@redhat.com>

  PR rtl-optimization/89676
Index: testsuite/gcc.target/i386/avx-1.c
===================================================================
--- testsuite/gcc.target/i386/avx-1.c (revision 269894)
+++ testsuite/gcc.target/i386/avx-1.c (working copy)
@@ -446,6 +446,8 @@ 
 #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
__builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
 #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
 #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss_mask(A, D, U)
__builtin_ia32_fpclassss_mask(A, 1, U)
+#define __builtin_ia32_fpclasssd_mask(A, D, U)
__builtin_ia32_fpclasssd_mask(A, 1, U)
 #define __builtin_ia32_fpclassps512_mask(A, D, C)
__builtin_ia32_fpclassps512_mask(A, 1, C)
 #define __builtin_ia32_fpclasspd512_mask(A, D, C)
__builtin_ia32_fpclasspd512_mask(A, 1, C)
 #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
__builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c
===================================================================
--- testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (revision 269894)
+++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-1.c (working copy)
@@ -1,6 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512dq -O2" } */
 /* { dg-final { scan-assembler-times "vfpclasssd\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
} */
+/* { dg-final { scan-assembler-times "vfpclasssd\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */

 #include <immintrin.h>

@@ -11,4 +12,5 @@ 
 avx512dq_test (void)
 {
   m8 = _mm_fpclass_sd_mask (x128, 13);
+  m8 = _mm_mask_fpclass_sd_mask (m8, x128, 13);
 }
Index: testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c
===================================================================
--- testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (nonexistent)
+++ testsuite/gcc.target/i386/avx512dq-vfpclasssd-2.c (working copy)
@@ -0,0 +1,75 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#define AVX512DQ
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+#define SIZE (128 / 64)
+#include "avx512f-mask-type.h"
+
+#ifndef __FPCLASSSD__
+#define __FPCLASSSD__
+int check_fp_class_dp (double src, int imm)
+{
+  int qNaN_res = isnan (src);
+  int sNaN_res = isnan (src);
+  int Pzero_res = (src == 0.0);
+  int Nzero_res = (src == -0.0);
+  int PInf_res = (isinf (src) == 1);
+  int NInf_res = (isinf (src) == -1);
+  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+  int FinNeg_res = __builtin_finite (src) && (src < 0);
+
+  int result = (((imm & 1) && qNaN_res)
+ || (((imm >> 1) & 1) && Pzero_res)
+ || (((imm >> 2) & 1) && Nzero_res)
+ || (((imm >> 3) & 1) && PInf_res)
+ || (((imm >> 4) & 1) && NInf_res)
+ || (((imm >> 5) & 1) && Denorm_res)
+ || (((imm >> 6) & 1) && FinNeg_res)
+ || (((imm >> 7) & 1) && sNaN_res));
+  return result;
+}
+#endif
+
+__mmask8
+CALC (double *s1, int imm)
+{
+  int i;
+  __mmask8 res = 0;
+
+  if (check_fp_class_dp(s1[0], imm))
+    res = res | 1;
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  int i;
+  union128d src;
+  __mmask8 res1, res2, res_ref = 0;
+  __mmask8 mask = MASK_VALUE;
+
+  src.a[0] = 1.0 / 0.0;
+  for (i = 1; i < SIZE; i++)
+    {
+      src.a[i] = -24.43 + 0.6 * i;
+    }
+
+  res1 = _mm_fpclass_sd_mask (src.x, 0xFF);
+  res2 = _mm_mask_fpclass_sd_mask (mask, src.x, 0xFF);
+
+  res_ref = CALC (src.a, 0xFF);
+
+  if (res_ref != res1)
+    abort ();
+
+  if ((res_ref & mask) != res2)
+    abort ();
+}
Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c
===================================================================
--- testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (revision 269894)
+++ testsuite/gcc.target/i386/avx512dq-vfpclassss-1.c (working copy)
@@ -1,6 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512dq -O2" } */
 /* { dg-final { scan-assembler-times "vfpclassss\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 }
} */
+/* { dg-final { scan-assembler-times "vfpclassss\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n^k\]*%k\[0-7\]\{%k\[0-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */

 #include <immintrin.h>

@@ -11,4 +12,5 @@ 
 avx512dq_test (void)
 {
   m8 = _mm_fpclass_ss_mask (x128, 13);
+  m8 = _mm_mask_fpclass_ss_mask (m8, x128, 13);
 }
Index: testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c
===================================================================
--- testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (nonexistent)
+++ testsuite/gcc.target/i386/avx512dq-vfpclassss-2.c (working copy)
@@ -0,0 +1,76 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#define AVX512DQ
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+#include "avx512f-mask-type.h"
+#define SIZE (128 / 32)
+
+#ifndef __FPCLASSSS__
+#define __FPCLASSSS__
+int check_fp_class_sp (float src, int imm)
+{
+  int qNaN_res = isnan (src);
+  int sNaN_res = isnan (src);
+  int Pzero_res = (src == 0.0);
+  int Nzero_res = (src == -0.0);
+  int PInf_res = (isinf (src) == 1);
+  int NInf_res = (isinf (src) == -1);
+  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+  int FinNeg_res = __builtin_finite (src) && (src < 0);
+
+  int result = (((imm & 1) && qNaN_res)
+ || (((imm >> 1) & 1) && Pzero_res)
+ || (((imm >> 2) & 1) && Nzero_res)
+ || (((imm >> 3) & 1) && PInf_res)
+ || (((imm >> 4) & 1) && NInf_res)
+ || (((imm >> 5) & 1) && Denorm_res)
+ || (((imm >> 6) & 1) && FinNeg_res)
+ || (((imm >> 7) & 1) && sNaN_res));
+  return result;
+}
+#endif
+
+__mmask8
+CALC (float *s1, int imm)
+{
+  int i;
+  __mmask8 res = 0;
+
+  if (check_fp_class_sp(s1[0], imm))
+    res = res | 1;
+
+  return res;
+}
+
+void
+TEST (void)
+{
+  int i;
+  union128 src;
+  __mmask8 res1, res2, res_ref = 0;
+  __mmask8 mask = MASK_VALUE;
+
+  src.a[0] = 1.0 / 0.0;
+  for (i = 1; i < SIZE; i++)
+    {
+      src.a[i] = -24.43 + 0.6 * i;
+    }
+
+  res1 = _mm_fpclass_ss_mask (src.x, 0xFF);
+  res2 = _mm_mask_fpclass_ss_mask (mask, src.x, 0xFF);
+
+
+  res_ref = CALC (src.a, 0xFF);
+
+  if (res_ref != res1)
+    abort ();
+
+  if ((mask & res_ref) != res2)
+    abort ();
+}
Index: testsuite/gcc.target/i386/sse-13.c
===================================================================
--- testsuite/gcc.target/i386/sse-13.c (revision 269894)
+++ testsuite/gcc.target/i386/sse-13.c (working copy)
@@ -463,6 +463,8 @@ 
 #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
__builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
 #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
 #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss_mask(A, D, U)
__builtin_ia32_fpclassss_mask(A, 1, U)
+#define __builtin_ia32_fpclasssd_mask(A, D, U)
__builtin_ia32_fpclasssd_mask(A, 1, U)
 #define __builtin_ia32_fpclassps512_mask(A, D, C)
__builtin_ia32_fpclassps512_mask(A, 1, C)
 #define __builtin_ia32_fpclasspd512_mask(A, D, C)
__builtin_ia32_fpclasspd512_mask(A, 1, C)
 #define __builtin_ia32_extracti64x2_512_mask(A, E, C, D)
__builtin_ia32_extracti64x2_512_mask(A, 1, C, D)
Index: testsuite/gcc.target/i386/sse-23.c
===================================================================
--- testsuite/gcc.target/i386/sse-23.c (revision 269894)
+++ testsuite/gcc.target/i386/sse-23.c (working copy)
@@ -462,6 +462,8 @@ 
 #define __builtin_ia32_insertf32x8_mask(A, B, F, D, E)
__builtin_ia32_insertf32x8_mask(A, B, 1, D, E)
 #define __builtin_ia32_fpclassss(A, D) __builtin_ia32_fpclassss(A, 1)
 #define __builtin_ia32_fpclasssd(A, D) __builtin_ia32_fpclasssd(A, 1)
+#define __builtin_ia32_fpclassss_mask(A, D, U)
__builtin_ia32_fpclassss_mask(A, 1, U)
+#define __builtin_ia32_fpclasssd_mask(A, D, U)
__builtin_ia32_fpclasssd_mask(A, 1, U)