Message ID | 20210701061648.9447-31-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | Support all AVX512FP16 intrinsics. | expand |
I'm going to check in 10 patches. [PATCH 30/62] AVX512FP16: Add vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh. [PATCH 31/62] AVX512FP16: Add testcase for vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh. [PATCH 32/62] AVX512FP16: Add vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2qq/vcvttph2udq/vcvttph2uqq [PATCH 33/62] AVX512FP16: Add testcase for vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2udq/vcvttph2qq/vcvttph2uqq [PATCH 34/62] AVX512FP16: Add vcvttsh2si/vcvttsh2usi [PATCH 35/62] AVX512FP16: Add vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx [PATCH 36/62] AVX512FP16: Add testcase for vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx [PATCH 37/62] AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh. [PATCH 38/62] AVX512FP16: Add testcase for vcvtsh2sd/vcvtsh2ss/vcvtsd2sh/vcvtss2sh [PATCH 39/62] AVX512FP16: Add intrinsics for casting between vector float16 and vector float32/float64/integer. Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. Newly added runtime testcase passed on sde{-m32,}. On Thu, Jul 1, 2021 at 2:17 PM liuhongt <hongtao.liu@intel.com> wrote: > > gcc/ChangeLog: > > * config/i386/avx512fp16intrin.h (_mm_cvtsh_i32): New intrinsic. > (_mm_cvtsh_u32): Likewise. > (_mm_cvt_roundsh_i32): Likewise. > (_mm_cvt_roundsh_u32): Likewise. > (_mm_cvtsh_i64): Likewise. > (_mm_cvtsh_u64): Likewise. > (_mm_cvt_roundsh_i64): Likewise. > (_mm_cvt_roundsh_u64): Likewise. > (_mm_cvti32_sh): Likewise. > (_mm_cvtu32_sh): Likewise. > (_mm_cvt_roundi32_sh): Likewise. > (_mm_cvt_roundu32_sh): Likewise. > (_mm_cvti64_sh): Likewise. > (_mm_cvtu64_sh): Likewise. > (_mm_cvt_roundi64_sh): Likewise. > (_mm_cvt_roundu64_sh): Likewise. > * config/i386/i386-builtin-types.def: Add corresponding builtin types. > * config/i386/i386-builtin.def: Add corresponding new builtins. > * config/i386/i386-expand.c (ix86_expand_round_builtin): > Handle new builtin types. > * config/i386/sse.md > (avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>): > New define_insn. > (avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2): Likewise. > (avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>): Likewise. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx-1.c: Add test for new builtins. > * gcc.target/i386/sse-13.c: Ditto. > * gcc.target/i386/sse-23.c: Ditto. > * gcc.target/i386/sse-14.c: Add test for new intrinsics. > * gcc.target/i386/sse-22.c: Ditto. > --- > gcc/config/i386/avx512fp16intrin.h | 158 +++++++++++++++++++++++++ > gcc/config/i386/i386-builtin-types.def | 8 ++ > gcc/config/i386/i386-builtin.def | 8 ++ > gcc/config/i386/i386-expand.c | 8 ++ > gcc/config/i386/sse.md | 46 +++++++ > gcc/testsuite/gcc.target/i386/avx-1.c | 8 ++ > gcc/testsuite/gcc.target/i386/sse-13.c | 8 ++ > gcc/testsuite/gcc.target/i386/sse-14.c | 10 ++ > gcc/testsuite/gcc.target/i386/sse-22.c | 10 ++ > gcc/testsuite/gcc.target/i386/sse-23.c | 8 ++ > 10 files changed, 272 insertions(+) > > diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h > index bd801942365..7524a8d6a5b 100644 > --- a/gcc/config/i386/avx512fp16intrin.h > +++ b/gcc/config/i386/avx512fp16intrin.h > @@ -3529,6 +3529,164 @@ _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C) > > #endif /* __OPTIMIZE__ */ > > +/* Intrinsics vcvtsh2si, vcvtsh2us. */ > +extern __inline int > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtsh_i32 (__m128h __A) > +{ > + return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline unsigned > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtsh_u32 (__m128h __A) > +{ > + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, > + _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline int > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundsh_i32 (__m128h __A, const int __R) > +{ > + return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R); > +} > + > +extern __inline unsigned > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundsh_u32 (__m128h __A, const int __R) > +{ > + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R); > +} > + > +#else > +#define _mm_cvt_roundsh_i32(A, B) \ > + ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B))) > +#define _mm_cvt_roundsh_u32(A, B) \ > + ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B))) > + > +#endif /* __OPTIMIZE__ */ > + > +#ifdef __x86_64__ > +extern __inline long long > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtsh_i64 (__m128h __A) > +{ > + return (long long) > + __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline unsigned long long > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtsh_u64 (__m128h __A) > +{ > + return (long long) > + __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline long long > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundsh_i64 (__m128h __A, const int __R) > +{ > + return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R); > +} > + > +extern __inline unsigned long long > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundsh_u64 (__m128h __A, const int __R) > +{ > + return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R); > +} > + > +#else > +#define _mm_cvt_roundsh_i64(A, B) \ > + ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B))) > +#define _mm_cvt_roundsh_u64(A, B) \ > + ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B))) > + > +#endif /* __OPTIMIZE__ */ > +#endif /* __x86_64__ */ > + > +/* Intrinsics vcvtsi2sh, vcvtusi2sh. */ > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvti32_sh (__m128h __A, int __B) > +{ > + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtu32_sh (__m128h __A, unsigned int __B) > +{ > + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R) > +{ > + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R) > +{ > + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R); > +} > + > +#else > +#define _mm_cvt_roundi32_sh(A, B, C) \ > + (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C))) > +#define _mm_cvt_roundu32_sh(A, B, C) \ > + (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C))) > + > +#endif /* __OPTIMIZE__ */ > + > +#ifdef __x86_64__ > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvti64_sh (__m128h __A, long long __B) > +{ > + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvtu64_sh (__m128h __A, unsigned long long __B) > +{ > + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); > +} > + > +#ifdef __OPTIMIZE__ > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R) > +{ > + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R); > +} > + > +extern __inline __m128h > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) > +{ > + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R); > +} > + > +#else > +#define _mm_cvt_roundi64_sh(A, B, C) \ > + (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C))) > +#define _mm_cvt_roundu64_sh(A, B, C) \ > + (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C))) > + > +#endif /* __OPTIMIZE__ */ > +#endif /* __x86_64__ */ > + > + > #ifdef __DISABLE_AVX512FP16__ > #undef __DISABLE_AVX512FP16__ > #pragma GCC pop_options > diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def > index 57b9ea786e1..74bda59a65e 100644 > --- a/gcc/config/i386/i386-builtin-types.def > +++ b/gcc/config/i386/i386-builtin-types.def > @@ -1308,9 +1308,17 @@ DEF_FUNCTION_TYPE (V8HF, V8HI) > DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI) > DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI) > DEF_FUNCTION_TYPE (SI, V32HF, INT, USI) > +DEF_FUNCTION_TYPE (INT, V8HF, INT) > +DEF_FUNCTION_TYPE (INT64, V8HF, INT) > +DEF_FUNCTION_TYPE (UINT, V8HF, INT) > +DEF_FUNCTION_TYPE (UINT64, V8HF, INT) > DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF) > DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI) > DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI) > +DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT) > +DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT) > +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT) > +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT) > DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI) > DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI) > DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI) > diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def > index 44c55876e48..3602b40d6d5 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -3094,6 +3094,14 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_ > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph_v16si_mask_round", IX86_BUILTIN_VCVTUDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) > BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTUQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT) > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) > +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) > +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) > > BDESC_END (ROUND_ARGS, MULTI_ARG) > > diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c > index 7d9e1bd6a2d..b83c6d9a92b 100644 > --- a/gcc/config/i386/i386-expand.c > +++ b/gcc/config/i386/i386-expand.c > @@ -10489,16 +10489,24 @@ ix86_expand_round_builtin (const struct builtin_description *d, > { > case UINT64_FTYPE_V2DF_INT: > case UINT64_FTYPE_V4SF_INT: > + case UINT64_FTYPE_V8HF_INT: > case UINT_FTYPE_V2DF_INT: > case UINT_FTYPE_V4SF_INT: > + case UINT_FTYPE_V8HF_INT: > case INT64_FTYPE_V2DF_INT: > case INT64_FTYPE_V4SF_INT: > + case INT64_FTYPE_V8HF_INT: > case INT_FTYPE_V2DF_INT: > case INT_FTYPE_V4SF_INT: > + case INT_FTYPE_V8HF_INT: > nargs = 2; > break; > case V32HF_FTYPE_V32HF_V32HF_INT: > case V8HF_FTYPE_V8HF_V8HF_INT: > + case V8HF_FTYPE_V8HF_INT_INT: > + case V8HF_FTYPE_V8HF_UINT_INT: > + case V8HF_FTYPE_V8HF_INT64_INT: > + case V8HF_FTYPE_V8HF_UINT64_INT: > case V4SF_FTYPE_V4SF_UINT_INT: > case V4SF_FTYPE_V4SF_UINT64_INT: > case V2DF_FTYPE_V2DF_UINT64_INT: > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 8b23048a232..b312d26b806 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -5589,6 +5589,52 @@ (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1" > (set_attr "prefix" "evex") > (set_attr "mode" "TI")]) > > +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>" > + [(set (match_operand:SWI48 0 "register_operand" "=r,r") > + (unspec:SWI48 > + [(vec_select:HF > + (match_operand:V8HF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>") > + (parallel [(const_int 0)]))] > + UNSPEC_US_FIX_NOTRUNC))] > + "TARGET_AVX512FP16" > + "%vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" > + [(set_attr "type" "sseicvt") > + (set_attr "athlon_decode" "double,vector") > + (set_attr "bdver1_decode" "double,double") > + (set_attr "prefix_rep" "1") > + (set_attr "prefix" "evex") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2" > + [(set (match_operand:SWI48 0 "register_operand" "=r,r") > + (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand" "v,m")] > + UNSPEC_US_FIX_NOTRUNC))] > + "TARGET_AVX512FP16" > + "%vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %k1}" > + [(set_attr "type" "sseicvt") > + (set_attr "athlon_decode" "double,vector") > + (set_attr "bdver1_decode" "double,double") > + (set_attr "prefix_rep" "1") > + (set_attr "prefix" "evex") > + (set_attr "mode" "<MODE>")]) > + > +(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>" > + [(set (match_operand:V8HF 0 "register_operand" "=v") > + (vec_merge:V8HF > + (vec_duplicate:V8HF > + (any_float:HF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>"))) > + (match_operand:V8HF 1 "register_operand" "v") > + (const_int 1)))] > + "TARGET_AVX512FP16" > + "vcvt<floatsuffix>si2sh\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" > + [(set_attr "type" "sseicvt") > + (set_attr "athlon_decode" "*") > + (set_attr "amdfam10_decode" "*") > + (set_attr "bdver1_decode" "*") > + (set_attr "btver2_decode" "double") > + (set_attr "znver1_decode" "double") > + (set_attr "prefix" "evex") > + (set_attr "mode" "HF")]) > > ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; > ;; > diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c > index b569cc0bdd9..0aae949097a 100644 > --- a/gcc/testsuite/gcc.target/i386/avx-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx-1.c > @@ -731,6 +731,14 @@ > #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) > +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) > +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) > +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c > index 07e59118438..997fb733132 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-13.c > +++ b/gcc/testsuite/gcc.target/i386/sse-13.c > @@ -748,6 +748,14 @@ > #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) > +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) > +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) > +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c > index 0530192d97e..89a589e0d80 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-14.c > +++ b/gcc/testsuite/gcc.target/i386/sse-14.c > @@ -690,6 +690,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) > test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) > test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) > test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) > +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) > +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) > +#ifdef __x86_64__ > +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) > +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) > +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) > +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) > +#endif > test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) > test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) > test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) > @@ -734,6 +742,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) > +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) > +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) > test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) > test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) > test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) > diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c > index 04e6340516b..fed12744c6c 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-22.c > +++ b/gcc/testsuite/gcc.target/i386/sse-22.c > @@ -795,6 +795,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) > test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) > test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) > test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) > +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) > +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) > +#ifdef __x86_64__ > +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) > +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) > +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) > +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) > +#endif > test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) > test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) > test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) > @@ -838,6 +846,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) > test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) > +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) > +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) > test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) > test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) > test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) > diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c > index 684891cc98b..6e8d8a1833c 100644 > --- a/gcc/testsuite/gcc.target/i386/sse-23.c > +++ b/gcc/testsuite/gcc.target/i386/sse-23.c > @@ -749,6 +749,14 @@ > #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) > #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) > +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) > +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) > +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) > +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) > +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) > > /* avx512fp16vlintrin.h */ > #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) > -- > 2.18.1 > -- BR, Hongtao
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h index bd801942365..7524a8d6a5b 100644 --- a/gcc/config/i386/avx512fp16intrin.h +++ b/gcc/config/i386/avx512fp16intrin.h @@ -3529,6 +3529,164 @@ _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C) #endif /* __OPTIMIZE__ */ +/* Intrinsics vcvtsh2si, vcvtsh2us. */ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u32 (__m128h __A) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, + _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R); +} + +extern __inline unsigned +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u32 (__m128h __A, const int __R) +{ + return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R); +} + +#else +#define _mm_cvt_roundsh_i32(A, B) \ + ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B))) +#define _mm_cvt_roundsh_u32(A, B) \ + ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ + +#ifdef __x86_64__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_i64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsh_u64 (__m128h __A) +{ + return (long long) + __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_i64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R); +} + +extern __inline unsigned long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundsh_u64 (__m128h __A, const int __R) +{ + return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R); +} + +#else +#define _mm_cvt_roundsh_i64(A, B) \ + ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B))) +#define _mm_cvt_roundsh_u64(A, B) \ + ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B))) + +#endif /* __OPTIMIZE__ */ +#endif /* __x86_64__ */ + +/* Intrinsics vcvtsi2sh, vcvtusi2sh. */ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti32_sh (__m128h __A, int __B) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu32_sh (__m128h __A, unsigned int __B) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R); +} + +#else +#define _mm_cvt_roundi32_sh(A, B, C) \ + (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C))) +#define _mm_cvt_roundu32_sh(A, B, C) \ + (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C))) + +#endif /* __OPTIMIZE__ */ + +#ifdef __x86_64__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvti64_sh (__m128h __A, long long __B) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtu64_sh (__m128h __A, unsigned long long __B) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION); +} + +#ifdef __OPTIMIZE__ +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R) +{ + return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R); +} + +extern __inline __m128h +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R) +{ + return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R); +} + +#else +#define _mm_cvt_roundi64_sh(A, B, C) \ + (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C))) +#define _mm_cvt_roundu64_sh(A, B, C) \ + (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C))) + +#endif /* __OPTIMIZE__ */ +#endif /* __x86_64__ */ + + #ifdef __DISABLE_AVX512FP16__ #undef __DISABLE_AVX512FP16__ #pragma GCC pop_options diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 57b9ea786e1..74bda59a65e 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1308,9 +1308,17 @@ DEF_FUNCTION_TYPE (V8HF, V8HI) DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI) DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI) DEF_FUNCTION_TYPE (SI, V32HF, INT, USI) +DEF_FUNCTION_TYPE (INT, V8HF, INT) +DEF_FUNCTION_TYPE (INT64, V8HF, INT) +DEF_FUNCTION_TYPE (UINT, V8HF, INT) +DEF_FUNCTION_TYPE (UINT64, V8HF, INT) DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF) DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI) DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI) +DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT) +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT) DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI) DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI) DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 44c55876e48..3602b40d6d5 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -3094,6 +3094,14 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph_v16si_mask_round", IX86_BUILTIN_VCVTUDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTUQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT) +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT) BDESC_END (ROUND_ARGS, MULTI_ARG) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 7d9e1bd6a2d..b83c6d9a92b 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -10489,16 +10489,24 @@ ix86_expand_round_builtin (const struct builtin_description *d, { case UINT64_FTYPE_V2DF_INT: case UINT64_FTYPE_V4SF_INT: + case UINT64_FTYPE_V8HF_INT: case UINT_FTYPE_V2DF_INT: case UINT_FTYPE_V4SF_INT: + case UINT_FTYPE_V8HF_INT: case INT64_FTYPE_V2DF_INT: case INT64_FTYPE_V4SF_INT: + case INT64_FTYPE_V8HF_INT: case INT_FTYPE_V2DF_INT: case INT_FTYPE_V4SF_INT: + case INT_FTYPE_V8HF_INT: nargs = 2; break; case V32HF_FTYPE_V32HF_V32HF_INT: case V8HF_FTYPE_V8HF_V8HF_INT: + case V8HF_FTYPE_V8HF_INT_INT: + case V8HF_FTYPE_V8HF_UINT_INT: + case V8HF_FTYPE_V8HF_INT64_INT: + case V8HF_FTYPE_V8HF_UINT64_INT: case V4SF_FTYPE_V4SF_UINT_INT: case V4SF_FTYPE_V4SF_UINT64_INT: case V2DF_FTYPE_V2DF_UINT64_INT: diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 8b23048a232..b312d26b806 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5589,6 +5589,52 @@ (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1" (set_attr "prefix" "evex") (set_attr "mode" "TI")]) +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (unspec:SWI48 + [(vec_select:HF + (match_operand:V8HF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>") + (parallel [(const_int 0)]))] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "%vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "bdver1_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2" + [(set (match_operand:SWI48 0 "register_operand" "=r,r") + (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand" "v,m")] + UNSPEC_US_FIX_NOTRUNC))] + "TARGET_AVX512FP16" + "%vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %k1}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "double,vector") + (set_attr "bdver1_decode" "double,double") + (set_attr "prefix_rep" "1") + (set_attr "prefix" "evex") + (set_attr "mode" "<MODE>")]) + +(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>" + [(set (match_operand:V8HF 0 "register_operand" "=v") + (vec_merge:V8HF + (vec_duplicate:V8HF + (any_float:HF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>"))) + (match_operand:V8HF 1 "register_operand" "v") + (const_int 1)))] + "TARGET_AVX512FP16" + "vcvt<floatsuffix>si2sh\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" + [(set_attr "type" "sseicvt") + (set_attr "athlon_decode" "*") + (set_attr "amdfam10_decode" "*") + (set_attr "bdver1_decode" "*") + (set_attr "btver2_decode" "double") + (set_attr "znver1_decode" "double") + (set_attr "prefix" "evex") + (set_attr "mode" "HF")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index b569cc0bdd9..0aae949097a 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -731,6 +731,14 @@ #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 07e59118438..997fb733132 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -748,6 +748,14 @@ #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 0530192d97e..89a589e0d80 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -690,6 +690,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) +#ifdef __x86_64__ +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) +#endif test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) @@ -734,6 +742,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 04e6340516b..fed12744c6c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -795,6 +795,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8) test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8) test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8) +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8) +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8) +#ifdef __x86_64__ +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8) +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8) +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8) +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8) +#endif test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8) test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1) @@ -838,6 +846,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8) test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8) +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8) +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8) test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8) test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8) test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 684891cc98b..6e8d8a1833c 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -749,6 +749,14 @@ #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8) #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8) +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8) +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8) +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8) +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8) +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8) +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8) /* avx512fp16vlintrin.h */ #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)