diff mbox series

[30/62] AVX512FP16: Add vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh.

Message ID 20210701061648.9447-31-hongtao.liu@intel.com
State New
Headers show
Series Support all AVX512FP16 intrinsics. | expand

Commit Message

Liu, Hongtao July 1, 2021, 6:16 a.m. UTC
gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm_cvtsh_i32): New intrinsic.
	(_mm_cvtsh_u32): Likewise.
	(_mm_cvt_roundsh_i32): Likewise.
	(_mm_cvt_roundsh_u32): Likewise.
	(_mm_cvtsh_i64): Likewise.
	(_mm_cvtsh_u64): Likewise.
	(_mm_cvt_roundsh_i64): Likewise.
	(_mm_cvt_roundsh_u64): Likewise.
	(_mm_cvti32_sh): Likewise.
	(_mm_cvtu32_sh): Likewise.
	(_mm_cvt_roundi32_sh): Likewise.
	(_mm_cvt_roundu32_sh): Likewise.
	(_mm_cvti64_sh): Likewise.
	(_mm_cvtu64_sh): Likewise.
	(_mm_cvt_roundi64_sh): Likewise.
	(_mm_cvt_roundu64_sh): Likewise.
	* config/i386/i386-builtin-types.def: Add corresponding builtin types.
	* config/i386/i386-builtin.def: Add corresponding new builtins.
	* config/i386/i386-expand.c (ix86_expand_round_builtin):
	Handle new builtin types.
	* config/i386/sse.md
	(avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>):
	New define_insn.
	(avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2): Likewise.
	(avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>): Likewise.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx-1.c: Add test for new builtins.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
---
 gcc/config/i386/avx512fp16intrin.h     | 158 +++++++++++++++++++++++++
 gcc/config/i386/i386-builtin-types.def |   8 ++
 gcc/config/i386/i386-builtin.def       |   8 ++
 gcc/config/i386/i386-expand.c          |   8 ++
 gcc/config/i386/sse.md                 |  46 +++++++
 gcc/testsuite/gcc.target/i386/avx-1.c  |   8 ++
 gcc/testsuite/gcc.target/i386/sse-13.c |   8 ++
 gcc/testsuite/gcc.target/i386/sse-14.c |  10 ++
 gcc/testsuite/gcc.target/i386/sse-22.c |  10 ++
 gcc/testsuite/gcc.target/i386/sse-23.c |   8 ++
 10 files changed, 272 insertions(+)

Comments

Hongtao Liu Sept. 17, 2021, 8:07 a.m. UTC | #1
I'm going to check in 10 patches.

[PATCH 30/62] AVX512FP16: Add vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh.
[PATCH 31/62] AVX512FP16: Add testcase for
vcvtsh2si/vcvtsh2usi/vcvtsi2sh/vcvtusi2sh.
[PATCH 32/62] AVX512FP16: Add
vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2qq/vcvttph2udq/vcvttph2uqq
[PATCH 33/62] AVX512FP16: Add testcase for
vcvttph2w/vcvttph2uw/vcvttph2dq/vcvttph2udq/vcvttph2qq/vcvttph2uqq
[PATCH 34/62] AVX512FP16: Add vcvttsh2si/vcvttsh2usi
[PATCH 35/62] AVX512FP16: Add vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx
[PATCH 36/62] AVX512FP16: Add testcase for
vcvtph2pd/vcvtph2psx/vcvtpd2ph/vcvtps2phx
[PATCH 37/62] AVX512FP16: Add vcvtsh2ss/vcvtsh2sd/vcvtss2sh/vcvtsd2sh.
[PATCH 38/62] AVX512FP16: Add testcase for
vcvtsh2sd/vcvtsh2ss/vcvtsd2sh/vcvtss2sh
[PATCH 39/62] AVX512FP16: Add intrinsics for casting between vector
float16 and vector float32/float64/integer.

  Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
  Newly added runtime testcase passed on sde{-m32,}.


On Thu, Jul 1, 2021 at 2:17 PM liuhongt <hongtao.liu@intel.com> wrote:
>
> gcc/ChangeLog:
>
>         * config/i386/avx512fp16intrin.h (_mm_cvtsh_i32): New intrinsic.
>         (_mm_cvtsh_u32): Likewise.
>         (_mm_cvt_roundsh_i32): Likewise.
>         (_mm_cvt_roundsh_u32): Likewise.
>         (_mm_cvtsh_i64): Likewise.
>         (_mm_cvtsh_u64): Likewise.
>         (_mm_cvt_roundsh_i64): Likewise.
>         (_mm_cvt_roundsh_u64): Likewise.
>         (_mm_cvti32_sh): Likewise.
>         (_mm_cvtu32_sh): Likewise.
>         (_mm_cvt_roundi32_sh): Likewise.
>         (_mm_cvt_roundu32_sh): Likewise.
>         (_mm_cvti64_sh): Likewise.
>         (_mm_cvtu64_sh): Likewise.
>         (_mm_cvt_roundi64_sh): Likewise.
>         (_mm_cvt_roundu64_sh): Likewise.
>         * config/i386/i386-builtin-types.def: Add corresponding builtin types.
>         * config/i386/i386-builtin.def: Add corresponding new builtins.
>         * config/i386/i386-expand.c (ix86_expand_round_builtin):
>         Handle new builtin types.
>         * config/i386/sse.md
>         (avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>):
>         New define_insn.
>         (avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2): Likewise.
>         (avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>): Likewise.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx-1.c: Add test for new builtins.
>         * gcc.target/i386/sse-13.c: Ditto.
>         * gcc.target/i386/sse-23.c: Ditto.
>         * gcc.target/i386/sse-14.c: Add test for new intrinsics.
>         * gcc.target/i386/sse-22.c: Ditto.
> ---
>  gcc/config/i386/avx512fp16intrin.h     | 158 +++++++++++++++++++++++++
>  gcc/config/i386/i386-builtin-types.def |   8 ++
>  gcc/config/i386/i386-builtin.def       |   8 ++
>  gcc/config/i386/i386-expand.c          |   8 ++
>  gcc/config/i386/sse.md                 |  46 +++++++
>  gcc/testsuite/gcc.target/i386/avx-1.c  |   8 ++
>  gcc/testsuite/gcc.target/i386/sse-13.c |   8 ++
>  gcc/testsuite/gcc.target/i386/sse-14.c |  10 ++
>  gcc/testsuite/gcc.target/i386/sse-22.c |  10 ++
>  gcc/testsuite/gcc.target/i386/sse-23.c |   8 ++
>  10 files changed, 272 insertions(+)
>
> diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
> index bd801942365..7524a8d6a5b 100644
> --- a/gcc/config/i386/avx512fp16intrin.h
> +++ b/gcc/config/i386/avx512fp16intrin.h
> @@ -3529,6 +3529,164 @@ _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
>
>  #endif /* __OPTIMIZE__ */
>
> +/* Intrinsics vcvtsh2si, vcvtsh2us.  */
> +extern __inline int
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtsh_i32 (__m128h __A)
> +{
> +  return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +extern __inline unsigned
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtsh_u32 (__m128h __A)
> +{
> +  return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
> +                                                 _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +#ifdef __OPTIMIZE__
> +extern __inline int
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
> +{
> +  return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
> +}
> +
> +extern __inline unsigned
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
> +{
> +  return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
> +}
> +
> +#else
> +#define _mm_cvt_roundsh_i32(A, B)              \
> +  ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
> +#define _mm_cvt_roundsh_u32(A, B)              \
> +  ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
> +
> +#endif /* __OPTIMIZE__ */
> +
> +#ifdef __x86_64__
> +extern __inline long long
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtsh_i64 (__m128h __A)
> +{
> +  return (long long)
> +    __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +extern __inline unsigned long long
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtsh_u64 (__m128h __A)
> +{
> +  return (long long)
> +    __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +#ifdef __OPTIMIZE__
> +extern __inline long long
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
> +{
> +  return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
> +}
> +
> +extern __inline unsigned long long
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
> +{
> +  return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
> +}
> +
> +#else
> +#define _mm_cvt_roundsh_i64(A, B)                      \
> +  ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
> +#define _mm_cvt_roundsh_u64(A, B)                      \
> +  ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
> +
> +#endif /* __OPTIMIZE__ */
> +#endif /* __x86_64__ */
> +
> +/* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvti32_sh (__m128h __A, int __B)
> +{
> +  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtu32_sh (__m128h __A, unsigned int __B)
> +{
> +  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +#ifdef __OPTIMIZE__
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
> +{
> +  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
> +{
> +  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
> +}
> +
> +#else
> +#define _mm_cvt_roundi32_sh(A, B, C)           \
> +  (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
> +#define _mm_cvt_roundu32_sh(A, B, C)           \
> +  (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
> +
> +#endif /* __OPTIMIZE__ */
> +
> +#ifdef __x86_64__
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvti64_sh (__m128h __A, long long __B)
> +{
> +  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
> +{
> +  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
> +}
> +
> +#ifdef __OPTIMIZE__
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
> +{
> +  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
> +}
> +
> +extern __inline __m128h
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
> +{
> +  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
> +}
> +
> +#else
> +#define _mm_cvt_roundi64_sh(A, B, C)           \
> +  (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
> +#define _mm_cvt_roundu64_sh(A, B, C)           \
> +  (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
> +
> +#endif /* __OPTIMIZE__ */
> +#endif /* __x86_64__ */
> +
> +
>  #ifdef __DISABLE_AVX512FP16__
>  #undef __DISABLE_AVX512FP16__
>  #pragma GCC pop_options
> diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
> index 57b9ea786e1..74bda59a65e 100644
> --- a/gcc/config/i386/i386-builtin-types.def
> +++ b/gcc/config/i386/i386-builtin-types.def
> @@ -1308,9 +1308,17 @@ DEF_FUNCTION_TYPE (V8HF, V8HI)
>  DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI)
>  DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
>  DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
> +DEF_FUNCTION_TYPE (INT, V8HF, INT)
> +DEF_FUNCTION_TYPE (INT64, V8HF, INT)
> +DEF_FUNCTION_TYPE (UINT, V8HF, INT)
> +DEF_FUNCTION_TYPE (UINT64, V8HF, INT)
>  DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
>  DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
>  DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
> +DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT)
> +DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT)
> +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT)
> +DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT)
>  DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI)
>  DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI)
>  DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI)
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index 44c55876e48..3602b40d6d5 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -3094,6 +3094,14 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph_v16si_mask_round", IX86_BUILTIN_VCVTUDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
>  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTUQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT)
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
> +BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
> +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
>
>  BDESC_END (ROUND_ARGS, MULTI_ARG)
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 7d9e1bd6a2d..b83c6d9a92b 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -10489,16 +10489,24 @@ ix86_expand_round_builtin (const struct builtin_description *d,
>      {
>      case UINT64_FTYPE_V2DF_INT:
>      case UINT64_FTYPE_V4SF_INT:
> +    case UINT64_FTYPE_V8HF_INT:
>      case UINT_FTYPE_V2DF_INT:
>      case UINT_FTYPE_V4SF_INT:
> +    case UINT_FTYPE_V8HF_INT:
>      case INT64_FTYPE_V2DF_INT:
>      case INT64_FTYPE_V4SF_INT:
> +    case INT64_FTYPE_V8HF_INT:
>      case INT_FTYPE_V2DF_INT:
>      case INT_FTYPE_V4SF_INT:
> +    case INT_FTYPE_V8HF_INT:
>        nargs = 2;
>        break;
>      case V32HF_FTYPE_V32HF_V32HF_INT:
>      case V8HF_FTYPE_V8HF_V8HF_INT:
> +    case V8HF_FTYPE_V8HF_INT_INT:
> +    case V8HF_FTYPE_V8HF_UINT_INT:
> +    case V8HF_FTYPE_V8HF_INT64_INT:
> +    case V8HF_FTYPE_V8HF_UINT64_INT:
>      case V4SF_FTYPE_V4SF_UINT_INT:
>      case V4SF_FTYPE_V4SF_UINT64_INT:
>      case V2DF_FTYPE_V2DF_UINT64_INT:
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 8b23048a232..b312d26b806 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -5589,6 +5589,52 @@ (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1"
>     (set_attr "prefix" "evex")
>     (set_attr "mode" "TI")])
>
> +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
> +       (unspec:SWI48
> +         [(vec_select:HF
> +            (match_operand:V8HF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
> +            (parallel [(const_int 0)]))]
> +         UNSPEC_US_FIX_NOTRUNC))]
> +  "TARGET_AVX512FP16"
> +  "%vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
> +  [(set_attr "type" "sseicvt")
> +   (set_attr "athlon_decode" "double,vector")
> +   (set_attr "bdver1_decode" "double,double")
> +   (set_attr "prefix_rep" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2"
> +  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
> +       (unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
> +                     UNSPEC_US_FIX_NOTRUNC))]
> +  "TARGET_AVX512FP16"
> +  "%vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %k1}"
> +  [(set_attr "type" "sseicvt")
> +   (set_attr "athlon_decode" "double,vector")
> +   (set_attr "bdver1_decode" "double,double")
> +   (set_attr "prefix_rep" "1")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "<MODE>")])
> +
> +(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
> +  [(set (match_operand:V8HF 0 "register_operand" "=v")
> +       (vec_merge:V8HF
> +         (vec_duplicate:V8HF
> +           (any_float:HF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
> +         (match_operand:V8HF 1 "register_operand" "v")
> +         (const_int 1)))]
> +  "TARGET_AVX512FP16"
> +  "vcvt<floatsuffix>si2sh\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
> +  [(set_attr "type" "sseicvt")
> +   (set_attr "athlon_decode" "*")
> +   (set_attr "amdfam10_decode" "*")
> +   (set_attr "bdver1_decode" "*")
> +   (set_attr "btver2_decode" "double")
> +   (set_attr "znver1_decode" "double")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "HF")])
>
>  ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
>  ;;
> diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
> index b569cc0bdd9..0aae949097a 100644
> --- a/gcc/testsuite/gcc.target/i386/avx-1.c
> +++ b/gcc/testsuite/gcc.target/i386/avx-1.c
> @@ -731,6 +731,14 @@
>  #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
> +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
> +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
>
>  /* avx512fp16vlintrin.h */
>  #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
> index 07e59118438..997fb733132 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-13.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-13.c
> @@ -748,6 +748,14 @@
>  #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
> +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
> +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
>
>  /* avx512fp16vlintrin.h */
>  #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
> index 0530192d97e..89a589e0d80 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-14.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-14.c
> @@ -690,6 +690,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
> +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
> +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
> +#ifdef __x86_64__
> +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
> +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
> +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
> +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
> +#endif
>  test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
>  test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
>  test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
> @@ -734,6 +742,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
> +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
> +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
>  test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
>  test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
>  test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
> index 04e6340516b..fed12744c6c 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-22.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-22.c
> @@ -795,6 +795,14 @@ test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
>  test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
> +test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
> +test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
> +#ifdef __x86_64__
> +test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
> +test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
> +test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
> +test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
> +#endif
>  test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
>  test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
>  test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
> @@ -838,6 +846,8 @@ test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
>  test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
> +test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
> +test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
>  test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
>  test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
>  test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
> diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
> index 684891cc98b..6e8d8a1833c 100644
> --- a/gcc/testsuite/gcc.target/i386/sse-23.c
> +++ b/gcc/testsuite/gcc.target/i386/sse-23.c
> @@ -749,6 +749,14 @@
>  #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
>  #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
> +#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
> +#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
> +#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
> +#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
>
>  /* avx512fp16vlintrin.h */
>  #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
> --
> 2.18.1
>


--
BR,
Hongtao
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index bd801942365..7524a8d6a5b 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -3529,6 +3529,164 @@  _mm512_maskz_cvt_roundepu16_ph (__mmask32 __A, __m512i __B, int __C)
 
 #endif /* __OPTIMIZE__ */
 
+/* Intrinsics vcvtsh2si, vcvtsh2us.  */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i32 (__m128h __A)
+{
+  return (int) __builtin_ia32_vcvtsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u32 (__m128h __A)
+{
+  return (int) __builtin_ia32_vcvtsh2usi32_round (__A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvtsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i32(A, B)		\
+  ((int)__builtin_ia32_vcvtsh2si32_round ((A), (B)))
+#define _mm_cvt_roundsh_u32(A, B)		\
+  ((int)__builtin_ia32_vcvtsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_i64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvtsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsh_u64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvtsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_i64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundsh_u64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvtsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvt_roundsh_i64(A, B)			\
+  ((long long)__builtin_ia32_vcvtsh2si64_round ((A), (B)))
+#define _mm_cvt_roundsh_u64(A, B)			\
+  ((long long)__builtin_ia32_vcvtsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+/* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sh (__m128h __A, int __B)
+{
+  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu32_sh (__m128h __A, unsigned int __B)
+{
+  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi32_sh (__m128h __A, int __B, const int __R)
+{
+  return __builtin_ia32_vcvtsi2sh32_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu32_sh (__m128h __A, unsigned int __B, const int __R)
+{
+  return __builtin_ia32_vcvtusi2sh32_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi32_sh(A, B, C)		\
+  (__builtin_ia32_vcvtsi2sh32_round ((A), (B), (C)))
+#define _mm_cvt_roundu32_sh(A, B, C)		\
+  (__builtin_ia32_vcvtusi2sh32_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sh (__m128h __A, long long __B)
+{
+  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtu64_sh (__m128h __A, unsigned long long __B)
+{
+  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundi64_sh (__m128h __A, long long __B, const int __R)
+{
+  return __builtin_ia32_vcvtsi2sh64_round (__A, __B, __R);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvt_roundu64_sh (__m128h __A, unsigned long long __B, const int __R)
+{
+  return __builtin_ia32_vcvtusi2sh64_round (__A, __B, __R);
+}
+
+#else
+#define _mm_cvt_roundi64_sh(A, B, C)		\
+  (__builtin_ia32_vcvtsi2sh64_round ((A), (B), (C)))
+#define _mm_cvt_roundu64_sh(A, B, C)		\
+  (__builtin_ia32_vcvtusi2sh64_round ((A), (B), (C)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
+
 #ifdef __DISABLE_AVX512FP16__
 #undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 57b9ea786e1..74bda59a65e 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1308,9 +1308,17 @@  DEF_FUNCTION_TYPE (V8HF, V8HI)
 DEF_FUNCTION_TYPE (QI, V8HF, INT, UQI)
 DEF_FUNCTION_TYPE (HI, V16HF, INT, UHI)
 DEF_FUNCTION_TYPE (SI, V32HF, INT, USI)
+DEF_FUNCTION_TYPE (INT, V8HF, INT)
+DEF_FUNCTION_TYPE (INT64, V8HF, INT)
+DEF_FUNCTION_TYPE (UINT, V8HF, INT)
+DEF_FUNCTION_TYPE (UINT64, V8HF, INT)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF)
 DEF_FUNCTION_TYPE (VOID, PCFLOAT16, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8HF, PCFLOAT16, V8HF, UQI)
+DEF_FUNCTION_TYPE (V8HF, V8HF, INT, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, INT64, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, UINT, INT)
+DEF_FUNCTION_TYPE (V8HF, V8HF, UINT64, INT)
 DEF_FUNCTION_TYPE (V2DI, V8HF, V2DI, UQI)
 DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI)
 DEF_FUNCTION_TYPE (V4SI, V8HF, V4SI, UQI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 44c55876e48..3602b40d6d5 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3094,6 +3094,14 @@  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph_v16si_mask_round", IX86_BUILTIN_VCVTUDQ2PH_V16SI_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph_v8di_mask_round", IX86_BUILTIN_VCVTUQQ2PH_V8DI_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 7d9e1bd6a2d..b83c6d9a92b 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -10489,16 +10489,24 @@  ix86_expand_round_builtin (const struct builtin_description *d,
     {
     case UINT64_FTYPE_V2DF_INT:
     case UINT64_FTYPE_V4SF_INT:
+    case UINT64_FTYPE_V8HF_INT:
     case UINT_FTYPE_V2DF_INT:
     case UINT_FTYPE_V4SF_INT:
+    case UINT_FTYPE_V8HF_INT:
     case INT64_FTYPE_V2DF_INT:
     case INT64_FTYPE_V4SF_INT:
+    case INT64_FTYPE_V8HF_INT:
     case INT_FTYPE_V2DF_INT:
     case INT_FTYPE_V4SF_INT:
+    case INT_FTYPE_V8HF_INT:
       nargs = 2;
       break;
     case V32HF_FTYPE_V32HF_V32HF_INT:
     case V8HF_FTYPE_V8HF_V8HF_INT:
+    case V8HF_FTYPE_V8HF_INT_INT:
+    case V8HF_FTYPE_V8HF_UINT_INT:
+    case V8HF_FTYPE_V8HF_INT64_INT:
+    case V8HF_FTYPE_V8HF_UINT64_INT:
     case V4SF_FTYPE_V4SF_UINT_INT:
     case V4SF_FTYPE_V4SF_UINT64_INT:
     case V2DF_FTYPE_V2DF_UINT64_INT:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 8b23048a232..b312d26b806 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5589,6 +5589,52 @@  (define_insn "*avx512fp16_vcvt<floatsuffix>qq2ph_v2di_mask_1"
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix><round_name>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(unspec:SWI48
+	  [(vec_select:HF
+	     (match_operand:V8HF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_US_FIX_NOTRUNC))]
+  "TARGET_AVX512FP16"
+  "%vcvtsh2<sseintconvertsignprefix>si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_vcvtsh2<sseintconvertsignprefix>si<rex64namesuffix>_2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(unspec:SWI48 [(match_operand:HF 1 "nonimmediate_operand" "v,m")]
+		      UNSPEC_US_FIX_NOTRUNC))]
+  "TARGET_AVX512FP16"
+  "%vcvtsh2<sseintconvertsignprefix>si\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_vcvt<floatsuffix>si2sh<rex64namesuffix><round_name>"
+  [(set (match_operand:V8HF 0 "register_operand" "=v")
+	(vec_merge:V8HF
+	  (vec_duplicate:V8HF
+	    (any_float:HF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
+	  (match_operand:V8HF 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512FP16"
+  "vcvt<floatsuffix>si2sh\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "*")
+   (set_attr "amdfam10_decode" "*")
+   (set_attr "bdver1_decode" "*")
+   (set_attr "btver2_decode" "double")
+   (set_attr "znver1_decode" "double")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index b569cc0bdd9..0aae949097a 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -731,6 +731,14 @@ 
 #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 07e59118438..997fb733132 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -748,6 +748,14 @@ 
 #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 0530192d97e..89a589e0d80 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -690,6 +690,14 @@  test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
 test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
+test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
+test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
+#endif
 test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
@@ -734,6 +742,8 @@  test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
+test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 04e6340516b..fed12744c6c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -795,6 +795,14 @@  test_1 (_mm512_cvt_roundepi32_ph, __m256h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu32_ph, __m256h, __m512i, 8)
 test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
+test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+#ifdef __x86_64__
+test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
+test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
+#endif
 test_1x (_mm512_reduce_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_roundscale_round_ph, __m512h, __m512h, 123, 8)
 test_1x (_mm512_getmant_ph, __m512h, __m512h, 1, 1)
@@ -838,6 +846,8 @@  test_2 (_mm512_maskz_cvt_roundepi32_ph, __m256h, __mmask16, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepu32_ph, __m256h, __mmask16, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepi64_ph, __m128h, __mmask8, __m512i, 8)
 test_2 (_mm512_maskz_cvt_roundepu64_ph, __m128h, __mmask8, __m512i, 8)
+test_2 (_mm_cvt_roundi32_sh, __m128h, __m128h, int, 8)
+test_2 (_mm_cvt_roundu32_sh, __m128h, __m128h, unsigned, 8)
 test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
 test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
 test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 684891cc98b..6e8d8a1833c 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -749,6 +749,14 @@ 
 #define __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, D) __builtin_ia32_vcvtudq2ph_v16si_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtqq2ph_v8di_mask_round(A, B, C, 8)
 #define __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, D) __builtin_ia32_vcvtuqq2ph_v8di_mask_round(A, B, C, 8)
+#define __builtin_ia32_vcvtsh2si32_round(A, B) __builtin_ia32_vcvtsh2si32_round(A, 8)
+#define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
+#define __builtin_ia32_vcvtusi2sh64_round(A, B, C) __builtin_ia32_vcvtusi2sh64_round(A, B, 8)
 
 /* avx512fp16vlintrin.h */
 #define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)