diff mbox series

[34/62] AVX512FP16: Add vcvttsh2si/vcvttsh2usi.

Message ID 20210701061648.9447-35-hongtao.liu@intel.com
State New
Headers show
Series Support all AVX512FP16 intrinsics. | expand

Commit Message

liuhongt July 1, 2021, 6:16 a.m. UTC
gcc/ChangeLog:

	* config/i386/avx512fp16intrin.h (_mm_cvttsh_i32):
	New intrinsic.
	(_mm_cvttsh_u32): Likewise.
	(_mm_cvtt_roundsh_i32): Likewise.
	(_mm_cvtt_roundsh_u32): Likewise.
	(_mm_cvttsh_i64): Likewise.
	(_mm_cvttsh_u64): Likewise.
	(_mm_cvtt_roundsh_i64): Likewise.
	(_mm_cvtt_roundsh_u64): Likewise.
	* config/i386/i386-builtin.def: Add corresponding new builtins.
	* config/i386/sse.md
	(avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>):
	New.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512fp16-vcvttsh2si-1a.c: New test.
	* gcc.target/i386/avx512fp16-vcvttsh2si-1b.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c: Ditto.
	* gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c: Ditto.
	* gcc.target/i386/avx-1.c: Add test for new builtins.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/sse-14.c: Add test for new intrinsics.
	* gcc.target/i386/sse-22.c: Ditto.
---
 gcc/config/i386/avx512fp16intrin.h            | 81 +++++++++++++++++++
 gcc/config/i386/i386-builtin.def              |  4 +
 gcc/config/i386/sse.md                        | 16 ++++
 gcc/testsuite/gcc.target/i386/avx-1.c         |  4 +
 .../i386/avx512fp16-vcvttsh2si-1a.c           | 16 ++++
 .../i386/avx512fp16-vcvttsh2si-1b.c           | 54 +++++++++++++
 .../i386/avx512fp16-vcvttsh2si64-1a.c         | 16 ++++
 .../i386/avx512fp16-vcvttsh2si64-1b.c         | 52 ++++++++++++
 .../i386/avx512fp16-vcvttsh2usi-1a.c          | 16 ++++
 .../i386/avx512fp16-vcvttsh2usi-1b.c          | 54 +++++++++++++
 .../i386/avx512fp16-vcvttsh2usi64-1a.c        | 16 ++++
 .../i386/avx512fp16-vcvttsh2usi64-1b.c        | 53 ++++++++++++
 gcc/testsuite/gcc.target/i386/sse-13.c        |  4 +
 gcc/testsuite/gcc.target/i386/sse-14.c        |  4 +
 gcc/testsuite/gcc.target/i386/sse-22.c        |  4 +
 gcc/testsuite/gcc.target/i386/sse-23.c        |  4 +
 16 files changed, 398 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 66de5b88927..bcd04f14769 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -4148,6 +4148,87 @@  _mm_cvt_roundsh_u64 (__m128h __A, const int __R)
 #endif /* __OPTIMIZE__ */
 #endif /* __x86_64__ */
 
+/* Intrinsics vcvttsh2si, vcvttsh2us.  */
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2si32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u32 (__m128h __A)
+{
+  return (int)
+    __builtin_ia32_vcvttsh2usi32_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2si32_round (__A, __R);
+}
+
+extern __inline unsigned
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u32 (__m128h __A, const int __R)
+{
+  return (int) __builtin_ia32_vcvttsh2usi32_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i32(A, B)		\
+  ((int)__builtin_ia32_vcvttsh2si32_round ((A), (B)))
+#define _mm_cvtt_roundsh_u32(A, B)		\
+  ((int)__builtin_ia32_vcvttsh2usi32_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+
+#ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_i64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2si64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvttsh_u64 (__m128h __A)
+{
+  return (long long)
+    __builtin_ia32_vcvttsh2usi64_round (__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_i64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2si64_round (__A, __R);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtt_roundsh_u64 (__m128h __A, const int __R)
+{
+  return (long long) __builtin_ia32_vcvttsh2usi64_round (__A, __R);
+}
+
+#else
+#define _mm_cvtt_roundsh_i64(A, B)			\
+  ((long long)__builtin_ia32_vcvttsh2si64_round ((A), (B)))
+#define _mm_cvtt_roundsh_u64(A, B)			\
+  ((long long)__builtin_ia32_vcvttsh2usi64_round ((A), (B)))
+
+#endif /* __OPTIMIZE__ */
+#endif /* __x86_64__ */
+
 /* Intrinsics vcvtsi2sh, vcvtusi2sh.  */
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 17571e3b4c3..4e6d08c2d3f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3116,6 +3116,10 @@  BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__b
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usiq_round, "__builtin_ia32_vcvtsh2usi64_round", IX86_BUILTIN_VCVTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncsi2_round, "__builtin_ia32_vcvttsh2si32_round", IX86_BUILTIN_VCVTTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fix_truncdi2_round, "__builtin_ia32_vcvttsh2si64_round", IX86_BUILTIN_VCVTTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncsi2_round, "__builtin_ia32_vcvttsh2usi32_round", IX86_BUILTIN_VCVTTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fixuns_truncdi2_round, "__builtin_ia32_vcvttsh2usi64_round", IX86_BUILTIN_VCVTTSH2USI64_ROUND, UNKNOWN, (int) UINT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__builtin_ia32_vcvtsi2sh32_round", IX86_BUILTIN_VCVTSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 66b4fa61eb5..c16e0dc46a7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5670,6 +5670,22 @@  (define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<round_saeonly_name>"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(any_fix:SWI48
+	  (vec_select:HF
+	    (match_operand:V8HF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512FP16"
+  "%vcvttsh2<fixsuffix>si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 4b6cf7e1ed6..595a6ac007a 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -741,6 +741,10 @@ 
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c
new file mode 100644
index 00000000000..80d84fce153
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1a.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile int res1;
+
+void extern
+avx512f_test (void)
+{
+  res1 = _mm_cvttsh_i32 (x1);
+  res1 = _mm_cvtt_roundsh_i32 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c
new file mode 100644
index 00000000000..c5b0a64d5f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si-1b.c
@@ -0,0 +1,54 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 2
+
+void NOINLINE
+emulate_cvtph2_d(V512 * dest, V512 op1,
+		 __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 16; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+	v5.u32[i] = 0;
+      }
+      else {
+	v5.u32[i] = dest->u32[i];
+      }
+    }
+    else {
+      v5.u32[i] = v1.f32[i];
+
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_d(&exp, src1,  NET_MASK, 0);
+  res.i32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i32");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c
new file mode 100644
index 00000000000..76a9053ef89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1a.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile long long res2;
+
+void extern
+avx512f_test (void)
+{
+  res2 = _mm_cvttsh_i64 (x1);
+  res2 = _mm_cvtt_roundsh_i64 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c
new file mode 100644
index 00000000000..4e0fe5bb6bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2si64-1b.c
@@ -0,0 +1,52 @@ 
+/* { dg-do run { target { { ! ia32 } && avx512fp16 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 4
+
+void NOINLINE
+emulate_cvtph2_q(V512 * dest, V512 op1,
+		 __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 8; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+	v5.u64[i] = 0;
+      }
+      else {
+	v5.u64[i] = dest->u64[i];
+      }
+    }
+    else {
+      v5.u64[i] = v1.f32[i];
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_q(&exp, src1,  NET_MASK, 0);
+  res.s64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_i64");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c
new file mode 100644
index 00000000000..59564578a4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1a.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512fp16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%eax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile unsigned int res1;
+
+void extern
+avx512f_test (void)
+{
+  res1 = _mm_cvttsh_u32 (x1);
+  res1 = _mm_cvtt_roundsh_u32 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c
new file mode 100644
index 00000000000..214e3e13db7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi-1b.c
@@ -0,0 +1,54 @@ 
+/* { dg-do run { target avx512fp16 } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 2
+
+void NOINLINE
+emulate_cvtph2_d(V512 * dest, V512 op1,
+		 __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 16; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+	v5.u32[i] = 0;
+      }
+      else {
+	v5.u32[i] = dest->u32[i];
+      }
+    }
+    else {
+      v5.u32[i] = v1.f32[i];
+
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_d(&exp, src1,  NET_MASK, 0);
+  res.u32[0] = _mm_cvtt_roundsh_i32(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, N_ELEMS, "_mm_cvtt_roundsh_u32");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c
new file mode 100644
index 00000000000..23e8e70a901
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1a.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile  { target { ! ia32 } } } */
+/* { dg-options "-mavx512fp16 -O2 " } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\{sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%rax" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128h x1;
+volatile unsigned long long res2;
+
+void extern
+avx512f_test (void)
+{
+  res2 = _mm_cvttsh_u64 (x1);
+  res2 = _mm_cvtt_roundsh_u64 (x1, 8);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c
new file mode 100644
index 00000000000..863fb6e167d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vcvttsh2usi64-1b.c
@@ -0,0 +1,53 @@ 
+/* { dg-do run  { target { { ! ia32 } && avx512fp16 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512dq" } */
+
+
+#define AVX512FP16
+#include "avx512fp16-helper.h"
+
+#define N_ELEMS 4
+
+void NOINLINE
+emulate_cvtph2_q(V512 * dest, V512 op1,
+		 __mmask32 k, int zero_mask)
+{
+  V512 v1, v2, v3, v4, v5, v6, v7, v8;
+  int i;
+  __mmask16 m1, m2;
+
+  m1 = k & 0xffff;
+
+  unpack_ph_2twops(op1, &v1, &v2);
+
+  for (i = 0; i < 8; i++) {
+    if (((1 << i) & m1) == 0) {
+      if (zero_mask) {
+	v5.u64[i] = 0;
+      }
+      else {
+	v5.u64[i] = dest->u64[i];
+      }
+    }
+    else {
+      v5.u64[i] = v1.f32[i];
+    }
+  }
+  *dest = v5;
+}
+
+void
+test_512 (void)
+{
+  V512 res;
+  V512 exp;
+  
+  init_src();
+  emulate_cvtph2_q(&exp, src1,  NET_MASK, 0);
+  res.u64[0] = _mm_cvtt_roundsh_i64(src1.xmmh[0], _ROUND_NINT);
+  check_results(&res, &exp, 4, "_mm_cvtt_roundsh_u64");
+
+  if (n_errs != 0) {
+      abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 2e730d554dd..0d976fb0de4 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -758,6 +758,10 @@ 
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index 98e38fb025a..403f3af6067 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -698,9 +698,13 @@  test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8)
 #ifdef __x86_64__
 test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8)
 test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
 test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 3ad10908d49..b980ac3cddd 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -803,9 +803,13 @@  test_1 (_mm512_cvt_roundepi64_ph, __m128h, __m512i, 8)
 test_1 (_mm512_cvt_roundepu64_ph, __m128h, __m512i, 8)
 test_1 (_mm_cvt_roundsh_i32, int, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u32, unsigned, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i32, int, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u32, unsigned, __m128h, 8)
 #ifdef __x86_64__
 test_1 (_mm_cvt_roundsh_i64, long long, __m128h, 8)
 test_1 (_mm_cvt_roundsh_u64, unsigned long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_i64, long long, __m128h, 8)
+test_1 (_mm_cvtt_roundsh_u64, unsigned long long, __m128h, 8)
 test_2 (_mm_cvt_roundi64_sh, __m128h, __m128h, long long, 8)
 test_2 (_mm_cvt_roundu64_sh, __m128h, __m128h, unsigned long long, 8)
 #endif
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 6990f93bfce..1bd734a9352 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -759,6 +759,10 @@ 
 #define __builtin_ia32_vcvtsh2si64_round(A, B) __builtin_ia32_vcvtsh2si64_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi32_round(A, B) __builtin_ia32_vcvtsh2usi32_round(A, 8)
 #define __builtin_ia32_vcvtsh2usi64_round(A, B) __builtin_ia32_vcvtsh2usi64_round(A, 8)
+#define __builtin_ia32_vcvttsh2si32_round(A, B) __builtin_ia32_vcvttsh2si32_round(A, 8)
+#define __builtin_ia32_vcvttsh2si64_round(A, B) __builtin_ia32_vcvttsh2si64_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi32_round(A, B) __builtin_ia32_vcvttsh2usi32_round(A, 8)
+#define __builtin_ia32_vcvttsh2usi64_round(A, B) __builtin_ia32_vcvttsh2usi64_round(A, 8)
 #define __builtin_ia32_vcvtsi2sh32_round(A, B, C) __builtin_ia32_vcvtsi2sh32_round(A, B, 8)
 #define __builtin_ia32_vcvtsi2sh64_round(A, B, C) __builtin_ia32_vcvtsi2sh64_round(A, B, 8)
 #define __builtin_ia32_vcvtusi2sh32_round(A, B, C) __builtin_ia32_vcvtusi2sh32_round(A, B, 8)