@@ -1428,6 +1428,201 @@ _mm_maskz_sqrt_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
#endif /* __OPTIMIZE__ */
+/* Intrinsics vrcpph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_ph (__m512h __A)
+{
+ return __builtin_ia32_vrcpph_v32hf_mask (__A, _mm512_setzero_ph (),
+ (__mmask32) -1);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_ph (__m512h __A, __mmask32 __B, __m512h __C)
+{
+ return __builtin_ia32_vrcpph_v32hf_mask (__C, __A, __B);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_ph (__mmask32 __A, __m512h __B)
+{
+ return __builtin_ia32_vrcpph_v32hf_mask (__B, _mm512_setzero_ph (),
+ __A);
+}
+
+/* Intrinsics vrcpsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_sh (__m128h __A, __m128h __B)
+{
+ return __builtin_ia32_vrcpsh_v8hf_mask (__B, __A, _mm_setzero_ph (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_sh (__m128h __A, __mmask32 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vrcpsh_v8hf_mask (__D, __C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_sh (__mmask32 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vrcpsh_v8hf_mask (__C, __B, _mm_setzero_ph (),
+ __A);
+}
+
+/* Intrinsics vscalefph. */
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_ph (__m512h __A, __m512h __B)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_ph (__m512h __A, __mmask32 __B, __m512h __C, __m512h __D)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_ph (__mmask32 __A, __m512h __B, __m512h __C)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_round_ph (__m512h __A, __m512h __B, const int __C)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__A, __B,
+ _mm512_setzero_ph (),
+ (__mmask32) -1, __C);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_round_ph (__m512h __A, __mmask32 __B, __m512h __C,
+ __m512h __D, const int __E)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__C, __D, __A, __B,
+ __E);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_round_ph (__mmask32 __A, __m512h __B, __m512h __C,
+ const int __D)
+{
+ return __builtin_ia32_vscalefph_v32hf_mask_round (__B, __C,
+ _mm512_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm512_scalef_round_ph(A, B, C) \
+ (__builtin_ia32_vscalefph_v32hf_mask_round ((A), (B), \
+ _mm512_setzero_ph (), \
+ (__mmask32)-1, (C)))
+
+#define _mm512_mask_scalef_round_ph(A, B, C, D, E) \
+ (__builtin_ia32_vscalefph_v32hf_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm512_maskz_scalef_round_ph(A, B, C, D) \
+ (__builtin_ia32_vscalefph_v32hf_mask_round ((B), (C), \
+ _mm512_setzero_ph (), \
+ (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
+/* Intrinsics vscalefsh. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_sh (__m128h __A, __m128h __B)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
+ _mm_setzero_ph (),
+ (__mmask8) -1,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_sh (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_sh (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
+ _mm_setzero_ph (),
+ __A,
+ _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_round_sh (__m128h __A, __m128h __B, const int __C)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__A, __B,
+ _mm_setzero_ph (),
+ (__mmask8) -1, __C);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_round_sh (__m128h __A, __mmask8 __B, __m128h __C,
+ __m128h __D, const int __E)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__C, __D, __A, __B,
+ __E);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_round_sh (__mmask8 __A, __m128h __B, __m128h __C,
+ const int __D)
+{
+ return __builtin_ia32_vscalefsh_v8hf_mask_round (__B, __C,
+ _mm_setzero_ph (),
+ __A, __D);
+}
+
+#else
+#define _mm_scalef_round_sh(A, B, C) \
+ (__builtin_ia32_vscalefsh_v8hf_mask_round ((A), (B), \
+ _mm_setzero_ph (), \
+ (__mmask8)-1, (C)))
+
+#define _mm_mask_scalef_round_sh(A, B, C, D, E) \
+ (__builtin_ia32_vscalefsh_v8hf_mask_round ((C), (D), (A), (B), (E)))
+
+#define _mm_maskz_scalef_round_sh(A, B, C, D) \
+ (__builtin_ia32_vscalefsh_v8hf_mask_round ((B), (C), _mm_setzero_ph (), \
+ (A), (D)))
+
+#endif /* __OPTIMIZE__ */
+
#ifdef __DISABLE_AVX512FP16__
#undef __DISABLE_AVX512FP16__
#pragma GCC pop_options
@@ -451,6 +451,103 @@ _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
__A);
}
+/* Intrinsics vrcpph. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_rcp_ph (__m128h __A)
+{
+ return __builtin_ia32_vrcpph_v8hf_mask (__A, _mm_setzero_ph (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_rcp_ph (__m256h __A)
+{
+ return __builtin_ia32_vrcpph_v16hf_mask (__A, _mm256_setzero_ph (),
+ (__mmask16) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
+{
+ return __builtin_ia32_vrcpph_v8hf_mask (__C, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
+{
+ return __builtin_ia32_vrcpph_v16hf_mask (__C, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
+{
+ return __builtin_ia32_vrcpph_v8hf_mask (__B, _mm_setzero_ph (), __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
+{
+ return __builtin_ia32_vrcpph_v16hf_mask (__B, _mm256_setzero_ph (),
+ __A);
+}
+
+/* Intrinsics vscalefph. */
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_scalef_ph (__m128h __A, __m128h __B)
+{
+ return __builtin_ia32_vscalefph_v8hf_mask (__A, __B,
+ _mm_setzero_ph (),
+ (__mmask8) -1);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_scalef_ph (__m256h __A, __m256h __B)
+{
+ return __builtin_ia32_vscalefph_v16hf_mask (__A, __B,
+ _mm256_setzero_ph (),
+ (__mmask16) -1);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
+{
+ return __builtin_ia32_vscalefph_v8hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
+ __m256h __D)
+{
+ return __builtin_ia32_vscalefph_v16hf_mask (__C, __D, __A, __B);
+}
+
+extern __inline __m128h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
+{
+ return __builtin_ia32_vscalefph_v8hf_mask (__B, __C,
+ _mm_setzero_ph (), __A);
+}
+
+extern __inline __m256h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
+{
+ return __builtin_ia32_vscalefph_v16hf_mask (__B, __C,
+ _mm256_setzero_ph (),
+ __A);
+}
+
#ifdef __DISABLE_AVX512FP16VL__
#undef __DISABLE_AVX512FP16VL__
#pragma GCC pop_options
@@ -2808,6 +2808,12 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_vrsqrtph_v16hf_mask", IX86_BUILTIN_VRSQRTPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_vrsqrtph_v32hf_mask", IX86_BUILTIN_VRSQRTPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_vrsqrtsh_v8hf_mask", IX86_BUILTIN_VRSQRTSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_vrcpph_v8hf_mask", IX86_BUILTIN_VRCPPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_vrcpph_v16hf_mask", IX86_BUILTIN_VRCPPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_vrcpph_v32hf_mask", IX86_BUILTIN_VRCPPH_V32HF_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_vrcpsh_v8hf_mask", IX86_BUILTIN_VRCPSH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_vscalefph_v8hf_mask", IX86_BUILTIN_VSCALEFPH_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_vscalefph_v16hf_mask", IX86_BUILTIN_VSCALEFPH_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
@@ -3025,6 +3031,8 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_vcmpsh_v8hf_mask_round", IX86_BUILTIN_VCMPSH_V8HF_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_vsqrtph_v32hf_mask_round", IX86_BUILTIN_VSQRTPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_vsqrtsh_v8hf_mask_round", IX86_BUILTIN_VSQRTSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_vscalefph_v32hf_mask_round", IX86_BUILTIN_VSCALEFPH_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_vscalefsh_v8hf_mask_round", IX86_BUILTIN_VSCALEFSH_V8HF_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
BDESC_END (ROUND_ARGS, MULTI_ARG)
@@ -386,6 +386,13 @@ (define_mode_iterator VF_AVX512VL
(define_mode_iterator VF1_AVX512ER_128_256
[(V16SF "TARGET_AVX512ER") (V8SF "TARGET_AVX") V4SF])
+(define_mode_iterator VFH_AVX512VL
+ [(V32HF "TARGET_AVX512FP16")
+ (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+ V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+ V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
(define_mode_iterator VF2_AVX512VL
[V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
@@ -2198,6 +2205,30 @@ (define_insn "*sse_vmrcpv4sf2"
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
+(define_insn "avx512fp16_rcp<mode>2<mask_name>"
+ [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v")
+ (unspec:VF_AVX512FP16VL
+ [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RCP))]
+ "TARGET_AVX512FP16"
+ "vrcpph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512fp16_vmrcpv8hf2<mask_scalar_name>"
+ [(set (match_operand:V8HF 0 "register_operand" "=v")
+ (vec_merge:V8HF
+ (unspec:V8HF [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RCP)
+ (match_operand:V8HF 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512FP16"
+ "vrcpsh\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %w1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "HF")])
+
(define_insn "<mask_codefor>rcp14<mode><mask_name>"
[(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
(unspec:VF_AVX512VL
@@ -9948,11 +9979,11 @@ (define_split
})
(define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
- [(set (match_operand:VF_128 0 "register_operand" "=v")
- (vec_merge:VF_128
- (unspec:VF_128
- [(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
+ [(set (match_operand:VFH_128 0 "register_operand" "=v")
+ (vec_merge:VFH_128
+ (unspec:VFH_128
+ [(match_operand:VFH_128 1 "register_operand" "v")
+ (match_operand:VFH_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
UNSPEC_SCALEF)
(match_dup 1)
(const_int 1)))]
@@ -9962,10 +9993,10 @@ (define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
(set_attr "mode" "<ssescalarmode>")])
(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
- [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
- (unspec:VF_AVX512VL
- [(match_operand:VF_AVX512VL 1 "register_operand" "v")
- (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
+ [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+ (unspec:VFH_AVX512VL
+ [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+ (match_operand:VFH_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_SCALEF))]
"TARGET_AVX512F"
"vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -703,6 +703,8 @@
#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
@@ -720,6 +720,8 @@
#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)
@@ -687,6 +687,8 @@ test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1)
test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
+test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
+test_2 (_mm_scalef_round_sh, __m128h, __m128h, __m128h, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -705,6 +707,8 @@ test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
+test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
+test_3 (_mm_maskz_scalef_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@@ -720,6 +724,8 @@ test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h,
test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
+test_4 (_mm_mask_scalef_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
/* avx512fp16vlintrin.h */
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
@@ -792,6 +792,7 @@ test_2 (_mm512_cmp_ph_mask, __mmask32, __m512h, __m512h, 1)
test_2 (_mm_comi_sh, int, __m128h, __m128h, 1)
test_2 (_mm512_maskz_sqrt_round_ph, __m512h, __mmask32, __m512h, 8)
test_2 (_mm_sqrt_round_sh, __m128h, __m128h, __m128h, 8)
+test_2 (_mm512_scalef_round_ph, __m512h, __m512h, __m512h, 8)
test_2x (_mm512_cmp_round_ph_mask, __mmask32, __m512h, __m512h, 1, 8)
test_2x (_mm_cmp_round_sh_mask, __mmask8, __m128h, __m128h, 1, 8)
test_2x (_mm_comi_round_sh, int, __m128h, __m128h, 1, 8)
@@ -810,6 +811,7 @@ test_3 (_mm_maskz_min_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
test_3 (_mm512_mask_cmp_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1)
test_3 (_mm512_mask_sqrt_round_ph, __m512h, __m512h, __mmask32, __m512h, 8)
test_3 (_mm_maskz_sqrt_round_sh, __m128h, __mmask8, __m128h, __m128h, 8)
+test_3 (_mm512_maskz_scalef_round_ph, __m512h, __mmask32, __m512h, __m512h, 8)
test_3x (_mm512_mask_cmp_round_ph_mask, __mmask32, __mmask32, __m512h, __m512h, 1, 8)
test_3x (_mm_mask_cmp_round_sh_mask, __mmask8, __mmask8, __m128h, __m128h, 1, 8)
test_4 (_mm512_mask_add_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
@@ -825,6 +827,7 @@ test_4 (_mm512_mask_min_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h,
test_4 (_mm_mask_max_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_min_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
test_4 (_mm_mask_sqrt_round_sh, __m128h, __m128h, __mmask8, __m128h, __m128h, 8)
+test_4 (_mm512_mask_scalef_round_ph, __m512h, __m512h, __mmask32, __m512h, __m512h, 8)
/* avx512fp16vlintrin.h */
test_2 (_mm_cmp_ph_mask, __mmask8, __m128h, __m128h, 1)
@@ -721,6 +721,8 @@
#define __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vcmpsh_v8hf_mask_round(A, B, 1, D, 8)
#define __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, D) __builtin_ia32_vsqrtph_v32hf_mask_round(C, A, B, 8)
#define __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, E) __builtin_ia32_vsqrtsh_v8hf_mask_round(D, C, A, B, 8)
+#define __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefph_v32hf_mask_round(A, B, C, D, 8)
+#define __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, E) __builtin_ia32_vscalefsh_v8hf_mask_round(A, B, C, D, 8)
/* avx512fp16vlintrin.h */
#define __builtin_ia32_vcmpph_v8hf_mask(A, B, C, D) __builtin_ia32_vcmpph_v8hf_mask(A, B, 1, D)