@@ -15082,6 +15082,43 @@ ix86_print_operand (FILE *file, rtx x, int code)
/* We do not want to print value of the operand. */
return;
+ case 'N':
+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
+ fputs ("{z}", file);
+ return;
+
+ case 'R':
+ gcc_assert (CONST_INT_P (x));
+
+ if (ASSEMBLER_DIALECT == ASM_INTEL)
+ fputs (", ", file);
+
+ switch (INTVAL (x))
+ {
+ case ROUND_NEAREST_INT:
+ fputs ("{rn-sae}", file);
+ break;
+ case ROUND_NEG_INF:
+ fputs ("{rd-sae}", file);
+ break;
+ case ROUND_POS_INF:
+ fputs ("{ru-sae}", file);
+ break;
+ case ROUND_ZERO:
+ fputs ("{rz-sae}", file);
+ break;
+ case ROUND_SAE:
+ fputs ("{sae}", file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (ASSEMBLER_DIALECT == ASM_ATT)
+ fputs (", ", file);
+
+ return;
+
case '*':
if (ASSEMBLER_DIALECT == ASM_ATT)
putc ('*', file);
@@ -241,6 +241,16 @@
(ROUND_NO_EXC 0x8)
])
+;; Constants to represent AVX512F embeded rounding
+(define_constants
+ [(ROUND_NEAREST_INT 0)
+ (ROUND_NEG_INF 1)
+ (ROUND_POS_INF 2)
+ (ROUND_ZERO 3)
+ (NO_ROUND 4)
+ (ROUND_SAE 5)
+ ])
+
;; Constants to represent pcomtrue/pcomfalse variants
(define_constants
[(PCOM_FALSE 0)
@@ -672,6 +672,16 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+;; Match 0 to 4.
+(define_predicate "const_0_to_4_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 4)")))
+
+;; Match 0 to 5.
+(define_predicate "const_0_to_5_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 5)")))
+
;; Match 0 to 7.
(define_predicate "const_0_to_7_operand"
(and (match_code "const_int")
@@ -629,6 +629,9 @@
(define_mode_attr bcstscalarsuff
[(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+;; Include define_subst patterns for instructions with mask
+(include "subst.md")
+
;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -768,6 +771,28 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "avx512f_load<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+ (vec_merge:VI48F_512
+ (match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8DF:
+ case MODE_V16SF:
+ return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ default:
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "avx512f_blendm<mode>"
[(set (match_operand:VI48F_512 0 "register_operand" "=v")
(vec_merge:VI48F_512
@@ -780,6 +805,28 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx512f_store<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+ (vec_merge:VI48F_512
+ (match_operand:VI48F_512 1 "register_operand" "v")
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V8DF:
+ case MODE_V16SF:
+ return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ default:
+ return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_concat:V2DI
@@ -871,21 +918,21 @@
DONE;
})
-(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
+(define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
(unspec:VF_AVX512F
[(match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")]
UNSPEC_LOADU))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition>"
{
switch (get_attr_mode (insn))
{
case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
- return "%vmovups\t{%1, %0|%0, %1}";
+ return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
- return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
+ return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
}
}
[(set_attr "type" "ssemov")
@@ -932,12 +979,36 @@
]
(const_string "<MODE>")))])
-(define_insn "<sse2_avx_avx512f>_loaddqu<mode>"
+(define_insn "avx512f_storeu<ssemodesuffix>512_mask"
+ [(set (match_operand:VF_512 0 "memory_operand" "=m")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")]
+ UNSPEC_STOREU)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ switch (get_attr_mode (insn))
+ {
+ case MODE_V16SF:
+ return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ default:
+ return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ }
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
[(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
(unspec:VI_UNALIGNED_LOADSTORE
[(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
UNSPEC_LOADU))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
switch (get_attr_mode (insn))
{
@@ -946,9 +1017,9 @@
return "%vmovups\t{%1, %0|%0, %1}";
case MODE_XI:
if (<MODE>mode == V8DImode)
- return "vmovdqu64\t{%1, %0|%0, %1}";
+ return "vmovdqu64\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
else
- return "vmovdqu32\t{%1, %0|%0, %1}";
+ return "vmovdqu32\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
@@ -1011,6 +1082,88 @@
]
(const_string "<sseinsnmode>")))])
+(define_insn "avx512f_storedqu<mode>_mask"
+ [(set (match_operand:VI48_512 0 "memory_operand" "=m")
+ (vec_merge:VI48_512
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "register_operand" "v")]
+ UNSPEC_STOREU)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")))]
+ "TARGET_AVX512F"
+{
+ if (<MODE>mode == V8DImode)
+ return "vmovdqu64\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+ else
+ return "vmovdqu32\t{%1, %0%{%2%}|%0%{%2%}, %1}";
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "movu" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_moves<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:VF_128 3 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k"))
+ (match_operand:VF_128 1 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_loads<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (match_operand:<ssescalarmode> 1 "memory_operand"))
+ (match_operand:VF_128 2 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*avx512f_loads<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (vec_duplicate:VF_128
+ (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
+ (match_operand:VF_128 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k"))
+ (match_operand:VF_128 4 "const0_operand")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "load")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_stores<mode>_mask"
+ [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+ (vec_select:<ssescalarmode>
+ (vec_merge:VF_128
+ (match_operand:VF_128 1 "register_operand" "v")
+ (vec_duplicate:VF_128
+ (match_dup 0))
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k"))
+ (parallel [(const_int 0)])))]
+ "TARGET_AVX512F"
+ "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<ssescalarmode>")])
+
(define_insn "<sse3>_lddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x")
(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
@@ -1138,83 +1291,83 @@
}
[(set_attr "isa" "noavx,noavx,avx,avx")])
-(define_expand "<plusminus_insn><mode>3"
+(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand")
(plusminus:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand")
(match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<plusminus_insn><mode>3"
+(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
(plusminus:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand" "<comm>0,v")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- v<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<plusminus_insn><mode>3"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(plusminus:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand")
(mult:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand")
(match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
-(define_insn "*mul<mode>3"
+(define_insn "*mul<mode>3<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
(mult:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
mul<ssemodesuffix>\t{%2, %0|%0, %2}
- vmul<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssemul")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(multdiv:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse<multdiv_mnemonic>")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<ssescalarmode>")])
@@ -1246,18 +1399,18 @@
}
})
-(define_insn "<sse>_div<mode>3"
+(define_insn "<sse>_div<mode>3<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
(div:VF_AVX512F
(match_operand:VF_AVX512F 1 "register_operand" "0,v")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE"
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_constraint>")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
div<ssemodesuffix>\t{%2, %0|%0, %2}
- vdiv<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "ssediv")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
(define_insn "<sse>_rcp<mode>2"
@@ -1290,18 +1443,18 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
-(define_insn "rcp14<mode>"
+(define_insn "<mask_codefor>rcp14<mode><mask_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
[(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
UNSPEC_RCP14))]
"TARGET_AVX512F"
- "vrcp14<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "srcp14<mode>"
+(define_insn "<mask_scalar_codefor>srcp14<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1311,7 +1464,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1337,33 +1490,33 @@
}
})
-(define_insn "<sse>_sqrt<mode>2"
+(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
- (sqrt:VF_AVX512F (match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")))]
- "TARGET_SSE"
- "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
+ (sqrt:VF_AVX512F (match_operand:VF_AVX512F 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmsqrt<mode>2"
+(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(sqrt:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
(match_operand:VF_128 2 "register_operand" "0,v")
(const_int 1)))]
"TARGET_SSE"
"@
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
- vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
+ vsqrt<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "btver2_sse_attr" "sqrt")
- (set_attr "prefix" "orig,vex")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "rsqrt<mode>2"
@@ -1386,18 +1539,18 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "rsqrt14<mode>"
+(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
[(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT14))]
"TARGET_AVX512F"
- "vrsqrt14<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "rsqrt14<mode>"
+(define_insn "<mask_scalar_codefor>rsqrt14<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1407,7 +1560,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1432,67 +1585,67 @@
;; isn't really correct, as those rtl operators aren't defined when
;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
-(define_expand "<code><mode>3"
+(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand")
(smaxmin:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand")
(match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
- "TARGET_SSE"
+ "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
{
if (!flag_finite_math_only)
operands[1] = force_reg (<MODE>mode, operands[1]);
ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
})
-(define_insn "*<code><mode>3_finite"
+(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
(smaxmin:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")))]
"TARGET_SSE && flag_finite_math_only
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
- "
+ && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
- v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "*<code><mode>3"
+(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
(smaxmin:VF_AVX512F
(match_operand:VF_AVX512F 1 "register_operand" "0,v")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>")))]
"TARGET_SSE && !flag_finite_math_only
- "
+ && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
"@
<maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
- v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<code><mode>3"
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(smaxmin:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
"@
<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_mask_scalar_op3>}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
@@ -2008,21 +2161,21 @@
[(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
(V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
-(define_insn "avx512f_cmp<mode>3"
+(define_insn "avx512f_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48F_512 1 "register_operand" "v")
- (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
+ (match_operand:VI48F_512 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "<cmp_imm_predicate>" "n")]
UNSPEC_PCMP))]
- "TARGET_AVX512F"
- "v<sseintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "TARGET_AVX512F && <round_saeonly_mode512bit_condition_op1>"
+ "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_ucmp<mode>3"
+(define_insn "avx512f_ucmp<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand" "v")
@@ -2030,23 +2183,41 @@
(match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_UNSIGNED_PCMP))]
"TARGET_AVX512F"
- "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vmcmp<mode>3"
+(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(and:<avx512fmaskmode>
(unspec:<avx512fmaskmode>
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_31_operand" "n")]
UNSPEC_PCMP)
(const_int 1)))]
"TARGET_AVX512F"
- "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+ (and:<avx512fmaskmode>
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 3 "const_0_to_31_operand" "n")]
+ UNSPEC_PCMP)
+ (and:<avx512fmaskmode>
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")
+ (const_int 1))))]
+ "TARGET_AVX512F"
+ "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -2064,17 +2235,17 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse>_comi"
+(define_insn "<sse>_comi<round_saeonly_name>"
[(set (reg:CCFP FLAGS_REG)
(compare:CCFP
(vec_select:MODEF
(match_operand:<ssevecmode> 0 "register_operand" "v")
(parallel [(const_int 0)]))
(vec_select:MODEF
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
+ "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
(set_attr "prefix_rep" "0")
@@ -2084,17 +2255,17 @@
(const_string "0")))
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_ucomi"
+(define_insn "<sse>_ucomi<round_saeonly_name>"
[(set (reg:CCFPU FLAGS_REG)
(compare:CCFPU
(vec_select:MODEF
(match_operand:<ssevecmode> 0 "register_operand" "v")
(parallel [(const_int 0)]))
(vec_select:MODEF
- (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
+ (match_operand:<ssevecmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"SSE_FLOAT_MODE_P (<MODE>mode)"
- "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
+ "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
[(set_attr "type" "ssecomi")
(set_attr "prefix" "maybe_vex")
(set_attr "prefix_rep" "0")
@@ -2585,78 +2756,228 @@
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"")
-(define_insn "*fma_fmadd_<mode>"
+(define_expand "avx512f_fmadd_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "<round_expand_predicate>")
+ (match_operand:VF_512 2 "<round_expand_predicate>")
+ (match_operand:VF_512 3 "<round_expand_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
+
+(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
- ""
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "fma_fmsub_<mode>"
+(define_insn "avx512f_fmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=x")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "x")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
- ""
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "fma_fnmadd_<mode>"
+(define_insn "avx512f_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
- ""
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "@
+ vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fnmsub_<mode>"
+(define_insn "avx512f_fnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "v"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
(neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
(neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
- ""
+ (match_operand:FMAMODE 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))))]
+ "<sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "avx512f_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "0,0"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (fma:VF_512
+ (neg:VF_512
+ (match_operand:VF_512 1 "register_operand" "v"))
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
;; FMA parallel floating point multiply addsub and subadd operations.
;; It would be possible to represent these without the UNSPEC as
@@ -2677,47 +2998,146 @@
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-(define_insn "*fma_fmaddsub_<mode>"
+(define_expand "avx512f_fmaddsub_<mode>_maskz<round_expand_name>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "<round_expand_predicate>")
+ (match_operand:VF_512 2 "<round_expand_predicate>")
+ (match_operand:VF_512 3 "<round_expand_predicate>")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
+ DONE;
+})
+
+(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_AVX512F
- [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0, 0, v, x,x")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "vm, v,vm, x,m")
- (match_operand:VF_AVX512F 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
+ (match_operand:VF_AVX512F 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x")]
UNSPEC_FMADDSUB))]
- "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
+ "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmsubadd_<mode>"
+(define_insn "avx512f_fmaddsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>")]
+ UNSPEC_FMADDSUB)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmaddsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_512 3 "register_operand" "0")]
+ UNSPEC_FMADDSUB)
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
(unspec:VF_AVX512F
- [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0, 0, v, x,x")
- (match_operand:VF_AVX512F 2 "nonimmediate_operand" "vm, v,vm, x,m")
+ [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,0,v,x,x")
+ (match_operand:VF_AVX512F 2 "nonimmediate_operand" "<round_constraint>,v,<round_constraint>,x,m")
(neg:VF_AVX512F
- (match_operand:VF_AVX512F 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
+ (match_operand:VF_AVX512F 3 "nonimmediate_operand" "v,<round_constraint>,0,xm,x"))]
UNSPEC_FMADDSUB))]
- "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
+ "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F) && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
+ vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
+ vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
+ vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "avx512f_fmsubadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v,v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0,0")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_512
+ (match_operand:VF_512 3 "nonimmediate_operand" "v,<round_constraint>"))]
+ UNSPEC_FMADDSUB)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "isa" "fma_avx512f,fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fmsubadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_512
+ (match_operand:VF_512 3 "register_operand" "0"))]
+ UNSPEC_FMADDSUB)
+ (match_dup 3)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "isa" "fma_avx512f")
+ (set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
;; FMA3 floating point scalar intrinsics. These merge result with
;; high-order elements from the destination register.
-(define_expand "fmai_vmfmadd_<mode>"
+(define_expand "fmai_vmfmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup <round_opnum>)
+ (match_operand:QI 4 "register_operand"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "operands[<round_opnum>] = CONST0_RTX (<MODE>mode);")
+
+(define_expand "fmai_vmfmadd_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
(fma:VF_128
@@ -2728,71 +3148,303 @@
(const_int 1)))]
"TARGET_FMA")
-(define_insn "*fmai_fmadd_<mode>"
+(define_insn "fmai_vmfmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>, v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_128 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %3, %2<round_op6>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmadd_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
- (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_FMA || TARGET_AVX512F"
"@
- vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fmsub_<mode>"
+(define_insn "*fmai_vmfnmadd_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (match_operand:VF_128 3 "register_operand" "0"))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask3<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")
+ (neg:VF_128
+ (match_operand:VF_128 3 "register_operand" "0")))
+ (match_dup 3)
+ (match_operand:QI 4 "register_operand" "k"))
+ (match_dup 3)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2<round_op5>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_maskz<round_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (neg:VF_128
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2<round_op6>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3<round_op6>}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(const_int 1)))]
"TARGET_FMA || TARGET_AVX512F"
"@
- vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fnmadd_<mode>"
+(define_insn "*fmai_fnmadd_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
- (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_FMA || TARGET_AVX512F"
"@
- vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fmai_fnmsub_<mode>"
+(define_insn "*fmai_fnmsub_<mode><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
- (match_operand:VF_128 2 "nonimmediate_operand" "vm, v"))
- (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>,v"))
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
(neg:VF_128
- (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,<round_constraint>")))
(match_dup 1)
(const_int 1)))]
"TARGET_FMA || TARGET_AVX512F"
"@
- vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
- vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
+ vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
+ vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
@@ -2913,18 +3565,18 @@
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtsi2ss"
+(define_insn "sse_cvtsi2ss<round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
+ (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
(match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE"
"@
cvtsi2ss\t{%2, %0|%0, %2}
cvtsi2ss\t{%2, %0|%0, %2}
- vcvtsi2ss\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2ss\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double,*")
@@ -2934,18 +3586,18 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtsi2ssq"
+(define_insn "sse_cvtsi2ssq<round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
- (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
+ (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
(match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE && TARGET_64BIT"
"@
cvtsi2ssq\t{%2, %0|%0, %2}
cvtsi2ssq\t{%2, %0|%0, %2}
- vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2ssq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double,*")
@@ -2957,15 +3609,15 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "SF")])
-(define_insn "sse_cvtss2si"
+(define_insn "sse_cvtss2si<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE"
- "%vcvtss2si\t{%1, %0|%0, %k1}"
+ "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -2987,15 +3639,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse_cvtss2siq"
+(define_insn "sse_cvtss2siq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE && TARGET_64BIT"
- "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
+ "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -3017,14 +3669,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "sse_cvttss2si"
+(define_insn "sse_cvttss2si<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE"
- "%vcvttss2si\t{%1, %0|%0, %k1}"
+ "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -3033,14 +3685,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse_cvttss2siq"
+(define_insn "sse_cvttss2siq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(fix:DI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "v,<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_SSE && TARGET_64BIT"
- "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
+ "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -3049,50 +3701,50 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "cvtusi2<ssescalarmodesuffix>32"
+(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(vec_duplicate:VF_128
(unsigned_float:<ssescalarmode>
- (match_operand:SI 2 "nonimmediate_operand" "rm")))
+ (match_operand:SI 2 "nonimmediate_operand" "<round_constraint3>")))
(match_operand:VF_128 1 "register_operand" "v")
(const_int 1)))]
- "TARGET_AVX512F"
- "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX512F && <round_modev4sf_condition>"
+ "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "cvtusi2<ssescalarmodesuffix>64"
+(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(vec_duplicate:VF_128
(unsigned_float:<ssescalarmode>
- (match_operand:DI 2 "nonimmediate_operand" "rm")))
+ (match_operand:DI 2 "nonimmediate_operand" "<round_constraint3>")))
(match_operand:VF_128 1 "register_operand" "v")
(const_int 1)))]
"TARGET_AVX512F && TARGET_64BIT"
- "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vcvtusi2<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "float<sseintvecmodelower><mode>2"
+(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
[(set (match_operand:VF1_AVX512F 0 "register_operand" "=v")
(float:VF1_AVX512F
- (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_SSE2"
- "%vcvtdq2ps\t{%1, %0|%0, %1}"
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ufloatv16siv16sf2"
+(define_insn "ufloatv16siv16sf2<mask_name><round_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(unsigned_float:V16SF
- (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
+ (match_operand:V16SI 1 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX512F"
- "vcvtudq2ps\t{%1, %0|%0, %1}"
+ "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -3126,34 +3778,34 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_fix_notruncv16sfv16si"
+(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(unspec:V16SI
- [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+ [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtps2dq\t{%1, %0|%0, %1}"
+ "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx512f_ufix_notruncv16sfv16si"
+(define_insn "<mask_codefor>avx512f_ufix_notruncv16sfv16si<mask_name><round_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(unspec:V16SI
- [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+ [(match_operand:V16SF 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtps2udq\t{%1, %0|%0, %1}"
+ "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "<fixsuffix>fix_truncv16sfv16si2"
+(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_fix:V16SI
- (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
+ (match_operand:V16SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
"TARGET_AVX512F"
- "vcvttps2<fixsuffix>dq\t{%1, %0|%0, %1}"
+ "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -3261,18 +3913,18 @@
(set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "DF")])
-(define_insn "sse2_cvtsi2sdq"
+(define_insn "sse2_cvtsi2sdq<round_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(vec_duplicate:V2DF
- (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
+ (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,<round_constraint3>")))
(match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2 && TARGET_64BIT"
"@
cvtsi2sdq\t{%2, %0|%0, %2}
cvtsi2sdq\t{%2, %0|%0, %2}
- vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
+ vcvtsi2sdq\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,direct,*")
@@ -3283,115 +3935,115 @@
(set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "DF")])
-(define_insn "avx512f_vcvtss2usi"
+(define_insn "avx512f_vcvtss2usi<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
(parallel [(const_int 0)]))]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtss2usi\t{%1, %0|%0, %1}"
+ "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
-(define_insn "avx512f_vcvtss2usiq"
+(define_insn "avx512f_vcvtss2usiq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI
[(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_constraint>")
(parallel [(const_int 0)]))]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F && TARGET_64BIT"
- "vcvtss2usi\t{%1, %0|%0, %1}"
+ "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "DI")])
-(define_insn "avx512f_vcvttss2usi"
+(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unsigned_fix:SI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
- "vcvttss2usi\t{%1, %0|%0, %1}"
+ "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
-(define_insn "avx512f_vcvttss2usiq"
+(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(unsigned_fix:DI
(vec_select:SF
- (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+ (match_operand:V4SF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F && TARGET_64BIT"
- "vcvttss2usi\t{%1, %0|%0, %1}"
+ "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "DI")])
-(define_insn "avx512f_vcvtsd2usi"
+(define_insn "avx512f_vcvtsd2usi<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
(parallel [(const_int 0)]))]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtsd2usi\t{%1, %0|%0, %1}"
+ "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
-(define_insn "avx512f_vcvtsd2usiq"
+(define_insn "avx512f_vcvtsd2usiq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_constraint>")
(parallel [(const_int 0)]))]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F && TARGET_64BIT"
- "vcvtsd2usi\t{%1, %0|%0, %1}"
+ "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "DI")])
-(define_insn "avx512f_vcvttsd2usi"
+(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r")
(unsigned_fix:SI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
- "vcvttsd2usi\t{%1, %0|%0, %1}"
+ "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
-(define_insn "avx512f_vcvttsd2usiq"
+(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r")
(unsigned_fix:DI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+ (match_operand:V2DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F && TARGET_64BIT"
- "vcvttsd2usi\t{%1, %0|%0, %1}"
+ "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "prefix" "evex")
(set_attr "mode" "DI")])
-(define_insn "sse2_cvtsd2si"
+(define_insn "sse2_cvtsd2si<round_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2"
- "%vcvtsd2si\t{%1, %0|%0, %q1}"
+ "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -3414,15 +4066,15 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse2_cvtsd2siq"
+(define_insn "sse2_cvtsd2siq<round_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(unspec:DI
[(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_constraint2>")
(parallel [(const_int 0)]))]
UNSPEC_FIX_NOTRUNC))]
"TARGET_SSE2 && TARGET_64BIT"
- "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+ "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "bdver1_decode" "double,double")
@@ -3444,14 +4096,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "sse2_cvttsd2si"
+(define_insn "sse2_cvttsd2si<round_saeonly_name>"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(fix:SI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
- "%vcvttsd2si\t{%1, %0|%0, %q1}"
+ "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -3461,14 +4113,14 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
-(define_insn "sse2_cvttsd2siq"
+(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(fix:DI
(vec_select:DF
- (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+ (match_operand:V2DF 1 "nonimmediate_operand" "v,<round_saeonly_constraint2>")
(parallel [(const_int 0)]))))]
"TARGET_SSE2 && TARGET_64BIT"
- "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
+ "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
(set_attr "amdfam10_decode" "double,double")
@@ -3483,20 +4135,21 @@
(define_mode_attr si2dfmodelower
[(V8DF "v8si") (V4DF "v4si")])
-(define_insn "float<si2dfmodelower><mode>2"
+(define_insn "float<si2dfmodelower><mode>2<mask_name>"
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX"
- "vcvtdq2pd\t{%1, %0|%0, %1}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "ufloatv8siv8df"
+(define_insn "ufloatv8siv8df<mask_name>"
[(set (match_operand:V8DF 0 "register_operand" "=v")
- (unsigned_float:V8DF (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+ (unsigned_float:V8DF
+ (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vcvtudq2pd\t{%1, %0|%0, %1}"
+ "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
@@ -3541,12 +4194,13 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V2DF")])
-(define_insn "avx512f_cvtpd2dq512"
+(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
- (unspec:V8SI [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
- UNSPEC_FIX_NOTRUNC))]
+ (unspec:V8SI
+ [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtpd2dq\t{%1, %0|%0, %1}"
+ "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
@@ -3614,22 +4268,23 @@
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
-(define_insn "avx512f_ufix_notruncv8dfv8si"
+(define_insn "avx512f_ufix_notruncv8dfv8si<mask_name><round_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
(unspec:V8SI
- [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+ [(match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_UNSIGNED_FIX_NOTRUNC))]
"TARGET_AVX512F"
- "vcvtpd2udq\t{%1, %0|%0, %1}"
+ "vcvtpd2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "<fixsuffix>fix_truncv8dfv8si2"
+(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
[(set (match_operand:V8SI 0 "register_operand" "=v")
- (any_fix:V8SI (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+ (any_fix:V8SI
+ (match_operand:V8DF 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
"TARGET_AVX512F"
- "vcvttpd2<fixsuffix>dq\t{%1, %0|%0, %1}"
+ "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
@@ -3690,34 +4345,34 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "sse2_cvtsd2ss"
+(define_insn "sse2_cvtsd2ss<mask_scalar_name><round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:V2SF
- (match_operand:V2DF 2 "nonimmediate_operand" "x,m,vm")))
+ (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
(match_operand:V4SF 1 "register_operand" "0,0,v")
(const_int 1)))]
"TARGET_SSE2"
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
+ vcvtsd2ss\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2<round_mask_scalar_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "direct,direct,*")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
(set_attr "mode" "SF")])
-(define_insn "sse2_cvtss2sd"
+(define_insn "sse2_cvtss2sd<mask_scalar_name><round_saeonly_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "x,m,vm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m,<round_saeonly_constraint>")
(parallel [(const_int 0) (const_int 1)])))
(match_operand:V2DF 1 "register_operand" "0,0,v")
(const_int 1)))]
@@ -3725,22 +4380,22 @@
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
+ vcvtss2sd\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %k2<round_saeonly_mask_scalar_op3>}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "athlon_decode" "direct,direct,*")
(set_attr "bdver1_decode" "direct,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "mode" "DF")])
-(define_insn "avx512f_cvtpd2ps512"
+(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
[(set (match_operand:V8SF 0 "register_operand" "=v")
(float_truncate:V8SF
- (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+ (match_operand:V8DF 1 "nonimmediate_operand" "<round_constraint>")))]
"TARGET_AVX512F"
- "vcvtpd2ps\t{%1, %0|%0, %1}"
+ "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V8SF")])
@@ -3790,12 +4445,12 @@
(define_mode_attr sf2dfmode
[(V8DF "V8SF") (V4DF "V4SF")])
-(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix>"
+(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(float_extend:VF2_512_256
- (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
- "TARGET_AVX"
- "vcvtps2pd\t{%1, %0|%0, %1}"
+ (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "<round_saeonly_constraint>")))]
+ "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
+ "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
@@ -4119,6 +4774,32 @@
DONE;
})
+(define_expand "vec_unpacku_float_hi_v16si"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V16SI 1 "register_operand")]
+ "TARGET_AVX512F"
+{
+ REAL_VALUE_TYPE TWO32r;
+ rtx k, x, tmp[4];
+
+ real_ldexp (&TWO32r, &dconst1, 32);
+ x = const_double_from_real_value (TWO32r, DFmode);
+
+ tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+ tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+ tmp[2] = gen_reg_rtx (V8DFmode);
+ tmp[3] = gen_reg_rtx (V8SImode);
+ k = gen_reg_rtx (QImode);
+
+ emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
+ emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
+ emit_insn (gen_rtx_SET (VOIDmode, k,
+ gen_rtx_LT (QImode, tmp[2], tmp[0])));
+ emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+ emit_move_insn (operands[0], tmp[2]);
+ DONE;
+})
+
(define_expand "vec_unpacku_float_lo_v8si"
[(match_operand:V4DF 0 "register_operand")
(match_operand:V8SI 1 "nonimmediate_operand")]
@@ -4144,6 +4825,30 @@
DONE;
})
+(define_expand "vec_unpacku_float_lo_v16si"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V16SI 1 "nonimmediate_operand")]
+ "TARGET_AVX512F"
+{
+ REAL_VALUE_TYPE TWO32r;
+ rtx k, x, tmp[3];
+
+ real_ldexp (&TWO32r, &dconst1, 32);
+ x = const_double_from_real_value (TWO32r, DFmode);
+
+ tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
+ tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
+ tmp[2] = gen_reg_rtx (V8DFmode);
+ k = gen_reg_rtx (QImode);
+
+ emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
+ emit_insn (gen_rtx_SET (VOIDmode, k,
+ gen_rtx_LT (QImode, tmp[2], tmp[0])));
+ emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
+ emit_move_insn (operands[0], tmp[2]);
+ DONE;
+})
+
(define_expand "vec_pack_trunc_<mode>"
[(set (match_dup 3)
(float_truncate:<sf2dfmode>
@@ -4431,7 +5136,7 @@
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
-(define_insn "avx512f_unpckhps512"
+(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(vec_select:V16SF
(vec_concat:V32SF
@@ -4446,7 +5151,7 @@
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
"TARGET_AVX512F"
- "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+ "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -4519,7 +5224,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
-(define_insn "avx512f_unpcklps512"
+(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(vec_select:V16SF
(vec_concat:V32SF
@@ -4534,7 +5239,7 @@
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
"TARGET_AVX512F"
- "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+ "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -4642,7 +5347,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
-(define_insn "avx512f_movshdup512"
+(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(vec_select:V16SF
(vec_concat:V32SF
@@ -4657,7 +5362,7 @@
(const_int 13) (const_int 13)
(const_int 15) (const_int 15)])))]
"TARGET_AVX512F"
- "vmovshdup\t{%1, %0|%0, %1}"
+ "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -4695,7 +5400,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
-(define_insn "avx512f_movsldup512"
+(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(vec_select:V16SF
(vec_concat:V32SF
@@ -4710,7 +5415,7 @@
(const_int 12) (const_int 12)
(const_int 14) (const_int 14)])))]
"TARGET_AVX512F"
- "vmovsldup\t{%1, %0|%0, %1}"
+ "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -5244,8 +5949,71 @@
operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
})
-(define_insn "avx512f_vextract<shuffletype>32x4_1"
- [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
+(define_expand "avx512f_vextract<shuffletype>32x4_mask"
+ [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_3_operand")
+ (match_operand:<ssequartermode> 3 "nonimmediate_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (0), GEN_INT (1), GEN_INT (2),
+ GEN_INT (3), operands[3], operands[4]));
+ break;
+ case 1:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (4), GEN_INT (5), GEN_INT (6),
+ GEN_INT (7), operands[3], operands[4]));
+ break;
+ case 2:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (8), GEN_INT (9), GEN_INT (10),
+ GEN_INT (11), operands[3], operands[4]));
+ break;
+ case 3:
+ emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
+ operands[1], GEN_INT (12), GEN_INT (13), GEN_INT (14),
+ GEN_INT (15), operands[3], operands[4]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
+(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
+ [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
+ (vec_merge:<ssequartermode>
+ (vec_select:<ssequartermode>
+ (match_operand:V16FI 1 "register_operand" "v")
+ (parallel [(match_operand 2 "const_0_to_15_operand")
+ (match_operand 3 "const_0_to_15_operand")
+ (match_operand 4 "const_0_to_15_operand")
+ (match_operand 5 "const_0_to_15_operand")]))
+ (match_operand:<ssequartermode> 6 "memory_operand" "0")
+ (match_operand:QI 7 "register_operand" "k")))]
+ "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+ && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+ && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+ operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+ return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
+ [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssequartermode>
(match_operand:V16FI 1 "register_operand" "v")
(parallel [(match_operand 2 "const_0_to_15_operand")
@@ -5257,7 +6025,7 @@
&& (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
{
operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
- return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
+ return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
@@ -5269,6 +6037,35 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "avx512f_vextract<shuffletype>64x4_mask"
+ [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_1_operand")
+ (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
+ (match_operand:QI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ rtx (*insn)(rtx, rtx, rtx, rtx);
+
+ if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
+ operands[0] = force_reg (<ssequartermode>mode, operands[0]);
+
+ switch (INTVAL (operands[2]))
+ {
+ case 0:
+ insn = gen_vec_extract_lo_<mode>_mask;
+ break;
+ case 1:
+ insn = gen_vec_extract_hi_<mode>_mask;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
+ DONE;
+})
+
(define_split
[(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
(vec_select:<ssehalfvecmode>
@@ -5288,14 +6085,36 @@
DONE;
})
-(define_insn "vec_extract_lo_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+(define_insn "vec_extract_lo_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512F"
+"vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_lo_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
(match_operand:V8FI 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)])))]
"TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
- "#"
+{
+ if (<mask_applied>)
+ return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
+ else
+ return "#";
+}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -5306,14 +6125,32 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_extract_hi_<mode>"
- [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+(define_insn "vec_extract_hi_<mode>_maskm"
+ [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
+ (vec_merge:<ssehalfvecmode>
+ (vec_select:<ssehalfvecmode>
+ (match_operand:V8FI 1 "register_operand" "v")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
+ (match_operand:QI 3 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "length_immediate" "1")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode><mask_name>"
+ [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
(vec_select:<ssehalfvecmode>
(match_operand:V8FI 1 "register_operand" "v")
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)])))]
"TARGET_AVX512F"
- "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+ "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
@@ -5665,7 +6502,7 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "avx512f_unpckhpd512"
+(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
[(set (match_operand:V8DF 0 "register_operand" "=v")
(vec_select:V8DF
(vec_concat:V16DF
@@ -5676,7 +6513,7 @@
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
"TARGET_AVX512F"
- "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+ "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
@@ -5761,7 +6598,7 @@
(set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
(set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
-(define_expand "avx512f_movddup512"
+(define_expand "avx512f_movddup512<mask_name>"
[(set (match_operand:V8DF 0 "register_operand")
(vec_select:V8DF
(vec_concat:V16DF
@@ -5773,7 +6610,7 @@
(const_int 6) (const_int 14)])))]
"TARGET_AVX512F")
-(define_expand "avx512f_unpcklpd512"
+(define_expand "avx512f_unpcklpd512<mask_name>"
[(set (match_operand:V8DF 0 "register_operand")
(vec_select:V8DF
(vec_concat:V16DF
@@ -5785,7 +6622,7 @@
(const_int 6) (const_int 14)])))]
"TARGET_AVX512F")
-(define_insn "*avx512f_unpcklpd512"
+(define_insn "*avx512f_unpcklpd512<mask_name>"
[(set (match_operand:V8DF 0 "register_operand" "=v,v")
(vec_select:V8DF
(vec_concat:V16DF
@@ -5797,8 +6634,8 @@
(const_int 6) (const_int 14)])))]
"TARGET_AVX512F"
"@
- vunpcklpd\t{%2, %1, %0|%0, %1, %2}
- vmovddup\t{%1, %0|%0, %1}"
+ vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
+ vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
@@ -5935,30 +6772,47 @@
operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
})
-(define_insn "avx512f_vmscalef<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name><round_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
- (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
- UNSPEC_SCALEF)
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_SCALEF)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vscalef<ssescalarmodesuffix>\t{<round_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_mask_scalar_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_scalef<mode>"
+(define_insn "avx512f_scalef<mode><mask_name><round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v")
- (match_operand:VF_512 2 "nonimmediate_operand" "vm")]
- UNSPEC_SCALEF))]
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "v")
+ (match_operand:VF_512 2 "nonimmediate_operand" "<round_constraint>")]
+ UNSPEC_SCALEF))]
"TARGET_AVX512F"
- "%vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_vternlog<mode>"
+(define_expand "avx512f_vternlog<mode>_maskz"
+ [(match_operand:VI48_512 0 "register_operand")
+ (match_operand:VI48_512 1 "register_operand")
+ (match_operand:VI48_512 2 "register_operand")
+ (match_operand:VI48_512 3 "nonimmediate_operand")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vternlog<mode>_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_vternlog<mode><sd_maskz_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(unspec:VI48_512
[(match_operand:VI48_512 1 "register_operand" "0")
@@ -5967,103 +6821,220 @@
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"TARGET_AVX512F"
- "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vternlog<mode>_mask"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v")
+ (vec_merge:VI48_512
+ (unspec:VI48_512
+ [(match_operand:VI48_512 1 "register_operand" "0")
+ (match_operand:VI48_512 2 "register_operand" "v")
+ (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getexp<mode>"
+(define_insn "avx512f_getexp<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")]
UNSPEC_GETEXP))]
"TARGET_AVX512F"
- "vgetexp<ssemodesuffix>\t{%1, %0|%0, %1}";
+ "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_sgetexp<mode>"
+(define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
- (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
- UNSPEC_GETEXP)
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "v")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")]
+ UNSPEC_GETEXP)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_mask_scalar_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_saeonly_mask_scalar_op3>}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_align<mode>"
+(define_insn "<mask_codefor>avx512f_align<mode><mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ALIGN))]
"TARGET_AVX512F"
- "valign<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_fixupimm<mode>"
+(define_expand "avx512f_shufps512_mask"
+ [(match_operand:V16SF 0 "register_operand")
+ (match_operand:V16SF 1 "register_operand")
+ (match_operand:V16SF 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V16SF 4 "register_operand")
+ (match_operand:HI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT (((mask >> 4) & 3) + 16),
+ GEN_INT (((mask >> 6) & 3) + 16),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 20),
+ GEN_INT (((mask >> 6) & 3) + 20),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 24),
+ GEN_INT (((mask >> 6) & 3) + 24),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 28),
+ GEN_INT (((mask >> 6) & 3) + 28),
+ operands[4], operands[5]));
+ DONE;
+})
+
+
+(define_expand "avx512f_fixupimm<mode>_maskz<round_saeonly_expand_name5>"
+ [(match_operand:VF_512 0 "register_operand")
+ (match_operand:VF_512 1 "register_operand")
+ (match_operand:VF_512 2 "register_operand")
+ (match_operand:<ssefixupmode> 3 "<round_saeonly_expand_predicate5>")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_fixupimm<mode>_maskz_1<round_saeonly_expand_name5> (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]
+ <round_saeonly_expand_operand6>));
+ DONE;
+})
+
+(define_insn "avx512f_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
[(match_operand:VF_512 1 "register_operand" "0")
(match_operand:VF_512 2 "register_operand" "v")
- (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "vm")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM))]
"TARGET_AVX512F"
- "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+ "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_fixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_512 0 "register_operand" "=v")
+ (vec_merge:VF_512
+ (unspec:VF_512
+ [(match_operand:VF_512 1 "register_operand" "0")
+ (match_operand:VF_512 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM)
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_sfixupimm<mode>"
+(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name5>"
+ [(match_operand:VF_128 0 "register_operand")
+ (match_operand:VF_128 1 "register_operand")
+ (match_operand:VF_128 2 "register_operand")
+ (match_operand:<ssefixupmode> 3 "<round_saeonly_expand_predicate5>")
+ (match_operand:SI 4 "const_0_to_255_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name5> (
+ operands[0], operands[1], operands[2], operands[3],
+ operands[4], CONST0_RTX (<MODE>mode), operands[5]
+ <round_saeonly_expand_operand6>));
+ DONE;
+})
+
+(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "0")
(match_operand:VF_128 2 "register_operand" "v")
- (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "vm")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_FIXUPIMM)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+ "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "avx512f_rndscale<mode>"
+(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "register_operand" "0")
+ (match_operand:VF_128 2 "register_operand" "v")
+ (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_FIXUPIMM)
+ (match_dup 1)
+ (const_int 1))
+ (match_dup 1)
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_rndscale<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_255_operand")]
UNSPEC_ROUND))]
"TARGET_AVX512F"
- "vrndscale<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_rndscale<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_rndscale<mode><mask_scalar_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_255_operand")]
UNSPEC_ROUND)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_mask_scalar_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_mask_scalar_op4>, %3}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
;; One bit in mask selects 2 elements.
-(define_insn "avx512f_shufps512_1"
+(define_insn "avx512f_shufps512_1<mask_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
(vec_select:V16SF
(vec_concat:V32SF
@@ -6106,14 +7077,37 @@
mask |= (INTVAL (operands[6]) - 16) << 6;
operands[3] = GEN_INT (mask);
- return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
-(define_insn "avx512f_shufpd512_1"
+(define_expand "avx512f_shufpd512_mask"
+ [(match_operand:V8DF 0 "register_operand")
+ (match_operand:V8DF 1 "register_operand")
+ (match_operand:V8DF 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V8DF 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 9 : 8),
+ GEN_INT (mask & 4 ? 3 : 2),
+ GEN_INT (mask & 8 ? 11 : 10),
+ GEN_INT (mask & 16 ? 5 : 4),
+ GEN_INT (mask & 32 ? 13 : 12),
+ GEN_INT (mask & 64 ? 7 : 6),
+ GEN_INT (mask & 128 ? 15 : 14),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shufpd512_1<mask_name>"
[(set (match_operand:V8DF 0 "register_operand" "=v")
(vec_select:V8DF
(vec_concat:V16DF
@@ -6140,7 +7134,7 @@
mask |= (INTVAL (operands[10]) - 14) << 7;
operands[3] = GEN_INT (mask);
- return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
@@ -6220,7 +7214,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_interleave_highv8di"
+(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(vec_select:V8DI
(vec_concat:V16DI
@@ -6231,7 +7225,7 @@
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
"TARGET_AVX512F"
- "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+ "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -6270,7 +7264,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_interleave_lowv8di"
+(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(vec_select:V8DI
(vec_concat:V16DI
@@ -6281,7 +7275,7 @@
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
"TARGET_AVX512F"
- "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+ "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -6652,6 +7646,20 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
+ [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+ (vec_merge:PMOV_DST_MODE
+ (any_truncate:PMOV_DST_MODE
+ (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
+ (match_operand:PMOV_DST_MODE 2 "vector_move_operand" "0C,0")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix><pmov_suff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "none,store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "*avx512f_<code>v8div16qi2"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(vec_concat:V16QI
@@ -6685,6 +7693,55 @@
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
+(define_insn "avx512f_<code>v8div16qi2_mask"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_operand:V16QI 2 "vector_move_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 3 "register_operand" "k"))
+ (const_vector:V8QI [(const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)
+ (const_int 0) (const_int 0)])))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store_mask"
+ [(set (match_operand:V16QI 0 "memory_operand" "=m")
+ (vec_concat:V16QI
+ (vec_merge:V8QI
+ (any_truncate:V8QI
+ (match_operand:V8DI 1 "register_operand" "v"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)]))
+ (match_operand:QI 2 "register_operand" "k"))
+ (vec_select:V8QI
+ (match_dup 0)
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)]))))]
+ "TARGET_AVX512F"
+ "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "memory" "store")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral arithmetic
@@ -6699,27 +7756,27 @@
"TARGET_SSE2"
"operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
-(define_expand "<plusminus_insn><mode>3"
+(define_expand "<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI_AVX2 0 "register_operand")
(plusminus:VI_AVX2
(match_operand:VI_AVX2 1 "nonimmediate_operand")
(match_operand:VI_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*<plusminus_insn><mode>3"
+(define_insn "*<plusminus_insn><mode>3<mask_name>"
[(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
(plusminus:VI_AVX2
(match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
(match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition>"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
- vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseiadd")
(set_attr "prefix_data16" "1,*")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<sse2_avx2>_<plusminus_insn><mode>3"
@@ -6809,7 +7866,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "vec_widen_umult_even_v16si"
+(define_expand "vec_widen_umult_even_v16si<mask_name>"
[(set (match_operand:V8DI 0 "register_operand")
(mult:V8DI
(zero_extend:V8DI
@@ -6829,7 +7886,7 @@
"TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
-(define_insn "*vec_widen_umult_even_v16si"
+(define_insn "*vec_widen_umult_even_v16si<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(mult:V8DI
(zero_extend:V8DI
@@ -6847,7 +7904,7 @@
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
"TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
- "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+ "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "avx512f")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
@@ -6924,7 +7981,7 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_expand "vec_widen_smult_even_v16si"
+(define_expand "vec_widen_smult_even_v16si<mask_name>"
[(set (match_operand:V8DI 0 "register_operand")
(mult:V8DI
(sign_extend:V8DI
@@ -6944,7 +8001,7 @@
"TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
-(define_insn "*vec_widen_smult_even_v16si"
+(define_insn "*vec_widen_smult_even_v16si<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=x")
(mult:V8DI
(sign_extend:V8DI
@@ -6962,7 +8019,7 @@
(const_int 8) (const_int 10)
(const_int 12) (const_int 14)])))))]
"TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
- "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+ "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "avx512f")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
@@ -7173,12 +8230,12 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
-(define_expand "mul<mode>3"
+(define_expand "mul<mode>3<mask_name>"
[(set (match_operand:VI4_AVX512F 0 "register_operand")
(mult:VI4_AVX512F
(match_operand:VI4_AVX512F 1 "general_vector_operand")
(match_operand:VI4_AVX512F 2 "general_vector_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && <mask_mode512bit_condition>"
{
if (TARGET_SSE4_1)
{
@@ -7195,19 +8252,19 @@
}
})
-(define_insn "*<sse4_1_avx2>_mul<mode>3"
+(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
[(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
(mult:VI4_AVX512F
(match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
(match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
"@
pmulld\t{%2, %0|%0, %2}
- vpmulld\t{%2, %1, %0|%0, %1, %2}"
+ vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set_attr "btver2_decode" "vector,vector")
(set_attr "mode" "<sseinsnmode>")])
@@ -7320,6 +8377,20 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "ashr<mode>3<mask_name>"
+ [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+ (ashiftrt:VI48_512
+ (match_operand:VI48_512 1 "nonimmediate_operand" "v,vm")
+ (match_operand:SI 2 "nonmemory_operand" "v,N")))]
+ "TARGET_AVX512F && <mask_mode512bit_condition>"
+ "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ [(set_attr "type" "sseishft")
+ (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+ (const_string "1")
+ (const_string "0")))
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_insn "<shift_insn><mode>3"
[(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
(any_lshift:VI248_AVX2
@@ -7339,13 +8410,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<shift_insn><mode>3"
+(define_insn "<shift_insn><mode>3<mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v,v")
(any_lshift:VI48_512
(match_operand:VI48_512 1 "register_operand" "v,m")
(match_operand:SI 2 "nonmemory_operand" "vN,N")))]
- "TARGET_AVX512F"
- "vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX512F && <mask_mode512bit_condition>"
+ "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "isa" "avx512f")
(set_attr "type" "sseishft")
(set (attr "length_immediate")
@@ -7355,6 +8426,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+
(define_expand "vec_shl_<mode>"
[(set (match_operand:VI_128 0 "register_operand")
(ashift:V1TI
@@ -7430,41 +8502,42 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate>v<mode>"
+(define_insn "avx512f_<rotate>v<mode><mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(any_rotate:VI48_512
(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_<rotate><mode>"
+(define_insn "avx512f_<rotate><mode><mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(any_rotate:VI48_512
(match_operand:VI48_512 1 "nonimmediate_operand" "vm")
(match_operand:SI 2 "const_0_to_255_operand")))]
"TARGET_AVX512F"
- "vp<rotate><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_expand "<code><mode>3"
+(define_expand "<code><mode>3<mask_name><round_name>"
[(set (match_operand:VI124_256_48_512 0 "register_operand")
(maxmin:VI124_256_48_512
(match_operand:VI124_256_48_512 1 "nonimmediate_operand")
(match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-(define_insn "*avx2_<code><mode>3"
+(define_insn "*avx2_<code><mode>3<mask_name><round_name>"
[(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
(maxmin:VI124_256_48_512
(match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
- (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
- "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "<round_constraint>")))]
+ "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && <mask_mode512bit_condition> && <round_mode512bit_condition>"
+ "vp<maxmin_int><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
[(set_attr "type" "sseiadd")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "maybe_evex")
@@ -7682,7 +8755,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_expand "avx512f_eq<mode>3"
+(define_expand "avx512f_eq<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand")
@@ -7691,14 +8764,14 @@
"TARGET_AVX512F"
"ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
-(define_insn "avx512f_eq<mode>3_1"
+(define_insn "avx512f_eq<mode>3<mask_scalar_merge_name>_1"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand" "%v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
UNSPEC_MASKED_EQ))]
"TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
- "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "type" "ssecmp")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "evex")
@@ -7778,13 +8851,13 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_gt<mode>3"
+(define_insn "avx512f_gt<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
"TARGET_AVX512F"
- "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "type" "ssecmp")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "evex")
@@ -8000,19 +9073,19 @@
operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
})
-(define_expand "<sse2_avx2>_andnot<mode>3"
+(define_expand "<sse2_avx2>_andnot<mode>3<mask_name>"
[(set (match_operand:VI_AVX2 0 "register_operand")
(and:VI_AVX2
(not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
(match_operand:VI_AVX2 2 "nonimmediate_operand")))]
- "TARGET_SSE2")
+ "TARGET_SSE2 && <mask_mode512bit_condition>")
-(define_insn "*andnot<mode>3"
+(define_insn "*andnot<mode>3<mask_name>"
[(set (match_operand:VI 0 "register_operand" "=x,v")
(and:VI
(not:VI (match_operand:VI 1 "register_operand" "0,v"))
(match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE "
+ "TARGET_SSE && <mask_mode512bit_condition>"
{
static char buf[64];
const char *ops;
@@ -8052,7 +9125,7 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -8069,7 +9142,7 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -8097,12 +9170,12 @@
DONE;
})
-(define_insn "*<code><mode>3"
+(define_insn "<mask_codefor><code><mode>3<mask_name>"
[(set (match_operand:VI 0 "register_operand" "=x,v")
(any_logic:VI
(match_operand:VI 1 "nonimmediate_operand" "%0,v")
(match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
- "TARGET_SSE
+ "TARGET_SSE && <mask_mode512bit_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
static char buf[64];
@@ -8145,7 +9218,7 @@
ops = "%s\t{%%2, %%0|%%0, %%2}";
break;
case 1:
- ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+ ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
break;
default:
gcc_unreachable ();
@@ -8162,7 +9235,7 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_prefix3>")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
@@ -8179,25 +9252,25 @@
]
(const_string "<sseinsnmode>")))])
-(define_insn "avx512f_testm<mode>3"
+(define_insn "avx512f_testm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTM))]
"TARGET_AVX512F"
- "vptestm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_testnm<mode>3"
+(define_insn "avx512f_testnm<mode>3<mask_scalar_merge_name>"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
(unspec:<avx512fmaskmode>
[(match_operand:VI48_512 1 "register_operand" "v")
(match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
UNSPEC_TESTNM))]
"TARGET_AVX512CD"
- "%vptestnm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -8470,7 +9543,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_interleave_highv16si"
+(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(vec_select:V16SI
(vec_concat:V32SI
@@ -8485,7 +9558,7 @@
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
"TARGET_AVX512F"
- "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+ "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -8525,7 +9598,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "avx512f_interleave_lowv16si"
+(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(vec_select:V16SI
(vec_concat:V32SI
@@ -8540,7 +9613,7 @@
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)])))]
"TARGET_AVX512F"
- "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+ "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -8663,7 +9736,45 @@
(set_attr "prefix" "orig,orig,vex,vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_vinsert<shuffletype>32x4_1"
+(define_expand "avx512f_vinsert<shuffletype>32x4_mask"
+ [(match_operand:V16FI 0 "register_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:<ssequartermode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_3_operand")
+ (match_operand:V16FI 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ switch (INTVAL (operands[3]))
+ {
+ case 0:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFFF), operands[4],
+ operands[5]));
+ break;
+ case 1:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xF0FF), operands[4],
+ operands[5]));
+ break;
+ case 2:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFF0F), operands[4],
+ operands[5]));
+ break;
+ case 3:
+ emit_insn (gen_avx512f_vinsert<shuffletype>32x4_1_mask (operands[0],
+ operands[1], operands[2], GEN_INT (0xFFF0), operands[4],
+ operands[5]));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+
+})
+
+(define_insn "<mask_codefor>avx512f_vinsert<shuffletype>32x4_1<mask_name>"
[(set (match_operand:V16FI 0 "register_operand" "=v")
(vec_merge:V16FI
(match_operand:V16FI 1 "register_operand" "v")
@@ -8686,14 +9797,35 @@
operands[3] = GEN_INT (mask);
- return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "vec_set_lo_<mode>"
+(define_expand "avx512f_vinsert<shuffletype>64x4_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_1_operand")
+ (match_operand:V8FI 4 "register_operand")
+ (match_operand:<avx512fmaskmode> 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ if (mask == 0)
+ emit_insn (gen_vec_set_lo_<mode>_mask
+ (operands[0], operands[1], operands[2],
+ operands[4], operands[5]));
+ else
+ emit_insn (gen_vec_set_hi_<mode>_mask
+ (operands[0], operands[1], operands[2],
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "vec_set_lo_<mode><mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_concat:V8FI
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -8702,13 +9834,13 @@
(parallel [(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX512F"
- "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}"
+ "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "vec_set_hi_<mode>"
+(define_insn "vec_set_hi_<mode><mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_concat:V8FI
(match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
@@ -8717,13 +9849,37 @@
(parallel [(const_int 0) (const_int 1)
(const_int 2) (const_int 3)]))))]
"TARGET_AVX512F"
- "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}"
+ "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "avx512f_shuf_<shuffletype>64x2_1"
+(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "register_operand")
+ (match_operand:V8FI 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V8FI 4 "register_operand")
+ (match_operand:QI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 3) * 2),
+ GEN_INT (((mask >> 0) & 3) * 2 + 1),
+ GEN_INT (((mask >> 2) & 3) * 2),
+ GEN_INT (((mask >> 2) & 3) * 2 + 1),
+ GEN_INT (((mask >> 4) & 3) * 2 + 8),
+ GEN_INT (((mask >> 4) & 3) * 2 + 9),
+ GEN_INT (((mask >> 6) & 3) * 2 + 8),
+ GEN_INT (((mask >> 6) & 3) * 2 + 9),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v")
(vec_select:V8FI
(vec_concat:<ssedoublemode>
@@ -8750,14 +9906,46 @@
mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
operands[3] = GEN_INT (mask);
- return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_shuf_<shuffletype>32x4_1"
+(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
+ [(match_operand:V16FI 0 "register_operand")
+ (match_operand:V16FI 1 "register_operand")
+ (match_operand:V16FI 2 "nonimmediate_operand")
+ (match_operand:SI 3 "const_0_to_255_operand")
+ (match_operand:V16FI 4 "register_operand")
+ (match_operand:HI 5 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
+ (operands[0], operands[1], operands[2],
+ GEN_INT (((mask >> 0) & 3) * 4),
+ GEN_INT (((mask >> 0) & 3) * 4 + 1),
+ GEN_INT (((mask >> 0) & 3) * 4 + 2),
+ GEN_INT (((mask >> 0) & 3) * 4 + 3),
+ GEN_INT (((mask >> 2) & 3) * 4),
+ GEN_INT (((mask >> 2) & 3) * 4 + 1),
+ GEN_INT (((mask >> 2) & 3) * 4 + 2),
+ GEN_INT (((mask >> 2) & 3) * 4 + 3),
+ GEN_INT (((mask >> 4) & 3) * 4 + 16),
+ GEN_INT (((mask >> 4) & 3) * 4 + 17),
+ GEN_INT (((mask >> 4) & 3) * 4 + 18),
+ GEN_INT (((mask >> 4) & 3) * 4 + 19),
+ GEN_INT (((mask >> 6) & 3) * 4 + 16),
+ GEN_INT (((mask >> 6) & 3) * 4 + 17),
+ GEN_INT (((mask >> 6) & 3) * 4 + 18),
+ GEN_INT (((mask >> 6) & 3) * 4 + 19),
+ operands[4], operands[5]));
+ DONE;
+})
+
+(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
[(set (match_operand:V16FI 0 "register_operand" "=v")
(vec_select:V16FI
(vec_concat:<ssedoublemode>
@@ -8800,14 +9988,44 @@
mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
operands[3] = GEN_INT (mask);
- return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
}
[(set_attr "type" "sselog")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_pshufd_1"
+(define_expand "avx512f_pshufdv3_mask"
+ [(match_operand:V16SI 0 "register_operand")
+ (match_operand:V16SI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V16SI 3 "register_operand")
+ (match_operand:HI 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ GEN_INT (((mask >> 0) & 3) + 4),
+ GEN_INT (((mask >> 2) & 3) + 4),
+ GEN_INT (((mask >> 4) & 3) + 4),
+ GEN_INT (((mask >> 6) & 3) + 4),
+ GEN_INT (((mask >> 0) & 3) + 8),
+ GEN_INT (((mask >> 2) & 3) + 8),
+ GEN_INT (((mask >> 4) & 3) + 8),
+ GEN_INT (((mask >> 6) & 3) + 8),
+ GEN_INT (((mask >> 0) & 3) + 12),
+ GEN_INT (((mask >> 2) & 3) + 12),
+ GEN_INT (((mask >> 4) & 3) + 12),
+ GEN_INT (((mask >> 6) & 3) + 12),
+ operands[3], operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_pshufd_1<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(vec_select:V16SI
(match_operand:V16SI 1 "nonimmediate_operand" "vm")
@@ -8848,7 +10066,7 @@
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
+ return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
}
[(set_attr "type" "sselog1")
(set_attr "prefix" "evex")
@@ -10299,12 +11517,12 @@
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
-(define_insn "abs<mode>2"
+(define_insn "abs<mode>2<mask_name>"
[(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
(abs:VI124_AVX2_48_AVX512F
(match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
- "TARGET_SSSE3"
- "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "TARGET_SSSE3 && <mask_mode512bit_condition>"
+ "%vpabs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix_data16" "1")
(set_attr "prefix_extra" "1")
@@ -10645,12 +11863,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_<code>v16qiv16si2"
+(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+ "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -10685,12 +11903,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_<code>v16hiv16si2"
+(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
[(set (match_operand:V16SI 0 "register_operand" "=v")
(any_extend:V16SI
(match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+ "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -10720,7 +11938,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_<code>v8qiv8di2"
+(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(vec_select:V8QI
@@ -10730,7 +11948,7 @@
(const_int 4) (const_int 5)
(const_int 6) (const_int 7)]))))]
"TARGET_AVX512F"
- "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+ "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -10762,12 +11980,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_<code>v8hiv8di2"
+(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+ "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -10799,12 +12017,12 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx512f_<code>v8siv8di2"
+(define_insn "avx512f_<code>v8siv8di2<mask_name>"
[(set (match_operand:V8DI 0 "register_operand" "=v")
(any_extend:V8DI
(match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512F"
- "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+ "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
@@ -11587,33 +12805,33 @@
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
-(define_insn "*avx512er_exp2<mode>"
+(define_insn "avx512er_exp2<mode><mask_name><round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_EXP2))]
"TARGET_AVX512ER"
- "vexp2<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vexp2<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*avx512er_rcp28<mode>"
+(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_RCP28))]
"TARGET_AVX512ER"
- "vrcp28<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vrcp28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512er_rsqrt28<mode>"
+(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_constraint>")]
UNSPEC_RSQRT28))]
"TARGET_AVX512ER"
- "vrsqrt28<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vrsqrt28<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -12663,16 +13881,16 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_permvar<mode>"
+(define_insn "<avx2_avx512f>_permvar<mode><mask_name>"
[(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
(unspec:VI48F_256_512
[(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
(match_operand:<sseintvecmode> 2 "register_operand" "v")]
UNSPEC_VPERMVAR))]
- "TARGET_AVX2"
- "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
(define_expand "<avx2_avx512f>_perm<mode>"
@@ -12683,14 +13901,32 @@
{
int mask = INTVAL (operands[2]);
emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
- GEN_INT ((mask >> 0) & 3),
- GEN_INT ((mask >> 2) & 3),
- GEN_INT ((mask >> 4) & 3),
- GEN_INT ((mask >> 6) & 3)));
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3)));
+ DONE;
+})
+
+(define_expand "avx512f_perm<mode>_mask"
+ [(match_operand:V8FI 0 "register_operand")
+ (match_operand:V8FI 1 "nonimmediate_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")
+ (match_operand:V8FI 3 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 4 "register_operand")]
+ "TARGET_AVX512F"
+{
+ int mask = INTVAL (operands[2]);
+ emit_insn (gen_<avx2_avx512f>_perm<mode>_1_mask (operands[0], operands[1],
+ GEN_INT ((mask >> 0) & 3),
+ GEN_INT ((mask >> 2) & 3),
+ GEN_INT ((mask >> 4) & 3),
+ GEN_INT ((mask >> 6) & 3),
+ operands[3], operands[4]));
DONE;
})
-(define_insn "<avx2_avx512f>_perm<mode>_1"
+(define_insn "<avx2_avx512f>_perm<mode>_1<mask_name>"
[(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
(vec_select:VI8F_256_512
(match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
@@ -12698,7 +13934,7 @@
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
(match_operand 5 "const_0_to_3_operand")])))]
- "TARGET_AVX2"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
{
int mask = 0;
mask |= INTVAL (operands[2]) << 0;
@@ -12706,10 +13942,10 @@
mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6;
operands[2] = GEN_INT (mask);
- return "vperm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
}
[(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix2>")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "avx2_permv2ti"
@@ -12756,58 +13992,58 @@
(set_attr "isa" "*,avx2,noavx2")
(set_attr "mode" "V8SF")])
-(define_insn "avx512f_vec_dup<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup<mode><mask_name>"
[(set (match_operand:VI48F_512 0 "register_operand" "=v")
(vec_duplicate:VI48F_512
(vec_select:<ssescalarmode>
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
(parallel [(const_int 0)]))))]
"TARGET_AVX512F"
- "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+ "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_broadcast<mode>"
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
[(set (match_operand:V16FI 0 "register_operand" "=v,v")
(vec_duplicate:V16FI
(match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
"TARGET_AVX512F"
"@
- vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0|%0, %g1, %g1, 0x0}
- vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
+ vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
+ vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_broadcast<mode>"
+(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
[(set (match_operand:V8FI 0 "register_operand" "=v,v")
(vec_duplicate:V8FI
(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
"TARGET_AVX512F"
"@
- vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44}
- vbroadcast<shuffletype>64x4\t{%1, %0|%0, %1}"
+ vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
+ vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vec_dup_gpr<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup_gpr<mode><mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(vec_duplicate:VI48_512
(match_operand:<ssescalarmode> 1 "register_operand" "r")))]
"TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
- "vpbroadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+ "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vec_dup_mem<mode>"
+(define_insn "<mask_codefor>avx512f_vec_dup_mem<mode><mask_name>"
[(set (match_operand:VI48F_512 0 "register_operand" "=x")
(vec_duplicate:VI48F_512
(match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX512F"
- "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+ "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -12947,12 +14183,12 @@
elt * GET_MODE_SIZE (<ssescalarmode>mode));
})
-(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
[(set (match_operand:VF2_AVX512F 0 "register_operand")
(vec_select:VF2_AVX512F
(match_operand:VF2_AVX512F 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
@@ -12968,12 +14204,12 @@
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
-(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
[(set (match_operand:VF1_AVX512F 0 "register_operand")
(vec_select:VF1_AVX512F
(match_operand:VF1_AVX512F 1 "nonimmediate_operand")
(match_operand:SI 2 "const_0_to_255_operand")))]
- "TARGET_AVX"
+ "TARGET_AVX && <mask_mode512bit_condition>"
{
int mask = INTVAL (operands[2]);
rtx perm[<ssescalarnum>];
@@ -12991,40 +14227,54 @@
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
})
-(define_insn "*<sse2_avx_avx512f>_vpermilp<mode>"
+(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
(vec_select:VF_AVX512F
(match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")
(match_parallel 2 ""
[(match_operand 3 "const_int_operand")])))]
- "TARGET_AVX
+ "TARGET_AVX && <mask_mode512bit_condition>
&& avx_vpermilp_parallel (operands[2], <MODE>mode)"
{
int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
operands[2] = GEN_INT (mask);
- return "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
}
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3"
+(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
[(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
(unspec:VF_AVX512F
[(match_operand:VF_AVX512F 1 "register_operand" "v")
(match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
UNSPEC_VPERMIL))]
- "TARGET_AVX"
- "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX && <mask_mode512bit_condition>"
+ "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "btver2_decode" "vector")
- (set_attr "prefix" "vex")
+ (set_attr "prefix" "<mask_prefix>")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermi2var<mode>3"
+(define_expand "avx512f_vpermi2var<mode>3_maskz"
+ [(match_operand:VI48F_512 0 "register_operand" "=v")
+ (match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vpermi2var<mode>3_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_vpermi2var<mode>3<sd_maskz_name>"
[(set (match_operand:VI48F_512 0 "register_operand" "=v")
(unspec:VI48F_512
[(match_operand:VI48F_512 1 "register_operand" "v")
@@ -13032,12 +14282,42 @@
(match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMI2))]
"TARGET_AVX512F"
- "vpermi2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
+ "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermi2var<mode>3_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_merge:VI48F_512
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:<sseintvecmode> 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMI2_MASK)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_vpermt2var<mode>3"
+(define_expand "avx512f_vpermt2var<mode>3_maskz"
+ [(match_operand:VI48F_512 0 "register_operand" "=v")
+ (match_operand:<sseintvecmode> 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")]
+ "TARGET_AVX512F"
+{
+ emit_insn (gen_avx512f_vpermt2var<mode>3_maskz_1 (
+ operands[0], operands[1], operands[2], operands[3],
+ CONST0_RTX (<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_vpermt2var<mode>3<sd_maskz_name>"
[(set (match_operand:VI48F_512 0 "register_operand" "=v")
(unspec:VI48F_512
[(match_operand:<sseintvecmode> 1 "register_operand" "v")
@@ -13045,7 +14325,23 @@
(match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
UNSPEC_VPERMT2))]
"TARGET_AVX512F"
- "vpermt2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
+ "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermt2var<mode>3_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (vec_merge:VI48F_512
+ (unspec:VI48F_512
+ [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "register_operand" "0")
+ (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+ UNSPEC_VPERMT2)
+ (match_dup 2)
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "k")))]
+ "TARGET_AVX512F"
+ "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -13440,24 +14736,24 @@
DONE;
})
-(define_insn "<avx2_avx512f>_ashrv<mode>"
+(define_insn "<avx2_avx512f>_ashrv<mode><mask_name>"
[(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
(ashiftrt:VI48_AVX512F
(match_operand:VI48_AVX512F 1 "register_operand" "v")
(match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX2"
- "vpsrav<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "<avx2_avx512f>_<shift_insn>v<mode>"
+(define_insn "<avx2_avx512f>_<shift_insn>v<mode><mask_name>"
[(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
(any_lshift:VI48_AVX2_48_AVX512F
(match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
(match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
- "TARGET_AVX2"
- "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "TARGET_AVX2 && <mask_mode512bit_condition>"
+ "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "sseishft")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "<sseinsnmode>")])
@@ -13540,12 +14836,13 @@
(set_attr "btver2_decode" "double")
(set_attr "mode" "V8SF")])
-(define_insn "avx512f_vcvtph2ps512"
+(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
[(set (match_operand:V16SF 0 "register_operand" "=v")
- (unspec:V16SF [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
- UNSPEC_VCVTPH2PS))]
+ (unspec:V16SF
+ [(match_operand:V16HI 1 "nonimmediate_operand" "<round_saeonly_constraint>")]
+ UNSPEC_VCVTPH2PS))]
"TARGET_AVX512F"
- "vcvtph2ps\t{%1, %0|%0, %1}"
+ "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -13596,13 +14893,14 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V8SF")])
-(define_insn "avx512f_vcvtps2ph512"
+(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
[(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
- (unspec:V16HI [(match_operand:V16SF 1 "register_operand" "v")
- (match_operand:SI 2 "const_0_to_255_operand" "N")]
- UNSPEC_VCVTPS2PH))]
+ (unspec:V16HI
+ [(match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
"TARGET_AVX512F"
- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+ "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
@@ -13993,49 +15291,100 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_compress<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "v")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")]
+ UNSPEC_COMPRESS))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_compressstore<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "memory_operand" "=m")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "register_operand" "x")
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "k")]
+ UNSPEC_COMPRESS_STORE))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "store")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_expand<mode>_maskz"
+ [(set (match_operand:VI48F_512 0 "register_operand")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "nonimmediate_operand")
+ (match_operand:VI48F_512 2 "vector_move_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand")]
+ UNSPEC_EXPAND))]
+ "TARGET_AVX512F"
+ "operands[2] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "avx512f_expand<mode>_mask"
+ [(set (match_operand:VI48F_512 0 "register_operand" "=v,v")
+ (unspec:VI48F_512
+ [(match_operand:VI48F_512 1 "nonimmediate_operand" "v,m")
+ (match_operand:VI48F_512 2 "vector_move_operand" "0C,0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k,k")]
+ UNSPEC_EXPAND))]
+ "TARGET_AVX512F"
+ "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "evex")
+ (set_attr "memory" "none,load")
+ (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getmant<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
+ [(match_operand:VF_512 1 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 2 "const_0_to_15_operand")]
UNSPEC_GETMANT))]
"TARGET_AVX512F"
- "vgetmant<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_getmant<mode><mask_scalar_name><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v")
- (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+ (match_operand:VF_128 2 "nonimmediate_operand" "<round_saeonly_constraint>")
(match_operand:SI 3 "const_0_to_15_operand")]
UNSPEC_GETMANT)
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_mask_scalar_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_mask_scalar_op4>, %3}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
-(define_insn "clz<mode>2"
+(define_insn "clz<mode>2<mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(clz:VI48_512
(match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512CD"
- "vplzcnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "conflict<mode>"
+(define_insn "<mask_codefor>conflict<mode><mask_name>"
[(set (match_operand:VI48_512 0 "register_operand" "=v")
(unspec:VI48_512
[(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
UNSPEC_CONFLICT))]
"TARGET_AVX512CD"
- "vpconflict<ssemodesuffix>\t{%1, %0|%0, %1}"
+ "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
new file mode 100644
@@ -0,0 +1,222 @@
+;; GCC machine description for AVX512F instructions
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Some iterators for extending subst as much as possible
+;; All vectors (Use it for destination)
+(define_mode_iterator SUBST_V
+ [V16QI
+ V16HI V8HI
+ V16SI V8SI V4SI
+ V8DI V4DI V2DI
+ V16SF V8SF V4SF
+ V8DF V4DF V2DF])
+
+(define_mode_iterator SUBST_S
+ [QI HI SI DI])
+
+(define_mode_iterator SUBST_A
+ [V16QI
+ V16HI V8HI
+ V16SI V8SI V4SI
+ V8DI V4DI V2DI
+ V16SF V8SF V4SF
+ V8DF V4DF V2DF
+ QI HI SI DI SF DF
+ CCFP CCFPU])
+
+(define_subst_attr "mask_name" "mask" "" "_mask")
+(define_subst_attr "mask_applied" "mask" "false" "true")
+(define_subst_attr "mask_operand2" "mask" "" "%{%3%}%N2")
+(define_subst_attr "mask_operand3" "mask" "" "%{%4%}%N3")
+(define_subst_attr "mask_operand3_1" "mask" "" "%%{%%4%%}%%N3") ;; for sprintf
+(define_subst_attr "mask_operand4" "mask" "" "%{%5%}%N4")
+(define_subst_attr "mask_operand6" "mask" "" "%{%7%}%N6")
+(define_subst_attr "mask_operand11" "mask" "" "%{%12%}%N11")
+(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
+(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
+(define_subst_attr "mask_codefor" "mask" "*" "")
+(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+(define_subst_attr "store_mask_constraint" "mask" "vm" "v")
+(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
+(define_subst_attr "mask_prefix" "mask" "vex" "evex")
+(define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
+(define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex")
+
+(define_subst "mask"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+(define_subst_attr "mask_scalar_operand4" "mask_scalar" "" "%{%5%}%N4")
+(define_subst_attr "mask_scalar_codefor" "mask_scalar" "*" "")
+(define_subst_attr "mask_scalar_prefix" "mask_scalar" "orig,vex" "evex")
+(define_subst_attr "mask_scalar_prefix2" "mask_scalar" "vex" "evex")
+
+(define_subst "mask_scalar"
+ [(set (match_operand:SUBST_V 0)
+ (vec_merge:SUBST_V
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 4 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k"))
+ (match_dup 2)
+ (const_int 1)))])
+
+(define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
+(define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
+(define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
+
+(define_subst "mask_scalar_merge"
+ [(set (match_operand:SUBST_S 0)
+ (match_operand:SUBST_S 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (and:SUBST_S
+ (match_dup 1)
+ (match_operand:SUBST_S 3 "register_operand" "k")))])
+
+(define_subst_attr "sd_maskz_name" "sd" "" "_maskz_1")
+(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
+(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
+(define_subst_attr "sd_mask_codefor" "sd" "*" "")
+(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)")
+
+(define_subst "sd"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "const0_operand" "C")
+ (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))
+])
+
+(define_subst_attr "round_name" "round" "" "_round")
+(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
+(define_subst_attr "round_op2" "round" "" "%R2")
+(define_subst_attr "round_op3" "round" "" "%R3")
+(define_subst_attr "round_op4" "round" "" "%R4")
+(define_subst_attr "round_op5" "round" "" "%R5")
+(define_subst_attr "round_op6" "round" "" "%R6")
+(define_subst_attr "round_mask_op2" "round" "" "<round_mask_operand2>")
+(define_subst_attr "round_mask_op3" "round" "" "<round_mask_operand3>")
+(define_subst_attr "round_mask_scalar_op3" "round" "" "<round_mask_scalar_operand3>")
+(define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
+(define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "round_constraint2" "round" "m" "v")
+(define_subst_attr "round_constraint3" "round" "rm" "r")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
+(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)")
+(define_subst_attr "round_codefor" "round" "*" "")
+(define_subst_attr "round_opnum" "round" "5" "6")
+
+(define_subst "round"
+ [(set (match_operand:SUBST_A 0)
+ (match_operand:SUBST_A 1))]
+ "TARGET_AVX512F"
+ [(parallel[
+ (set (match_dup 0)
+ (match_dup 1))
+ (unspec [(match_operand:SI 2 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_saeonly_name" "round_saeonly" "" "_round")
+(define_subst_attr "round_saeonly_mask_operand2" "mask" "%R2" "%R4")
+(define_subst_attr "round_saeonly_mask_operand3" "mask" "%R3" "%R5")
+(define_subst_attr "round_saeonly_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
+(define_subst_attr "round_saeonly_mask_scalar_operand4" "mask_scalar" "%R4" "%R6")
+(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%R4" "%R5")
+(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%R5" "%R7")
+(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%R2")
+(define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%R4")
+(define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%R5")
+(define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%R6")
+(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
+(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
+(define_subst_attr "round_saeonly_mask_scalar_op3" "round_saeonly" "" "<round_saeonly_mask_scalar_operand3>")
+(define_subst_attr "round_saeonly_mask_scalar_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_operand4>")
+(define_subst_attr "round_saeonly_mask_scalar_merge_op4" "round_saeonly" "" "<round_saeonly_mask_scalar_merge_operand4>")
+(define_subst_attr "round_saeonly_sd_mask_op5" "round_saeonly" "" "<round_saeonly_sd_mask_operand5>")
+(define_subst_attr "round_saeonly_constraint" "round_saeonly" "vm" "v")
+(define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)")
+(define_subst_attr "round_saeonly_mode512bit_condition_op1" "round_saeonly" "1" "(GET_MODE (operands[1]) == V16SFmode || GET_MODE (operands[1]) == V8DFmode)")
+
+(define_subst "round_saeonly"
+ [(set (match_operand:SUBST_A 0)
+ (match_operand:SUBST_A 1))]
+ "TARGET_AVX512F"
+ [(parallel[
+ (set (match_dup 0)
+ (match_dup 1))
+ (unspec [(match_operand:SI 2 "const_4_to_5_operand")] UNSPEC_EMBEDDED_ROUNDING)])])
+
+(define_subst_attr "round_expand_name" "round_expand" "" "_round")
+(define_subst_attr "round_expand_predicate" "round_expand" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_expand_operand" "round_expand" "" ", operands[5]")
+
+(define_subst "round_expand"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SUBST_V 3)
+ (match_operand:SUBST_S 4)]
+ "TARGET_AVX512F"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (unspec [(match_operand:SI 5 "const_0_to_4_operand")] UNSPEC_EMBEDDED_ROUNDING)])
+
+(define_subst_attr "round_saeonly_expand_name5" "round_saeonly_expand5" "" "_round")
+(define_subst_attr "round_saeonly_expand_predicate5" "round_saeonly_expand5" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_saeonly_expand_operand6" "round_saeonly_expand5" "" ", operands[6]")
+
+(define_subst "round_saeonly_expand5"
+ [(match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (match_operand:SUBST_A 3)
+ (match_operand:SI 4)
+ (match_operand:SUBST_S 5)]
+ "TARGET_AVX512F"
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_dup 4)
+ (match_dup 5)
+ (unspec [(match_operand:SI 6 "const_4_to_5_operand")] UNSPEC_EMBEDDED_ROUNDING)])