@@ -1307,7 +1307,7 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<plusminus_insn><mode>3"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(plusminus:VF_128
@@ -1318,10 +1318,10 @@
"TARGET_SSE"
"@
<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
(define_expand "mul<mode>3<mask_name>"
@@ -1347,7 +1347,7 @@
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(multdiv:VF_128
@@ -1358,10 +1358,10 @@
"TARGET_SSE"
"@
<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse<multdiv_mnemonic>")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<ssescalarmode>")])
@@ -1446,7 +1446,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*srcp14<mode>"
+(define_insn "<mask_scalar_codefor>srcp14<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1456,7 +1456,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1493,7 +1493,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vmsqrt<mode>2"
+(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(sqrt:VF_128
@@ -1503,11 +1503,11 @@
"TARGET_SSE"
"@
sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
- vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
+ vsqrt<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "btver2_sse_attr" "sqrt")
(set_attr "mode" "<ssescalarmode>")])
@@ -1542,7 +1542,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*rsqrt14<mode>"
+(define_insn "<mask_scalar_codefor>rsqrt14<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -1552,7 +1552,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
[(set_attr "type" "sse")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -1622,7 +1622,7 @@
(set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")])
-(define_insn "<sse>_vm<code><mode>3"
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
(smaxmin:VF_128
@@ -1633,11 +1633,11 @@
"TARGET_SSE"
"@
<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
- v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+ v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sse")
(set_attr "btver2_sse_attr" "maxmin")
- (set_attr "prefix" "orig,vex")
+ (set_attr "prefix" "<mask_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")])
;; These versions of the min/max patterns implement exactly the operations
@@ -2748,7 +2748,7 @@
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"")
-(define_insn "*fma_fmadd_<mode>"
+(define_insn "fma_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
@@ -2976,7 +2976,7 @@
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
-(define_insn "*fma_fmaddsub_<mode>"
+(define_insn "fma_fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%0,0,v,x,x")
@@ -3241,6 +3241,46 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+(define_insn "*fmai_fmsub_<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+ (match_dup 1)
+ (match_operand:QI 4 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_maskz"
+ [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+ (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+ (neg:VF_128
+ (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+ (match_operand:VF_128 4 "const0_operand")
+ (match_operand:QI 5 "register_operand" "k,k"))
+ (match_dup 1)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "@
+ vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
+ vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*fmai_vmfnmadd_<mode>_mask"
[(set (match_operand:VF_128 0 "register_operand" "=v,v")
(vec_merge:VF_128
@@ -4310,7 +4350,7 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "sse2_cvtsd2ss"
+(define_insn "sse2_cvtsd2ss<mask_scalar_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
(vec_duplicate:V4SF
@@ -4322,17 +4362,17 @@
"@
cvtsd2ss\t{%2, %0|%0, %2}
cvtsd2ss\t{%2, %0|%0, %q2}
- vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
+ vcvtsd2ss\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double,*")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "bdver1_decode" "direct,direct,*")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "SF")])
-(define_insn "sse2_cvtss2sd"
+(define_insn "sse2_cvtss2sd<mask_scalar_name>"
[(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
(vec_merge:V2DF
(float_extend:V2DF
@@ -4345,14 +4385,14 @@
"@
cvtss2sd\t{%2, %0|%0, %2}
cvtss2sd\t{%2, %0|%0, %k2}
- vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
+ vcvtss2sd\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %k2}"
[(set_attr "isa" "noavx,noavx,avx")
(set_attr "type" "ssecvt")
(set_attr "amdfam10_decode" "vector,double,*")
(set_attr "athlon_decode" "direct,direct,*")
(set_attr "bdver1_decode" "direct,direct,*")
(set_attr "btver2_decode" "double,double,double")
- (set_attr "prefix" "orig,orig,vex")
+ (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
(set_attr "mode" "DF")])
(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
@@ -6737,7 +6777,7 @@
operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
})
-(define_insn "*avx512f_vmscalef<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -6747,7 +6787,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
@@ -6802,7 +6842,7 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_sgetexp<mode>"
+(define_insn "avx512f_sgetexp<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -6812,7 +6852,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+ "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
@@ -6934,7 +6974,7 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "*avx512f_rndscale<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_rndscale<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -6945,7 +6985,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}"
[(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
@@ -15230,7 +15270,7 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_getmant<mode><mask_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
(unspec:VF_128
@@ -15241,7 +15281,7 @@
(match_dup 1)
(const_int 1)))]
"TARGET_AVX512F"
- "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+ "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}";
[(set_attr "prefix" "evex")
(set_attr "mode" "<ssescalarmode>")])
@@ -54,3 +54,26 @@
(match_dup 1)
(match_operand:SUBST_V 2 "vector_move_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+(define_subst_attr "mask_scalar_operand4" "mask_scalar" "" "%{%5%}%N4")
+(define_subst_attr "mask_scalar_codefor" "mask_scalar" "*" "")
+(define_subst_attr "mask_scalar_prefix" "mask_scalar" "orig,vex" "evex")
+(define_subst_attr "mask_scalar_prefix2" "mask_scalar" "vex" "evex")
+
+(define_subst "mask_scalar"
+ [(set (match_operand:SUBST_V 0)
+ (vec_merge:SUBST_V
+ (match_operand:SUBST_V 1)
+ (match_operand:SUBST_V 2)
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 4 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 5 "register_operand" "k"))
+ (match_dup 2)
+ (const_int 1)))])