diff mbox

[i386,4/8,AVX512,2/n] Add substed patterns: mask scalar subst.

Message ID 20131113131802.GA13411@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Nov. 13, 2013, 1:18 p.m. UTC
Hello,
> Is it ok to commit to main trunk?

I've moved to this patch few hunks from
[4/8] [1/n], which related to scalar insns.

Updated patch in the bottom.

Testing pass.

Could you pls comment on it?
I cannot imagine anything except double nested vec_merges.
We may introduce (non-substed) dedicated patterns of
such a kind:
(cond_exec (BI mode k reg)
	   (op:SF/DF (SF/DF operand1)
	   	     (SF/DF operand2)))
However this only applicable to merge variant of EVEX's
masking feature.
For zero-masking, when we need to put 0 with predicate is
false into destination (first element of vector) cond_exec
is not applicable. Maybe extend cond_exec? Maybe use 
if_then_else?

--
Thanks, K

---
 gcc/config/i386/sse.md   | 342 ++++++++++++++++++++++++++++++++++++++++++-----
 gcc/config/i386/subst.md |  23 ++++
 2 files changed, 333 insertions(+), 32 deletions(-)

Comments

Kirill Yukhin Nov. 15, 2013, 5:03 p.m. UTC | #1
Helllo,
On 13 Nov 16:18, Kirill Yukhin wrote:
> Testing pass.
Ping?

--
Thanks, K
Kirill Yukhin Nov. 19, 2013, 9:05 a.m. UTC | #2
Hello,
On 15 Nov 20:03, Kirill Yukhin wrote:
> Ping?
Ping?

--
Thanks, K
Kirill Yukhin Dec. 2, 2013, 1:07 p.m. UTC | #3
Hello,

On 19 Nov 12:05, Kirill Yukhin wrote:
> Hello,
> On 15 Nov 20:03, Kirill Yukhin wrote:
> > Ping?
> Ping?
Ping?

--
Thanks, K
Kirill Yukhin Dec. 24, 2013, 4:56 a.m. UTC | #4
Hello,
On 02 Dec 16:07, Kirill Yukhin wrote:
> Hello,
>
> On 19 Nov 12:05, Kirill Yukhin wrote:
> > Hello,
> > On 15 Nov 20:03, Kirill Yukhin wrote:
> > > Ping?
> > Ping?
> Ping?
>
> --
> Thanks, K

It seems that we have no solution of how scalar instructions
with EVEX's masking should look like. It seems that nested
vec_merge is not very welcome.

So, in order not to miss 4.9 I've stripped out `mask_scalar`
subst from the patch. It now contains only AVX-512 scalar
instructions, intrinsics and tests.

Tested as mentioned in the top post.

Patch attached.

Ok for trunk?

--
Thanks, K
Kirill Yukhin Dec. 24, 2013, 4:59 a.m. UTC | #5
> Patch attached.
>
> Ok for trunk?
>

Just noticed Uros's input about predicates. So, ok with fix of predicate?
diff mbox

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6d6e16e..dddf907 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1097,6 +1097,67 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_moves<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (match_operand:VF_128 2 "register_operand" "v")
+	    (match_operand:VF_128 3 "vector_move_operand" "0C")
+	    (match_operand:<avx512fmaskmode> 4 "register_operand" "k"))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_loads<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (vec_duplicate:VF_128
+	      (match_operand:<ssescalarmode> 1 "memory_operand"))
+	    (match_operand:VF_128 2 "vector_move_operand")
+	    (match_operand:<avx512fmaskmode> 3 "register_operand"))
+	  (match_dup 4)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
+
+(define_insn "*avx512f_loads<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (vec_duplicate:VF_128
+	      (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
+	    (match_operand:VF_128 2 "vector_move_operand" "0C")
+	    (match_operand:<avx512fmaskmode> 3 "register_operand" "k"))
+	  (match_operand:VF_128 4 "const0_operand")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_stores<mode>_mask"
+  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
+	(vec_select:<ssescalarmode>
+	  (vec_merge:VF_128
+	    (match_operand:VF_128 1 "register_operand" "v")
+	    (vec_duplicate:VF_128
+	      (match_dup 0))
+	    (match_operand:<avx512fmaskmode> 2 "register_operand" "k"))
+	  (parallel [(const_int 0)])))]
+  "TARGET_AVX512F"
+  "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "<sse3>_lddqu<avxsizesuffix>"
   [(set (match_operand:VI1 0 "register_operand" "=x")
 	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
@@ -1246,7 +1307,7 @@ 
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<plusminus_insn><mode>3"
+(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (plusminus:VF_128
@@ -1257,10 +1318,10 @@ 
   "TARGET_SSE"
   "@
    <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "mul<mode>3<mask_name>"
@@ -1286,7 +1347,7 @@ 
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (multdiv:VF_128
@@ -1297,10 +1358,10 @@ 
   "TARGET_SSE"
   "@
    <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse<multdiv_mnemonic>")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_scalar_prefix>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<ssescalarmode>")])
 
@@ -1385,7 +1446,7 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*srcp14<mode>"
+(define_insn "<mask_scalar_codefor>srcp14<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -1395,7 +1456,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -1432,7 +1493,7 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vmsqrt<mode>2"
+(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (sqrt:VF_128
@@ -1442,11 +1503,11 @@ 
   "TARGET_SSE"
   "@
    sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
-   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
+   vsqrt<ssescalarmodesuffix>\t{%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "atom_sse_attr" "sqrt")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_scalar_prefix>")
    (set_attr "btver2_sse_attr" "sqrt")
    (set_attr "mode" "<ssescalarmode>")])
 
@@ -1481,7 +1542,7 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*rsqrt14<mode>"
+(define_insn "<mask_scalar_codefor>rsqrt14<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -1491,7 +1552,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -1561,7 +1622,7 @@ 
    (set_attr "prefix" "<mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vm<code><mode>3"
+(define_insn "<sse>_vm<code><mode>3<mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
 	  (smaxmin:VF_128
@@ -1572,11 +1633,11 @@ 
   "TARGET_SSE"
   "@
    <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+   v<maxmin_float><ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sse")
    (set_attr "btver2_sse_attr" "maxmin")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "<mask_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
 ;; These versions of the min/max patterns implement exactly the operations
@@ -2691,7 +2752,7 @@ 
 	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
   "")
 
-(define_insn "*fma_fmadd_<mode>"
+(define_insn "fma_fmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
@@ -2919,7 +2980,7 @@ 
 	  UNSPEC_FMADDSUB))]
   "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
 
-(define_insn "*fma_fmaddsub_<mode>"
+(define_insn "fma_fmaddsub_<mode>"
   [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
 	(unspec:VF
 	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
@@ -3056,6 +3117,223 @@ 
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*fmai_fmsub_<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" " 0, 0")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm, v")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_dup 1)
+	    (match_operand:QI 4 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fmai_vmfmsub_<mode>_mask3"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "%v")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "register_operand" "0")))
+	    (match_dup 3)
+	    (match_operand:QI 4 "register_operand" "k"))
+	  (match_dup 3)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_maskz"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_operand:VF_128 4 "const0_operand")
+	    (match_operand:QI 5 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_dup 1)
+	    (match_operand:QI 4 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fmsub_<mode>_maskz"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm,v")
+          (neg:VF_128
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_operand:VF_128 4 "const0_operand")
+	    (match_operand:QI 5 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
+   vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
+	    (match_dup 1)
+	    (match_operand:QI 4 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
+   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_mask3"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	      (match_operand:VF_128 3 "register_operand" "0"))
+	    (match_dup 3)
+	    (match_operand:QI 4 "register_operand" "k"))
+	  (match_dup 3)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_vmfnmadd_<mode>_maskz"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (match_operand:VF_128 3 "nonimmediate_operand" "v,vm"))
+	    (match_operand:VF_128 4 "const0_operand")
+	    (match_operand:QI 5 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
+   vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (neg:VF_128
+		(match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_dup 1)
+	    (match_operand:QI 4 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2}
+   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_mask3"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 1 "nonimmediate_operand" "%v"))
+	      (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	      (neg:VF_128
+	        (match_operand:VF_128 3 "register_operand" "0")))
+	    (match_dup 3)
+	    (match_operand:QI 4 "register_operand" "k"))
+	  (match_dup 3)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>1, %<iptr>2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fmai_fnmsub_<mode>_maskz"
+  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
+	(vec_merge:VF_128
+	  (vec_merge:VF_128
+	    (fma:VF_128
+	      (neg:VF_128
+		(match_operand:VF_128 2 "nonimmediate_operand" "vm,v"))
+	      (match_operand:VF_128 1 "nonimmediate_operand" "0,0")
+	      (neg:VF_128
+	        (match_operand:VF_128 3 "nonimmediate_operand" "v,vm")))
+	    (match_operand:VF_128 4 "const0_operand")
+	    (match_operand:QI 5 "register_operand" "k,k"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "@
+   vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>3, %<iptr>2}
+   vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %<iptr>2, %<iptr>3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*fmai_fmsub_<mode>"
   [(set (match_operand:VF_128 0 "register_operand" "=v,v")
         (vec_merge:VF_128
@@ -4006,7 +4284,7 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_cvtsd2ss"
+(define_insn "sse2_cvtsd2ss<mask_scalar_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
@@ -4018,17 +4296,17 @@ 
   "@
    cvtsd2ss\t{%2, %0|%0, %2}
    cvtsd2ss\t{%2, %0|%0, %q2}
-   vcvtsd2ss\t{%2, %1, %0|%0, %1, %q2}"
+   vcvtsd2ss\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %q2}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "athlon_decode" "vector,double,*")
    (set_attr "amdfam10_decode" "vector,double,*")
    (set_attr "bdver1_decode" "direct,direct,*")
    (set_attr "btver2_decode" "double,double,double")
-   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
    (set_attr "mode" "SF")])
 
-(define_insn "sse2_cvtss2sd"
+(define_insn "sse2_cvtss2sd<mask_scalar_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
 	  (float_extend:V2DF
@@ -4041,14 +4319,14 @@ 
   "@
    cvtss2sd\t{%2, %0|%0, %2}
    cvtss2sd\t{%2, %0|%0, %k2}
-   vcvtss2sd\t{%2, %1, %0|%0, %1, %k2}"
+   vcvtss2sd\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %k2}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "amdfam10_decode" "vector,double,*")
    (set_attr "athlon_decode" "direct,direct,*")
    (set_attr "bdver1_decode" "direct,direct,*")
    (set_attr "btver2_decode" "double,double,double")
-   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "prefix" "orig,orig,<mask_scalar_prefix2>")
    (set_attr "mode" "DF")])
 
 (define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name>"
@@ -6403,7 +6681,7 @@ 
   operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
 })
 
-(define_insn "*avx512f_vmscalef<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_vmscalef<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -6413,7 +6691,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<ssescalarmode>")])
 
@@ -6468,7 +6746,7 @@ 
     [(set_attr "prefix" "evex")
      (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_sgetexp<mode>"
+(define_insn "avx512f_sgetexp<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -6478,7 +6756,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
    "TARGET_AVX512F"
-   "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+   "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2}";
     [(set_attr "prefix" "evex")
      (set_attr "mode" "<ssescalarmode>")])
 
@@ -6600,7 +6878,7 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*avx512f_rndscale<mode>"
+(define_insn "<mask_scalar_codefor>avx512f_rndscale<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -6611,7 +6889,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512F"
-  "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -14914,7 +15192,7 @@ 
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512f_getmant<mode>"
+(define_insn "avx512f_getmant<mode><mask_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -14925,7 +15203,7 @@ 
 	  (match_dup 1)
 	  (const_int 1)))]
    "TARGET_AVX512F"
-   "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+   "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2, %3}";
    [(set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
 
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 6b45d05..532a3a1 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -54,3 +54,26 @@ 
 	  (match_dup 1)
 	  (match_operand:SUBST_V 2 "vector_move_operand" "0C")
 	  (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))])
+
+(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
+(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
+(define_subst_attr "mask_scalar_operand4" "mask_scalar" "" "%{%5%}%N4")
+(define_subst_attr "mask_scalar_codefor" "mask_scalar" "*" "")
+(define_subst_attr "mask_scalar_prefix" "mask_scalar" "orig,vex" "evex")
+(define_subst_attr "mask_scalar_prefix2" "mask_scalar" "vex" "evex")
+
+(define_subst "mask_scalar"
+  [(set (match_operand:SUBST_V 0)
+	(vec_merge:SUBST_V
+	  (match_operand:SUBST_V 1)
+	  (match_operand:SUBST_V 2)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  [(set (match_dup 0)
+        (vec_merge:SUBST_V
+	  (vec_merge:SUBST_V
+	    (match_dup 1)
+	    (match_operand:SUBST_V 4 "vector_move_operand" "0C")
+	    (match_operand:<avx512fmaskmode> 5 "register_operand" "k"))
+	  (match_dup 2)
+	  (const_int 1)))])