diff mbox

[i386,3/8,AVX512] Add AVX-512 patterns.

Message ID 20130814072638.GD52726@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Aug. 14, 2013, 7:26 a.m. UTC
Hello,
Patch was rebased on top of trunk.

It is applicable on top of [2/8] (which was rebased on new trunk today).

Testing:
  1. Bootstrap pass.
  2. make check shows no regressions.
  3. Spec 2000 & 2006 build show no regressions both with and without -mavx512f option.
  4. Spec 2000 & 2006 run shows no regressions without -m512d option.

Thanks, K

---
 gcc/config/i386/i386.c        |   68 +-
 gcc/config/i386/i386.md       |   30 +-
 gcc/config/i386/predicates.md |   55 +
 gcc/config/i386/sse.md        | 3525 +++++++++++++++++++++++++++++++++++------
 4 files changed, 3157 insertions(+), 521 deletions(-)

Comments

Richard Henderson Aug. 19, 2013, 10:01 p.m. UTC | #1
>  ;; All vector modes including V?TImode, used in move patterns.
>  (define_mode_iterator V16
> -  [(V32QI "TARGET_AVX") V16QI
> -   (V16HI "TARGET_AVX") V8HI
> -   (V8SI "TARGET_AVX") V4SI
> -   (V4DI "TARGET_AVX") V2DI
> +  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
> +   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
> +   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
> +   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
>     (V2TI "TARGET_AVX") V1TI
> -   (V8SF "TARGET_AVX") V4SF
> -   (V4DF "TARGET_AVX") V2DF])
> +   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
> +   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])

Let's rename this VMOVE, and apply only that change as a separate patch.

> +(define_mode_iterator VF_AVX512F
> +  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
> +   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
> +

Why aren't you modifying VF instead?  Certainly this makes the comment for "VF"
be incorrect, as it no longer contains "all vector float modes".

>  ;; All SFmode vector float modes
>  (define_mode_iterator VF1
>    [(V8SF "TARGET_AVX") V4SF])
>  
> +(define_mode_iterator VF1_AVX512F
> +  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])

Likewise.

>  ;; All DFmode vector float modes
>  (define_mode_iterator VF2
>    [(V4DF "TARGET_AVX") V2DF])
>  
> +(define_mode_iterator VF2_AVX512F
> +  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])

Likewise.

> +;; 128bit and 512bit float modes
> +(define_mode_iterator VF_128_512
> +  [V4SF V2DF V16SF V8DF])

Unused?

>  (define_expand "<code><mode>2"
> -  [(set (match_operand:VF 0 "register_operand")
> -	(absneg:VF
> -	  (match_operand:VF 1 "register_operand")))]
> +  [(set (match_operand:VF_AVX512F 0 "register_operand")
> +	(absneg:VF_AVX512F
> +	  (match_operand:VF_AVX512F 1 "register_operand")))]
>    "TARGET_SSE"
>    "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")

Fixing VF as I describe above appears to make a large portion
of the patch just go away.



r~
Richard Henderson Aug. 19, 2013, 10:02 p.m. UTC | #2
On 08/14/2013 12:26 AM, Kirill Yukhin wrote:
> Hello,
> Patch was rebased on top of trunk.
> 
> It is applicable on top of [2/8] (which was rebased on new trunk today).
> 
> Testing:
>   1. Bootstrap pass.
>   2. make check shows no regressions.
>   3. Spec 2000 & 2006 build show no regressions both with and without -mavx512f option.
>   4. Spec 2000 & 2006 run shows no regressions without -m512d option.
> 
> Thanks, K

This patch is still far too large.

I think you should split it up based on every single mode iterator that
you need to add or change.


r~
diff mbox

Patch

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7412745..7851d12 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -16759,8 +16759,8 @@  ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
       gcc_unreachable ();
     case V32QImode:
       extract = gen_avx_vextractf128v32qi;
-      load_unaligned = gen_avx_loaddqu256;
-      store_unaligned = gen_avx_storedqu256;
+      load_unaligned = gen_avx_loaddquv32qi;
+      store_unaligned = gen_avx_storedquv32qi;
       mode = V16QImode;
       break;
     case V8SFmode:
@@ -16863,10 +16863,56 @@  void
 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 {
   rtx op0, op1, m;
+  rtx (*load_unaligned) (rtx, rtx);
+  rtx (*store_unaligned) (rtx, rtx);
 
   op0 = operands[0];
   op1 = operands[1];
 
+  if (GET_MODE_SIZE (mode) == 64)
+    {
+      switch (GET_MODE_CLASS (mode))
+	{
+	case MODE_VECTOR_INT:
+	case MODE_INT:
+	  op0 = gen_lowpart (V16SImode, op0);
+	  op1 = gen_lowpart (V16SImode, op1);
+	  /* FALLTHRU */
+
+	case MODE_VECTOR_FLOAT:
+	  switch (GET_MODE (op0))
+	    {
+	    default:
+	      gcc_unreachable ();
+	    case V16SImode:
+	      load_unaligned = gen_avx512f_loaddquv16si;
+	      store_unaligned = gen_avx512f_storedquv16si;
+	      break;
+	    case V16SFmode:
+	      load_unaligned = gen_avx512f_loadups512;
+	      store_unaligned = gen_avx512f_storeups512;
+	      break;
+	    case V8DFmode:
+	      load_unaligned = gen_avx512f_loadupd512;
+	      store_unaligned = gen_avx512f_storeupd512;
+	      break;
+	    }
+
+	  if (MEM_P (op1))
+	    emit_insn (load_unaligned (op0, op1));
+	  else if (MEM_P (op0))
+	    emit_insn (store_unaligned (op0, op1));
+	  else
+	    gcc_unreachable ();
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      return;
+    }
+
   if (TARGET_AVX
       && GET_MODE_SIZE (mode) == 32)
     {
@@ -16899,7 +16945,7 @@  ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 	  op0 = gen_lowpart (V16QImode, op0);
 	  op1 = gen_lowpart (V16QImode, op1);
 	  /* We will eventually emit movups based on insn attributes.  */
-	  emit_insn (gen_sse2_loaddqu (op0, op1));
+	  emit_insn (gen_sse2_loaddquv16qi (op0, op1));
 	}
       else if (TARGET_SSE2 && mode == V2DFmode)
         {
@@ -16974,7 +17020,7 @@  ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 	  op0 = gen_lowpart (V16QImode, op0);
 	  op1 = gen_lowpart (V16QImode, op1);
 	  /* We will eventually emit movups based on insn attributes.  */
-	  emit_insn (gen_sse2_storedqu (op0, op1));
+	  emit_insn (gen_sse2_storedquv16qi (op0, op1));
 	}
       else if (TARGET_SSE2 && mode == V2DFmode)
 	{
@@ -27694,13 +27740,13 @@  static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
@@ -27729,8 +27775,8 @@  static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
@@ -27999,7 +28045,7 @@  static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
   { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
 
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
 
@@ -28347,7 +28393,7 @@  static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
@@ -35025,7 +35071,7 @@  ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
 	rtx sub;
 
         gcc_assert (FLOAT_MODE_P (mode));
-        gcc_assert (TARGET_FMA || TARGET_FMA4);
+        gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
 
         /* ??? SSE scalar/vector cost should be used here.  */
         /* ??? Bald assumption that fma has the same cost as fmul.  */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 630b87e..bffe748 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -751,6 +751,8 @@ 
 
 (define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
 
+(define_code_iterator multdiv [mult div])
+
 ;; Base name for define_insn
 (define_code_attr plusminus_insn
   [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
@@ -762,6 +764,8 @@ 
    (minus "sub") (ss_minus "subs") (us_minus "subus")])
 (define_code_attr plusminus_carry_mnemonic
   [(plus "adc") (minus "sbb")])
+(define_code_attr multdiv_mnemonic
+  [(mult "mul") (div "div")])
 
 ;; Mark commutative operators as such in constraints.
 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
@@ -784,6 +788,7 @@ 
 ;; Mapping of logic operators
 (define_code_iterator any_logic [and ior xor])
 (define_code_iterator any_or [ior xor])
+(define_code_iterator fpint_logic [and xor])
 
 ;; Base name for insn mnemonic.
 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
@@ -831,6 +836,15 @@ 
 (define_code_attr s [(sign_extend "s") (zero_extend "u")])
 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
 
+;; Used in signed and unsigned truncations.
+(define_code_iterator any_truncate [ss_truncate truncate us_truncate])
+;; Instruction suffix for truncations.
+(define_code_attr trunsuffix [(ss_truncate "s") (truncate "") (us_truncate "us")])
+
+;; Used in signed and unsigned fix.
+(define_code_iterator any_fix [fix unsigned_fix])
+(define_code_attr fixsuffix [(fix "") (unsigned_fix "u")])
+
 ;; All integer modes.
 (define_mode_iterator SWI1248x [QI HI SI DI])
 
@@ -17438,7 +17452,7 @@ 
   [(prefetch (match_operand 0 "address_operand")
 	     (match_operand:SI 1 "const_int_operand")
 	     (match_operand:SI 2 "const_int_operand"))]
-  "TARGET_PREFETCH_SSE || TARGET_PRFCHW"
+  "TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_AVX512PF"
 {
   bool write = INTVAL (operands[1]) != 0;
   int locality = INTVAL (operands[2]);
@@ -17451,6 +17465,8 @@ 
      of locality.  */
   if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE))
     operands[2] = GEN_INT (3);
+  else if (TARGET_AVX512PF && (write || !TARGET_PREFETCH_SSE))
+    operands[2] = GEN_INT (1);
   else
     operands[1] = const0_rtx;
 })
@@ -17492,6 +17508,18 @@ 
 	(symbol_ref "memory_address_length (operands[0], false)"))
    (set_attr "memory" "none")])
 
+(define_insn "*prefetch_avx512pf_<mode>"
+  [(prefetch (match_operand:P 0 "address_operand" "p")
+	     (const_int 1)
+	     (const_int 1))]
+  "TARGET_AVX512PF"
+  "prefetchwt1\t%a0";
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set (attr "length_address")
+	(symbol_ref "memory_address_length (operands[0], false)"))
+   (set_attr "memory" "none")])
+
 (define_expand "stack_protect_set"
   [(match_operand 0 "memory_operand")
    (match_operand 1 "memory_operand")]
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b64ef69..c029fa8 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -737,16 +737,66 @@ 
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 6, 7)")))
 
+;; Match 8 to 9.
+(define_predicate "const_8_to_9_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 9)")))
+
 ;; Match 8 to 11.
 (define_predicate "const_8_to_11_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 8, 11)")))
 
+;; Match 8 to 15.
+(define_predicate "const_8_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 8, 15)")))
+
+;; Match 10 to 11.
+(define_predicate "const_10_to_11_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 10, 11)")))
+
+;; Match 12 to 13.
+(define_predicate "const_12_to_13_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 12, 13)")))
+
 ;; Match 12 to 15.
 (define_predicate "const_12_to_15_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 12, 15)")))
 
+;; Match 14 to 15.
+(define_predicate "const_14_to_15_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 14, 15)")))
+
+;; Match 16 to 19.
+(define_predicate "const_16_to_19_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 16, 19)")))
+
+;; Match 16 to 31.
+(define_predicate "const_16_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 16, 31)")))
+
+;; Match 20 to 23.
+(define_predicate "const_20_to_23_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 20, 23)")))
+
+;; Match 24 to 27.
+(define_predicate "const_24_to_27_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 24, 27)")))
+
+;; Match 28 to 31.
+(define_predicate "const_28_to_31_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 28, 31)")))
+
 ;; True if this is a constant appropriate for an increment or decrement.
 (define_predicate "incdec_operand"
   (match_code "const_int")
@@ -1317,3 +1367,8 @@ 
 (define_predicate "general_vector_operand"
   (ior (match_operand 0 "nonimmediate_operand")
        (match_code "const_vector")))
+
+;; Return true if OP is either -1 constant or stored in register.
+(define_predicate "register_or_constm1_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "op == constm1_rtx")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1dd7957..6802031 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -83,6 +83,43 @@ 
   UNSPEC_VPERMTI
   UNSPEC_GATHER
   UNSPEC_VSIBADDR
+
+  ;; For AVX512F support
+  UNSPEC_BLENDM
+  UNSPEC_VPERMI2
+  UNSPEC_VPERMI2_MASK
+  UNSPEC_VPERMT2
+  UNSPEC_UNSIGNED_FIX_NOTRUNC
+  UNSPEC_UNSIGNED_PCMP
+  UNSPEC_TESTM
+  UNSPEC_TESTNM
+  UNSPEC_SCATTER
+  UNSPEC_RCP14
+  UNSPEC_RSQRT14
+  UNSPEC_FIXUPIMM
+  UNSPEC_SCALEF
+  UNSPEC_VTERNLOG
+  UNSPEC_GETEXP
+  UNSPEC_GETMANT
+  UNSPEC_ALIGN
+  UNSPEC_CONFLICT
+  UNSPEC_COMPRESS
+  UNSPEC_COMPRESS_STORE
+  UNSPEC_EXPAND
+  UNSPEC_MASKED_EQ
+  UNSPEC_MASKED_GT
+
+  ;; For embed. rounding feature
+  UNSPEC_EMBEDDED_ROUNDING
+
+  ;; For AVX512PF support
+  UNSPEC_GATHER_PREFETCH
+  UNSPEC_SCATTER_PREFETCH
+
+  ;; For AVX512ER support
+  UNSPEC_EXP2
+  UNSPEC_RCP28
+  UNSPEC_RSQRT28
 ])
 
 (define_c_enum "unspecv" [
@@ -97,22 +134,22 @@ 
 
 ;; All vector modes including V?TImode, used in move patterns.
 (define_mode_iterator V16
-  [(V32QI "TARGET_AVX") V16QI
-   (V16HI "TARGET_AVX") V8HI
-   (V8SI "TARGET_AVX") V4SI
-   (V4DI "TARGET_AVX") V2DI
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
    (V2TI "TARGET_AVX") V1TI
-   (V8SF "TARGET_AVX") V4SF
-   (V4DF "TARGET_AVX") V2DF])
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
 
 ;; All vector modes
 (define_mode_iterator V
   [(V32QI "TARGET_AVX") V16QI
    (V16HI "TARGET_AVX") V8HI
-   (V8SI "TARGET_AVX") V4SI
-   (V4DI "TARGET_AVX") V2DI
-   (V8SF "TARGET_AVX") V4SF
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 ;; All 128bit vector modes
 (define_mode_iterator V_128
@@ -122,19 +159,41 @@ 
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
 
+;; All 512bit vector modes
+(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
+
+;; All 256bit and 512bit vector modes
+(define_mode_iterator V_256_512
+  [V32QI V16HI V8SI V4DI V8SF V4DF
+   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+
 ;; All vector float modes
 (define_mode_iterator VF
   [(V8SF "TARGET_AVX") V4SF
    (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
+(define_mode_iterator VF_AVX512F
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+
 ;; All SFmode vector float modes
 (define_mode_iterator VF1
   [(V8SF "TARGET_AVX") V4SF])
 
+(define_mode_iterator VF1_AVX512F
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
+
 ;; All DFmode vector float modes
 (define_mode_iterator VF2
   [(V4DF "TARGET_AVX") V2DF])
 
+(define_mode_iterator VF2_AVX512F
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
+
+(define_mode_iterator VF2_512_256
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")])
+
 ;; All 128bit vector float modes
 (define_mode_iterator VF_128
   [V4SF (V2DF "TARGET_SSE2")])
@@ -143,9 +202,18 @@ 
 (define_mode_iterator VF_256
   [V8SF V4DF])
 
+;; All 512bit vector float modes
+(define_mode_iterator VF_512
+  [V16SF V8DF])
+
+;; 128bit and 512bit float modes
+(define_mode_iterator VF_128_512
+  [V4SF V2DF V16SF V8DF])
+
 ;; All vector integer modes
 (define_mode_iterator VI
-  [(V32QI "TARGET_AVX") V16QI
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V32QI "TARGET_AVX") V16QI
    (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
    (V4DI "TARGET_AVX") V2DI])
@@ -153,16 +221,20 @@ 
 (define_mode_iterator VI_AVX2
   [(V32QI "TARGET_AVX2") V16QI
    (V16HI "TARGET_AVX2") V8HI
-   (V8SI "TARGET_AVX2") V4SI
-   (V4DI "TARGET_AVX2") V2DI])
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 ;; All QImode vector integer modes
 (define_mode_iterator VI1
   [(V32QI "TARGET_AVX") V16QI])
 
+(define_mode_iterator VI_UNALIGNED_LOADSTORE
+  [(V32QI "TARGET_AVX") V16QI
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
+
 ;; All DImode vector integer modes
 (define_mode_iterator VI8
-  [(V4DI "TARGET_AVX") V2DI])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI1_AVX2
   [(V32QI "TARGET_AVX2") V16QI])
@@ -170,12 +242,36 @@ 
 (define_mode_iterator VI2_AVX2
   [(V16HI "TARGET_AVX2") V8HI])
 
+(define_mode_iterator VI2_AVX512F
+  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
+
+(define_mode_iterator VI4_AVX
+  [(V8SI "TARGET_AVX") V4SI])
+
 (define_mode_iterator VI4_AVX2
   [(V8SI "TARGET_AVX2") V4SI])
 
+(define_mode_iterator VI4_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
+(define_mode_iterator VI48_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F")])
+
 (define_mode_iterator VI8_AVX2
   [(V4DI "TARGET_AVX2") V2DI])
 
+(define_mode_iterator VI8_AVX2_AVX512F
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+;; All V8D* modes
+(define_mode_iterator V8FI
+  [V8DF V8DI])
+
+;; All V16S* modes
+(define_mode_iterator V16FI
+  [V16SF V16SI])
+
 ;; ??? We should probably use TImode instead.
 (define_mode_iterator VIMAX_AVX2
   [(V2TI "TARGET_AVX2") V1TI])
@@ -192,6 +288,17 @@ 
   [(V16HI "TARGET_AVX2") V8HI
    (V8SI "TARGET_AVX2") V4SI])
 
+(define_mode_iterator VI124_AVX2_48_AVX512F
+  [(V32QI "TARGET_AVX2") V16QI
+   (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F")])
+
+(define_mode_iterator VI124_AVX512F
+  [(V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
+
 (define_mode_iterator VI124_AVX2
   [(V32QI "TARGET_AVX2") V16QI
    (V16HI "TARGET_AVX2") V8HI
@@ -202,9 +309,14 @@ 
    (V8SI "TARGET_AVX2") V4SI
    (V4DI "TARGET_AVX2") V2DI])
 
-(define_mode_iterator VI48_AVX2
-  [(V8SI "TARGET_AVX2") V4SI
-   (V4DI "TARGET_AVX2") V2DI])
+(define_mode_iterator VI248_AVX2_8_AVX512F
+  [(V16HI "TARGET_AVX2") V8HI
+   (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
+
+(define_mode_iterator VI48_AVX2_48_AVX512F
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator V48_AVX2
   [V4SF V2DF
@@ -212,11 +324,18 @@ 
    (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
 
+(define_mode_attr sse2_avx_avx512f
+  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
+   (V8DI "avx512f")
+   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
+   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
+
 (define_mode_attr sse2_avx2
   [(V16QI "sse2") (V32QI "avx2")
    (V8HI "sse2") (V16HI "avx2")
-   (V4SI "sse2") (V8SI "avx2")
-   (V2DI "sse2") (V4DI "avx2")
+   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
+   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
    (V1TI "sse2") (V2TI "avx2")])
 
 (define_mode_attr ssse3_avx2
@@ -229,7 +348,7 @@ 
 (define_mode_attr sse4_1_avx2
    [(V16QI "sse4_1") (V32QI "avx2")
     (V8HI "sse4_1") (V16HI "avx2")
-    (V4SI "sse4_1") (V8SI "avx2")
+    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
     (V2DI "sse4_1") (V4DI "avx2")])
 
 (define_mode_attr avx_avx2
@@ -244,6 +363,12 @@ 
    (V4SI "vec") (V8SI "avx2")
    (V2DI "vec") (V4DI "avx2")])
 
+(define_mode_attr avx2_avx512f
+  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
+   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
+   (V8SF "avx2") (V16SF "avx512f")
+   (V4DF "avx2") (V8DF "avx512f")])
+
 (define_mode_attr shuffletype
   [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
   (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
@@ -251,12 +376,21 @@ 
   (V32QI "i") (V16HI "u") (V16QI "i") (V8HI "i")
   (V64QI "i") (V1TI "i") (V2TI "i")])
 
+(define_mode_attr ssequartermode
+  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
+
 (define_mode_attr ssedoublemode
-  [(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
+  [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
+   (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI")
    (V32QI "V32HI") (V16QI "V16HI")])
 
+(define_mode_attr ssefixupmode
+  [(V16SF "V16SI") (V4SF "V4SI") (V8DF "V8DI") (V2DF "V2DI")])
+
 (define_mode_attr ssebytemode
-  [(V4DI "V32QI") (V2DI "V16QI")])
+  [(V4DI "V32QI") (V2DI "V16QI")
+   (V8SI "V32QI") (V4SI "V16QI")
+   (V16HI "V32QI") (V8HI "V16QI")])
 
 ;; All 128bit vector integer modes
 (define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
@@ -264,7 +398,10 @@ 
 ;; All 256bit vector integer modes
 (define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
 
-;; Random 128bit vector integer mode combinations
+;; All 512bit vector integer modes
+(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
+
+;; Various 128bit vector integer mode combinations
 (define_mode_iterator VI12_128 [V16QI V8HI])
 (define_mode_iterator VI14_128 [V16QI V4SI])
 (define_mode_iterator VI124_128 [V16QI V8HI V4SI])
@@ -273,36 +410,49 @@ 
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
 (define_mode_iterator VI48_128 [V4SI V2DI])
 
-;; Random 256bit vector integer mode combinations
-(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
+;; Various 256bit and 512 vector integer mode combinations
+(define_mode_iterator VI124_256_48_512
+  [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
 (define_mode_iterator VI48_256 [V8SI V4DI])
+(define_mode_iterator VI48_512 [V16SI V8DI])
 
 ;; Int-float size matches
 (define_mode_iterator VI4F_128 [V4SI V4SF])
 (define_mode_iterator VI8F_128 [V2DI V2DF])
 (define_mode_iterator VI4F_256 [V8SI V8SF])
 (define_mode_iterator VI8F_256 [V4DI V4DF])
+(define_mode_iterator VI8F_256_512
+  [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
+(define_mode_iterator VI48F_256_512
+  [V8SI V8SF
+  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")])
+(define_mode_iterator VI48F_512 [V16SI V16SF V8DI V8DF])
 
 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
   [(SF "sse") (DF "sse2")
    (V4SF "sse") (V2DF "sse2")
-   (V8SF "avx") (V4DF "avx")])
+   (V16SF "avx512f") (V8SF "avx")
+   (V8DF "avx512f") (V4DF "avx")])
 
 (define_mode_attr sse2
-  [(V16QI "sse2") (V32QI "avx")
-   (V2DI "sse2") (V4DI "avx")])
+  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
+   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
 
 (define_mode_attr sse3
   [(V16QI "sse3") (V32QI "avx")])
 
 (define_mode_attr sse4_1
   [(V4SF "sse4_1") (V2DF "sse4_1")
-   (V8SF "avx") (V4DF "avx")])
+   (V8SF "avx") (V4DF "avx")
+   (V8DF "avx512f")])
 
 (define_mode_attr avxsizesuffix
-  [(V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
+  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
+   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
    (V16QI "") (V8HI "") (V4SI "") (V2DI "")
+   (V16SF "512") (V8DF "512")
    (V8SF "256") (V4DF "256")
    (V4SF "") (V2DF "")])
 
@@ -316,17 +466,29 @@ 
    (V4SF "V4SF") (V2DF "V2DF")
    (TI "TI")])
 
+;; Mapping of vector modes to corresponding mask size
+(define_mode_attr avx512fmaskmode
+  [(V16QI "HI")
+   (V16HI "HI") (V8HI  "QI")
+   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
+   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
+   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
+   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
+
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr sseintvecmode
-  [(V8SF "V8SI") (V4DF "V4DI")
-   (V4SF "V4SI") (V2DF "V2DI")
-   (V8SI "V8SI") (V4DI "V4DI")
-   (V4SI "V4SI") (V2DI "V2DI")
-   (V16HI "V16HI") (V8HI "V8HI")
+  [(V16SF "V16SI") (V8DF  "V8DI")
+   (V8SF  "V8SI")  (V4DF  "V4DI")
+   (V4SF  "V4SI")  (V2DF  "V2DI")
+   (V16SI "V16SI") (V8DI  "V8DI")
+   (V8SI  "V8SI")  (V4DI  "V4DI")
+   (V4SI  "V4SI")  (V2DI  "V2DI")
+   (V16HI "V16HI") (V8HI  "V8HI")
    (V32QI "V32QI") (V16QI "V16QI")])
 
 (define_mode_attr sseintvecmodelower
-  [(V8SF "v8si") (V4DF "v4di")
+  [(V16SF "v16si")
+   (V8SF "v8si") (V4DF "v4di")
    (V4SF "v4si") (V2DF "v2di")
    (V8SI "v8si") (V4DI "v4di")
    (V4SI "v4si") (V2DI "v2di")
@@ -342,15 +504,20 @@ 
 
 ;; Mapping of vector modes to a vector mode of half size
 (define_mode_attr ssehalfvecmode
-  [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
-   (V16QI  "V8QI") (V8HI  "V4HI") (V4SI "V2SI")
-   (V8SF "V4SF") (V4DF "V2DF")
-   (V4SF "V2SF")])
+  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
+   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
+   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
+   (V16SF "V8SF") (V8DF "V4DF")
+   (V8SF  "V4SF") (V4DF "V2DF")
+   (V4SF  "V2SF")])
 
 ;; Mapping of vector modes ti packed single mode of the same size
+;; TODO: it's questionable if we should extend it to 512 or not
 (define_mode_attr ssePSmode
-  [(V32QI "V8SF") (V16QI "V4SF")
-   (V16HI "V8SF") (V8HI "V4SF")
+  [(V16SI "V16SF") (V8DF "V16SF")
+   (V16SF "V16SF") (V8DI "V16SF")
+   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
+   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
    (V8SI "V8SF") (V4SI "V4SF")
    (V4DI "V8SF") (V2DI "V4SF")
    (V2TI "V8SF") (V1TI "V4SF")
@@ -359,10 +526,21 @@ 
 
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr ssescalarmode
-  [(V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI")
-   (V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI")
-   (V8SF "SF") (V4DF "DF")
-   (V4SF "SF") (V2DF "DF")])
+  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
+   (V32HI "HI") (V16HI "HI") (V8HI "HI")
+   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
+   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
+   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
+   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")])
+
+;; Mapping of vector modes to the 128bit modes
+(define_mode_attr ssexmmmode
+  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
+   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
+   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
+   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
+   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
+   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
 
 ;; Pointer size override for scalar modes (Intel asm dialect)
 (define_mode_attr iptr
@@ -374,8 +552,10 @@ 
 
 ;; Number of scalar elements in each vector type
 (define_mode_attr ssescalarnum
-  [(V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
+  [(V64QI "64") (V16SI "16") (V8DI "8")
+   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
    (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
+   (V16SF "16") (V8DF "8")
    (V8SF "8") (V4DF "4")
    (V4SF "4") (V2DF "2")])
 
@@ -386,12 +566,19 @@ 
    (V8SF "7") (V4DF "3")
    (V4SF "3") (V2DF "1")])
 
+(define_mode_attr ssescalarsize
+  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
+   (V32HI "16") (V16HI "16") (V8HI "16")
+   (V16SI "32") (V8SI "32") (V4SI "32")])
+
 ;; SSE prefix for integer vector modes
 (define_mode_attr sseintprefix
-  [(V2DI "p") (V2DF "")
-   (V4DI "p") (V4DF "")
-   (V4SI "p") (V4SF "")
-   (V8SI "p") (V8SF "")])
+  [(V2DI  "p") (V2DF  "")
+   (V4DI  "p") (V4DF  "")
+   (V8DI  "p") (V8DF  "")
+   (V4SI  "p") (V4SF  "")
+   (V8SI  "p") (V8SF  "")
+   (V16SI "p") (V16SF "")])
 
 ;; SSE scalar suffix for vector modes
 (define_mode_attr ssescalarmodesuffix
@@ -399,16 +586,20 @@ 
    (V8SF "ss") (V4DF "sd")
    (V4SF "ss") (V2DF "sd")
    (V8SI "ss") (V4DI "sd")
+   (V16SF "ss") (V8DF "sd")
+   (V16SI "ss") (V8DI "sd")
    (V4SI "d")])
 
 ;; Pack/unpack vector modes
 (define_mode_attr sseunpackmode
   [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
-   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")])
+   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
+   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
 
 (define_mode_attr ssepackmode
   [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
-   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")])
+   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
+   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
 
 ;; Mapping of the max integer size for xop rotate immediate constraint
 (define_mode_attr sserotatemax
@@ -421,9 +612,11 @@ 
 (define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
 
 ;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
+;; i64x4 or f64x4 for 512bit modes.
 (define_mode_attr i128
-  [(V8SF "f128") (V4DF "f128") (V32QI "%~128") (V16HI "%~128")
-   (V8SI "%~128") (V4DI "%~128")])
+  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
+   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
+   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
 
 ;; Mix-n-match
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
@@ -432,6 +625,10 @@ 
 (define_mode_attr blendbits
   [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
 
+;; Mapping suffixes for broadcast
+(define_mode_attr bcstscalarsuff
+  [(V16SI "d") (V16SF "ss") (V8DI "q") (V8DF "sd")])
+
 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -571,6 +768,18 @@ 
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
+(define_insn "avx512f_blendm<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_merge:VI48F_512
+	  (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
+	  (match_operand:VI48F_512 1 "register_operand" "v")
+	  (match_operand:<avx512fmaskmode> 3 "register_operand" "k")))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "sse2_movq128"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(vec_concat:V2DI
@@ -663,14 +872,15 @@ 
 })
 
 (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
-  [(set (match_operand:VF 0 "register_operand" "=v")
-	(unspec:VF
-	  [(match_operand:VF 1 "memory_operand" "vm")]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_LOADU))]
   "TARGET_SSE"
 {
   switch (get_attr_mode (insn))
     {
+    case MODE_V16SF:
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
@@ -692,14 +902,15 @@ 
 	      (const_string "<MODE>")))])
 
 (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
-  [(set (match_operand:VF 0 "memory_operand" "=m")
-	(unspec:VF
-	  [(match_operand:VF 1 "register_operand" "x")]
+  [(set (match_operand:VF_AVX512F 0 "memory_operand" "=m")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "register_operand" "v")]
 	  UNSPEC_STOREU))]
   "TARGET_SSE"
 {
   switch (get_attr_mode (insn))
     {
+    case MODE_V16SF:
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
@@ -721,10 +932,11 @@ 
 	      ]
 	      (const_string "<MODE>")))])
 
-(define_insn "<sse2>_loaddqu<avxsizesuffix>"
-  [(set (match_operand:VI1 0 "register_operand" "=v")
-	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
-		    UNSPEC_LOADU))]
+(define_insn "<sse2_avx_avx512f>_loaddqu<mode>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "register_operand" "=v")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_LOADU))]
   "TARGET_SSE2"
 {
   switch (get_attr_mode (insn))
@@ -732,6 +944,11 @@ 
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
+    case MODE_XI:
+      if (<MODE>mode == V8DImode)
+	return "vmovdqu64\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqu32\t{%1, %0|%0, %1}";
     default:
       return "%vmovdqu\t{%1, %0|%0, %1}";
     }
@@ -754,10 +971,11 @@ 
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "<sse2>_storedqu<avxsizesuffix>"
-  [(set (match_operand:VI1 0 "memory_operand" "=m")
-	(unspec:VI1 [(match_operand:VI1 1 "register_operand" "v")]
-		    UNSPEC_STOREU))]
+(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
+  [(set (match_operand:VI_UNALIGNED_LOADSTORE 0 "memory_operand" "=m")
+	(unspec:VI_UNALIGNED_LOADSTORE
+	  [(match_operand:VI_UNALIGNED_LOADSTORE 1 "register_operand" "v")]
+	  UNSPEC_STOREU))]
   "TARGET_SSE2"
 {
   switch (get_attr_mode (insn))
@@ -765,6 +983,11 @@ 
     case MODE_V8SF:
     case MODE_V4SF:
       return "%vmovups\t{%1, %0|%0, %1}";
+    case MODE_XI:
+      if (<MODE>mode == V8DImode)
+	return "vmovdqu64\t{%1, %0|%0, %1}";
+      else
+	return "vmovdqu32\t{%1, %0|%0, %1}";
     default:
       return "%vmovdqu\t{%1, %0|%0, %1}";
     }
@@ -820,9 +1043,10 @@ 
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_movnt<mode>"
-  [(set (match_operand:VF 0 "memory_operand" "=m")
-	(unspec:VF [(match_operand:VF 1 "register_operand" "x")]
-		   UNSPEC_MOVNT))]
+  [(set (match_operand:VF_AVX512F 0 "memory_operand" "=m")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "register_operand" "v")]
+	  UNSPEC_MOVNT))]
   "TARGET_SSE"
   "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
@@ -831,7 +1055,7 @@ 
 
 (define_insn "<sse2>_movnt<mode>"
   [(set (match_operand:VI8 0 "memory_operand" "=m")
-	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "x")]
+	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
 		    UNSPEC_MOVNT))]
   "TARGET_SSE2"
   "%vmovntdq\t{%1, %0|%0, %1}"
@@ -852,9 +1076,9 @@ 
 (define_mode_iterator STORENT_MODE
   [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
    (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
-   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
-   (V8SF "TARGET_AVX") V4SF
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 (define_expand "storent<mode>"
   [(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -870,17 +1094,17 @@ 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_expand "<code><mode>2"
-  [(set (match_operand:VF 0 "register_operand")
-	(absneg:VF
-	  (match_operand:VF 1 "register_operand")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand")
+	(absneg:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "register_operand")))]
   "TARGET_SSE"
   "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
 
 (define_insn_and_split "*absneg<mode>2"
-  [(set (match_operand:VF 0 "register_operand" "=x,x,x,x")
-	(match_operator:VF 3 "absneg_operator"
-	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm,x, m")]))
-   (use (match_operand:VF 2 "nonimmediate_operand"    "xm,0, xm,x"))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,x,v,v")
+	(match_operator:VF_AVX512F 3 "absneg_operator"
+	  [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "0, xm, v, m")]))
+   (use (match_operand:VF_AVX512F 2 "nonimmediate_operand"    "xm, 0, vm,v"))]
   "TARGET_SSE"
   "#"
   "&& reload_completed"
@@ -915,18 +1139,18 @@ 
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
 (define_expand "<plusminus_insn><mode>3"
-  [(set (match_operand:VF 0 "register_operand")
-	(plusminus:VF
-	  (match_operand:VF 1 "nonimmediate_operand")
-	  (match_operand:VF 2 "nonimmediate_operand")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand")
+	(plusminus:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
   "TARGET_SSE"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<plusminus_insn><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-	(plusminus:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "<comm>0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(plusminus:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand" "<comm>0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -954,18 +1178,18 @@ 
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "mul<mode>3"
-  [(set (match_operand:VF 0 "register_operand")
-	(mult:VF
-	  (match_operand:VF 1 "nonimmediate_operand")
-	  (match_operand:VF 2 "nonimmediate_operand")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand")
+	(mult:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
   "TARGET_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*mul<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
-	(mult:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "%0,x")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(mult:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
   "@
    mul<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -976,34 +1200,37 @@ 
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vmmul<mode>3"
+(define_insn "<sse>_vm<multdiv_mnemonic><mode>3"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
 	(vec_merge:VF_128
-	  (mult:VF_128
+	  (multdiv:VF_128
 	    (match_operand:VF_128 1 "register_operand" "0,v")
 	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_SSE"
   "@
-   mul<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   vmul<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
+   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
   [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "ssemul")
+   (set_attr "type" "sse<multdiv_mnemonic>")
    (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_expand "div<mode>3"
-  [(set (match_operand:VF2 0 "register_operand")
-	(div:VF2 (match_operand:VF2 1 "register_operand")
-		 (match_operand:VF2 2 "nonimmediate_operand")))]
+  [(set (match_operand:VF2_AVX512F 0 "register_operand")
+	(div:VF2_AVX512F
+	  (match_operand:VF2_AVX512F 1 "register_operand")
+	  (match_operand:VF2_AVX512F 2 "nonimmediate_operand")))]
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
 
 (define_expand "div<mode>3"
-  [(set (match_operand:VF1 0 "register_operand")
-	(div:VF1 (match_operand:VF1 1 "register_operand")
-		 (match_operand:VF1 2 "nonimmediate_operand")))]
+  [(set (match_operand:VF1_AVX512F 0 "register_operand")
+	(div:VF1_AVX512F
+	  (match_operand:VF1_AVX512F 1 "register_operand")
+	  (match_operand:VF1_AVX512F 2 "nonimmediate_operand")))]
   "TARGET_SSE"
 {
   ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
@@ -1020,10 +1247,10 @@ 
 })
 
 (define_insn "<sse>_div<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-	(div:VF
-	  (match_operand:VF 1 "register_operand" "0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(div:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "register_operand" "0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
@@ -1033,24 +1260,6 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "<sse>_vmdiv<mode>3"
-  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
-	(vec_merge:VF_128
-	  (div:VF_128
-	    (match_operand:VF_128 1 "register_operand" "0,v")
-	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,vm"))
-	  (match_dup 1)
-	  (const_int 1)))]
-  "TARGET_SSE"
-  "@
-   div<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
-   vdiv<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
-  [(set_attr "isa" "noavx,avx")
-   (set_attr "type" "ssediv")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "btver2_decode" "direct,double")
-   (set_attr "mode" "<ssescalarmode>")])
-
 (define_insn "<sse>_rcp<mode>2"
   [(set (match_operand:VF1 0 "register_operand" "=x")
 	(unspec:VF1
@@ -1081,14 +1290,40 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "SF")])
 
+(define_insn "rcp14<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RCP14))]
+  "TARGET_AVX512F"
+  "vrcp14<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "srcp14<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	    UNSPEC_RCP14)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrcp14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:VF2 0 "register_operand")
-	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
+  [(set (match_operand:VF2_AVX512F 0 "register_operand")
+	(sqrt:VF2_AVX512F (match_operand:VF2_AVX512F 1 "nonimmediate_operand")))]
   "TARGET_SSE2")
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:VF1 0 "register_operand")
-	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
+  [(set (match_operand:VF1_AVX512F 0 "register_operand")
+	(sqrt:VF1_AVX512F (match_operand:VF1_AVX512F 1 "nonimmediate_operand")))]
   "TARGET_SSE"
 {
   if (TARGET_SSE_MATH
@@ -1103,8 +1338,8 @@ 
 })
 
 (define_insn "<sse>_sqrt<mode>2"
-  [(set (match_operand:VF 0 "register_operand" "=v")
-	(sqrt:VF (match_operand:VF 1 "nonimmediate_operand" "vm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+	(sqrt:VF_AVX512F (match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSE"
   "%vsqrt<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
@@ -1151,6 +1386,32 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "rsqrt14<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RSQRT14))]
+  "TARGET_AVX512F"
+  "vrsqrt14<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rsqrt14<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+	    UNSPEC_RSQRT14)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "sse_vmrsqrtv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
 	(vec_merge:V4SF
@@ -1172,10 +1433,10 @@ 
 ;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:VF 0 "register_operand")
-	(smaxmin:VF
-	  (match_operand:VF 1 "nonimmediate_operand")
-	  (match_operand:VF 2 "nonimmediate_operand")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand")
+	(smaxmin:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand")))]
   "TARGET_SSE"
 {
   if (!flag_finite_math_only)
@@ -1184,12 +1445,13 @@ 
 })
 
 (define_insn "*<code><mode>3_finite"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-	(smaxmin:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(smaxmin:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && flag_finite_math_only
-   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+  "
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
    v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
@@ -1200,11 +1462,12 @@ 
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-	(smaxmin:VF
-	  (match_operand:VF 1 "register_operand" "0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE && !flag_finite_math_only"
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(smaxmin:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "register_operand" "0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+  "TARGET_SSE && !flag_finite_math_only
+  "
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
    v<maxmin_float><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
@@ -1536,6 +1799,15 @@ 
    (set_attr "prefix_rep" "1,*")
    (set_attr "mode" "V4SF")])
 
+(define_expand "reduc_splus_v8df"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "register_operand")]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "reduc_splus_v4df"
   [(match_operand:V4DF 0 "register_operand")
    (match_operand:V4DF 1 "register_operand")]
@@ -1558,6 +1830,15 @@ 
   DONE;
 })
 
+(define_expand "reduc_splus_v16sf"
+  [(match_operand:V16SF 0 "register_operand")
+   (match_operand:V16SF 1 "register_operand")]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "reduc_splus_v8sf"
   [(match_operand:V8SF 0 "register_operand")
    (match_operand:V8SF 1 "register_operand")]
@@ -1593,7 +1874,9 @@ 
   [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
    (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
-   (V4SF "TARGET_SSE")])
+   (V4SF "TARGET_SSE") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 (define_expand "reduc_<code>_<mode>"
   [(smaxmin:REDUC_SMINMAX_MODE
@@ -1606,6 +1889,16 @@ 
 })
 
 (define_expand "reduc_<code>_<mode>"
+  [(umaxmin:VI48_512
+     (match_operand:VI48_512 0 "register_operand")
+     (match_operand:VI48_512 1 "register_operand"))]
+  "TARGET_AVX512F"
+{
+  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "reduc_<code>_<mode>"
   [(umaxmin:VI_256
      (match_operand:VI_256 0 "register_operand")
      (match_operand:VI_256 1 "register_operand"))]
@@ -1711,14 +2004,74 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_mode_attr cmp_imm_predicate
+  [(V16SF "const_0_to_31_operand") (V8DF "const_0_to_31_operand")
+  (V16SI "const_0_to_7_operand") (V8DI "const_0_to_7_operand")])
+
+(define_insn "avx512f_cmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "nonimmediate_operand" "vm")
+	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	  UNSPEC_PCMP))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_ucmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	  UNSPEC_UNSIGNED_PCMP))]
+  "TARGET_AVX512F"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vmcmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(and:<avx512fmaskmode>
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_maskcmp<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
+	  [(match_operand:VF_AVX512F 1 "register_operand" "v")
+	   (match_operand:VF_AVX512F 2 "nonimmediate_operand" "vm")]))]
+  "TARGET_SSE"
+  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "<sse>_comi"
   [(set (reg:CCFP FLAGS_REG)
 	(compare:CCFP
 	  (vec_select:MODEF
-	    (match_operand:<ssevecmode> 0 "register_operand" "x")
+	    (match_operand:<ssevecmode> 0 "register_operand" "v")
 	    (parallel [(const_int 0)]))
 	  (vec_select:MODEF
-	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
 	    (parallel [(const_int 0)]))))]
   "SSE_FLOAT_MODE_P (<MODE>mode)"
   "%vcomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
@@ -1735,10 +2088,10 @@ 
   [(set (reg:CCFPU FLAGS_REG)
 	(compare:CCFPU
 	  (vec_select:MODEF
-	    (match_operand:<ssevecmode> 0 "register_operand" "x")
+	    (match_operand:<ssevecmode> 0 "register_operand" "v")
 	    (parallel [(const_int 0)]))
 	  (vec_select:MODEF
-	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "xm")
+	    (match_operand:<ssevecmode> 1 "nonimmediate_operand" "vm")
 	    (parallel [(const_int 0)]))))]
   "SSE_FLOAT_MODE_P (<MODE>mode)"
   "%vucomi<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
@@ -1751,6 +2104,23 @@ 
 		      (const_string "0")))
    (set_attr "mode" "<MODE>")])
 
+(define_expand "vcond<V_512:mode><VF_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VF_512 4 "nonimmediate_operand")
+	     (match_operand:VF_512 5 "nonimmediate_operand")])
+	  (match_operand:V_512 1 "general_operand")
+	  (match_operand:V_512 2 "general_operand")))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
+{
+  bool ok = ix86_expand_fp_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond<V_256:mode><VF_256:mode>"
   [(set (match_operand:V_256 0 "register_operand")
 	(if_then_else:V_256
@@ -1792,11 +2162,11 @@ 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "<sse>_andnot<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
-	(and:VF
-	  (not:VF
-	    (match_operand:VF 1 "register_operand" "0,x"))
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,xm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(and:VF_AVX512F
+	  (not:VF_AVX512F
+	    (match_operand:VF_AVX512F 1 "register_operand" "0,v"))
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE"
 {
   static char buf[32];
@@ -1825,12 +2195,19 @@ 
       gcc_unreachable ();
     }
 
+  /* There is no vandnp[sd].  Use vpandnq.  */
+  if (GET_MODE_SIZE (<MODE>mode) == 64)
+    {
+      suffix = "q";
+      ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+    }
+
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set (attr "mode")
 	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "<ssePSmode>")
@@ -1849,11 +2226,19 @@ 
   "TARGET_SSE"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:VF_512 0 "register_operand")
+       (fpint_logic:VF_512
+         (match_operand:VF_512 1 "nonimmediate_operand")
+         (match_operand:VF_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
 (define_insn "*<code><mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=x,v")
-	(any_logic:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "%0,v")
-	  (match_operand:VF 2 "nonimmediate_operand" "xm,vm")))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=x,v")
+	(any_logic:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VF_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   static char buf[32];
@@ -1882,12 +2267,19 @@ 
       gcc_unreachable ();
     }
 
+  /* There is no v<logic>p[sd].  Use vp<logic>q.  */
+  if (GET_MODE_SIZE (<MODE>mode) == 64)
+    {
+      suffix = "q";
+      ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
+    }
+
   snprintf (buf, sizeof (buf), ops, suffix);
   return buf;
 }
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sselog")
-   (set_attr "prefix" "orig,vex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set (attr "mode")
 	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
 		 (const_string "<ssePSmode>")
@@ -1900,14 +2292,14 @@ 
 
 (define_expand "copysign<mode>3"
   [(set (match_dup 4)
-	(and:VF
-	  (not:VF (match_dup 3))
-	  (match_operand:VF 1 "nonimmediate_operand")))
+	(and:VF_AVX512F
+	  (not:VF_AVX512F (match_dup 3))
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand")))
    (set (match_dup 5)
-	(and:VF (match_dup 3)
-		(match_operand:VF 2 "nonimmediate_operand")))
-   (set (match_operand:VF 0 "register_operand")
-	(ior:VF (match_dup 4) (match_dup 5)))]
+	(and:VF_AVX512F (match_dup 3)
+		(match_operand:VF_AVX512F 2 "nonimmediate_operand")))
+   (set (match_operand:VF_AVX512F 0 "register_operand")
+	(ior:VF_AVX512F (match_dup 4) (match_dup 5)))]
   "TARGET_SSE"
 {
   operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
@@ -2105,6 +2497,23 @@ 
 	       ]
 	       (const_string "TI")))])
 
+;; There are no floating point xor for V16SF and V8DF in avx512f
+;; but we need them for negation.  Instead we use int versions of
+;; xor.  Maybe there could be a better way to do that.
+
+(define_mode_attr avx512flogicsuff
+  [(V16SF "d") (V8DF "q")])
+
+(define_insn "avx512f_<logic><mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(fpint_logic:VF_512
+	  (match_operand:VF_512 1 "register_operand" "v")
+	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vp<logic><avx512flogicsuff>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; FMA floating point multiply/accumulate instructions.  These include
@@ -2113,9 +2522,18 @@ 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 ;; The standard names for scalar FMA are only available with SSE math enabled.
-(define_mode_iterator FMAMODEM [(SF "TARGET_SSE_MATH")
-				(DF "TARGET_SSE_MATH")
-				V4SF V2DF V8SF V4DF])
+;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
+;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
+;; and TARGET_FMA4 are both false.
+(define_mode_iterator FMAMODEM
+  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
+   (V4SF "TARGET_FMA || TARGET_FMA4")
+   (V2DF "TARGET_FMA || TARGET_FMA4")
+   (V8SF "TARGET_FMA || TARGET_FMA4")
+   (V4DF "TARGET_FMA || TARGET_FMA4")
+   (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 (define_expand "fma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2123,7 +2541,7 @@ 
 	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
-  "TARGET_FMA || TARGET_FMA4")
+  "")
 
 (define_expand "fms<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2131,7 +2549,7 @@ 
 	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
 	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
-  "TARGET_FMA || TARGET_FMA4")
+  "")
 
 (define_expand "fnma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2139,7 +2557,7 @@ 
 	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
 	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))]
-  "TARGET_FMA || TARGET_FMA4")
+  "")
 
 (define_expand "fnms<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -2147,10 +2565,17 @@ 
 	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
 	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
 	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))]
-  "TARGET_FMA || TARGET_FMA4")
+  "")
 
 ;; The builtins for intrinsics are not constrained by SSE math enabled.
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+(define_mode_iterator FMAMODE [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+			       (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
+			       (V4SF "TARGET_FMA || TARGET_FMA4")
+			       (V2DF "TARGET_FMA || TARGET_FMA4")
+			       (V8SF "TARGET_FMA || TARGET_FMA4")
+			       (V4DF "TARGET_FMA || TARGET_FMA4")
+			       (V16SF "TARGET_AVX512F")
+			       (V8DF "TARGET_AVX512F")])
 
 (define_expand "fma4i_fmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand")
@@ -2158,7 +2583,7 @@ 
 	  (match_operand:FMAMODE 1 "nonimmediate_operand")
 	  (match_operand:FMAMODE 2 "nonimmediate_operand")
 	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
-  "TARGET_FMA || TARGET_FMA4")
+  "")
 
 (define_insn "*fma_fmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
@@ -2166,50 +2591,50 @@ 
 	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x")
 	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
-  "TARGET_FMA || TARGET_FMA4"
+  ""
   "@
    vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fma_fmsub_<mode>"
+(define_insn "fma_fmsub_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0, v, x,x")
 	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (neg:FMAMODE
 	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
-  "TARGET_FMA || TARGET_FMA4"
+  ""
   "@
    vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fma_fnmadd_<mode>"
+(define_insn "fma_fnmadd_<mode>"
   [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
 	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0, v, x,x"))
 	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (match_operand:FMAMODE   3 "nonimmediate_operand" " v,vm, 0,xm,x")))]
-  "TARGET_FMA || TARGET_FMA4"
+  ""
   "@
    vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -2221,14 +2646,14 @@ 
 	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm, v,vm, x,m")
 	  (neg:FMAMODE
 	    (match_operand:FMAMODE 3 "nonimmediate_operand" " v,vm, 0,xm,x"))))]
-  "TARGET_FMA || TARGET_FMA4"
+  ""
   "@
    vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -2244,48 +2669,48 @@ 
 ;; But this doesn't seem useful in practice.
 
 (define_expand "fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand")
-	   (match_operand:VF 2 "nonimmediate_operand")
-	   (match_operand:VF 3 "nonimmediate_operand")]
+  [(set (match_operand:VF_AVX512F 0 "register_operand")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "nonimmediate_operand")
+	   (match_operand:VF_AVX512F 2 "nonimmediate_operand")
+	   (match_operand:VF_AVX512F 3 "nonimmediate_operand")]
 	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA || TARGET_FMA4")
+  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
 
 (define_insn "*fma_fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0, v, x,x")
-	   (match_operand:VF 2 "nonimmediate_operand" "vm, v,vm, x,m")
-	   (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "nonimmediate_operand" "%0, 0, v, x,x")
+	   (match_operand:VF_AVX512F 2 "nonimmediate_operand" "vm, v,vm, x,m")
+	   (match_operand:VF_AVX512F 3 "nonimmediate_operand" " v,vm, 0,xm,x")]
 	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA || TARGET_FMA4"
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
   "@
    vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*fma_fmsubadd_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=v,v,v,x,x")
-	(unspec:VF
-	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0, v, x,x")
-	   (match_operand:VF   2 "nonimmediate_operand" "vm, v,vm, x,m")
-	   (neg:VF
-	     (match_operand:VF 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v,v,v,x,x")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F   1 "nonimmediate_operand" "%0, 0, v, x,x")
+	   (match_operand:VF_AVX512F   2 "nonimmediate_operand" "vm, v,vm, x,m")
+	   (neg:VF_AVX512F
+	     (match_operand:VF_AVX512F 3 "nonimmediate_operand" " v,vm, 0,xm,x"))]
 	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA || TARGET_FMA4"
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)"
   "@
    vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
    vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
    vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
    vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
+  [(set_attr "isa" "fma_avx512f,fma_avx512f,fma_avx512f,fma4,fma4")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
@@ -2312,7 +2737,7 @@ 
 	    (match_operand:VF_128 3 "nonimmediate_operand" " v,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_FMA"
+  "TARGET_FMA || TARGET_AVX512F"
   "@
    vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
    vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
@@ -2329,7 +2754,7 @@ 
 	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_FMA"
+  "TARGET_FMA || TARGET_AVX512F"
   "@
    vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
    vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
@@ -2346,7 +2771,7 @@ 
 	    (match_operand:VF_128   3 "nonimmediate_operand" " v,vm"))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_FMA"
+  "TARGET_FMA || TARGET_AVX512F"
   "@
    vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
    vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
@@ -2364,7 +2789,7 @@ 
 	      (match_operand:VF_128 3 "nonimmediate_operand" " v,vm")))
 	  (match_dup 1)
 	  (const_int 1)))]
-  "TARGET_FMA"
+  "TARGET_FMA || TARGET_AVX512F"
   "@
    vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %<iptr>3, %<iptr>2}
    vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %<iptr>3}"
@@ -2506,7 +2931,7 @@ 
    (set_attr "amdfam10_decode" "vector,double,*")
    (set_attr "bdver1_decode" "double,direct,*")
    (set_attr "btver2_decode" "double,double,double")
-   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtsi2ssq"
@@ -2529,7 +2954,7 @@ 
    (set_attr "btver2_decode" "double,double,double")
    (set_attr "length_vex" "*,*,4")
    (set_attr "prefix_rex" "1,1,*")
-   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtss2si"
@@ -2580,7 +3005,7 @@ 
 
 (define_insn "sse_cvtss2siq_2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "x,m")]
+	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
 		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE && TARGET_64BIT"
   "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
@@ -2612,7 +3037,7 @@ 
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(fix:DI
 	  (vec_select:SF
-	    (match_operand:V4SF 1 "nonimmediate_operand" "v,m")
+	    (match_operand:V4SF 1 "nonimmediate_operand" "v,vm")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE && TARGET_64BIT"
   "%vcvttss2si{q}\t{%1, %0|%0, %k1}"
@@ -2624,9 +3049,37 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DI")])
 
+(define_insn "cvtusi2<ssescalarmodesuffix>32"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_duplicate:VF_128
+	    (unsigned_float:<ssescalarmode>
+	      (match_operand:SI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "cvtusi2<ssescalarmodesuffix>64"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (vec_duplicate:VF_128
+	    (unsigned_float:<ssescalarmode>
+	      (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	  (match_operand:VF_128 1 "register_operand" "v")
+	  (const_int 1)))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtusi2<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "float<sseintvecmodelower><mode>2"
-  [(set (match_operand:VF1 0 "register_operand" "=v")
-	(float:VF1
+  [(set (match_operand:VF1_AVX512F 0 "register_operand" "=v")
+	(float:VF1_AVX512F
 	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSE2"
   "%vcvtdq2ps\t{%1, %0|%0, %1}"
@@ -2634,8 +3087,18 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "ufloatv16siv16sf2"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(unsigned_float:V16SF
+	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvtudq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 (define_expand "floatuns<sseintvecmodelower><mode>2"
-  [(match_operand:VF1 0 "register_operand")
+  [(match_operand:VF1_AVX512F 0 "register_operand")
    (match_operand:<sseintvecmode> 1 "register_operand")]
   "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
 {
@@ -2643,20 +3106,15 @@ 
   DONE;
 })
 
-(define_insn "avx_cvtps2dq256"
-  [(set (match_operand:V8SI 0 "register_operand" "=x")
-	(unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "xm")]
-		     UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX"
-  "vcvtps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
+;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
+(define_mode_attr sf2simodelower
+  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
 
-(define_insn "sse2_cvtps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-		     UNSPEC_FIX_NOTRUNC))]
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode>"
+  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
+	(unspec:VI4_AVX
+	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
   "%vcvtps2dq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
@@ -2666,7 +3124,39 @@ 
      (const_string "*")
      (const_string "1")))
    (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_fix_notruncv16sfv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(unspec:V16SI
+	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx512f_ufix_notruncv16sfv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(unspec:V16SI
+	  [(match_operand:V16SF 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtps2udq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "<fixsuffix>fix_truncv16sfv16si2"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_fix:V16SI
+	  (match_operand:V16SF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvttps2<fixsuffix>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
 
 (define_insn "fix_truncv8sfv8si2"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
@@ -2699,7 +3189,7 @@ 
 
 (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
   [(match_operand:<sseintvecmode> 0 "register_operand")
-   (match_operand:VF1 1 "register_operand")]
+   (match_operand:VF1_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
 {
   rtx tmp[3];
@@ -2772,11 +3262,11 @@ 
    (set_attr "mode" "DF")])
 
 (define_insn "sse2_cvtsi2sdq"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
 	  (vec_duplicate:V2DF
 	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m,rm")))
-	  (match_operand:V2DF 1 "register_operand" "0,0,x")
+	  (match_operand:V2DF 1 "register_operand" "0,0,v")
 	  (const_int 1)))]
   "TARGET_SSE2 && TARGET_64BIT"
   "@
@@ -2790,59 +3280,159 @@ 
    (set_attr "bdver1_decode" "double,direct,*")
    (set_attr "length_vex" "*,*,4")
    (set_attr "prefix_rex" "1,1,*")
-   (set_attr "prefix" "orig,orig,vex")
+   (set_attr "prefix" "orig,orig,maybe_evex")
    (set_attr "mode" "DF")])
 
-(define_insn "sse2_cvtsd2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
+(define_insn "avx512f_vcvtss2usi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
-	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
 	     (parallel [(const_int 0)]))]
-	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE2"
-  "%vcvtsd2si\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "bdver1_decode" "double,double")
-   (set_attr "btver2_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "SI")])
-
-(define_insn "sse2_cvtsd2si_2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
-		   UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE2"
-  "%vcvtsd2si\t{%1, %0|%0, %q1}"
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtss2usi\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "bdver1_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix" "evex")
    (set_attr "mode" "SI")])
 
-(define_insn "sse2_cvtsd2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+(define_insn "avx512f_vcvtss2usiq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI
-	  [(vec_select:DF
-	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	  [(vec_select:SF
+	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
 	     (parallel [(const_int 0)]))]
-	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtss2usi\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "bdver1_decode" "double,double")
-   (set_attr "prefix_rep" "1")
-   (set_attr "prefix" "maybe_vex")
+   (set_attr "prefix" "evex")
    (set_attr "mode" "DI")])
 
-(define_insn "sse2_cvtsd2siq_2"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
+(define_insn "avx512f_vcvttss2usi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unsigned_fix:SI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vcvttss2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttss2usiq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	  (vec_select:SF
+	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvttss2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvtsd2usi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtsd2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvtsd2usiq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvtsd2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "avx512f_vcvttsd2usi"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unsigned_fix:SI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "vcvttsd2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "SI")])
+
+(define_insn "avx512f_vcvttsd2usiq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	  (vec_select:DF
+	    (match_operand:V2DF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F && TARGET_64BIT"
+  "vcvttsd2usi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2si"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "btver2_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2si_2"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
+		   UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2"
+  "%vcvtsd2si\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse2_cvtsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI
+	  [(vec_select:DF
+	     (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
+	     (parallel [(const_int 0)]))]
+	  UNSPEC_FIX_NOTRUNC))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "bdver1_decode" "double,double")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "sse2_cvtsd2siq_2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
 		   UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2 && TARGET_64BIT"
   "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
@@ -2858,7 +3448,7 @@ 
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(fix:SI
 	  (vec_select:DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	    (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE2"
   "%vcvttsd2si\t{%1, %0|%0, %q1}"
@@ -2875,7 +3465,7 @@ 
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(fix:DI
 	  (vec_select:DF
-	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
+	    (match_operand:V2DF 1 "nonimmediate_operand" "v,m")
 	    (parallel [(const_int 0)]))))]
   "TARGET_SSE2 && TARGET_64BIT"
   "%vcvttsd2si{q}\t{%1, %0|%0, %q1}"
@@ -2887,14 +3477,44 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DI")])
 
-(define_insn "floatv4siv4df2"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-	(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+;; For float<si2dfmode><mode>2 insn pattern
+(define_mode_attr si2dfmode
+  [(V8DF "V8SI") (V4DF "V4SI")])
+(define_mode_attr si2dfmodelower
+  [(V8DF "v8si") (V4DF "v4si")])
+
+(define_insn "float<si2dfmodelower><mode>2"
+  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX"
   "vcvtdq2pd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V4DF")])
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "ufloatv8siv8df"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(unsigned_float:V8DF (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvtudq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
+(define_insn "avx512f_cvtdq2pd512_2"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(float:V8DF
+	  (vec_select:V8SI
+	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX"
+  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
 
 (define_insn "avx_cvtdq2pd256_2"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -2921,6 +3541,16 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
+(define_insn "avx512f_cvtpd2dq512"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(unspec:V8SI [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+		     UNSPEC_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtpd2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
 (define_insn "avx_cvtpd2dq256"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
@@ -2984,6 +3614,26 @@ 
    (set_attr "athlon_decode" "vector")
    (set_attr "bdver1_decode" "double")])
 
+(define_insn "avx512f_ufix_notruncv8dfv8si"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(unspec:V8SI
+	  [(match_operand:V8DF 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
+  "TARGET_AVX512F"
+  "vcvtpd2udq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
+(define_insn "<fixsuffix>fix_truncv8dfv8si2"
+  [(set (match_operand:V8SI 0 "register_operand" "=v")
+	(any_fix:V8SI (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvttpd2<fixsuffix>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "OI")])
+
 (define_insn "fix_truncv4dfv4si2"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
@@ -3085,6 +3735,16 @@ 
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "DF")])
 
+(define_insn "avx512f_cvtpd2ps512"
+  [(set (match_operand:V8SF 0 "register_operand" "=v")
+	(float_truncate:V8SF
+	  (match_operand:V8DF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vcvtpd2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8SF")])
+
 (define_insn "avx_cvtpd2ps256"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(float_truncate:V4SF
@@ -3126,15 +3786,19 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
-(define_insn "avx_cvtps2pd256"
-  [(set (match_operand:V4DF 0 "register_operand" "=x")
-	(float_extend:V4DF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
+;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
+(define_mode_attr sf2dfmode
+  [(V8DF "V8SF") (V4DF "V4SF")])
+
+(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix>"
+  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
+	(float_extend:VF2_512_256
+	  (match_operand:<sf2dfmode> 1 "nonimmediate_operand" "vm")))]
   "TARGET_AVX"
   "vcvtps2pd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V4DF")])
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*avx_cvtps2pd256_2"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -3149,6 +3813,21 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "V4DF")])
 
+(define_insn "vec_unpacks_lo_v16sf"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(float_extend:V8DF
+	  (vec_select:V8SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vcvtps2pd\t{%t1, %0|%0, %t1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(float_extend:V2DF
@@ -3193,6 +3872,20 @@ 
   "TARGET_AVX"
   "operands[2] = gen_reg_rtx (V4SFmode);")
 
+(define_expand "vec_unpacks_hi_v16sf"
+  [(set (match_dup 2)
+	(vec_select:V8SF
+	  (match_operand:V16SF 1 "nonimmediate_operand")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))
+   (set (match_operand:V8DF 0 "register_operand")
+	(float_extend:V8DF
+	  (match_dup 2)))]
+"TARGET_AVX512F"
+"operands[2] = gen_reg_rtx (V8SFmode);")
+
 (define_expand "vec_unpacks_lo_v4sf"
   [(set (match_operand:V2DF 0 "register_operand")
 	(float_extend:V2DF
@@ -3211,11 +3904,12 @@ 
   "TARGET_AVX")
 
 (define_mode_attr sseunpackfltmode
-  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF") (V8SI "V4DF")])
+  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
+  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
 
 (define_expand "vec_unpacks_float_hi_<mode>"
   [(match_operand:<sseunpackfltmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")]
+   (match_operand:VI2_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
 {
   rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3228,7 +3922,7 @@ 
 
 (define_expand "vec_unpacks_float_lo_<mode>"
   [(match_operand:<sseunpackfltmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")]
+   (match_operand:VI2_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
 {
   rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3241,7 +3935,7 @@ 
 
 (define_expand "vec_unpacku_float_hi_<mode>"
   [(match_operand:<sseunpackfltmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")]
+   (match_operand:VI2_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
 {
   rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3254,7 +3948,7 @@ 
 
 (define_expand "vec_unpacku_float_lo_<mode>"
   [(match_operand:<sseunpackfltmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")]
+   (match_operand:VI2_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
 {
   rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
@@ -3308,6 +4002,31 @@ 
 		       (const_int 2) (const_int 3)]))))]
   "TARGET_AVX")
 
+(define_expand "vec_unpacks_float_hi_v16si"
+  [(set (match_dup 2)
+	(vec_select:V8SI
+	  (match_operand:V16SI 1 "nonimmediate_operand")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))
+   (set (match_operand:V8DF 0 "register_operand")
+	(float:V8DF
+	  (match_dup 2)))]
+  "TARGET_AVX512F"
+  "operands[2] = gen_reg_rtx (V8SImode);")
+
+(define_expand "vec_unpacks_float_lo_v16si"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(float:V8DF
+	  (vec_select:V8SI
+	    (match_operand:V16SI 1 "nonimmediate_operand")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F")
+
 (define_expand "vec_unpacku_float_hi_v4si"
   [(set (match_dup 5)
 	(vec_select:V4SI
@@ -3425,21 +4144,21 @@ 
   DONE;
 })
 
-(define_expand "vec_pack_trunc_v4df"
+(define_expand "vec_pack_trunc_<mode>"
   [(set (match_dup 3)
-	(float_truncate:V4SF
-	  (match_operand:V4DF 1 "nonimmediate_operand")))
+	(float_truncate:<sf2dfmode>
+	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
    (set (match_dup 4)
-	(float_truncate:V4SF
-	  (match_operand:V4DF 2 "nonimmediate_operand")))
-   (set (match_operand:V8SF 0 "register_operand")
-	(vec_concat:V8SF
+	(float_truncate:<sf2dfmode>
+	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
+   (set (match_operand:<ssePSmode> 0 "register_operand")
+	(vec_concat:<ssePSmode>
 	  (match_dup 3)
 	  (match_dup 4)))]
   "TARGET_AVX"
 {
-  operands[3] = gen_reg_rtx (V4SFmode);
-  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
+  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
 })
 
 (define_expand "vec_pack_trunc_v2df"
@@ -3470,6 +4189,23 @@ 
   DONE;
 })
 
+(define_expand "vec_pack_sfix_trunc_v8df"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V8DF 1 "nonimmediate_operand")
+   (match_operand:V8DF 2 "nonimmediate_operand")]
+  "TARGET_AVX512F"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V8SImode);
+  r2 = gen_reg_rtx (V8SImode);
+
+  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
+  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
+  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
+  DONE;
+})
+
 (define_expand "vec_pack_sfix_trunc_v4df"
   [(match_operand:V8SI 0 "register_operand")
    (match_operand:V4DF 1 "nonimmediate_operand")
@@ -3519,7 +4255,7 @@ 
 })
 
 (define_mode_attr ssepackfltmode
-  [(V4DF "V8SI") (V2DF "V4SI")])
+  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
 
 (define_expand "vec_pack_ufix_trunc_<mode>"
   [(match_operand:<ssepackfltmode> 0 "register_operand")
@@ -3695,6 +4431,26 @@ 
    (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
+(define_insn "avx512f_unpckhps512"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpckhps256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -3763,6 +4519,26 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "V4SF")])
 
+(define_insn "avx512f_unpcklps512"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 8) (const_int 24)
+		     (const_int 9) (const_int 25)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)])))]
+  "TARGET_AVX512F"
+  "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpcklps256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
@@ -3866,6 +4642,26 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
+(define_insn "avx512f_movshdup512"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (match_dup 1))
+	  (parallel [(const_int 1) (const_int 1)
+		     (const_int 3) (const_int 3)
+		     (const_int 5) (const_int 5)
+		     (const_int 7) (const_int 7)
+		     (const_int 9) (const_int 9)
+		     (const_int 11) (const_int 11)
+		     (const_int 13) (const_int 13)
+		     (const_int 15) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vmovshdup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 (define_insn "avx_movsldup256"
   [(set (match_operand:V8SF 0 "register_operand" "=x")
 	(vec_select:V8SF
@@ -3899,6 +4695,26 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V4SF")])
 
+(define_insn "avx512f_movsldup512"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 0)
+		     (const_int 2) (const_int 2)
+		     (const_int 4) (const_int 4)
+		     (const_int 6) (const_int 6)
+		     (const_int 8) (const_int 8)
+		     (const_int 10) (const_int 10)
+		     (const_int 12) (const_int 12)
+		     (const_int 14) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "vmovsldup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 (define_expand "avx_shufps256"
   [(match_operand:V8SF 0 "register_operand")
    (match_operand:V8SF 1 "register_operand")
@@ -4428,6 +5244,86 @@ 
   operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
 })
 
+(define_insn "avx512f_vextract<shuffletype>32x4_1"
+  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand" "=vm")
+	(vec_select:<ssequartermode>
+	  (match_operand:V16FI 1 "register_operand" "v")
+	  (parallel [(match_operand 2  "const_0_to_15_operand")
+            (match_operand 3  "const_0_to_15_operand")
+            (match_operand 4  "const_0_to_15_operand")
+            (match_operand 5  "const_0_to_15_operand")])))]
+  "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1)
+  && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1)
+  && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)"
+{
+  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
+  return "vextract<shuffletype>32x4\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_split
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "nonimmediate_operand")
+	  (parallel [(const_int 0) (const_int 1)
+            (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
+  && reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
+	  (parallel [(const_int 0) (const_int 1)
+            (const_int 2) (const_int 3)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=vm")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V8FI 1 "register_operand" "v")
+	  (parallel [(const_int 4) (const_int 5)
+            (const_int 6) (const_int 7)])))]
+  "TARGET_AVX512F"
+  "vextract<shuffletype>64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set (attr "memory")
+      (if_then_else (match_test "MEM_P (operands[0])")
+	(const_string "store")
+	(const_string "none")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "avx_vextractf128<mode>"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
    (match_operand:V_256 1 "register_operand")
@@ -4453,6 +5349,45 @@ 
 })
 
 (define_insn_and_split "vec_extract_lo_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+                     (const_int 2) (const_int 3)
+                     (const_int 4) (const_int 5)
+                     (const_int 6) (const_int 7)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
+  else
+    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_<mode>"
+  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
+	(vec_select:<ssehalfvecmode>
+	  (match_operand:V16FI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn_and_split "vec_extract_lo_<mode>"
   [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=x,m")
 	(vec_select:<ssehalfvecmode>
 	  (match_operand:VI8F_256 1 "nonimmediate_operand" "xm,x")
@@ -4514,6 +5449,53 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "vec_extract_lo_v32hi"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V16HI
+	  (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (V16HImode, REGNO (op1));
+  else
+    op1 = gen_lowpart (V16HImode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_v32hi"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V16HI
+	  (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn_and_split "vec_extract_lo_v16hi"
   [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
 	(vec_select:V8HI
@@ -4550,6 +5532,69 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn_and_split "vec_extract_lo_v64qi"
+  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V32QI
+	  (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
+	  (parallel [(const_int 0) (const_int 1)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)
+		     (const_int 8) (const_int 9)
+		     (const_int 10) (const_int 11)
+		     (const_int 12) (const_int 13)
+		     (const_int 14) (const_int 15)
+		     (const_int 16) (const_int 17)
+		     (const_int 18) (const_int 19)
+		     (const_int 20) (const_int 21)
+		     (const_int 22) (const_int 23)
+		     (const_int 24) (const_int 25)
+		     (const_int 26) (const_int 27)
+		     (const_int 28) (const_int 29)
+		     (const_int 30) (const_int 31)])))]
+  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+  if (REG_P (op1))
+    op1 = gen_rtx_REG (V32QImode, REGNO (op1));
+  else
+    op1 = gen_lowpart (V32QImode, op1);
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "vec_extract_hi_v64qi"
+  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
+	(vec_select:V32QI
+	  (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
+	  (parallel [(const_int 32) (const_int 33)
+		     (const_int 34) (const_int 35)
+		     (const_int 36) (const_int 37)
+		     (const_int 38) (const_int 39)
+		     (const_int 40) (const_int 41)
+		     (const_int 42) (const_int 43)
+		     (const_int 44) (const_int 45)
+		     (const_int 46) (const_int 47)
+		     (const_int 48) (const_int 49)
+		     (const_int 50) (const_int 51)
+		     (const_int 52) (const_int 53)
+		     (const_int 54) (const_int 55)
+		     (const_int 56) (const_int 57)
+		     (const_int 58) (const_int 59)
+		     (const_int 60) (const_int 61)
+		     (const_int 62) (const_int 63)])))]
+  "TARGET_AVX512F"
+  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn_and_split "vec_extract_lo_v32qi"
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
 	(vec_select:V16QI
@@ -4598,10 +5643,10 @@ 
 (define_mode_iterator VEC_EXTRACT_MODE
   [(V32QI "TARGET_AVX") V16QI
    (V16HI "TARGET_AVX") V8HI
-   (V8SI "TARGET_AVX") V4SI
-   (V4DI "TARGET_AVX") V2DI
-   (V8SF "TARGET_AVX") V4SF
-   (V4DF "TARGET_AVX") V2DF])
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
 
 (define_expand "vec_extract<mode>"
   [(match_operand:<ssescalarmode> 0 "register_operand")
@@ -4620,6 +5665,22 @@ 
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
+(define_insn "avx512f_unpckhpd512"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand" "v")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_insn "avx_unpckhpd256"
   [(set (match_operand:V4DF 0 "register_operand" "=x")
@@ -4700,6 +5761,48 @@ 
    (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
    (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
 
+(define_expand "avx512f_movddup512"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand")
+	    (match_dup 1))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F")
+
+(define_expand "avx512f_unpcklpd512"
+  [(set (match_operand:V8DF 0 "register_operand")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "register_operand")
+	    (match_operand:V8DF 2 "nonimmediate_operand"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F")
+
+(define_insn "*avx512f_unpcklpd512"
+  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "nonimmediate_operand" "v,vm")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "vm,1"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "@
+   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+   vmovddup\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
+
 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
 (define_expand "avx_movddup256"
   [(set (match_operand:V4DF 0 "register_operand")
@@ -4812,25 +5915,237 @@ 
   "TARGET_SSE3 && reload_completed"
   [(const_int 0)]
 {
-  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
-  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
-  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
-  DONE;
-})
+  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:V2DF 0 "register_operand")
+	(vec_select:V2DF
+	  (vec_concat:V4DF
+	    (match_operand:V2DF 1 "memory_operand")
+	    (match_dup 1))
+	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
+		     (match_operand:SI 3 "const_int_operand")])))]
+  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
+  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
+{
+  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
+})
+
+(define_insn "avx512f_vmscalef<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
+			  (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+			 UNSPEC_SCALEF)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "%vscalef<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<ssescalarmode>")])
+
+(define_insn "avx512f_scalef<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512 [(match_operand:VF_512 1 "register_operand" "v")
+			(match_operand:VF_512 2 "nonimmediate_operand" "vm")]
+		       UNSPEC_SCALEF))]
+  "TARGET_AVX512F"
+  "%vscalef<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<MODE>")])
+
+(define_insn "avx512f_vternlog<mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(unspec:VI48_512
+	  [(match_operand:VI48_512 1 "register_operand" "0")
+	   (match_operand:VI48_512 2 "register_operand" "v")
+	   (match_operand:VI48_512 3 "nonimmediate_operand" "vm")
+	   (match_operand:SI 4 "const_0_to_255_operand")]
+	  UNSPEC_VTERNLOG))]
+  "TARGET_AVX512F"
+  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getexp<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+        (unspec:VF_512 [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+                        UNSPEC_GETEXP))]
+   "TARGET_AVX512F"
+   "vgetexp<ssemodesuffix>\t{%1, %0|%0, %1}";
+    [(set_attr "prefix" "evex")
+     (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_sgetexp<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v")
+			  (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+			 UNSPEC_GETEXP)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vgetexp<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+    [(set_attr "prefix" "evex")
+     (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_align<mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+        (unspec:VI48_512 [(match_operand:VI48_512 1 "register_operand" "v")
+			  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")
+			  (match_operand:SI 3 "const_0_to_255_operand")]
+			 UNSPEC_ALIGN))]
+  "TARGET_AVX512F"
+  "valign<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_fixupimm<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+        (unspec:VF_512
+          [(match_operand:VF_512 1 "register_operand" "0")
+	   (match_operand:VF_512 2 "register_operand" "v")
+           (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "vm")
+           (match_operand:SI 4 "const_0_to_255_operand")]
+           UNSPEC_FIXUPIMM))]
+  "TARGET_AVX512F"
+  "vfixupimm<ssemodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_sfixupimm<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+          (unspec:VF_128
+            [(match_operand:VF_128 1 "register_operand" "0")
+	     (match_operand:VF_128 2 "register_operand" "v")
+	     (match_operand:<ssefixupmode> 3 "nonimmediate_operand" "vm")
+	     (match_operand:SI 4 "const_0_to_255_operand")]
+	    UNSPEC_FIXUPIMM)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vfixupimm<ssescalarmodesuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+   [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx512f_rndscale<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
+	   (match_operand:SI 2 "const_0_to_255_operand")]
+	  UNSPEC_ROUND))]
+  "TARGET_AVX512F"
+  "vrndscale<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_rndscale<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_255_operand")]
+	    UNSPEC_ROUND)
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX512F"
+  "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+;; One bit in mask selects 2 elements.
+(define_insn "avx512f_shufps512_1"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(vec_select:V16SF
+	  (vec_concat:V32SF
+	    (match_operand:V16SF 1 "register_operand" "v")
+	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_3_operand")
+		     (match_operand 4  "const_0_to_3_operand")
+		     (match_operand 5  "const_16_to_19_operand")
+		     (match_operand 6  "const_16_to_19_operand")
+		     (match_operand 7  "const_4_to_7_operand")
+		     (match_operand 8  "const_4_to_7_operand")
+		     (match_operand 9  "const_20_to_23_operand")
+		     (match_operand 10  "const_20_to_23_operand")
+		     (match_operand 11  "const_8_to_11_operand")
+		     (match_operand 12  "const_8_to_11_operand")
+		     (match_operand 13  "const_24_to_27_operand")
+		     (match_operand 14  "const_24_to_27_operand")
+		     (match_operand 15  "const_12_to_15_operand")
+		     (match_operand 16  "const_12_to_15_operand")
+		     (match_operand 17  "const_28_to_31_operand")
+		     (match_operand 18  "const_28_to_31_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
+       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
+       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
+       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
+       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
+       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
+       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
+       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
+       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
+       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
+       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
+       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
+{
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= INTVAL (operands[4]) << 2;
+  mask |= (INTVAL (operands[5]) - 16) << 4;
+  mask |= (INTVAL (operands[6]) - 16) << 6;
+  operands[3] = GEN_INT (mask);
 
-(define_split
-  [(set (match_operand:V2DF 0 "register_operand")
-	(vec_select:V2DF
-	  (vec_concat:V4DF
-	    (match_operand:V2DF 1 "memory_operand")
-	    (match_dup 1))
-	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
-		     (match_operand:SI 3 "const_int_operand")])))]
-  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
-  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
+  return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
+(define_insn "avx512f_shufpd512_1"
+  [(set (match_operand:V8DF 0 "register_operand" "=v")
+	(vec_select:V8DF
+	  (vec_concat:V16DF
+	    (match_operand:V8DF 1 "register_operand" "v")
+	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3 "const_0_to_1_operand")
+		     (match_operand 4 "const_8_to_9_operand")
+		     (match_operand 5 "const_2_to_3_operand")
+		     (match_operand 6 "const_10_to_11_operand")
+		     (match_operand 7 "const_4_to_5_operand")
+		     (match_operand 8 "const_12_to_13_operand")
+		     (match_operand 9 "const_6_to_7_operand")
+		     (match_operand 10 "const_14_to_15_operand")])))]
+  "TARGET_AVX512F"
 {
-  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
-})
+  int mask;
+  mask = INTVAL (operands[3]);
+  mask |= (INTVAL (operands[4]) - 8) << 1;
+  mask |= (INTVAL (operands[5]) - 2) << 2;
+  mask |= (INTVAL (operands[6]) - 10) << 3;
+  mask |= (INTVAL (operands[7]) - 4) << 4;
+  mask |= (INTVAL (operands[8]) - 12) << 5;
+  mask |= (INTVAL (operands[9]) - 6) << 6;
+  mask |= (INTVAL (operands[10]) - 14) << 7;
+  operands[3] = GEN_INT (mask);
+
+  return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V8DF")])
 
 (define_expand "avx_shufpd256"
   [(match_operand:V4DF 0 "register_operand")
@@ -4905,6 +6220,22 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn "avx512f_interleave_highv8di"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_select:V8DI
+	  (vec_concat:V16DI
+	    (match_operand:V8DI 1 "register_operand" "v")
+	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 1) (const_int 9)
+		     (const_int 3) (const_int 11)
+		     (const_int 5) (const_int 13)
+		     (const_int 7) (const_int 15)])))]
+  "TARGET_AVX512F"
+  "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "vec_interleave_highv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x,x")
 	(vec_select:V2DI
@@ -4939,6 +6270,22 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn "avx512f_interleave_lowv8di"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_select:V8DI
+	  (vec_concat:V16DI
+	    (match_operand:V8DI 1 "register_operand" "v")
+	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 2) (const_int 10)
+		     (const_int 4) (const_int 12)
+		     (const_int 6) (const_int 14)])))]
+  "TARGET_AVX512F"
+  "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "vec_interleave_lowv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=x,x")
 	(vec_select:V2DI
@@ -5282,6 +6629,64 @@ 
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
+;; Parallel integer down-conversion operations
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_mode_iterator PMOV_DST_MODE [V16QI V16HI V8SI V8HI])
+(define_mode_attr pmov_src_mode
+  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
+(define_mode_attr pmov_src_lower
+  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
+(define_mode_attr pmov_suff
+  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
+
+(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
+  [(set (match_operand:PMOV_DST_MODE 0 "nonimmediate_operand" "=v,m")
+	(any_truncate:PMOV_DST_MODE
+	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix><pmov_suff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "none,store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_<code>v8div16qi2"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(vec_concat:V16QI
+	  (any_truncate:V8QI
+	    (match_operand:V8DI 1 "register_operand" "v"))
+	  (const_vector:V8QI [(const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)
+			      (const_int 0) (const_int 0)])))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*avx512f_<code>v8div16qi2_store"
+  [(set (match_operand:V16QI 0 "memory_operand" "=m")
+	(vec_concat:V16QI
+	  (any_truncate:V8QI
+	    (match_operand:V8DI 1 "register_operand" "v"))
+	  (vec_select:V8QI
+	    (match_dup 0)
+	    (parallel [(const_int 8) (const_int 9)
+		       (const_int 10) (const_int 11)
+		       (const_int 12) (const_int 13)
+		       (const_int 14) (const_int 15)]))))]
+  "TARGET_AVX512F"
+  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "memory" "store")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
 ;; Parallel integral arithmetic
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -5358,9 +6763,9 @@ 
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*mul<mode>3"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x")
-	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,x")
-		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,xm")))]
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
+		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
   "@
    pmullw\t{%2, %0|%0, %2}
@@ -5404,6 +6809,51 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_expand "vec_widen_umult_even_v16si"
+  [(set (match_operand:V8DI 0 "register_operand")
+        (mult:V8DI
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_umult_even_v16si"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+        (mult:V8DI
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand" "v")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (zero_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+  "vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_expand "vec_widen_umult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand")
 	(mult:V4DI
@@ -5474,6 +6924,51 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
+(define_expand "vec_widen_smult_even_v16si"
+  [(set (match_operand:V8DI 0 "register_operand")
+        (mult:V8DI
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
+
+(define_insn "*vec_widen_smult_even_v16si"
+  [(set (match_operand:V8DI 0 "register_operand" "=x")
+        (mult:V8DI
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 1 "nonimmediate_operand" "v")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))
+          (sign_extend:V8DI
+            (vec_select:V8SI
+              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
+              (parallel [(const_int 0) (const_int 2)
+                         (const_int 4) (const_int 6)
+                         (const_int 8) (const_int 10)
+                         (const_int 12) (const_int 14)])))))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
+  "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseimul")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_expand "vec_widen_smult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand")
 	(mult:V4DI
@@ -5679,10 +7174,10 @@ 
    (set_attr "mode" "TI")])
 
 (define_expand "mul<mode>3"
-  [(set (match_operand:VI4_AVX2 0 "register_operand")
-	(mult:VI4_AVX2
-	  (match_operand:VI4_AVX2 1 "general_vector_operand")
-	  (match_operand:VI4_AVX2 2 "general_vector_operand")))]
+  [(set (match_operand:VI4_AVX512F 0 "register_operand")
+	(mult:VI4_AVX512F
+	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
+	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
   "TARGET_SSE2"
 {
   if (TARGET_SSE4_1)
@@ -5701,10 +7196,10 @@ 
 })
 
 (define_insn "*<sse4_1_avx2>_mul<mode>3"
-  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v")
-	(mult:VI4_AVX2
-	  (match_operand:VI4_AVX2 1 "nonimmediate_operand" "%0,v")
-	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=x,v")
+	(mult:VI4_AVX512F
+	  (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,v")
+	  (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
   "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
   "@
    pmulld\t{%2, %0|%0, %2}
@@ -5717,9 +7212,10 @@ 
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "mul<mode>3"
-  [(set (match_operand:VI8_AVX2 0 "register_operand")
-	(mult:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand")
-		       (match_operand:VI8_AVX2 2 "register_operand")))]
+  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
+	(mult:VI8_AVX2_AVX512F
+	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
+	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
   "TARGET_SSE2"
 {
   ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
@@ -5766,8 +7262,8 @@ 
 (define_expand "vec_widen_<s>mult_odd_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
    (any_extend:<sseunpackmode>
-     (match_operand:VI4_AVX2 1 "general_vector_operand"))
-   (match_operand:VI4_AVX2 2 "general_vector_operand")]
+     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
+   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
   "TARGET_SSE2"
 {
   ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
@@ -5825,9 +7321,9 @@ 
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "<shift_insn><mode>3"
-  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,v")
+  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
 	(any_lshift:VI248_AVX2
-	  (match_operand:VI248_AVX2 1 "register_operand" "0,v")
+	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
 	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
   "TARGET_SSE2"
   "@
@@ -5843,6 +7339,22 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "<shift_insn><mode>3"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
+	(any_lshift:VI48_512
+	  (match_operand:VI48_512 1 "register_operand" "v,m")
+	  (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
+  "TARGET_AVX512F"
+  "vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "avx512f")
+   (set_attr "type" "sseishft")
+   (set (attr "length_immediate")
+     (if_then_else (match_operand 2 "const_int_operand")
+       (const_string "1")
+       (const_string "0")))
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_expand "vec_shl_<mode>"
   [(set (match_operand:VI_128 0 "register_operand")
 	(ashift:V1TI
@@ -5918,25 +7430,44 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512f_<rotate>v<mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(any_rotate:VI48_512
+	  (match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_<rotate><mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(any_rotate:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")
+	  (match_operand:SI 2 "const_0_to_255_operand")))]
+  "TARGET_AVX512F"
+  "vp<rotate><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand")
-	(maxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand")
-	  (match_operand:VI124_256 2 "nonimmediate_operand")))]
+  [(set (match_operand:VI124_256_48_512 0 "register_operand")
+	(maxmin:VI124_256_48_512
+	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand")
+	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand")))]
   "TARGET_AVX2"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*avx2_<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "=v")
-	(maxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
+  [(set (match_operand:VI124_256_48_512 0 "register_operand" "=v")
+	(maxmin:VI124_256_48_512
+	  (match_operand:VI124_256_48_512 1 "nonimmediate_operand" "%v")
+	  (match_operand:VI124_256_48_512 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
 (define_expand "<code><mode>3"
@@ -6151,6 +7682,28 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_expand "avx512f_eq<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand")
+	   (match_operand:VI48_512 2 "nonimmediate_operand")]
+	  UNSPEC_MASKED_EQ))]
+  "TARGET_AVX512F"
+  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
+
+(define_insn "avx512f_eq<mode>3_1"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "%v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	  UNSPEC_MASKED_EQ))]
+  "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
+  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "*sse4_1_eqv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x,x")
 	(eq:V2DI
@@ -6225,6 +7778,18 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn "avx512f_gt<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	  [(match_operand:VI48_512 1 "register_operand" "v")
+	   (match_operand:VI48_512 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
+  "TARGET_AVX512F"
+  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "sse2_gt<mode>3"
   [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
 	(gt:VI124_128
@@ -6240,6 +7805,23 @@ 
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
+(define_expand "vcond<V_512:mode><VI_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VI_512 4 "nonimmediate_operand")
+	     (match_operand:VI_512 5 "general_operand")])
+	  (match_operand:V_512 1)
+	  (match_operand:V_512 2)))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcond<V_256:mode><VI_256:mode>"
   [(set (match_operand:V_256 0 "register_operand")
 	(if_then_else:V_256
@@ -6289,6 +7871,23 @@ 
   DONE;
 })
 
+(define_expand "vcondu<V_512:mode><VI_512:mode>"
+  [(set (match_operand:V_512 0 "register_operand")
+	(if_then_else:V_512
+	  (match_operator 3 ""
+	    [(match_operand:VI_512 4 "nonimmediate_operand")
+	     (match_operand:VI_512 5 "nonimmediate_operand")])
+	  (match_operand:V_512 1 "general_operand")
+	  (match_operand:V_512 2 "general_operand")))]
+  "TARGET_AVX512F
+   && (GET_MODE_NUNITS (<V_512:MODE>mode)
+       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
+{
+  bool ok = ix86_expand_int_vcond (operands);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vcondu<V_256:mode><VI_256:mode>"
   [(set (match_operand:V_256 0 "register_operand")
 	(if_then_else:V_256
@@ -6342,7 +7941,9 @@ 
   [V16QI V8HI V4SI V2DI V4SF V2DF
    (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
-   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")])
+   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -6361,7 +7962,9 @@ 
    (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
    (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
    (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
-   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")])
+   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
 
 (define_expand "vec_perm_const<mode>"
   [(match_operand:VEC_PERM_CONST 0 "register_operand")
@@ -6409,14 +8012,20 @@ 
 	(and:VI
 	  (not:VI (match_operand:VI 1 "register_operand" "0,v"))
 	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE"
+  "TARGET_SSE "
 {
-  static char buf[32];
+  static char buf[64];
   const char *ops;
   const char *tmp;
 
   switch (get_attr_mode (insn))
     {
+    case MODE_XI:
+      gcc_assert (TARGET_AVX512F);
+
+      tmp = "pandn<ssemodesuffix>";
+      break;
+
     case MODE_OI:
       gcc_assert (TARGET_AVX2);
     case MODE_TI:
@@ -6496,12 +8105,18 @@ 
   "TARGET_SSE
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  static char buf[32];
+  static char buf[64];
   const char *ops;
   const char *tmp;
 
   switch (get_attr_mode (insn))
     {
+
+    case MODE_XI:
+      gcc_assert (TARGET_AVX512F);
+      tmp = "p<logic><ssemodesuffix>";
+      break;
+
     case MODE_OI:
       gcc_assert (TARGET_AVX2);
     case MODE_TI:
@@ -6510,6 +8125,8 @@ 
       tmp = "p<logic>";
       break;
 
+   case MODE_V16SF:
+      gcc_assert (TARGET_AVX512F);
    case MODE_V8SF:
       gcc_assert (TARGET_AVX);
    case MODE_V4SF:
@@ -6562,6 +8179,28 @@ 
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
+(define_insn "avx512f_testm<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	 [(match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	 UNSPEC_TESTM))]
+  "TARGET_AVX512F"
+  "vptestm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<sseinsnmode>")])
+
+(define_insn "avx512f_testnm<mode>3"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
+	(unspec:<avx512fmaskmode>
+	 [(match_operand:VI48_512 1 "register_operand" "v")
+	  (match_operand:VI48_512 2 "nonimmediate_operand" "vm")]
+	 UNSPEC_TESTNM))]
+  "TARGET_AVX512CD"
+  "%vptestnm<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode"  "<sseinsnmode>")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral element swizzling
@@ -6570,8 +8209,8 @@ 
 
 (define_expand "vec_pack_trunc_<mode>"
   [(match_operand:<ssepackmode> 0 "register_operand")
-   (match_operand:VI248_AVX2 1 "register_operand")
-   (match_operand:VI248_AVX2 2 "register_operand")]
+   (match_operand:VI248_AVX2_8_AVX512F 1 "register_operand")
+   (match_operand:VI248_AVX2_8_AVX512F 2 "register_operand")]
   "TARGET_SSE2"
 {
   rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
@@ -6831,6 +8470,27 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn "avx512f_interleave_highv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (vec_concat:V32SI
+	    (match_operand:V16SI 1 "register_operand" "v")
+	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 2) (const_int 18)
+		     (const_int 3) (const_int 19)
+		     (const_int 6) (const_int 22)
+		     (const_int 7) (const_int 23)
+		     (const_int 10) (const_int 26)
+		     (const_int 11) (const_int 27)
+		     (const_int 14) (const_int 30)
+		     (const_int 15) (const_int 31)])))]
+  "TARGET_AVX512F"
+  "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+
 (define_insn "vec_interleave_highv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x,x")
 	(vec_select:V4SI
@@ -6865,6 +8525,26 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
+(define_insn "avx512f_interleave_lowv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (vec_concat:V32SI
+	    (match_operand:V16SI 1 "register_operand" "v")
+	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(const_int 0) (const_int 16)
+		     (const_int 1) (const_int 17)
+		     (const_int 4) (const_int 20)
+		     (const_int 5) (const_int 21)
+		     (const_int 8) (const_int 24)
+		     (const_int 9) (const_int 25)
+		     (const_int 12) (const_int 28)
+		     (const_int 13) (const_int 29)])))]
+  "TARGET_AVX512F"
+  "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "vec_interleave_lowv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=x,x")
 	(vec_select:V4SI
@@ -6941,47 +8621,239 @@ 
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
 
-  switch (which_alternative)
-    {
-    case 0:
-      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
-	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
-      /* FALLTHRU */
-    case 1:
-      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
-    case 2:
-      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
-	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
-      /* FALLTHRU */
-    case 3:
-      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
-    default:
-      gcc_unreachable ();
-    }
+  switch (which_alternative)
+    {
+    case 0:
+      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
+	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
+      /* FALLTHRU */
+    case 1:
+      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
+    case 2:
+      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
+	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+      /* FALLTHRU */
+    case 3:
+      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,noavx,avx,avx")
+   (set_attr "type" "sselog")
+   (set (attr "prefix_rex")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_data16")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "1")
+       (const_string "*")))
+   (set (attr "prefix_extra")
+     (if_then_else
+       (and (not (match_test "TARGET_AVX"))
+	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
+       (const_string "*")
+       (const_string "1")))
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,orig,vex,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "avx512f_vinsert<shuffletype>32x4_1"
+  [(set (match_operand:V16FI 0 "register_operand" "=v")
+	(vec_merge:V16FI
+	  (match_operand:V16FI 1 "register_operand" "v")
+	  (vec_duplicate:V16FI
+		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
+	  (match_operand:SI 3 "const_int_operand" "n")))]
+  "TARGET_AVX512F"
+{
+  int mask;
+  if (INTVAL (operands[3]) == 0xFFF)
+      mask = 0;
+  else if ( INTVAL (operands[3]) == 0xF0FF)
+      mask = 1;
+  else if ( INTVAL (operands[3]) == 0xFF0F)
+      mask = 2;
+  else if ( INTVAL (operands[3]) == 0xFFF0)
+      mask = 3;
+  else
+      gcc_unreachable ();
+
+  operands[3] = GEN_INT (mask);
+
+  return "vinsert<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "vec_set_lo_<mode>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_concat:V8FI
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 4) (const_int 5)
+              (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0|%0, %1, %2, $0x0}"
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "vec_set_hi_<mode>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_concat:V8FI
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
+	  (vec_select:<ssehalfvecmode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (parallel [(const_int 0) (const_int 1)
+              (const_int 2) (const_int 3)]))))]
+  "TARGET_AVX512F"
+  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0|%0, %1, %2, $0x1}"
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx512f_shuf_<shuffletype>64x2_1"
+  [(set (match_operand:V8FI 0 "register_operand" "=v")
+	(vec_select:V8FI
+	  (vec_concat:<ssedoublemode>
+	    (match_operand:V8FI 1 "register_operand" "v")
+	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_7_operand")
+		     (match_operand 4  "const_0_to_7_operand")
+		     (match_operand 5  "const_0_to_7_operand")
+		     (match_operand 6  "const_0_to_7_operand")
+		     (match_operand 7  "const_8_to_15_operand")
+		     (match_operand 8  "const_8_to_15_operand")
+		     (match_operand 9  "const_8_to_15_operand")
+		     (match_operand 10  "const_8_to_15_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
+       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
+{
+  int mask;
+  mask = INTVAL (operands[3]) / 2;
+  mask |= INTVAL (operands[5]) / 2 << 2;
+  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
+  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_shuf_<shuffletype>32x4_1"
+  [(set (match_operand:V16FI 0 "register_operand" "=v")
+	(vec_select:V16FI
+	  (vec_concat:<ssedoublemode>
+	    (match_operand:V16FI 1 "register_operand" "v")
+	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
+	  (parallel [(match_operand 3  "const_0_to_15_operand")
+		     (match_operand 4  "const_0_to_15_operand")
+		     (match_operand 5  "const_0_to_15_operand")
+		     (match_operand 6  "const_0_to_15_operand")
+		     (match_operand 7  "const_0_to_15_operand")
+		     (match_operand 8  "const_0_to_15_operand")
+		     (match_operand 9  "const_0_to_15_operand")
+		     (match_operand 10  "const_0_to_15_operand")
+		     (match_operand 11  "const_16_to_31_operand")
+		     (match_operand 12  "const_16_to_31_operand")
+		     (match_operand 13  "const_16_to_31_operand")
+		     (match_operand 14  "const_16_to_31_operand")
+		     (match_operand 15  "const_16_to_31_operand")
+		     (match_operand 16  "const_16_to_31_operand")
+		     (match_operand 17  "const_16_to_31_operand")
+		     (match_operand 18  "const_16_to_31_operand")])))]
+  "TARGET_AVX512F
+   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
+       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
+       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
+       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
+       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
+       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
+       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
+       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
+       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
+       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
+       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
+       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
+{
+  int mask;
+  mask = INTVAL (operands[3]) / 4;
+  mask |= INTVAL (operands[7]) / 4 << 2;
+  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
+  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
+  operands[3] = GEN_INT (mask);
+
+  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+}
+  [(set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_pshufd_1"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_select:V16SI
+	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")
+		     (match_operand 6 "const_4_to_7_operand")
+		     (match_operand 7 "const_4_to_7_operand")
+		     (match_operand 8 "const_4_to_7_operand")
+		     (match_operand 9 "const_4_to_7_operand")
+		     (match_operand 10 "const_8_to_11_operand")
+		     (match_operand 11 "const_8_to_11_operand")
+		     (match_operand 12 "const_8_to_11_operand")
+		     (match_operand 13 "const_8_to_11_operand")
+		     (match_operand 14 "const_12_to_15_operand")
+		     (match_operand 15 "const_12_to_15_operand")
+		     (match_operand 16 "const_12_to_15_operand")
+		     (match_operand 17 "const_12_to_15_operand")])))]
+  "TARGET_AVX512F
+   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
+   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
+   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
+   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
+   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
+   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
+   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
+   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
+   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
+   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
+   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
+   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
+{
+  int mask = 0;
+  mask |= INTVAL (operands[2]) << 0;
+  mask |= INTVAL (operands[3]) << 2;
+  mask |= INTVAL (operands[4]) << 4;
+  mask |= INTVAL (operands[5]) << 6;
+  operands[2] = GEN_INT (mask);
+
+  return "vpshufd\t{%2, %1, %0|%0, %1, %2}";
 }
-  [(set_attr "isa" "noavx,noavx,avx,avx")
-   (set_attr "type" "sselog")
-   (set (attr "prefix_rex")
-     (if_then_else
-       (and (not (match_test "TARGET_AVX"))
-	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "prefix_data16")
-     (if_then_else
-       (and (not (match_test "TARGET_AVX"))
-	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
-       (const_string "1")
-       (const_string "*")))
-   (set (attr "prefix_extra")
-     (if_then_else
-       (and (not (match_test "TARGET_AVX"))
-	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
-       (const_string "*")
-       (const_string "1")))
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix" "evex")
    (set_attr "length_immediate" "1")
-   (set_attr "prefix" "orig,orig,vex,vex")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "XI")])
 
 (define_expand "avx2_pshufdv3"
   [(match_operand:V8SI 0 "register_operand")
@@ -7657,25 +9529,25 @@ 
 
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX2 1 "register_operand")]
+   (match_operand:VI124_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
 
 (define_expand "vec_unpacks_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX2 1 "register_operand")]
+   (match_operand:VI124_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
 
 (define_expand "vec_unpacku_lo_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX2 1 "register_operand")]
+   (match_operand:VI124_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
 
 (define_expand "vec_unpacku_hi_<mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI124_AVX2 1 "register_operand")]
+   (match_operand:VI124_AVX512F 1 "register_operand")]
   "TARGET_SSE2"
   "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
 
@@ -8428,9 +10300,9 @@ 
    (set_attr "mode" "DI")])
 
 (define_insn "abs<mode>2"
-  [(set (match_operand:VI124_AVX2 0 "register_operand" "=v")
-	(abs:VI124_AVX2
-	  (match_operand:VI124_AVX2 1 "nonimmediate_operand" "vm")))]
+  [(set (match_operand:VI124_AVX2_48_AVX512F 0 "register_operand" "=v")
+	(abs:VI124_AVX2_48_AVX512F
+	  (match_operand:VI124_AVX2_48_AVX512F 1 "nonimmediate_operand" "vm")))]
   "TARGET_SSSE3"
   "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog1")
@@ -8773,6 +10645,16 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v16qiv16si2"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_extend:V16SI
+	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>bd\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx2_<code>v8qiv8si2"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
 	(any_extend:V8SI
@@ -8803,6 +10685,16 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v16hiv16si2"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(any_extend:V16SI
+	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>wd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx2_<code>v8hiv8si2"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
 	(any_extend:V8SI
@@ -8828,6 +10720,21 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v8qiv8di2"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>bq\t{%1, %0|%0, %k1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx2_<code>v4qiv4di2"
   [(set (match_operand:V4DI 0 "register_operand" "=x")
 	(any_extend:V4DI
@@ -8855,6 +10762,16 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v8hiv8di2"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>wq\t{%1, %0|%0, %q1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx2_<code>v4hiv4di2"
   [(set (match_operand:V4DI 0 "register_operand" "=x")
 	(any_extend:V4DI
@@ -8882,6 +10799,16 @@ 
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
+(define_insn "avx512f_<code>v8siv8di2"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(any_extend:V8DI
+	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512F"
+  "vpmov<extsuffix>dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx2_<code>v4siv4di2"
   [(set (match_operand:V4DI 0 "register_operand" "=x")
 	(any_extend:V4DI
@@ -8981,10 +10908,20 @@ 
   DONE;
 })
 
+(define_expand "avx512f_roundpd512"
+  [(match_operand:V8DF 0 "register_operand")
+   (match_operand:V8DF 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_15_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 (define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
   [(match_operand:<ssepackfltmode> 0 "register_operand")
-   (match_operand:VF2 1 "nonimmediate_operand")
-   (match_operand:VF2 2 "nonimmediate_operand")
+   (match_operand:VF2_AVX512F 1 "nonimmediate_operand")
+   (match_operand:VF2_AVX512F 2 "nonimmediate_operand")
    (match_operand:SI 3 "const_0_to_15_operand")]
   "TARGET_ROUND"
 {
@@ -9042,11 +10979,11 @@ 
 
 (define_expand "round<mode>2"
   [(set (match_dup 4)
-	(plus:VF
-	  (match_operand:VF 1 "register_operand")
+	(plus:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "register_operand")
 	  (match_dup 3)))
-   (set (match_operand:VF 0 "register_operand")
-	(unspec:VF
+   (set (match_operand:VF_AVX512F 0 "register_operand")
+	(unspec:VF_AVX512F
 	  [(match_dup 4) (match_dup 5)]
 	  UNSPEC_ROUND))]
   "TARGET_ROUND && !flag_trapping_math"
@@ -9090,8 +11027,8 @@ 
 
 (define_expand "round<mode>2_vec_pack_sfix"
   [(match_operand:<ssepackfltmode> 0 "register_operand")
-   (match_operand:VF2 1 "register_operand")
-   (match_operand:VF2 2 "register_operand")]
+   (match_operand:VF2_AVX512F 1 "register_operand")
+   (match_operand:VF2_AVX512F 2 "register_operand")]
   "TARGET_ROUND && !flag_trapping_math"
 {
   rtx tmp0, tmp1;
@@ -9508,6 +11445,178 @@ 
    (set_attr "btver2_decode" "vector,vector,vector,vector")
    (set_attr "mode" "TI")])
 
+(define_expand "avx512pf_gatherpf<mode>"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<ssescalarmode>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI48_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_0_to_1_operand")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+      (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "p")
+	    (match_operand:VI48_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_0_to_1_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 0:
+      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    case 1:
+      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "p")
+	    (match_operand:VI48_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_0_to_1_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      return "vgatherpf0<ssemodesuffix>ps\t{%4|%4}";
+    case 1:
+      return "vgatherpf1<ssemodesuffix>ps\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_expand "avx512pf_scatterpf<mode>"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<ssescalarmode>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI48_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_0_to_1_operand")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+      (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "p")
+	    (match_operand:VI48_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_0_to_1_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 0:
+      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    case 1:
+      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "p")
+	    (match_operand:VI48_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_0_to_1_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      return "vscatterpf0<ssemodesuffix>ps\t{%4|%4}";
+    case 1:
+      return "vscatterpf1<ssemodesuffix>ps\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512er_exp2<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_EXP2))]
+  "TARGET_AVX512ER"
+  "vexp2<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*avx512er_rcp28<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RCP28))]
+  "TARGET_AVX512ER"
+  "vrcp28<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512er_rsqrt28<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_RSQRT28))]
+  "TARGET_AVX512ER"
+  "vrsqrt28<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; XOP instructions
@@ -10055,6 +12164,13 @@ 
 })
 
 (define_expand "vlshr<mode>3"
+  [(set (match_operand:VI48_512 0 "register_operand")
+	(lshiftrt:VI48_512
+	  (match_operand:VI48_512 1 "register_operand")
+	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
+(define_expand "vlshr<mode>3"
   [(set (match_operand:VI48_256 0 "register_operand")
 	(lshiftrt:VI48_256
 	  (match_operand:VI48_256 1 "register_operand")
@@ -10089,6 +12205,12 @@ 
     }
 })
 
+(define_expand "vashrv16si3"
+  [(set (match_operand:V16SI 0 "register_operand")
+	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
+		        (match_operand:V16SI 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
 (define_expand "vashrv8si3"
   [(set (match_operand:V8SI 0 "register_operand")
 	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
@@ -10122,6 +12244,13 @@ 
 })
 
 (define_expand "vashl<mode>3"
+  [(set (match_operand:VI48_512 0 "register_operand")
+	(ashift:VI48_512
+	  (match_operand:VI48_512 1 "register_operand")
+	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F")
+
+(define_expand "vashl<mode>3"
   [(set (match_operand:VI48_256 0 "register_operand")
 	(ashift:VI48_256
 	  (match_operand:VI48_256 1 "register_operand")
@@ -10506,17 +12635,11 @@ 
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "OI")])
 
-(define_mode_attr AVXTOSSEMODE
-  [(V4DI "V2DI") (V2DI "V2DI")
-   (V8SI "V4SI") (V4SI "V4SI")
-   (V16HI "V8HI") (V8HI "V8HI")
-   (V32QI "V16QI") (V16QI "V16QI")])
-
 (define_insn "avx2_pbroadcast<mode>"
   [(set (match_operand:VI 0 "register_operand" "=x")
 	(vec_duplicate:VI
 	  (vec_select:<ssescalarmode>
-	    (match_operand:<AVXTOSSEMODE> 1 "nonimmediate_operand" "xm")
+	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
 	    (parallel [(const_int 0)]))))]
   "TARGET_AVX2"
   "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
@@ -10540,37 +12663,37 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx2_permvar<mode>"
-  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
-	(unspec:VI4F_256
-	  [(match_operand:VI4F_256 1 "nonimmediate_operand" "vm")
-	   (match_operand:V8SI 2 "register_operand" "v")]
+(define_insn "<avx2_avx512f>_permvar<mode>"
+  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
+	(unspec:VI48F_256_512
+	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
+	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
 	  UNSPEC_VPERMVAR))]
   "TARGET_AVX2"
   "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
+   (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx2_perm<mode>"
-  [(match_operand:VI8F_256 0 "register_operand")
-   (match_operand:VI8F_256 1 "nonimmediate_operand")
+(define_expand "<avx2_avx512f>_perm<mode>"
+  [(match_operand:VI8F_256_512 0 "register_operand")
+   (match_operand:VI8F_256_512 1 "nonimmediate_operand")
    (match_operand:SI 2 "const_0_to_255_operand")]
   "TARGET_AVX2"
 {
   int mask = INTVAL (operands[2]);
-  emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
-				    GEN_INT ((mask >> 0) & 3),
-				    GEN_INT ((mask >> 2) & 3),
-				    GEN_INT ((mask >> 4) & 3),
-				    GEN_INT ((mask >> 6) & 3)));
+  emit_insn (gen_<avx2_avx512f>_perm<mode>_1 (operands[0], operands[1],
+					    GEN_INT ((mask >> 0) & 3),
+					    GEN_INT ((mask >> 2) & 3),
+					    GEN_INT ((mask >> 4) & 3),
+					    GEN_INT ((mask >> 6) & 3)));
   DONE;
 })
 
-(define_insn "avx2_perm<mode>_1"
-  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
-	(vec_select:VI8F_256
-	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
+(define_insn "<avx2_avx512f>_perm<mode>_1"
+  [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
+	(vec_select:VI8F_256_512
+	  (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
 	  (parallel [(match_operand 2 "const_0_to_3_operand")
 		     (match_operand 3 "const_0_to_3_operand")
 		     (match_operand 4 "const_0_to_3_operand")
@@ -10633,6 +12756,62 @@ 
    (set_attr "isa" "*,avx2,noavx2")
    (set_attr "mode" "V8SF")])
 
+(define_insn "avx512f_vec_dup<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(vec_duplicate:VI48F_512
+	  (vec_select:<ssescalarmode>
+	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_broadcast<mode>"
+  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
+	(vec_duplicate:V16FI
+	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
+  "TARGET_AVX512F"
+  "@
+   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0|%0, %g1, %g1, 0x0}
+   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_broadcast<mode>"
+  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
+	(vec_duplicate:V8FI
+	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
+  "TARGET_AVX512F"
+  "@
+   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0|%0, %g1, %g1, 0x44}
+   vbroadcast<shuffletype>64x4\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vec_dup_gpr<mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(vec_duplicate:VI48_512
+	  (match_operand:<ssescalarmode> 1 "register_operand" "r")))]
+  "TARGET_AVX512F && (<MODE>mode != V8DImode || TARGET_64BIT)"
+  "vpbroadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vec_dup_mem<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=x")
+	(vec_duplicate:VI48F_512
+	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
 (define_insn "avx2_vbroadcasti128_<mode>"
   [(set (match_operand:VI_256 0 "register_operand" "=x")
 	(vec_concat:VI_256
@@ -10672,6 +12851,28 @@ 
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn "avx512cd_maskb_vec_dupv8di"
+  [(set (match_operand:V8DI 0 "register_operand" "=v")
+	(vec_duplicate:V8DI
+	  (zero_extend:DI
+	    (match_operand:QI 1 "register_operand" "k"))))]
+  "TARGET_AVX512CD"
+  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mskmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "avx512cd_maskw_vec_dupv16si"
+  [(set (match_operand:V16SI 0 "register_operand" "=v")
+	(vec_duplicate:V16SI
+	  (zero_extend:SI
+	    (match_operand:HI 1 "register_operand" "k"))))]
+  "TARGET_AVX512CD"
+  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
+  [(set_attr "type" "mskmov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
 ;; If it so happens that the input is in memory, use vbroadcast.
 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
@@ -10746,58 +12947,54 @@ 
 				elt * GET_MODE_SIZE (<ssescalarmode>mode));
 })
 
-(define_expand "avx_vpermil<mode>"
-  [(set (match_operand:VF2 0 "register_operand")
-	(vec_select:VF2
-	  (match_operand:VF2 1 "nonimmediate_operand")
+(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+  [(set (match_operand:VF2_AVX512F 0 "register_operand")
+	(vec_select:VF2_AVX512F
+	  (match_operand:VF2_AVX512F 1 "nonimmediate_operand")
 	  (match_operand:SI 2 "const_0_to_255_operand")))]
   "TARGET_AVX"
 {
   int mask = INTVAL (operands[2]);
   rtx perm[<ssescalarnum>];
 
-  perm[0] = GEN_INT (mask & 1);
-  perm[1] = GEN_INT ((mask >> 1) & 1);
-  if (<MODE>mode == V4DFmode)
+  int i;
+  for (i = 0; i < <ssescalarnum>; i = i + 2)
     {
-      perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
-      perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
+      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
+      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
     }
 
   operands[2]
     = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
 })
 
-(define_expand "avx_vpermil<mode>"
-  [(set (match_operand:VF1 0 "register_operand")
-	(vec_select:VF1
-	  (match_operand:VF1 1 "nonimmediate_operand")
+(define_expand "<sse2_avx_avx512f>_vpermil<mode>"
+  [(set (match_operand:VF1_AVX512F 0 "register_operand")
+	(vec_select:VF1_AVX512F
+	  (match_operand:VF1_AVX512F 1 "nonimmediate_operand")
 	  (match_operand:SI 2 "const_0_to_255_operand")))]
   "TARGET_AVX"
 {
   int mask = INTVAL (operands[2]);
   rtx perm[<ssescalarnum>];
 
-  perm[0] = GEN_INT (mask & 3);
-  perm[1] = GEN_INT ((mask >> 2) & 3);
-  perm[2] = GEN_INT ((mask >> 4) & 3);
-  perm[3] = GEN_INT ((mask >> 6) & 3);
-  if (<MODE>mode == V8SFmode)
+  int i;
+  for (i = 0; i < <ssescalarnum>; i = i + 4)
     {
-      perm[4] = GEN_INT ((mask & 3) + 4);
-      perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
-      perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
-      perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
+      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
+      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
+      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
+      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
     }
 
   operands[2]
     = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
 })
 
-(define_insn "*avx_vpermilp<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=v")
-	(vec_select:VF
-	  (match_operand:VF 1 "nonimmediate_operand" "vm")
+(define_insn "*<sse2_avx_avx512f>_vpermilp<mode>"
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+	(vec_select:VF_AVX512F
+	  (match_operand:VF_AVX512F 1 "nonimmediate_operand" "vm")
 	  (match_parallel 2 ""
 	    [(match_operand 3 "const_int_operand")])))]
   "TARGET_AVX
@@ -10811,21 +13008,47 @@ 
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "vex")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx_vpermilvar<mode>3"
-  [(set (match_operand:VF 0 "register_operand" "=v")
-	(unspec:VF
-	  [(match_operand:VF 1 "register_operand" "v")
+(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3"
+  [(set (match_operand:VF_AVX512F 0 "register_operand" "=v")
+	(unspec:VF_AVX512F
+	  [(match_operand:VF_AVX512F 1 "register_operand" "v")
 	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPERMIL))]
   "TARGET_AVX"
   "vpermil<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
    (set_attr "btver2_decode" "vector")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermi2var<mode>3"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "v")
+	   (match_operand:<sseintvecmode> 2 "register_operand" "0")
+	   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	  UNSPEC_VPERMI2))]
+  "TARGET_AVX512F"
+  "vpermi2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_vpermt2var<mode>3"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=v")
+	(unspec:VI48F_512
+	  [(match_operand:<sseintvecmode> 1 "register_operand" "v")
+	   (match_operand:VI48F_512 2 "register_operand" "0")
+	   (match_operand:VI48F_512 3 "nonimmediate_operand" "vm")]
+	  UNSPEC_VPERMT2))]
+  "TARGET_AVX512F"
+  "vpermt2<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx_vperm2f128<mode>3"
   [(set (match_operand:AVX256MODE2P 0 "register_operand")
@@ -11159,6 +13382,15 @@ 
   DONE;
 })
 
+(define_expand "vec_init<mode>"
+  [(match_operand:VI48F_512 0 "register_operand")
+   (match_operand 1)]
+  "TARGET_AVX512F"
+{
+  ix86_expand_vector_init (false, operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "avx2_extracti128"
   [(match_operand:V2DI 0 "nonimmediate_operand")
    (match_operand:V4DI 1 "register_operand")
@@ -11208,31 +13440,36 @@ 
   DONE;
 })
 
-(define_insn "avx2_ashrv<mode>"
-  [(set (match_operand:VI4_AVX2 0 "register_operand" "=v")
-	(ashiftrt:VI4_AVX2
-	  (match_operand:VI4_AVX2 1 "register_operand" "v")
-	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512f>_ashrv<mode>"
+  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
+	(ashiftrt:VI48_AVX512F
+	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
+	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2"
-  "vpsravd\t{%2, %1, %0|%0, %1, %2}"
+  "vpsrav<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx2_<shift_insn>v<mode>"
-  [(set (match_operand:VI48_AVX2 0 "register_operand" "=v")
-	(any_lshift:VI48_AVX2
-	  (match_operand:VI48_AVX2 1 "register_operand" "v")
-	  (match_operand:VI48_AVX2 2 "nonimmediate_operand" "vm")))]
+(define_insn "<avx2_avx512f>_<shift_insn>v<mode>"
+  [(set (match_operand:VI48_AVX2_48_AVX512F 0 "register_operand" "=v")
+	(any_lshift:VI48_AVX2_48_AVX512F
+	  (match_operand:VI48_AVX2_48_AVX512F 1 "register_operand" "v")
+	  (match_operand:VI48_AVX2_48_AVX512F 2 "nonimmediate_operand" "vm")))]
   "TARGET_AVX2"
   "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseishft")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
+;; For avx_vec_concat<mode> insn pattern
+(define_mode_attr concat_tg_mode
+  [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
+   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
+
 (define_insn "avx_vec_concat<mode>"
-  [(set (match_operand:V_256 0 "register_operand" "=x,x")
-	(vec_concat:V_256
+  [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
+	(vec_concat:V_256_512
 	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
 	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
   "TARGET_AVX"
@@ -11240,16 +13477,24 @@ 
   switch (which_alternative)
     {
     case 0:
-      return "vinsert<i128>\t{$0x1, %2, %t1, %0|%0, %t1, %2, 0x1}";
+      return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
     case 1:
       switch (get_attr_mode (insn))
 	{
+	case MODE_V16SF:
+	  return "vmovaps\t{%1, %t0|%t0, %1}";
+	case MODE_V8DF:
+	  return "vmovapd\t{%1, %t0|%t0, %1}";
 	case MODE_V8SF:
 	  return "vmovaps\t{%1, %x0|%x0, %1}";
 	case MODE_V4DF:
 	  return "vmovapd\t{%1, %x0|%x0, %1}";
-	default:
+	case MODE_XI:
+	  return "vmovdqa\t{%1, %t0|%t0, %1}";
+	case MODE_OI:
 	  return "vmovdqa\t{%1, %x0|%x0, %1}";
+	default:
+	  gcc_unreachable ();
 	}
     default:
       gcc_unreachable ();
@@ -11258,7 +13503,7 @@ 
   [(set_attr "type" "sselog,ssemov")
    (set_attr "prefix_extra" "1,*")
    (set_attr "length_immediate" "1,*")
-   (set_attr "prefix" "vex")
+   (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vcvtph2ps"
@@ -11295,6 +13540,16 @@ 
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "V8SF")])
 
+(define_insn "avx512f_vcvtph2ps512"
+  [(set (match_operand:V16SF 0 "register_operand" "=v")
+	(unspec:V16SF [(match_operand:V16HI 1 "nonimmediate_operand" "vm")]
+		      UNSPEC_VCVTPH2PS))]
+  "TARGET_AVX512F"
+  "vcvtph2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 (define_expand "vcvtps2ph"
   [(set (match_operand:V8HI 0 "register_operand")
 	(vec_concat:V8HI
@@ -11341,24 +13596,38 @@ 
    (set_attr "btver2_decode" "vector")
    (set_attr "mode" "V8SF")])
 
+(define_insn "avx512f_vcvtps2ph512"
+  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
+	(unspec:V16HI [(match_operand:V16SF 1 "register_operand" "v")
+		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
+		     UNSPEC_VCVTPS2PH))]
+  "TARGET_AVX512F"
+  "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "V16SF")])
+
 ;; For gather* insn patterns
 (define_mode_iterator VEC_GATHER_MODE
 		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
+
 (define_mode_attr VEC_GATHER_IDXSI
-		      [(V2DI "V4SI") (V2DF "V4SI")
-		       (V4DI "V4SI") (V4DF "V4SI")
-		       (V4SI "V4SI") (V4SF "V4SI")
-		       (V8SI "V8SI") (V8SF "V8SI")])
+		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
+		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
+		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
+		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
+
 (define_mode_attr VEC_GATHER_IDXDI
-		      [(V2DI "V2DI") (V2DF "V2DI")
-		       (V4DI "V4DI") (V4DF "V4DI")
-		       (V4SI "V2DI") (V4SF "V2DI")
-		       (V8SI "V4DI") (V8SF "V4DI")])
+		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
+		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
+		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
+
 (define_mode_attr VEC_GATHER_SRCDI
-		      [(V2DI "V2DI") (V2DF "V2DF")
-		       (V4DI "V4DI") (V4DF "V4DF")
-		       (V4SI "V4SI") (V4SF "V4SF")
-		       (V8SI "V4SI") (V8SF "V4SF")])
+		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
+		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
+		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
+		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
 
 (define_expand "avx2_gathersi<mode>"
   [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
@@ -11532,3 +13801,241 @@ 
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_gathersi<mode>"
+  [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+		   (unspec:VI48F_512
+		     [(match_operand:VI48F_512 1 "register_operand")
+		      (match_operand:<avx512fmaskmode> 4 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 6
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand")]))]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
+  "TARGET_AVX512F"
+{
+  operands[6]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gathersi<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(match_operand:VI48F_512 1 "register_operand" "0")
+	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 4 "vsib_address_operand" "p")
+		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:<avx512fmaskmode> 2 "=&k"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gathersi<mode>_2"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(pc)
+	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "p")
+		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+		 (match_operand:SI 4 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+
+(define_expand "avx512f_gatherdi<mode>"
+  [(parallel [(set (match_operand:VI48F_512 0 "register_operand")
+		   (unspec:VI48F_512
+		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
+		      (match_operand:QI 4 "register_operand")
+		      (mem:<ssescalarmode>
+			(match_par_dup 6
+			  [(match_operand 2 "vsib_address_operand")
+			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
+			   (match_operand:SI 5 "const1248_operand")]))]
+		     UNSPEC_GATHER))
+	      (clobber (match_scratch:QI 7))])]
+  "TARGET_AVX512F"
+{
+  operands[6]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
+					operands[5]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_gatherdi<mode>"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
+	   (match_operand:QI 7 "register_operand" "2")
+	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 4 "vsib_address_operand" "p")
+		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
+		 (match_operand:SI 5 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:QI 2 "=&k"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512f_gatherdi<mode>_2"
+  [(set (match_operand:VI48F_512 0 "register_operand" "=&v")
+	(unspec:VI48F_512
+	  [(pc)
+	   (match_operand:QI 6 "register_operand" "1")
+	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+	     [(unspec:P
+		[(match_operand:P 3 "vsib_address_operand" "p")
+		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
+		 (match_operand:SI 4 "const1248_operand" "n")]
+		UNSPEC_VSIBADDR)])]
+	  UNSPEC_GATHER))
+   (clobber (match_scratch:QI 1 "=&k"))]
+  "TARGET_AVX512F"
+{
+  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
+  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
+}
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scattersi<mode>"
+  [(parallel [(set (mem:VI48F_512
+		     (match_par_dup 5
+		       [(match_operand 0 "vsib_address_operand")
+			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
+			(match_operand:SI 4 "const1248_operand")]))
+		   (unspec:VI48F_512
+		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
+		      (match_operand:VI48F_512 3 "register_operand")]
+		     UNSPEC_SCATTER))
+	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
+  "TARGET_AVX512F"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+					operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scattersi<mode>"
+  [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+	  [(unspec:P
+	     [(match_operand:P 0 "vsib_address_operand" "p")
+	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
+	      (match_operand:SI 4 "const1248_operand" "n")]
+	     UNSPEC_VSIBADDR)])
+	(unspec:VI48F_512
+	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
+	   (match_operand:VI48F_512 3 "register_operand" "v")]
+	  UNSPEC_SCATTER))
+   (clobber (match_scratch:<avx512fmaskmode> 1 "=&k"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_expand "avx512f_scatterdi<mode>"
+  [(parallel [(set (mem:VI48F_512
+		     (match_par_dup 5
+		       [(match_operand 0 "vsib_address_operand")
+			(match_operand:V8DI 2 "register_operand")
+			(match_operand:SI 4 "const1248_operand")]))
+		   (unspec:VI48F_512
+		     [(match_operand:QI 1 "register_operand")
+		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
+		     UNSPEC_SCATTER))
+	      (clobber (match_scratch:QI 6))])]
+  "TARGET_AVX512F"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
+					operands[4]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512f_scatterdi<mode>"
+  [(set (match_operator:VI48F_512 5 "vsib_mem_operator"
+	  [(unspec:P
+	     [(match_operand:P 0 "vsib_address_operand" "p")
+	      (match_operand:V8DI 2 "register_operand" "v")
+	      (match_operand:SI 4 "const1248_operand" "n")]
+	     UNSPEC_VSIBADDR)])
+	(unspec:VI48F_512
+	  [(match_operand:QI 6 "register_operand" "1")
+	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
+	  UNSPEC_SCATTER))
+   (clobber (match_scratch:QI 1 "=&k"))]
+  "TARGET_AVX512F"
+  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "avx512f_getmant<mode>"
+  [(set (match_operand:VF_512 0 "register_operand" "=v")
+	(unspec:VF_512
+	  [(match_operand:VF_512 1 "nonimmediate_operand" "vm")
+	   (match_operand:SI 2 "const_0_to_15_operand")]
+	  UNSPEC_GETMANT))]
+  "TARGET_AVX512F"
+  "vgetmant<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}";
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "avx512f_getmant<mode>"
+  [(set (match_operand:VF_128 0 "register_operand" "=v")
+	(vec_merge:VF_128
+	  (unspec:VF_128
+	    [(match_operand:VF_128 1 "register_operand" "v")
+	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_15_operand")]
+	    UNSPEC_GETMANT)
+	  (match_dup 1)
+	  (const_int 1)))]
+   "TARGET_AVX512F"
+   "vgetmant<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+   [(set_attr "prefix" "evex")
+   (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "clz<mode>2"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(clz:VI48_512
+	  (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX512CD"
+  "vplzcnt<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "conflict<mode>"
+  [(set (match_operand:VI48_512 0 "register_operand" "=v")
+	(unspec:VI48_512
+	  [(match_operand:VI48_512 1 "nonimmediate_operand" "vm")]
+	  UNSPEC_CONFLICT))]
+  "TARGET_AVX512CD"
+  "vpconflict<ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])