diff mbox

[i386,11/8,AVX512,2/2] Add missing packed PF gathers/scatters.

Message ID 20140127100900.GA65215@msticlxl57.ims.intel.com
State New
Headers show

Commit Message

Kirill Yukhin Jan. 27, 2014, 10:09 a.m. UTC
Hello,
On 23 Jan 14:22, Uros Bizjak wrote:
> > (define_expand "avx512pf_scatterpf<mode>df"
> >   [(unspec
> >      [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
> >       (mem:DF
> >        (match_par_dup 5
> >          [(match_operand 2 "vsib_address_operand")
> >           (match_operand:VI4_256_8_512 1 "register_operand")
> >           (match_operand:SI 3 "const1248_operand")]))
> >       (match_operand:SI 4 "const_0_to_1_operand")]
> >      UNSPEC_SCATTER_PREFETCH)]
> >
> > We have this correspondence between, say, main and index modes:
> >   SF -> (V16SI, V8DI)
> >   DF -> (V8SI , V8DI)
> 
> It looks to me that you should use V16SF and V8DF instead of SF and DF
> modes here.
I didn't find existing attributes with necessary mapping, so I invented new.

> Other than this, the patch looks OK to me. Please wait a day if Jakub
> has any remark here.

Patch in the bottom and I'll check it in this evening (MS time) if no objections.
(will update ChangeLog adding new mode attributes)

--
Thanks, K

 gcc/config/i386/avx512pfintrin.h                   | 113 +++++++++++--
 gcc/config/i386/i386-builtin-types.def             |   2 +
 gcc/config/i386/i386.c                             |  37 ++++-
 gcc/config/i386/sse.md                             | 176 +++++++++++++++++++--
 gcc/testsuite/gcc.target/i386/avx-1.c              |   4 +
 .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c     |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c     |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c     |  15 ++
 .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c     |  15 ++
 .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c    |  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c    |  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c    |  17 ++
 .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c    |  17 ++
 gcc/testsuite/gcc.target/i386/sse-14.c             |   4 +
 gcc/testsuite/gcc.target/i386/sse-22.c             |   5 +
 gcc/testsuite/gcc.target/i386/sse-23.c             |   4 +
 16 files changed, 442 insertions(+), 31 deletions(-)

Comments

Uros Bizjak Jan. 27, 2014, 10:25 a.m. UTC | #1
On Mon, Jan 27, 2014 at 11:09 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

>> > (define_expand "avx512pf_scatterpf<mode>df"
>> >   [(unspec
>> >      [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
>> >       (mem:DF
>> >        (match_par_dup 5
>> >          [(match_operand 2 "vsib_address_operand")
>> >           (match_operand:VI4_256_8_512 1 "register_operand")
>> >           (match_operand:SI 3 "const1248_operand")]))
>> >       (match_operand:SI 4 "const_0_to_1_operand")]
>> >      UNSPEC_SCATTER_PREFETCH)]
>> >
>> > We have this correspondence between, say, main and index modes:
>> >   SF -> (V16SI, V8DI)
>> >   DF -> (V8SI , V8DI)
>>
>> It looks to me that you should use V16SF and V8DF instead of SF and DF
>> modes here.
> I didn't find existing attributes with necessary mapping, so I invented new.
>
>> Other than this, the patch looks OK to me. Please wait a day if Jakub
>> has any remark here.
>
> Patch in the bottom and I'll check it in this evening (MS time) if no objections.
> (will update ChangeLog adding new mode attributes)
>
> --
> Thanks, K
>
>  gcc/config/i386/avx512pfintrin.h                   | 113 +++++++++++--
>  gcc/config/i386/i386-builtin-types.def             |   2 +
>  gcc/config/i386/i386.c                             |  37 ++++-
>  gcc/config/i386/sse.md                             | 176 +++++++++++++++++++--
>  gcc/testsuite/gcc.target/i386/avx-1.c              |   4 +
>  .../gcc.target/i386/avx512pf-vgatherpf0dpd-1.c     |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf0qpd-1.c     |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf1dpd-1.c     |  15 ++
>  .../gcc.target/i386/avx512pf-vgatherpf1qpd-1.c     |  15 ++
>  .../gcc.target/i386/avx512pf-vscatterpf0dpd-1.c    |  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf0qpd-1.c    |  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf1dpd-1.c    |  17 ++
>  .../gcc.target/i386/avx512pf-vscatterpf1qpd-1.c    |  17 ++
>  gcc/testsuite/gcc.target/i386/sse-14.c             |   4 +
>  gcc/testsuite/gcc.target/i386/sse-22.c             |   5 +
>  gcc/testsuite/gcc.target/i386/sse-23.c             |   4 +
>  16 files changed, 442 insertions(+), 31 deletions(-)

> -(define_expand "avx512pf_gatherpf<mode>"
> +;; Packed float variants
> +(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
> +                     [(V8DI "V8SF") (V16SI "V16SF")])
> +(define_mode_attr GATHER_SCATTER_DF_MEM_MODE
> +                     [(V8DI "V8DF") (V8SI "V8DF")])

You actually don't need this attribute, since it always declares V8DF.
Just use V8DF mode in the patterns instead.

(no need to repost the patch due to this trivial removal).

Uros.
diff mbox

Patch

diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index b8c0110..bc7598e 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -48,74 +48,157 @@  typedef unsigned short __mmask16;
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
-				   int const *addr, int scale, int hint)
+				   void *addr, int scale, int hint)
+{
+  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
+			      scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
+				   void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfdps (mask, (__v16si) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
+			      scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
-				   int const *addr, int scale, int hint)
+				   void *addr, int scale, int hint)
 {
-  __builtin_ia32_gatherpfqps (mask, (__v8di) index, addr, scale, hint);
+  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
+			      scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
 			       int hint)
 {
-  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, addr, scale,
-			       hint);
+  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, 
+			       (long long const *)addr, scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
+				    __m256i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
+			       scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_ps (int const *addr, __mmask16 mask,
+_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
 				    __m512i index, int scale, int hint)
 {
-  __builtin_ia32_scatterpfdps (mask, (__v16si) index, addr, scale, hint);
+  __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
+			       int hint)
+{
+  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
+			       scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_ps (int const *addr, __m512i index, int scale,
+_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
 			       int hint)
 {
-  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, addr, scale,
-			       hint);
+  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
+			       scale, hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
+				    __m512i index, int scale, int hint)
+{
+  __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
+			       scale, hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_ps (int const *addr, __mmask16 mask,
+_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
 				    __m512i index, int scale, int hint)
 {
-  __builtin_ia32_scatterpfqps (mask, (__v8di) index, addr, scale, hint);
+  __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
+			       scale, hint);
 }
+
 #else
+#define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
+			      (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,	     \
+  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
 			      (int const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
+  __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			      (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
   __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
 			      (int const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
   __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
 			       (int const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
   __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
 			       (int const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
+  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
   __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
 			       (int const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
+  __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
+			       (long long const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
   __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
 			       (int const *)ADDR, (int)SCALE, (int)HINT)
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index acf2f32..f3c658b 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -733,7 +733,9 @@  DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8SI, V8DI, INT)
 DEF_FUNCTION_TYPE (VOID, PINT, QI, V8DI, V8SI, INT)
 DEF_FUNCTION_TYPE (VOID, PLONGLONG, QI, V8DI, V8DI, INT)
 
+DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCINT64, INT, INT)
 DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCINT, INT, INT)
+DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT64, INT, INT)
 DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
 
 DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1a4d568..49e153c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28397,9 +28397,13 @@  enum ix86_builtins
   IX86_BUILTIN_SCATTERSIV8DI,
 
   /* AVX512PF */
+  IX86_BUILTIN_GATHERPFQPD,
   IX86_BUILTIN_GATHERPFDPS,
+  IX86_BUILTIN_GATHERPFDPD,
   IX86_BUILTIN_GATHERPFQPS,
+  IX86_BUILTIN_SCATTERPFDPD,
   IX86_BUILTIN_SCATTERPFDPS,
+  IX86_BUILTIN_SCATTERPFQPD,
   IX86_BUILTIN_SCATTERPFQPS,
 
   /* AVX-512ER */
@@ -30929,15 +30933,27 @@  ix86_init_mmx_sse_builtins (void)
 	       IX86_BUILTIN_SCATTERDIV8DI);
 
   /* AVX512PF */
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
+	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+	       IX86_BUILTIN_GATHERPFDPD);
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
 	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
 	       IX86_BUILTIN_GATHERPFDPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
+	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+	       IX86_BUILTIN_GATHERPFQPD);
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
 	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
 	       IX86_BUILTIN_GATHERPFQPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
+	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
+	       IX86_BUILTIN_SCATTERPFDPD);
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
 	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
 	       IX86_BUILTIN_SCATTERPFDPS);
+  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
+	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
+	       IX86_BUILTIN_SCATTERPFQPD);
   def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
 	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
 	       IX86_BUILTIN_SCATTERPFQPS);
@@ -35583,17 +35599,30 @@  addcarryx:
     case IX86_BUILTIN_SCATTERDIV8DI:
       icode = CODE_FOR_avx512f_scatterdiv8di;
       goto scatter_gen;
+
+    case IX86_BUILTIN_GATHERPFDPD:
+      icode = CODE_FOR_avx512pf_gatherpfv8sidf;
+      goto vec_prefetch_gen;
     case IX86_BUILTIN_GATHERPFDPS:
-      icode = CODE_FOR_avx512pf_gatherpfv16si;
+      icode = CODE_FOR_avx512pf_gatherpfv16sisf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_GATHERPFQPD:
+      icode = CODE_FOR_avx512pf_gatherpfv8didf;
       goto vec_prefetch_gen;
     case IX86_BUILTIN_GATHERPFQPS:
-      icode = CODE_FOR_avx512pf_gatherpfv8di;
+      icode = CODE_FOR_avx512pf_gatherpfv8disf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFDPD:
+      icode = CODE_FOR_avx512pf_scatterpfv8sidf;
       goto vec_prefetch_gen;
     case IX86_BUILTIN_SCATTERPFDPS:
-      icode = CODE_FOR_avx512pf_scatterpfv16si;
+      icode = CODE_FOR_avx512pf_scatterpfv16sisf;
+      goto vec_prefetch_gen;
+    case IX86_BUILTIN_SCATTERPFQPD:
+      icode = CODE_FOR_avx512pf_scatterpfv8didf;
       goto vec_prefetch_gen;
     case IX86_BUILTIN_SCATTERPFQPS:
-      icode = CODE_FOR_avx512pf_scatterpfv8di;
+      icode = CODE_FOR_avx512pf_scatterpfv8disf;
       goto vec_prefetch_gen;
 
     gather_gen:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2e68fb6..24eec40 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -417,6 +417,7 @@ 
   [V32QI V16HI V8SI (V8DI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")])
 (define_mode_iterator VI48_256 [V8SI V4DI])
 (define_mode_iterator VI48_512 [V16SI V8DI])
+(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
 
 ;; Int-float size matches
 (define_mode_iterator VI4F_128 [V4SI V4SF])
@@ -12495,10 +12496,16 @@ 
    (set_attr "btver2_decode" "vector,vector,vector,vector")
    (set_attr "mode" "TI")])
 
-(define_expand "avx512pf_gatherpf<mode>"
+;; Packed float variants
+(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
+		      [(V8DI "V8SF") (V16SI "V16SF")])
+(define_mode_attr GATHER_SCATTER_DF_MEM_MODE
+		      [(V8DI "V8DF") (V8SI "V8DF")])
+
+(define_expand "avx512pf_gatherpf<mode>sf"
   [(unspec
      [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
-      (mem:<ssescalarmode>
+      (mem:<GATHER_SCATTER_SF_MEM_MODE>
 	(match_par_dup 5
 	  [(match_operand 2 "vsib_address_operand")
 	   (match_operand:VI48_512 1 "register_operand")
@@ -12512,10 +12519,10 @@ 
 					operands[3]), UNSPEC_VSIBADDR);
 })
 
-(define_insn "*avx512pf_gatherpf<mode>_mask"
+(define_insn "*avx512pf_gatherpf<mode>sf_mask"
   [(unspec
      [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
-      (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
 	[(unspec:P
 	   [(match_operand:P 2 "vsib_address_operand" "Tv")
 	    (match_operand:VI48_512 1 "register_operand" "v")
@@ -12539,10 +12546,10 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "*avx512pf_gatherpf<mode>"
+(define_insn "*avx512pf_gatherpf<mode>sf"
   [(unspec
      [(const_int -1)
-      (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
 	[(unspec:P
 	   [(match_operand:P 1 "vsib_address_operand" "Tv")
 	    (match_operand:VI48_512 0 "register_operand" "v")
@@ -12566,10 +12573,83 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_expand "avx512pf_scatterpf<mode>"
+;; Packed double variants
+(define_expand "avx512pf_gatherpf<mode>df"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<GATHER_SCATTER_DF_MEM_MODE>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI4_256_8_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_0_to_1_operand")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_gatherpf<mode>df_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+      (match_operator:<GATHER_SCATTER_DF_MEM_MODE> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_0_to_1_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 0:
+      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    case 1:
+      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_gatherpf<mode>df"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<GATHER_SCATTER_DF_MEM_MODE> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_0_to_1_operand" "n")]
+     UNSPEC_GATHER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      return "vgatherpf0<ssemodesuffix>pd\t{%4|%4}";
+    case 1:
+      return "vgatherpf1<ssemodesuffix>pd\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+;; Packed float variants
+(define_expand "avx512pf_scatterpf<mode>sf"
   [(unspec
      [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
-      (mem:<ssescalarmode>
+      (mem:<GATHER_SCATTER_SF_MEM_MODE>
 	(match_par_dup 5
 	  [(match_operand 2 "vsib_address_operand")
 	   (match_operand:VI48_512 1 "register_operand")
@@ -12583,10 +12663,10 @@ 
 					operands[3]), UNSPEC_VSIBADDR);
 })
 
-(define_insn "*avx512pf_scatterpf<mode>_mask"
+(define_insn "*avx512pf_scatterpf<mode>sf_mask"
   [(unspec
      [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
-      (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
 	[(unspec:P
 	   [(match_operand:P 2 "vsib_address_operand" "Tv")
 	    (match_operand:VI48_512 1 "register_operand" "v")
@@ -12610,10 +12690,10 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "*avx512pf_scatterpf<mode>"
+(define_insn "*avx512pf_scatterpf<mode>sf"
   [(unspec
      [(const_int -1)
-      (match_operator:<ssescalarmode> 4 "vsib_mem_operator"
+      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 4 "vsib_mem_operator"
 	[(unspec:P
 	   [(match_operand:P 1 "vsib_address_operand" "Tv")
 	    (match_operand:VI48_512 0 "register_operand" "v")
@@ -12637,6 +12717,78 @@ 
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
+;; Packed double variants
+(define_expand "avx512pf_scatterpf<mode>df"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_or_constm1_operand")
+      (mem:<GATHER_SCATTER_DF_MEM_MODE>
+	(match_par_dup 5
+	  [(match_operand 2 "vsib_address_operand")
+	   (match_operand:VI4_256_8_512 1 "register_operand")
+	   (match_operand:SI 3 "const1248_operand")]))
+      (match_operand:SI 4 "const_0_to_1_operand")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  operands[5]
+    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
+					operands[3]), UNSPEC_VSIBADDR);
+})
+
+(define_insn "*avx512pf_scatterpf<mode>df_mask"
+  [(unspec
+     [(match_operand:<avx512fmaskmode> 0 "register_operand" "k")
+      (match_operator:<GATHER_SCATTER_DF_MEM_MODE> 5 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 2 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
+	    (match_operand:SI 3 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 4 "const_0_to_1_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[4]))
+    {
+    case 0:
+      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    case 1:
+      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
+(define_insn "*avx512pf_scatterpf<mode>df"
+  [(unspec
+     [(const_int -1)
+      (match_operator:<GATHER_SCATTER_DF_MEM_MODE> 4 "vsib_mem_operator"
+	[(unspec:P
+	   [(match_operand:P 1 "vsib_address_operand" "Tv")
+	    (match_operand:VI4_256_8_512 0 "register_operand" "v")
+	    (match_operand:SI 2 "const1248_operand" "n")]
+	   UNSPEC_VSIBADDR)])
+      (match_operand:SI 3 "const_0_to_1_operand" "n")]
+     UNSPEC_SCATTER_PREFETCH)]
+  "TARGET_AVX512PF"
+{
+  switch (INTVAL (operands[3]))
+    {
+    case 0:
+      return "vscatterpf0<ssemodesuffix>pd\t{%4|%4}";
+    case 1:
+      return "vscatterpf1<ssemodesuffix>pd\t{%4|%4}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "XI")])
+
 (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
 	(unspec:VF_512
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 12674ad..8fb6fb88 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -362,6 +362,10 @@ 
 #define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1)
 #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1)
 #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, 1)
 
 /* shaintrin.h */
 #define __builtin_ia32_sha1rnds4(A, B, C) __builtin_ia32_sha1rnds4(A, B, 1)
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c
new file mode 100644
index 0000000..1368b7a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c
new file mode 100644
index 0000000..61a81bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c
new file mode 100644
index 0000000..5bc7599
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c
new file mode 100644
index 0000000..96610db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c
@@ -0,0 +1,15 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
new file mode 100644
index 0000000..83c31cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0dpd-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32scatter_pd (base, idx, 8, 0);
+  _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
new file mode 100644
index 0000000..31172f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf0qpd-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf0qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i64scatter_pd (base, idx, 8, 0);
+  _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c
new file mode 100644
index 0000000..205505b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1dpd-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\n\]*%ymm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1dpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256i idx;
+volatile __mmask8 m8;
+void *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i32scatter_pd (base, idx, 8, 1);
+  _mm512_mask_prefetch_i32scatter_pd (base, m8, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c
new file mode 100644
index 0000000..64d7dfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512pf-vscatterpf1qpd-1.c
@@ -0,0 +1,17 @@ 
+/* { dg-do compile } */
+/* { dg-options "-mavx512pf -O2" } */
+/* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\n\]*%zmm\[0-9\]" 2 } } */
+/* { dg-final { scan-assembler-times "vscatterpf1qpd\[ \\t\]+\[^\n\]*\{%k\[1-7\]" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512i idx;
+volatile __mmask8 m8;
+int *base;
+
+void extern
+avx512pf_test (void)
+{
+  _mm512_prefetch_i64scatter_pd (base, idx, 8, 1);
+  _mm512_mask_prefetch_i64scatter_pd (base, m8, idx, 8, 1);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index ad7ca76..643eb99 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -523,6 +523,10 @@  test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1
 test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, 1)
 test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1)
 test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1)
+test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i, 1, 1)
 
 /* avx512erintrin.h */
 test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5)
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 630c952..7d68be1 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -646,6 +646,11 @@  test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i,
 test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, 1)
 test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, 1)
 
+test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, 1)
+test_3vx (_mm512_mask_prefetch_i64scatter_pd, void const *, __mmask8, __m512i, 1, 1)
+
 /* avx512erintrin.h */
 test_1 (_mm512_exp2a23_round_pd, __m512d, __m512d, 5)
 test_1 (_mm512_exp2a23_round_ps, __m512, __m512, 5)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 309cd73..77c8d67 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -365,6 +365,10 @@ 
 #define __builtin_ia32_gatherpfqps(A, B, C, D, E) __builtin_ia32_gatherpfqps(A, B, C, 1, 1)
 #define __builtin_ia32_scatterpfdps(A, B, C, D, E) __builtin_ia32_scatterpfdps(A, B, C, 1, 1)
 #define __builtin_ia32_scatterpfqps(A, B, C, D, E) __builtin_ia32_scatterpfqps(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfdpd(A, B, C, D, E) __builtin_ia32_gatherpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_gatherpfqpd(A, B, C, D, E) __builtin_ia32_gatherpfqpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfdpd(A, B, C, D, E) __builtin_ia32_scatterpfdpd(A, B, C, 1, 1)
+#define __builtin_ia32_scatterpfqpd(A, B, C, D, E) __builtin_ia32_scatterpfqpd(A, B, C, 1, 1)
 
 /* avx512erintrin.h */
 #define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 5)