Message ID | 20170213193554.GK1849@tucnak |
---|---|
State | New |
Headers | show |
On Mon, Feb 13, 2017 at 8:35 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > As mentioned in the PR, ICC as well as clang have these non-masked > gather prefetch intrinsics in addition to masked (and for scatter > even GCC has both masked and non-masked), but GCC does not (the > SDM actually doesn't mention those, only those for scatters). > > The following patch implements those, I think it is useful to have > them for compatibility with the other compilers as well for consistency. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2017-02-13 Jakub Jelinek <jakub@redhat.com> > > PR target/79481 > * config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd, > _mm512_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd, > _mm512_prefetch_i64gather_ps): New inline functions and macros. > > * gcc.target/i386/sse-14.c (test_2vx): Add void return type. > (test_3vx): Change return type from int to void. > (_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps, > _mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps, > _mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd, > _mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New > tests. > * gcc.target/i386/sse-22.c (test_2vx): Add void return type. > (test_3vx): Change return type from int to void. > (_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps, > _mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps, > _mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd, > _mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New > tests. > * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Add non-masked > intrinsic. Change scan-assembler-times number from 1 to 2. > * gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Likewise. > * gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Likewise. OK. Thanks, Uros. > --- gcc/config/i386/avx512pfintrin.h.jj 2017-01-17 18:40:59.000000000 +0100 > +++ gcc/config/i386/avx512pfintrin.h 2017-02-13 09:56:21.333303124 +0100 > @@ -48,6 +48,24 @@ typedef unsigned short __mmask16; > #ifdef __OPTIMIZE__ > extern __inline void > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, > void const *__addr, int __scale, int __hint) > { > @@ -66,6 +84,24 @@ _mm512_mask_prefetch_i32gather_ps (__m51 > > extern __inline void > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > +_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr, > + int __scale, int __hint) > +{ > + __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr, > + __scale, __hint); > +} > + > +extern __inline void > +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask, > void const *__addr, int __scale, int __hint) > { > @@ -155,6 +191,14 @@ _mm512_mask_prefetch_i64scatter_ps (void > } > > #else > +#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \ > + __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ > + (void const *)ADDR, (int)SCALE, (int)HINT) > + > +#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \ > + __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ > + (void const *)ADDR, (int)SCALE, (int)HINT) > + > #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ > __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ > (void const *)ADDR, (int)SCALE, (int)HINT) > @@ -163,6 +207,14 @@ _mm512_mask_prefetch_i64scatter_ps (void > __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ > (void const *)ADDR, (int)SCALE, (int)HINT) > > +#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \ > + __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ > + (void *)ADDR, (int)SCALE, (int)HINT) > + > +#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \ > + __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ > + (void *)ADDR, (int)SCALE, (int)HINT) > + > #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ > __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ > (void *)ADDR, (int)SCALE, (int)HINT) > --- gcc/testsuite/gcc.target/i386/sse-14.c.jj 2017-01-12 22:28:47.000000000 +0100 > +++ gcc/testsuite/gcc.target/i386/sse-14.c 2017-02-13 10:15:36.815163082 +0100 > @@ -50,7 +50,7 @@ > { return func (A, B, imm1, imm2, imm3); } > > #define test_2vx(func, op1_type, op2_type, imm1, imm2) \ > - _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ > + void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ > { func (A, B, imm1, imm2); } > > #define test_3(func, type, op1_type, op2_type, op3_type, imm) \ > @@ -74,7 +74,7 @@ > { func (A, B, C, imm); } > > #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \ > - int _CONCAT(_,func) (op1_type A, op2_type B, \ > + void _CONCAT(_,func) (op1_type A, op2_type B, \ > op3_type C, int const I, int const L) \ > { func (A, B, C, imm1, imm2); } > > @@ -520,6 +520,14 @@ test_4x (_mm_maskz_fixupimm_round_sd, __ > test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8) > > /* avx512pfintrin.h */ > +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0) > --- gcc/testsuite/gcc.target/i386/sse-22.c.jj 2017-01-12 22:28:47.000000000 +0100 > +++ gcc/testsuite/gcc.target/i386/sse-22.c 2017-02-13 10:15:54.781926974 +0100 > @@ -50,7 +50,7 @@ > { return func (A, B, imm1, imm2, imm3); } > > #define test_2vx(func, op1_type, op2_type, imm1, imm2) \ > - _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ > + void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ > { func (A, B, imm1, imm2); } > > #define test_3(func, type, op1_type, op2_type, op3_type, imm) \ > @@ -74,7 +74,7 @@ > { func (A, B, C, imm); } > > #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \ > - int _CONCAT(_,func) (op1_type A, op2_type B, \ > + void _CONCAT(_,func) (op1_type A, op2_type B, \ > op3_type C, int const I, int const L) \ > { func (A, B, C, imm1, imm2); } > > @@ -647,11 +647,18 @@ test_4x (_mm_maskz_fixupimm_round_sd, __ > test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8) > > /* avx512pfintrin.h */ > +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, long long *, 1, _MM_HINT_T0) > +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, _MM_HINT_T0) > - > test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, _MM_HINT_T0) > test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, _MM_HINT_T0) > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c.jj 2016-05-22 12:20:09.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c 2017-02-13 10:22:18.154888926 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ void *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0); > _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c.jj 2016-05-22 12:20:23.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c 2017-02-13 10:22:21.992838490 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T0); > _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T0); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c.jj 2016-05-22 12:20:32.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c 2017-02-13 10:22:26.097784546 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T0); > _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T0); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c.jj 2016-05-22 12:20:13.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c 2017-02-13 10:22:29.952733887 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T0); > _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T0); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c.jj 2016-05-22 12:20:31.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c 2017-02-13 10:22:33.866682452 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T1); > _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T1); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c.jj 2016-05-22 12:20:13.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c 2017-02-13 10:22:37.749631425 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T1); > _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T1); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c.jj 2016-05-22 12:20:18.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c 2017-02-13 10:22:41.703579464 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T1); > _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T1); > } > --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c.jj 2016-05-22 12:20:02.000000000 +0200 > +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c 2017-02-13 10:22:46.684514008 +0100 > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512pf -O2" } */ > -/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ > > #include <immintrin.h> > > @@ -11,5 +11,6 @@ int *base; > void extern > avx512pf_test (void) > { > + _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T1); > _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T1); > } > > Jakub
--- gcc/config/i386/avx512pfintrin.h.jj 2017-01-17 18:40:59.000000000 +0100 +++ gcc/config/i386/avx512pfintrin.h 2017-02-13 09:56:21.333303124 +0100 @@ -48,6 +48,24 @@ typedef unsigned short __mmask16; #ifdef __OPTIMIZE__ extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask, void const *__addr, int __scale, int __hint) { @@ -66,6 +84,24 @@ _mm512_mask_prefetch_i32gather_ps (__m51 extern __inline void __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr, + int __scale, int __hint) +{ + __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr, + __scale, __hint); +} + +extern __inline void +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask, void const *__addr, int __scale, int __hint) { @@ -155,6 +191,14 @@ _mm512_mask_prefetch_i64scatter_ps (void } #else +#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX, \ + (void const *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX, \ + (void const *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX, \ (void const *)ADDR, (int)SCALE, (int)HINT) @@ -163,6 +207,14 @@ _mm512_mask_prefetch_i64scatter_ps (void __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX, \ (void const *)ADDR, (int)SCALE, (int)HINT) +#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ + (void *)ADDR, (int)SCALE, (int)HINT) + +#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT) \ + __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX, \ + (void *)ADDR, (int)SCALE, (int)HINT) + #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT) \ __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX, \ (void *)ADDR, (int)SCALE, (int)HINT) --- gcc/testsuite/gcc.target/i386/sse-14.c.jj 2017-01-12 22:28:47.000000000 +0100 +++ gcc/testsuite/gcc.target/i386/sse-14.c 2017-02-13 10:15:36.815163082 +0100 @@ -50,7 +50,7 @@ { return func (A, B, imm1, imm2, imm3); } #define test_2vx(func, op1_type, op2_type, imm1, imm2) \ - _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ { func (A, B, imm1, imm2); } #define test_3(func, type, op1_type, op2_type, op3_type, imm) \ @@ -74,7 +74,7 @@ { func (A, B, C, imm); } #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \ - int _CONCAT(_,func) (op1_type A, op2_type B, \ + void _CONCAT(_,func) (op1_type A, op2_type B, \ op3_type C, int const I, int const L) \ { func (A, B, C, imm1, imm2); } @@ -520,6 +520,14 @@ test_4x (_mm_maskz_fixupimm_round_sd, __ test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8) /* avx512pfintrin.h */ +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0) --- gcc/testsuite/gcc.target/i386/sse-22.c.jj 2017-01-12 22:28:47.000000000 +0100 +++ gcc/testsuite/gcc.target/i386/sse-22.c 2017-02-13 10:15:54.781926974 +0100 @@ -50,7 +50,7 @@ { return func (A, B, imm1, imm2, imm3); } #define test_2vx(func, op1_type, op2_type, imm1, imm2) \ - _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ { func (A, B, imm1, imm2); } #define test_3(func, type, op1_type, op2_type, op3_type, imm) \ @@ -74,7 +74,7 @@ { func (A, B, C, imm); } #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2) \ - int _CONCAT(_,func) (op1_type A, op2_type B, \ + void _CONCAT(_,func) (op1_type A, op2_type B, \ op3_type C, int const I, int const L) \ { func (A, B, C, imm1, imm2); } @@ -647,11 +647,18 @@ test_4x (_mm_maskz_fixupimm_round_sd, __ test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8) /* avx512pfintrin.h */ +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, long long *, 1, _MM_HINT_T0) +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, _MM_HINT_T0) - test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, _MM_HINT_T0) test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, _MM_HINT_T0) --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c.jj 2016-05-22 12:20:09.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c 2017-02-13 10:22:18.154888926 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ void *base; void extern avx512pf_test (void) { + _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0); _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c.jj 2016-05-22 12:20:23.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c 2017-02-13 10:22:21.992838490 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T0); _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T0); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c.jj 2016-05-22 12:20:32.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c 2017-02-13 10:22:26.097784546 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T0); _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T0); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c.jj 2016-05-22 12:20:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c 2017-02-13 10:22:29.952733887 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T0); _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T0); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c.jj 2016-05-22 12:20:31.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c 2017-02-13 10:22:33.866682452 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T1); _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T1); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c.jj 2016-05-22 12:20:13.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c 2017-02-13 10:22:37.749631425 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T1); _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T1); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c.jj 2016-05-22 12:20:18.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c 2017-02-13 10:22:41.703579464 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T1); _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T1); } --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c.jj 2016-05-22 12:20:02.000000000 +0200 +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c 2017-02-13 10:22:46.684514008 +0100 @@ -1,6 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-mavx512pf -O2" } */ -/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */ #include <immintrin.h> @@ -11,5 +11,6 @@ int *base; void extern avx512pf_test (void) { + _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T1); _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T1); }