diff mbox

Add missing _mm512_prefetch_i{32,64}gather_{pd,ps} (PR target/79481)

Message ID 20170213193554.GK1849@tucnak
State New
Headers show

Commit Message

Jakub Jelinek Feb. 13, 2017, 7:35 p.m. UTC
Hi!

As mentioned in the PR, ICC as well as clang have these non-masked
gather prefetch intrinsics in addition to masked (and for scatter
even GCC has both masked and non-masked), but GCC does not (the
SDM actually doesn't mention those, only those for scatters).

The following patch implements those, I think it is useful to have
them for compatibility with the other compilers as well for consistency.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2017-02-13  Jakub Jelinek  <jakub@redhat.com>

	PR target/79481
	* config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd,
	_mm512_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd,
	_mm512_prefetch_i64gather_ps): New inline functions and macros.

	* gcc.target/i386/sse-14.c (test_2vx): Add void return type.
	(test_3vx): Change return type from int to void. 
	(_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps,
	_mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps,
	_mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd,
	_mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New
	tests.
	* gcc.target/i386/sse-22.c (test_2vx): Add void return type.
	(test_3vx): Change return type from int to void.
	(_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps,
	_mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps,
	_mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd,
	_mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New
	tests.
	* gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Add non-masked
	intrinsic.  Change scan-assembler-times number from 1 to 2.
	* gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Likewise.
	* gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Likewise.


	Jakub

Comments

Uros Bizjak Feb. 14, 2017, 9:20 a.m. UTC | #1
On Mon, Feb 13, 2017 at 8:35 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, ICC as well as clang have these non-masked
> gather prefetch intrinsics in addition to masked (and for scatter
> even GCC has both masked and non-masked), but GCC does not (the
> SDM actually doesn't mention those, only those for scatters).
>
> The following patch implements those, I think it is useful to have
> them for compatibility with the other compilers as well for consistency.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2017-02-13  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/79481
>         * config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd,
>         _mm512_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd,
>         _mm512_prefetch_i64gather_ps): New inline functions and macros.
>
>         * gcc.target/i386/sse-14.c (test_2vx): Add void return type.
>         (test_3vx): Change return type from int to void.
>         (_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps,
>         _mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps,
>         _mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd,
>         _mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New
>         tests.
>         * gcc.target/i386/sse-22.c (test_2vx): Add void return type.
>         (test_3vx): Change return type from int to void.
>         (_mm512_prefetch_i32gather_ps, _mm512_prefetch_i32scatter_ps,
>         _mm512_prefetch_i64gather_ps, _mm512_prefetch_i64scatter_ps,
>         _mm512_prefetch_i32gather_pd, _mm512_prefetch_i32scatter_pd,
>         _mm512_prefetch_i64gather_pd, _mm512_prefetch_i64scatter_pd): New
>         tests.
>         * gcc.target/i386/avx512pf-vgatherpf0dpd-1.c: Add non-masked
>         intrinsic.  Change scan-assembler-times number from 1 to 2.
>         * gcc.target/i386/avx512pf-vgatherpf0dps-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf0qpd-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf0qps-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf1dpd-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf1dps-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf1qpd-1.c: Likewise.
>         * gcc.target/i386/avx512pf-vgatherpf1qps-1.c: Likewise.

OK.

Thanks,
Uros.

> --- gcc/config/i386/avx512pfintrin.h.jj 2017-01-17 18:40:59.000000000 +0100
> +++ gcc/config/i386/avx512pfintrin.h    2017-02-13 09:56:21.333303124 +0100
> @@ -48,6 +48,24 @@ typedef unsigned short __mmask16;
>  #ifdef __OPTIMIZE__
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
>                                    void const *__addr, int __scale, int __hint)
>  {
> @@ -66,6 +84,24 @@ _mm512_mask_prefetch_i32gather_ps (__m51
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> +_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
> +                             int __scale, int __hint)
> +{
> +  __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
> +                             __scale, __hint);
> +}
> +
> +extern __inline void
> +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
>                                    void const *__addr, int __scale, int __hint)
>  {
> @@ -155,6 +191,14 @@ _mm512_mask_prefetch_i64scatter_ps (void
>  }
>
>  #else
> +#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)              \
> +  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
> +                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +
> +#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)              \
> +  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,    \
> +                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +
>  #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
>    __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
>                               (void const *)ADDR, (int)SCALE, (int)HINT)
> @@ -163,6 +207,14 @@ _mm512_mask_prefetch_i64scatter_ps (void
>    __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
>                               (void const *)ADDR, (int)SCALE, (int)HINT)
>
> +#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)              \
> +  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,       \
> +                             (void *)ADDR, (int)SCALE, (int)HINT)
> +
> +#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)              \
> +  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,       \
> +                             (void *)ADDR, (int)SCALE, (int)HINT)
> +
>  #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
>    __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,       \
>                               (void *)ADDR, (int)SCALE, (int)HINT)
> --- gcc/testsuite/gcc.target/i386/sse-14.c.jj   2017-01-12 22:28:47.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/sse-14.c      2017-02-13 10:15:36.815163082 +0100
> @@ -50,7 +50,7 @@
>    { return func (A, B, imm1, imm2, imm3); }
>
>  #define test_2vx(func, op1_type, op2_type, imm1, imm2)     \
> -  _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
> +  void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
>    { func (A, B, imm1, imm2); }
>
>  #define test_3(func, type, op1_type, op2_type, op3_type, imm)          \
> @@ -74,7 +74,7 @@
>    { func (A, B, C, imm); }
>
>  #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2)   \
> -  int _CONCAT(_,func) (op1_type A, op2_type B,                    \
> +  void _CONCAT(_,func) (op1_type A, op2_type B,                           \
>                        op3_type C, int const I, int const L)       \
>    { func (A, B, C, imm1, imm2); }
>
> @@ -520,6 +520,14 @@ test_4x (_mm_maskz_fixupimm_round_sd, __
>  test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8)
>
>  /* avx512pfintrin.h */
> +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0)
> --- gcc/testsuite/gcc.target/i386/sse-22.c.jj   2017-01-12 22:28:47.000000000 +0100
> +++ gcc/testsuite/gcc.target/i386/sse-22.c      2017-02-13 10:15:54.781926974 +0100
> @@ -50,7 +50,7 @@
>    { return func (A, B, imm1, imm2, imm3); }
>
>  #define test_2vx(func, op1_type, op2_type, imm1, imm2)     \
> -  _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
> +  void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
>    { func (A, B, imm1, imm2); }
>
>  #define test_3(func, type, op1_type, op2_type, op3_type, imm)          \
> @@ -74,7 +74,7 @@
>    { func (A, B, C, imm); }
>
>  #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2)   \
> -  int _CONCAT(_,func) (op1_type A, op2_type B,                    \
> +  void _CONCAT(_,func) (op1_type A, op2_type B,                   \
>                        op3_type C, int const I, int const L)       \
>    { func (A, B, C, imm1, imm2); }
>
> @@ -647,11 +647,18 @@ test_4x (_mm_maskz_fixupimm_round_sd, __
>  test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8)
>
>  /* avx512pfintrin.h */
> +test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64gather_pd, __m512i, long long *, 1, _MM_HINT_T0)
> +test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, _MM_HINT_T0)
> -
>  test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, _MM_HINT_T0)
>  test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, _MM_HINT_T0)
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c.jj 2016-05-22 12:20:09.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c    2017-02-13 10:22:18.154888926 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ void *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
>    _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c.jj 2016-05-22 12:20:23.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c    2017-02-13 10:22:21.992838490 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T0);
>    _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T0);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c.jj 2016-05-22 12:20:32.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c    2017-02-13 10:22:26.097784546 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T0);
>    _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T0);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c.jj 2016-05-22 12:20:13.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c    2017-02-13 10:22:29.952733887 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T0);
>    _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T0);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c.jj 2016-05-22 12:20:31.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c    2017-02-13 10:22:33.866682452 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T1);
>    _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T1);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c.jj 2016-05-22 12:20:13.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c    2017-02-13 10:22:37.749631425 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T1);
>    _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T1);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c.jj 2016-05-22 12:20:18.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c    2017-02-13 10:22:41.703579464 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T1);
>    _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T1);
>  }
> --- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c.jj 2016-05-22 12:20:02.000000000 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c    2017-02-13 10:22:46.684514008 +0100
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-mavx512pf -O2" } */
> -/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
> +/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
>
>  #include <immintrin.h>
>
> @@ -11,5 +11,6 @@ int *base;
>  void extern
>  avx512pf_test (void)
>  {
> +  _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T1);
>    _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T1);
>  }
>
>         Jakub
diff mbox

Patch

--- gcc/config/i386/avx512pfintrin.h.jj	2017-01-17 18:40:59.000000000 +0100
+++ gcc/config/i386/avx512pfintrin.h	2017-02-13 09:56:21.333303124 +0100
@@ -48,6 +48,24 @@  typedef unsigned short __mmask16;
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_pd (__m256i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdpd ((__mmask8) 0xFF, (__v8si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i32gather_ps (__m512i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfdps ((__mmask16) 0xFFFF, (__v16si) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
 				   void const *__addr, int __scale, int __hint)
 {
@@ -66,6 +84,24 @@  _mm512_mask_prefetch_i32gather_ps (__m51
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64gather_pd (__m512i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfqpd ((__mmask8) 0xFF, (__v8di) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_prefetch_i64gather_ps (__m512i __index, void const *__addr,
+			      int __scale, int __hint)
+{
+  __builtin_ia32_gatherpfqps ((__mmask8) 0xFF, (__v8di) __index, __addr,
+			      __scale, __hint);
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
 				   void const *__addr, int __scale, int __hint)
 {
@@ -155,6 +191,14 @@  _mm512_mask_prefetch_i64scatter_ps (void
 }
 
 #else
+#define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)		     \
+  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,	     \
+			      (void const *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)		     \
+  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,    \
+			      (void const *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
   __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
 			      (void const *)ADDR, (int)SCALE, (int)HINT)
@@ -163,6 +207,14 @@  _mm512_mask_prefetch_i64scatter_ps (void
   __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
 			      (void const *)ADDR, (int)SCALE, (int)HINT)
 
+#define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)		     \
+  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
+			      (void *)ADDR, (int)SCALE, (int)HINT)
+
+#define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)		     \
+  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
+			      (void *)ADDR, (int)SCALE, (int)HINT)
+
 #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
   __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
 			      (void *)ADDR, (int)SCALE, (int)HINT)
--- gcc/testsuite/gcc.target/i386/sse-14.c.jj	2017-01-12 22:28:47.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/sse-14.c	2017-02-13 10:15:36.815163082 +0100
@@ -50,7 +50,7 @@ 
   { return func (A, B, imm1, imm2, imm3); }
 
 #define test_2vx(func, op1_type, op2_type, imm1, imm2)     \
-  _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
+  void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
   { func (A, B, imm1, imm2); }
 
 #define test_3(func, type, op1_type, op2_type, op3_type, imm)		\
@@ -74,7 +74,7 @@ 
   { func (A, B, C, imm); }
 
 #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2)   \
-  int _CONCAT(_,func) (op1_type A, op2_type B,			   \
+  void _CONCAT(_,func) (op1_type A, op2_type B,			   \
 		       op3_type C, int const I, int const L)       \
   { func (A, B, C, imm1, imm2); }
 
@@ -520,6 +520,14 @@  test_4x (_mm_maskz_fixupimm_round_sd, __
 test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8)
 
 /* avx512pfintrin.h */
+test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64gather_pd, __m512i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0)
--- gcc/testsuite/gcc.target/i386/sse-22.c.jj	2017-01-12 22:28:47.000000000 +0100
+++ gcc/testsuite/gcc.target/i386/sse-22.c	2017-02-13 10:15:54.781926974 +0100
@@ -50,7 +50,7 @@ 
   { return func (A, B, imm1, imm2, imm3); }
 
 #define test_2vx(func, op1_type, op2_type, imm1, imm2)     \
-  _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
+  void _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \
   { func (A, B, imm1, imm2); }
 
 #define test_3(func, type, op1_type, op2_type, op3_type, imm)		\
@@ -74,7 +74,7 @@ 
   { func (A, B, C, imm); }
 
 #define test_3vx(func, op1_type, op2_type, op3_type, imm1, imm2)   \
-  int _CONCAT(_,func) (op1_type A, op2_type B,             	   \
+  void _CONCAT(_,func) (op1_type A, op2_type B,             	   \
 		       op3_type C, int const I, int const L)       \
   { func (A, B, C, imm1, imm2); }
 
@@ -647,11 +647,18 @@  test_4x (_mm_maskz_fixupimm_round_sd, __
 test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 8)
 
 /* avx512pfintrin.h */
+test_2vx (_mm512_prefetch_i32gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64gather_ps, __m512i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64scatter_ps, void const *, __m512i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32gather_pd, __m256i, void const *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i32scatter_pd, void const *, __m256i, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64gather_pd, __m512i, long long *, 1, _MM_HINT_T0)
+test_2vx (_mm512_prefetch_i64scatter_pd, void const *, __m512i, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i32scatter_ps, void const *, __mmask16, __m512i, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i64gather_ps, __m512i, __mmask8, void const *, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i64scatter_ps, void const *, __mmask8, __m512i, 1, _MM_HINT_T0)
-
 test_3vx (_mm512_mask_prefetch_i32gather_pd, __m256i, __mmask8, void const *, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i32scatter_pd, void const *, __mmask8, __m256i, 1, _MM_HINT_T0)
 test_3vx (_mm512_mask_prefetch_i64gather_pd, __m512i, __mmask8, long long *, 1, _MM_HINT_T0)
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c.jj	2016-05-22 12:20:09.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dpd-1.c	2017-02-13 10:22:18.154888926 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf0dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  void *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T0);
   _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T0);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c.jj	2016-05-22 12:20:23.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0dps-1.c	2017-02-13 10:22:21.992838490 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf0dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T0);
   _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T0);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c.jj	2016-05-22 12:20:32.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qpd-1.c	2017-02-13 10:22:26.097784546 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf0qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T0);
   _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T0);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c.jj	2016-05-22 12:20:13.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf0qps-1.c	2017-02-13 10:22:29.952733887 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf0qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T0);
   _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T0);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c.jj	2016-05-22 12:20:31.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dpd-1.c	2017-02-13 10:22:33.866682452 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf1dpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i32gather_pd (idx, base, 8, _MM_HINT_T1);
   _mm512_mask_prefetch_i32gather_pd (idx, m8, base, 8, _MM_HINT_T1);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c.jj	2016-05-22 12:20:13.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1dps-1.c	2017-02-13 10:22:37.749631425 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf1dps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i32gather_ps (idx, base, 8, _MM_HINT_T1);
   _mm512_mask_prefetch_i32gather_ps (idx, m16, base, 8, _MM_HINT_T1);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c.jj	2016-05-22 12:20:18.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qpd-1.c	2017-02-13 10:22:41.703579464 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf1qpd\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i64gather_pd (idx, base, 8, _MM_HINT_T1);
   _mm512_mask_prefetch_i64gather_pd (idx, m8, base, 8, _MM_HINT_T1);
 }
--- gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c.jj	2016-05-22 12:20:02.000000000 +0200
+++ gcc/testsuite/gcc.target/i386/avx512pf-vgatherpf1qps-1.c	2017-02-13 10:22:46.684514008 +0100
@@ -1,6 +1,6 @@ 
 /* { dg-do compile } */
 /* { dg-options "-mavx512pf -O2" } */
-/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vgatherpf1qps\[ \\t\]+\[^\{\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 2 } } */
 
 #include <immintrin.h>
 
@@ -11,5 +11,6 @@  int *base;
 void extern
 avx512pf_test (void)
 {
+  _mm512_prefetch_i64gather_ps (idx, base, 8, _MM_HINT_T1);
   _mm512_mask_prefetch_i64gather_ps (idx, m8, base, 8, _MM_HINT_T1);
 }