diff mbox series

x86: Fix -O0 intrinsic *gather*/*scatter* macros [PR94832]

Message ID 20200429110441.GC2424@tucnak
State New
Headers show
Series x86: Fix -O0 intrinsic *gather*/*scatter* macros [PR94832] | expand

Commit Message

Jakub Jelinek April 29, 2020, 11:04 a.m. UTC
Hi!

As reported in the PR, while most intrinsic -O0 macro argument uses
are properly wrapped in ()s or used in context where having a complex
expression passed as the argument doesn't pose a problem (e.g. when
macro argument use is in between commas, or between ( and comma, or
between comma and ) etc.), especially the gather/scatter macros don't do
this and if one passes to some macro e.g. x + y as argument, the
corresponding inline function would do cast on the argument, but
the macro does (int) ARG, then it is (int) x + y rather than (int) (x + y).

The following patch fixes those issues in *gather/*scatter*; additionally,
the AVX2 macros were passing incorrect mask of e.g.
(__v2df)_mm_set1_pd((double)(long long int) -1)
which is IMHO equivalent to
(__v2df){-1.0, -1.0}
when it really wants to pass __v2df vector with all bits set.
I've used what the inline functions use for those cases.

Tested on x86_64-linux, ok for trunk if it passes full bootstrap/regtest
on x86_64-linux and i686-linux?

2020-04-29  Jakub Jelinek  <jakub@redhat.com>

	PR target/94832
	* config/i386/avx2intrin.h (_mm_mask_i32gather_pd,
	_mm256_mask_i32gather_pd, _mm_mask_i64gather_pd,
	_mm256_mask_i64gather_pd, _mm_mask_i32gather_ps,
	_mm256_mask_i32gather_ps, _mm_mask_i64gather_ps,
	_mm256_mask_i64gather_ps, _mm_i32gather_epi64,
	_mm_mask_i32gather_epi64, _mm256_i32gather_epi64,
	_mm256_mask_i32gather_epi64, _mm_i64gather_epi64,
	_mm_mask_i64gather_epi64, _mm256_i64gather_epi64,
	_mm256_mask_i64gather_epi64, _mm_i32gather_epi32,
	_mm_mask_i32gather_epi32, _mm256_i32gather_epi32,
	_mm256_mask_i32gather_epi32, _mm_i64gather_epi32,
	_mm_mask_i64gather_epi32, _mm256_i64gather_epi32,
	_mm256_mask_i64gather_epi32): Surround macro parameter uses with
	parens.
	(_mm_i32gather_pd, _mm256_i32gather_pd, _mm_i64gather_pd,
	_mm256_i64gather_pd, _mm_i32gather_ps, _mm256_i32gather_ps,
	_mm_i64gather_ps, _mm256_i64gather_ps): Likewise.  Don't use
	as mask vector containing -1.0 or -1.0f elts, but instead vector
	with all bits set using _mm*_cmpeq_p? with zero operands.
	* config/i386/avx512fintrin.h (_mm512_i32gather_ps,
	_mm512_mask_i32gather_ps, _mm512_i32gather_pd,
	_mm512_mask_i32gather_pd, _mm512_i64gather_ps,
	_mm512_mask_i64gather_ps, _mm512_i64gather_pd,
	_mm512_mask_i64gather_pd, _mm512_i32gather_epi32,
	_mm512_mask_i32gather_epi32, _mm512_i32gather_epi64,
	_mm512_mask_i32gather_epi64, _mm512_i64gather_epi32,
	_mm512_mask_i64gather_epi32, _mm512_i64gather_epi64,
	_mm512_mask_i64gather_epi64, _mm512_i32scatter_ps,
	_mm512_mask_i32scatter_ps, _mm512_i32scatter_pd,
	_mm512_mask_i32scatter_pd, _mm512_i64scatter_ps,
	_mm512_mask_i64scatter_ps, _mm512_i64scatter_pd,
	_mm512_mask_i64scatter_pd, _mm512_i32scatter_epi32,
	_mm512_mask_i32scatter_epi32, _mm512_i32scatter_epi64,
	_mm512_mask_i32scatter_epi64, _mm512_i64scatter_epi32,
	_mm512_mask_i64scatter_epi32, _mm512_i64scatter_epi64,
	_mm512_mask_i64scatter_epi64): Surround macro parameter uses with
	parens.
	* config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd,
	_mm512_prefetch_i32gather_ps, _mm512_mask_prefetch_i32gather_pd,
	_mm512_mask_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd,
	_mm512_prefetch_i64gather_ps, _mm512_mask_prefetch_i64gather_pd,
	_mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd,
	_mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd,
	_mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd,
	_mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd,
	_mm512_mask_prefetch_i64scatter_ps): Likewise.
	* config/i386/avx512vlintrin.h (_mm256_mmask_i32gather_ps,
	_mm_mmask_i32gather_ps, _mm256_mmask_i32gather_pd,
	_mm_mmask_i32gather_pd, _mm256_mmask_i64gather_ps,
	_mm_mmask_i64gather_ps, _mm256_mmask_i64gather_pd,
	_mm_mmask_i64gather_pd, _mm256_mmask_i32gather_epi32,
	_mm_mmask_i32gather_epi32, _mm256_mmask_i32gather_epi64,
	_mm_mmask_i32gather_epi64, _mm256_mmask_i64gather_epi32,
	_mm_mmask_i64gather_epi32, _mm256_mmask_i64gather_epi64,
	_mm_mmask_i64gather_epi64, _mm256_i32scatter_ps,
	_mm256_mask_i32scatter_ps, _mm_i32scatter_ps, _mm_mask_i32scatter_ps,
	_mm256_i32scatter_pd, _mm256_mask_i32scatter_pd, _mm_i32scatter_pd,
	_mm_mask_i32scatter_pd, _mm256_i64scatter_ps,
	_mm256_mask_i64scatter_ps, _mm_i64scatter_ps, _mm_mask_i64scatter_ps,
	_mm256_i64scatter_pd, _mm256_mask_i64scatter_pd, _mm_i64scatter_pd,
	_mm_mask_i64scatter_pd, _mm256_i32scatter_epi32,
	_mm256_mask_i32scatter_epi32, _mm_i32scatter_epi32,
	_mm_mask_i32scatter_epi32, _mm256_i32scatter_epi64,
	_mm256_mask_i32scatter_epi64, _mm_i32scatter_epi64,
	_mm_mask_i32scatter_epi64, _mm256_i64scatter_epi32,
	_mm256_mask_i64scatter_epi32, _mm_i64scatter_epi32,
	_mm_mask_i64scatter_epi32, _mm256_i64scatter_epi64,
	_mm256_mask_i64scatter_epi64, _mm_i64scatter_epi64,
	_mm_mask_i64scatter_epi64): Likewise.


	Jakub

Comments

Uros Bizjak April 29, 2020, 11:12 a.m. UTC | #1
On Wed, Apr 29, 2020 at 1:04 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As reported in the PR, while most intrinsic -O0 macro argument uses
> are properly wrapped in ()s or used in context where having a complex
> expression passed as the argument doesn't pose a problem (e.g. when
> macro argument use is in between commas, or between ( and comma, or
> between comma and ) etc.), especially the gather/scatter macros don't do
> this and if one passes to some macro e.g. x + y as argument, the
> corresponding inline function would do cast on the argument, but
> the macro does (int) ARG, then it is (int) x + y rather than (int) (x + y).
>
> The following patch fixes those issues in *gather/*scatter*; additionally,
> the AVX2 macros were passing incorrect mask of e.g.
> (__v2df)_mm_set1_pd((double)(long long int) -1)
> which is IMHO equivalent to
> (__v2df){-1.0, -1.0}
> when it really wants to pass __v2df vector with all bits set.
> I've used what the inline functions use for those cases.
>
> Tested on x86_64-linux, ok for trunk if it passes full bootstrap/regtest
> on x86_64-linux and i686-linux?
>
> 2020-04-29  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/94832
>         * config/i386/avx2intrin.h (_mm_mask_i32gather_pd,
>         _mm256_mask_i32gather_pd, _mm_mask_i64gather_pd,
>         _mm256_mask_i64gather_pd, _mm_mask_i32gather_ps,
>         _mm256_mask_i32gather_ps, _mm_mask_i64gather_ps,
>         _mm256_mask_i64gather_ps, _mm_i32gather_epi64,
>         _mm_mask_i32gather_epi64, _mm256_i32gather_epi64,
>         _mm256_mask_i32gather_epi64, _mm_i64gather_epi64,
>         _mm_mask_i64gather_epi64, _mm256_i64gather_epi64,
>         _mm256_mask_i64gather_epi64, _mm_i32gather_epi32,
>         _mm_mask_i32gather_epi32, _mm256_i32gather_epi32,
>         _mm256_mask_i32gather_epi32, _mm_i64gather_epi32,
>         _mm_mask_i64gather_epi32, _mm256_i64gather_epi32,
>         _mm256_mask_i64gather_epi32): Surround macro parameter uses with
>         parens.
>         (_mm_i32gather_pd, _mm256_i32gather_pd, _mm_i64gather_pd,
>         _mm256_i64gather_pd, _mm_i32gather_ps, _mm256_i32gather_ps,
>         _mm_i64gather_ps, _mm256_i64gather_ps): Likewise.  Don't use
>         as mask vector containing -1.0 or -1.0f elts, but instead vector
>         with all bits set using _mm*_cmpeq_p? with zero operands.
>         * config/i386/avx512fintrin.h (_mm512_i32gather_ps,
>         _mm512_mask_i32gather_ps, _mm512_i32gather_pd,
>         _mm512_mask_i32gather_pd, _mm512_i64gather_ps,
>         _mm512_mask_i64gather_ps, _mm512_i64gather_pd,
>         _mm512_mask_i64gather_pd, _mm512_i32gather_epi32,
>         _mm512_mask_i32gather_epi32, _mm512_i32gather_epi64,
>         _mm512_mask_i32gather_epi64, _mm512_i64gather_epi32,
>         _mm512_mask_i64gather_epi32, _mm512_i64gather_epi64,
>         _mm512_mask_i64gather_epi64, _mm512_i32scatter_ps,
>         _mm512_mask_i32scatter_ps, _mm512_i32scatter_pd,
>         _mm512_mask_i32scatter_pd, _mm512_i64scatter_ps,
>         _mm512_mask_i64scatter_ps, _mm512_i64scatter_pd,
>         _mm512_mask_i64scatter_pd, _mm512_i32scatter_epi32,
>         _mm512_mask_i32scatter_epi32, _mm512_i32scatter_epi64,
>         _mm512_mask_i32scatter_epi64, _mm512_i64scatter_epi32,
>         _mm512_mask_i64scatter_epi32, _mm512_i64scatter_epi64,
>         _mm512_mask_i64scatter_epi64): Surround macro parameter uses with
>         parens.
>         * config/i386/avx512pfintrin.h (_mm512_prefetch_i32gather_pd,
>         _mm512_prefetch_i32gather_ps, _mm512_mask_prefetch_i32gather_pd,
>         _mm512_mask_prefetch_i32gather_ps, _mm512_prefetch_i64gather_pd,
>         _mm512_prefetch_i64gather_ps, _mm512_mask_prefetch_i64gather_pd,
>         _mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd,
>         _mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd,
>         _mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd,
>         _mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd,
>         _mm512_mask_prefetch_i64scatter_ps): Likewise.
>         * config/i386/avx512vlintrin.h (_mm256_mmask_i32gather_ps,
>         _mm_mmask_i32gather_ps, _mm256_mmask_i32gather_pd,
>         _mm_mmask_i32gather_pd, _mm256_mmask_i64gather_ps,
>         _mm_mmask_i64gather_ps, _mm256_mmask_i64gather_pd,
>         _mm_mmask_i64gather_pd, _mm256_mmask_i32gather_epi32,
>         _mm_mmask_i32gather_epi32, _mm256_mmask_i32gather_epi64,
>         _mm_mmask_i32gather_epi64, _mm256_mmask_i64gather_epi32,
>         _mm_mmask_i64gather_epi32, _mm256_mmask_i64gather_epi64,
>         _mm_mmask_i64gather_epi64, _mm256_i32scatter_ps,
>         _mm256_mask_i32scatter_ps, _mm_i32scatter_ps, _mm_mask_i32scatter_ps,
>         _mm256_i32scatter_pd, _mm256_mask_i32scatter_pd, _mm_i32scatter_pd,
>         _mm_mask_i32scatter_pd, _mm256_i64scatter_ps,
>         _mm256_mask_i64scatter_ps, _mm_i64scatter_ps, _mm_mask_i64scatter_ps,
>         _mm256_i64scatter_pd, _mm256_mask_i64scatter_pd, _mm_i64scatter_pd,
>         _mm_mask_i64scatter_pd, _mm256_i32scatter_epi32,
>         _mm256_mask_i32scatter_epi32, _mm_i32scatter_epi32,
>         _mm_mask_i32scatter_epi32, _mm256_i32scatter_epi64,
>         _mm256_mask_i32scatter_epi64, _mm_i32scatter_epi64,
>         _mm_mask_i32scatter_epi64, _mm256_i64scatter_epi32,
>         _mm256_mask_i64scatter_epi32, _mm_i64scatter_epi32,
>         _mm_mask_i64scatter_epi32, _mm256_i64scatter_epi64,
>         _mm256_mask_i64scatter_epi64, _mm_i64scatter_epi64,
>         _mm_mask_i64scatter_epi64): Likewise.

LGTM.

Thanks,
Uros.

> --- gcc/config/i386/avx2intrin.h.jj     2020-03-05 19:44:14.969713607 +0100
> +++ gcc/config/i386/avx2intrin.h        2020-04-29 11:40:46.216509672 +0200
> @@ -1670,234 +1670,246 @@ _mm256_mask_i64gather_epi32 (__m128i __s
>  #else /* __OPTIMIZE__ */
>  #define _mm_i32gather_pd(BASE, INDEX, SCALE)                           \
>    (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),   \
> -                                        (double const *)BASE,          \
> -                                        (__v4si)(__m128i)INDEX,        \
> -                                        (__v2df)_mm_set1_pd(           \
> -                                          (double)(long long int) -1), \
> -                                        (int)SCALE)
> -
> -#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)    \
> -  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,  \
> -                                        (double const *)BASE,   \
> -                                        (__v4si)(__m128i)INDEX, \
> -                                        (__v2df)(__m128d)MASK,  \
> -                                        (int)SCALE)
> +                                        (double const *) (BASE),       \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v2df)                       \
> +                                        _mm_cmpeq_pd (_mm_setzero_pd (),\
> +                                                      _mm_setzero_pd ()),\
> +                                        (int) (SCALE))
> +
> +#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)           \
> +  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d) (SRC),      \
> +                                        (double const *) (BASE),       \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v2df)(__m128d) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm256_i32gather_pd(BASE, INDEX, SCALE)                                \
>    (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),        \
> -                                        (double const *)BASE,          \
> -                                        (__v4si)(__m128i)INDEX,        \
> -                                        (__v4df)_mm256_set1_pd(        \
> -                                          (double)(long long int) -1), \
> -                                        (int)SCALE)
> -
> -#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)         \
> -  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,  \
> -                                        (double const *)BASE,   \
> -                                        (__v4si)(__m128i)INDEX, \
> -                                        (__v4df)(__m256d)MASK,  \
> -                                        (int)SCALE)
> +                                        (double const *) (BASE),       \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v4df)                       \
> +                                        _mm256_cmp_pd (_mm256_setzero_pd (),\
> +                                                       _mm256_setzero_pd (),\
> +                                                       _CMP_EQ_OQ),    \
> +                                        (int) (SCALE))
> +
> +#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d) (SRC),      \
> +                                        (double const *) (BASE),       \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v4df)(__m256d) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm_i64gather_pd(BASE, INDEX, SCALE)                           \
>    (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),   \
> -                                        (double const *)BASE,          \
> -                                        (__v2di)(__m128i)INDEX,        \
> -                                        (__v2df)_mm_set1_pd(           \
> -                                          (double)(long long int) -1), \
> -                                        (int)SCALE)
> -
> -#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)    \
> -  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,  \
> -                                        (double const *)BASE,   \
> -                                        (__v2di)(__m128i)INDEX, \
> -                                        (__v2df)(__m128d)MASK,  \
> -                                        (int)SCALE)
> +                                        (double const *) (BASE),       \
> +                                        (__v2di)(__m128i) (INDEX),     \
> +                                        (__v2df)                       \
> +                                        _mm_cmpeq_pd (_mm_setzero_pd (),\
> +                                                      _mm_setzero_pd ()),\
> +                                        (int) (SCALE))
> +
> +#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)           \
> +  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d) (SRC),      \
> +                                        (double const *) (BASE),       \
> +                                        (__v2di)(__m128i) (INDEX),     \
> +                                        (__v2df)(__m128d) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm256_i64gather_pd(BASE, INDEX, SCALE)                                \
>    (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),        \
> -                                        (double const *)BASE,          \
> -                                        (__v4di)(__m256i)INDEX,        \
> -                                        (__v4df)_mm256_set1_pd(        \
> -                                          (double)(long long int) -1), \
> -                                        (int)SCALE)
> -
> -#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)         \
> -  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,  \
> -                                        (double const *)BASE,   \
> -                                        (__v4di)(__m256i)INDEX, \
> -                                        (__v4df)(__m256d)MASK,  \
> -                                        (int)SCALE)
> +                                        (double const *) (BASE),       \
> +                                        (__v4di)(__m256i) (INDEX),     \
> +                                        (__v4df)                       \
> +                                        _mm256_cmp_pd (_mm256_setzero_pd (),\
> +                                                       _mm256_setzero_pd (),\
> +                                                       _CMP_EQ_OQ),    \
> +                                        (int) (SCALE))
> +
> +#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d) (SRC),      \
> +                                        (double const *) (BASE),       \
> +                                        (__v4di)(__m256i) (INDEX),     \
> +                                        (__v4df)(__m256d) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm_i32gather_ps(BASE, INDEX, SCALE)                           \
>    (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),    \
> -                                       (float const *)BASE,            \
> -                                       (__v4si)(__m128i)INDEX,         \
> -                                       _mm_set1_ps ((float)(int) -1),  \
> -                                       (int)SCALE)
> -
> -#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)    \
> -  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC,    \
> -                                       (float const *)BASE,     \
> -                                       (__v4si)(__m128i)INDEX,  \
> -                                       (__v4sf)(__m128)MASK,    \
> -                                       (int)SCALE)
> -
> -#define _mm256_i32gather_ps(BASE, INDEX, SCALE)                               \
> -  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
> -                                       (float const *)BASE,           \
> -                                       (__v8si)(__m256i)INDEX,        \
> -                                       (__v8sf)_mm256_set1_ps (       \
> -                                         (float)(int) -1),            \
> -                                       (int)SCALE)
> -
> -#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,   \
> -                                       (float const *)BASE,    \
> -                                       (__v8si)(__m256i)INDEX, \
> -                                       (__v8sf)(__m256)MASK,   \
> -                                       (int)SCALE)
> +                                       (float const *) (BASE),         \
> +                                       (__v4si)(__m128i) (INDEX),      \
> +                                       (__v4sf)                        \
> +                                       _mm_cmpeq_ps (_mm_setzero_ps (),\
> +                                                     _mm_setzero_ps ()),\
> +                                       (int) (SCALE))
> +
> +#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)           \
> +  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128) (SRC),                \
> +                                       (float const *) (BASE),         \
> +                                       (__v4si)(__m128i) (INDEX),      \
> +                                       (__v4sf)(__m128) (MASK),        \
> +                                       (int) (SCALE))
> +
> +#define _mm256_i32gather_ps(BASE, INDEX, SCALE)                                \
> +  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
> +                                       (float const *) (BASE),         \
> +                                       (__v8si)(__m256i) (INDEX),      \
> +                                       (__v8sf)                        \
> +                                       _mm256_cmp_ps (_mm256_setzero_ps (),\
> +                                                      _mm256_setzero_ps (),\
> +                                                      _CMP_EQ_OQ),     \
> +                                       (int) (SCALE))
> +
> +#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256) (SRC),                \
> +                                       (float const *) (BASE),         \
> +                                       (__v8si)(__m256i) (INDEX),      \
> +                                       (__v8sf)(__m256) (MASK),        \
> +                                       (int) (SCALE))
>
>  #define _mm_i64gather_ps(BASE, INDEX, SCALE)                           \
>    (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),    \
> -                                       (float const *)BASE,            \
> -                                       (__v2di)(__m128i)INDEX,         \
> -                                       (__v4sf)_mm_set1_ps (           \
> -                                         (float)(int) -1),             \
> -                                       (int)SCALE)
> -
> -#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)    \
> -  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,    \
> -                                       (float const *)BASE,     \
> -                                       (__v2di)(__m128i)INDEX,  \
> -                                       (__v4sf)(__m128)MASK,    \
> -                                       (int)SCALE)
> +                                       (float const *) (BASE),         \
> +                                       (__v2di)(__m128i) (INDEX),      \
> +                                       (__v4sf)                        \
> +                                       _mm_cmpeq_ps (_mm_setzero_ps (),\
> +                                                     _mm_setzero_ps ()),\
> +                                       (int) (SCALE))
> +
> +#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)           \
> +  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128) (SRC),                \
> +                                       (float const *) (BASE),         \
> +                                       (__v2di)(__m128i) (INDEX),      \
> +                                       (__v4sf)(__m128) (MASK),        \
> +                                       (int) (SCALE))
>
>  #define _mm256_i64gather_ps(BASE, INDEX, SCALE)                                \
>    (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (), \
> -                                          (float const *)BASE,         \
> -                                          (__v4di)(__m256i)INDEX,      \
> -                                          (__v4sf)_mm_set1_ps(         \
> -                                            (float)(int) -1),          \
> -                                          (int)SCALE)
> -
> -#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)           \
> -  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,           \
> -                                          (float const *)BASE,    \
> -                                          (__v4di)(__m256i)INDEX, \
> -                                          (__v4sf)(__m128)MASK,   \
> -                                          (int)SCALE)
> +                                          (float const *) (BASE),      \
> +                                          (__v4di)(__m256i) (INDEX),   \
> +                                          (__v4sf)                     \
> +                                          _mm_cmpeq_ps (_mm_setzero_ps (),\
> +                                                        _mm_setzero_ps ()),\
> +                                          (int) (SCALE))
> +
> +#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128) (SRC),     \
> +                                          (float const *) (BASE),      \
> +                                          (__v4di)(__m256i) (INDEX),   \
> +                                          (__v4sf)(__m128) (MASK),     \
> +                                          (int) (SCALE))
>
>  #define _mm_i32gather_epi64(BASE, INDEX, SCALE)                                \
>    (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
> -                                        (long long const *)BASE,       \
> -                                        (__v4si)(__m128i)INDEX,        \
> +                                        (long long const *) (BASE),    \
> +                                        (__v4si)(__m128i) (INDEX),     \
>                                          (__v2di)_mm_set1_epi64x (-1),  \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)          \
> -  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,   \
> -                                        (long long const *)BASE, \
> -                                        (__v4si)(__m128i)INDEX,  \
> -                                        (__v2di)(__m128i)MASK,   \
> -                                        (int)SCALE)
> +#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i) (SRC),      \
> +                                        (long long const *) (BASE),    \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v2di)(__m128i) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm256_i32gather_epi64(BASE, INDEX, SCALE)                        \
>    (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
> -                                        (long long const *)BASE,          \
> -                                        (__v4si)(__m128i)INDEX,           \
> +                                        (long long const *) (BASE),       \
> +                                        (__v4si)(__m128i) (INDEX),        \
>                                          (__v4di)_mm256_set1_epi64x (-1),  \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,    \
> -                                        (long long const *)BASE,  \
> -                                        (__v4si)(__m128i)INDEX,   \
> -                                        (__v4di)(__m256i)MASK,    \
> -                                        (int)SCALE)
> +#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)     \
> +  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i) (SRC),      \
> +                                        (long long const *) (BASE),    \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__v4di)(__m256i) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm_i64gather_epi64(BASE, INDEX, SCALE)                                \
>    (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
> -                                        (long long const *)BASE,       \
> -                                        (__v2di)(__m128i)INDEX,        \
> +                                        (long long const *) (BASE),    \
> +                                        (__v2di)(__m128i) (INDEX),     \
>                                          (__v2di)_mm_set1_epi64x (-1),  \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)          \
> -  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,   \
> -                                        (long long const *)BASE, \
> -                                        (__v2di)(__m128i)INDEX,  \
> -                                        (__v2di)(__m128i)MASK,   \
> -                                        (int)SCALE)
> +#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i) (SRC),      \
> +                                        (long long const *) (BASE),    \
> +                                        (__v2di)(__m128i) (INDEX),     \
> +                                        (__v2di)(__m128i) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm256_i64gather_epi64(BASE, INDEX, SCALE)                        \
>    (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
> -                                        (long long const *)BASE,          \
> -                                        (__v4di)(__m256i)INDEX,           \
> +                                        (long long const *) (BASE),       \
> +                                        (__v4di)(__m256i) (INDEX),        \
>                                          (__v4di)_mm256_set1_epi64x (-1),  \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,    \
> -                                        (long long const *)BASE,  \
> -                                        (__v4di)(__m256i)INDEX,   \
> -                                        (__v4di)(__m256i)MASK,    \
> -                                        (int)SCALE)
> +#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)     \
> +  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i) (SRC),      \
> +                                        (long long const *) (BASE),    \
> +                                        (__v4di)(__m256i) (INDEX),     \
> +                                        (__v4di)(__m256i) (MASK),      \
> +                                        (int) (SCALE))
>
>  #define _mm_i32gather_epi32(BASE, INDEX, SCALE)                                \
>    (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),        \
> -                                        (int const *)BASE,             \
> -                                        (__v4si)(__m128i)INDEX,        \
> +                                        (int const *) (BASE),          \
> +                                        (__v4si)(__m128i) (INDEX),     \
>                                          (__v4si)_mm_set1_epi32 (-1),   \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC, \
> -                                       (int const *)BASE,      \
> -                                       (__v4si)(__m128i)INDEX, \
> -                                       (__v4si)(__m128i)MASK,  \
> -                                       (int)SCALE)
> +#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i) (SRC),      \
> +                                       (int const *) (BASE),           \
> +                                       (__v4si)(__m128i) (INDEX),      \
> +                                       (__v4si)(__m128i) (MASK),       \
> +                                       (int) (SCALE))
>
>  #define _mm256_i32gather_epi32(BASE, INDEX, SCALE)                        \
>    (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
> -                                        (int const *)BASE,                \
> -                                        (__v8si)(__m256i)INDEX,           \
> +                                        (int const *) (BASE),             \
> +                                        (__v8si)(__m256i) (INDEX),        \
>                                          (__v8si)_mm256_set1_epi32 (-1),   \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,    \
> -                                       (int const *)BASE,         \
> -                                       (__v8si)(__m256i)INDEX,    \
> -                                       (__v8si)(__m256i)MASK,     \
> -                                       (int)SCALE)
> +#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)     \
> +  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i) (SRC),      \
> +                                       (int const *) (BASE),           \
> +                                       (__v8si)(__m256i) (INDEX),      \
> +                                       (__v8si)(__m256i) (MASK),       \
> +                                       (int) (SCALE))
>
>  #define _mm_i64gather_epi32(BASE, INDEX, SCALE)                                \
>    (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),        \
> -                                        (int const *)BASE,             \
> -                                        (__v2di)(__m128i)INDEX,        \
> +                                        (int const *) (BASE),          \
> +                                        (__v2di)(__m128i) (INDEX),     \
>                                          (__v4si)_mm_set1_epi32 (-1),   \
> -                                        (int)SCALE)
> +                                        (int) (SCALE))
>
> -#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC, \
> -                                       (int const *)BASE,      \
> -                                       (__v2di)(__m128i)INDEX, \
> -                                       (__v4si)(__m128i)MASK,  \
> -                                       (int)SCALE)
> +#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)                \
> +  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i) (SRC),      \
> +                                       (int const *) (BASE),           \
> +                                       (__v2di)(__m128i) (INDEX),      \
> +                                       (__v4si)(__m128i) (MASK),       \
> +                                       (int) (SCALE))
>
>  #define _mm256_i64gather_epi32(BASE, INDEX, SCALE)                        \
>    (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
> -                                           (int const *)BASE,             \
> -                                           (__v4di)(__m256i)INDEX,        \
> +                                           (int const *) (BASE),          \
> +                                           (__v4di)(__m256i) (INDEX),     \
>                                             (__v4si)_mm_set1_epi32(-1),    \
> -                                           (int)SCALE)
> +                                           (int) (SCALE))
>
> -#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
> -  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
> -                                          (int const *)BASE,      \
> -                                          (__v4di)(__m256i)INDEX, \
> -                                          (__v4si)(__m128i)MASK,  \
> -                                          (int)SCALE)
> +#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)     \
> +  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i) (SRC),   \
> +                                          (int const *) (BASE),        \
> +                                          (__v4di)(__m256i) (INDEX),   \
> +                                          (__v4si)(__m128i) (MASK),    \
> +                                          (int) (SCALE))
>  #endif  /* __OPTIMIZE__ */
>
>  #ifdef __DISABLE_AVX2__
> --- gcc/config/i386/avx512fintrin.h.jj  2020-01-12 11:54:36.315414887 +0100
> +++ gcc/config/i386/avx512fintrin.h     2020-04-29 11:05:59.796380601 +0200
> @@ -10468,179 +10468,189 @@ _mm512_mask_i64scatter_epi64 (void *__ad
>  #else
>  #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)                                \
>    (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
> -                                        (void const *)ADDR,            \
> -                                        (__v16si)(__m512i)INDEX,       \
> -                                        (__mmask16)0xFFFF, (int)SCALE)
> +                                        (void const *) (ADDR),         \
> +                                        (__v16si)(__m512i) (INDEX),    \
> +                                        (__mmask16)0xFFFF,             \
> +                                        (int) (SCALE))
>
>  #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)      \
> -  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,       \
> -                                        (void const *)ADDR,            \
> -                                        (__v16si)(__m512i)INDEX,       \
> -                                        (__mmask16)MASK, (int)SCALE)
> +  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD),    \
> +                                        (void const *) (ADDR),         \
> +                                        (__v16si)(__m512i) (INDEX),    \
> +                                        (__mmask16) (MASK),            \
> +                                        (int) (SCALE))
>
>  #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)                                \
>    (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),        \
> -                                        (void const *)ADDR,            \
> -                                        (__v8si)(__m256i)INDEX,        \
> -                                        (__mmask8)0xFF, (int)SCALE)
> +                                        (void const *) (ADDR),         \
> +                                        (__v8si)(__m256i) (INDEX),     \
> +                                        (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)      \
> -  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,       \
> -                                        (void const *)ADDR,            \
> -                                        (__v8si)(__m256i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD),    \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8si)(__m256i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)                                \
>    (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),        \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)0xFF, (int)SCALE)
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)      \
> -  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,                \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD),     \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)                                \
>    (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),        \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)0xFF, (int)SCALE)
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)      \
> -  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,       \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD),    \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)                     \
> -  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),  \
> -                                         (void const *)ADDR,           \
> -                                         (__v16si)(__m512i)INDEX,      \
> -                                         (__mmask16)0xFFFF, (int)SCALE)
> +  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
> +                                         (void const *) (ADDR),        \
> +                                         (__v16si)(__m512i) (INDEX),   \
> +                                         (__mmask16)0xFFFF,            \
> +                                         (int) (SCALE))
>
>  #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)   \
> -  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,     \
> -                                         (void const *)ADDR,           \
> -                                         (__v16si)(__m512i)INDEX,      \
> -                                         (__mmask16)MASK, (int)SCALE)
> +  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD),  \
> +                                         (void const *) (ADDR),        \
> +                                         (__v16si)(__m512i) (INDEX),   \
> +                                         (__mmask16) (MASK),           \
> +                                         (int) (SCALE))
>
>  #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)                     \
> -  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),    \
> -                                        (void const *)ADDR,            \
> -                                        (__v8si)(__m256i)INDEX,        \
> -                                        (__mmask8)0xFF, (int)SCALE)
> +  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
> +                                        (void const *) (ADDR),         \
> +                                        (__v8si)(__m256i) (INDEX),     \
> +                                        (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)   \
> -  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,       \
> -                                        (void const *)ADDR,            \
> -                                        (__v8si)(__m256i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> -
> -#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)                       \
> -  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
> -                                         (void const *)ADDR,             \
> -                                         (__v8di)(__m512i)INDEX,         \
> -                                         (__mmask8)0xFF, (int)SCALE)
> +  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD),    \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8si)(__m256i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
> +
> +#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)                        \
> +  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
> +                                         (void const *) (ADDR),           \
> +                                         (__v8di)(__m512i) (INDEX),       \
> +                                         (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)   \
> -  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v8di)(__m512i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v8di)(__m512i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)                     \
> -  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),    \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)0xFF, (int)SCALE)
> +  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8)0xFF, (int) (SCALE))
>
>  #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)   \
> -  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,       \
> -                                        (void const *)ADDR,            \
> -                                        (__v8di)(__m512i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD),    \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8di)(__m512i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF,      \
> -                                (__v16si)(__m512i)INDEX,               \
> -                                (__v16sf)(__m512)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF,   \
> +                                (__v16si)(__m512i) (INDEX),            \
> +                                (__v16sf)(__m512) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK,                \
> -                                (__v16si)(__m512i)INDEX,               \
> -                                (__v16sf)(__m512)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK),  \
> +                                (__v16si)(__m512i) (INDEX),            \
> +                                (__v16sf)(__m512) (V1), (int) (SCALE))
>
>  #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8df)(__m512d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8df)(__m512d) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8df)(__m512d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8df)(__m512d) (V1), (int) (SCALE))
>
>  #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF,         \
> -                                (__v8di)(__m512i)INDEX,                \
> -                                (__v8sf)(__m256)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF,      \
> +                                (__v8di)(__m512i) (INDEX),             \
> +                                (__v8sf)(__m256) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK,                \
> -                                (__v8di)(__m512i)INDEX,                \
> -                                (__v8sf)(__m256)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK),  \
> +                                (__v8di)(__m512i) (INDEX),             \
> +                                (__v8sf)(__m256) (V1), (int) (SCALE))
>
>  #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8di)(__m512i)INDEX,                 \
> -                               (__v8df)(__m512d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8di)(__m512i) (INDEX),              \
> +                               (__v8df)(__m512d) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8di)(__m512i)INDEX,                 \
> -                               (__v8df)(__m512d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8di)(__m512i) (INDEX),              \
> +                               (__v8df)(__m512d) (V1), (int) (SCALE))
>
>  #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF,      \
> -                                (__v16si)(__m512i)INDEX,               \
> -                                (__v16si)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF,   \
> +                                (__v16si)(__m512i) (INDEX),            \
> +                                (__v16si)(__m512i) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK,                \
> -                                (__v16si)(__m512i)INDEX,               \
> -                                (__v16si)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK),  \
> +                                (__v16si)(__m512i) (INDEX),            \
> +                                (__v16si)(__m512i) (V1), (int) (SCALE))
>
>  #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8di)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8di)(__m512i) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8di)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8di)(__m512i) (V1), (int) (SCALE))
>
>  #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF,         \
> -                                (__v8di)(__m512i)INDEX,                \
> -                                (__v8si)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF,      \
> +                                (__v8di)(__m512i) (INDEX),             \
> +                                (__v8si)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK,         \
> -                                (__v8di)(__m512i)INDEX,                \
> -                                (__v8si)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK),   \
> +                                (__v8di)(__m512i) (INDEX),             \
> +                                (__v8si)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8di)(__m512i)INDEX,                 \
> -                               (__v8di)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8di)(__m512i) (INDEX),              \
> +                               (__v8di)(__m512i) (V1), (int) (SCALE))
>
>  #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8di)(__m512i)INDEX,                 \
> -                               (__v8di)(__m512i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8di)(__m512i) (INDEX),              \
> +                               (__v8di)(__m512i) (V1), (int) (SCALE))
>  #endif
>
>  extern __inline __m512d
> --- gcc/config/i386/avx512pfintrin.h.jj 2020-01-12 11:54:36.315414887 +0100
> +++ gcc/config/i386/avx512pfintrin.h    2020-04-29 11:10:11.272661182 +0200
> @@ -192,68 +192,73 @@ _mm512_mask_prefetch_i64scatter_ps (void
>
>  #else
>  #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)              \
> -  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
> -                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),     \
> +                             (void const *) (ADDR), (int) (SCALE),          \
> +                             (int) (HINT))
>
>  #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)              \
> -  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,    \
> -                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \
> +                             (void const *) (ADDR), (int) (SCALE),          \
> +                             (int) (HINT))
>
>  #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
> -  __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
> -                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX),  \
> +                             (void const *) (ADDR), (int) (SCALE),          \
> +                             (int) (HINT))
>
>  #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
> -  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
> -                             (void const *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\
> +                             (void const *) (ADDR), (int) (SCALE),          \
> +                             (int) (HINT))
>
>  #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)              \
> -  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,       \
> -                             (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
> +                             (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)              \
> -  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,       \
> -                             (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
> +                             (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
> -  __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,       \
> -                             (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
> +                             (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
> -  __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,       \
> -                             (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
> +                             (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
> -  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),    \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
> -  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
> -  __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
> -  __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfdps ((__mmask16) (MASK),                          \
> +                              (__v16si)(__m512i) (INDEX),                   \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
> -  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,      \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
> -  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,      \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
> -  __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,      \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>
>  #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
> -  __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,      \
> -                              (void *)ADDR, (int)SCALE, (int)HINT)
> +  __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
> +                              (void *) (ADDR), (int) (SCALE), (int) (HINT))
>  #endif
>
>  #ifdef __DISABLE_AVX512PF__
> --- gcc/config/i386/avx512vlintrin.h.jj 2020-01-12 11:54:36.316414872 +0100
> +++ gcc/config/i386/avx512vlintrin.h    2020-04-29 11:16:27.671094124 +0200
> @@ -13000,260 +13000,276 @@ _mm256_permutex_pd (__m256d __X, const i
>                                            (__mmask8)(U)))
>
>  #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,                \
> -                                        (void const *)ADDR,            \
> -                                        (__v8si)(__m256i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD),     \
> +                                        (void const *) (ADDR),         \
> +                                        (__v8si)(__m256i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)                \
> -  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,                \
> -                                        (void const *)ADDR,            \
> -                                        (__v4si)(__m128i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD),     \
> +                                        (void const *) (ADDR),         \
> +                                        (__v4si)(__m128i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4si)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4si)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)                \
> -  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4si)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4si)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,                \
> -                                        (void const *)ADDR,            \
> -                                        (__v4di)(__m256i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD),     \
> +                                        (void const *) (ADDR),         \
> +                                        (__v4di)(__m256i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)                \
> -  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,                \
> -                                        (void const *)ADDR,            \
> -                                        (__v2di)(__m128i)INDEX,        \
> -                                        (__mmask8)MASK, (int)SCALE)
> +  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD),     \
> +                                        (void const *) (ADDR),         \
> +                                        (__v2di)(__m128i) (INDEX),     \
> +                                        (__mmask8) (MASK),             \
> +                                        (int) (SCALE))
>
>  #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4di)(__m256i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4di)(__m256i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)                \
> -  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v2di)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v2di)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)  \
> -  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v8si)(__m256i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v8si)(__m256i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4si)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4si)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)  \
> -  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4si)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4si)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4si)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4si)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)  \
> -  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4di)(__m256i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4di)(__m256i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v2di)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v2di)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)  \
> -  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v4di)(__m256i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v4di)(__m256i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)     \
> -  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,      \
> -                                         (void const *)ADDR,           \
> -                                         (__v2di)(__m128i)INDEX,       \
> -                                         (__mmask8)MASK, (int)SCALE)
> +  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD),   \
> +                                         (void const *) (ADDR),        \
> +                                         (__v2di)(__m128i) (INDEX),    \
> +                                         (__mmask8) (MASK),            \
> +                                         (int) (SCALE))
>
>  #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8sf)(__m256)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8sf)(__m256) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8sf)(__m256)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8sf)(__m256) (V1), (int) (SCALE))
>
>  #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)                      \
> -  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)           \
> -  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4df)(__m256d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4df)(__m256d) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4df)(__m256d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4df)(__m256d) (V1), (int) (SCALE))
>
>  #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)                      \
> -  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v2df)(__m128d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v2df)(__m128d) (V1), (int) (SCALE))
>
>  #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)           \
> -  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v2df)(__m128d)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v2df)(__m128d) (V1), (int) (SCALE))
>
>  #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)                      \
> -  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)           \
> -  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v4sf)(__m128)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v4sf)(__m128) (V1), (int) (SCALE))
>
>  #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4df)(__m256d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4df)(__m256d) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4df)(__m256d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4df)(__m256d) (V1), (int) (SCALE))
>
>  #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)                      \
> -  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v2df)(__m128d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v2df)(__m128d) (V1), (int) (SCALE))
>
>  #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)           \
> -  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v2df)(__m128d)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v2df)(__m128d) (V1), (int) (SCALE))
>
>  #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8si)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8si)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v8si)(__m256i)INDEX,                 \
> -                               (__v8si)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v8si)(__m256i) (INDEX),              \
> +                               (__v8si)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4di)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4di)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v4di)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v4di)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v2di)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v2di)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4si)(__m128i)INDEX,                 \
> -                               (__v2di)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4si)(__m128i) (INDEX),              \
> +                               (__v2di)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v4si)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v4si)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                        \
> -  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4di)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4di)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)     \
> -  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v4di)(__m256i)INDEX,                 \
> -                               (__v4di)(__m256i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v4di)(__m256i) (INDEX),              \
> +                               (__v4di)(__m256i) (V1), (int) (SCALE))
>
>  #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)                   \
> -  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v2di)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF,       \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v2di)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)                \
> -  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK,          \
> -                               (__v2di)(__m128i)INDEX,                 \
> -                               (__v2di)(__m128i)V1, (int)SCALE)
> +  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK),    \
> +                               (__v2di)(__m128i) (INDEX),              \
> +                               (__v2di)(__m128i) (V1), (int) (SCALE))
>
>  #define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
>    ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \
>
>         Jakub
>
diff mbox series

Patch

--- gcc/config/i386/avx2intrin.h.jj	2020-03-05 19:44:14.969713607 +0100
+++ gcc/config/i386/avx2intrin.h	2020-04-29 11:40:46.216509672 +0200
@@ -1670,234 +1670,246 @@  _mm256_mask_i64gather_epi32 (__m128i __s
 #else /* __OPTIMIZE__ */
 #define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
   (__m128d) __builtin_ia32_gathersiv2df ((__v2df) _mm_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v2df)_mm_set1_pd(		\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4si)(__m128i)INDEX, \
-					 (__v2df)(__m128d)MASK,	 \
-					 (int)SCALE)
+					 (double const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v2df)			\
+					 _mm_cmpeq_pd (_mm_setzero_pd (),\
+						       _mm_setzero_pd ()),\
+					 (int) (SCALE))
+
+#define _mm_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 	\
+  (__m128d) __builtin_ia32_gathersiv2df ((__v2df)(__m128d) (SRC),	\
+					 (double const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v2df)(__m128d) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm256_i32gather_pd(BASE, INDEX, SCALE)				\
   (__m256d) __builtin_ia32_gathersiv4df ((__v4df) _mm256_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__v4df)_mm256_set1_pd(	\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4si)(__m128i)INDEX, \
-					 (__v4df)(__m256d)MASK,	 \
-					 (int)SCALE)
+					 (double const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v4df)			\
+					 _mm256_cmp_pd (_mm256_setzero_pd (),\
+							_mm256_setzero_pd (),\
+							_CMP_EQ_OQ),	\
+					 (int) (SCALE))
+
+#define _mm256_mask_i32gather_pd(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m256d) __builtin_ia32_gathersiv4df ((__v4df)(__m256d) (SRC),	\
+					 (double const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v4df)(__m256d) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm_i64gather_pd(BASE, INDEX, SCALE)				\
   (__m128d) __builtin_ia32_gatherdiv2df ((__v2df) _mm_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__v2df)_mm_set1_pd(		\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v2di)(__m128i)INDEX, \
-					 (__v2df)(__m128d)MASK,	 \
-					 (int)SCALE)
+					 (double const *) (BASE),	\
+					 (__v2di)(__m128i) (INDEX),	\
+					 (__v2df)			\
+					 _mm_cmpeq_pd (_mm_setzero_pd (),\
+						       _mm_setzero_pd ()),\
+					 (int) (SCALE))
+
+#define _mm_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)(__m128d) (SRC),	\
+					 (double const *) (BASE),	\
+					 (__v2di)(__m128i) (INDEX),	\
+					 (__v2df)(__m128d) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm256_i64gather_pd(BASE, INDEX, SCALE)				\
   (__m256d) __builtin_ia32_gatherdiv4df ((__v4df) _mm256_setzero_pd (),	\
-					 (double const *)BASE,		\
-					 (__v4di)(__m256i)INDEX,	\
-					 (__v4df)_mm256_set1_pd(	\
-					   (double)(long long int) -1), \
-					 (int)SCALE)
-
-#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d)SRC,	 \
-					 (double const *)BASE,	 \
-					 (__v4di)(__m256i)INDEX, \
-					 (__v4df)(__m256d)MASK,	 \
-					 (int)SCALE)
+					 (double const *) (BASE),	\
+					 (__v4di)(__m256i) (INDEX),	\
+					 (__v4df)			\
+					 _mm256_cmp_pd (_mm256_setzero_pd (),\
+							_mm256_setzero_pd (),\
+							_CMP_EQ_OQ),	\
+					 (int) (SCALE))
+
+#define _mm256_mask_i64gather_pd(SRC, BASE, INDEX, MASK, SCALE)	 	\
+  (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)(__m256d) (SRC),	\
+					 (double const *) (BASE),	\
+					 (__v4di)(__m256i) (INDEX),	\
+					 (__v4df)(__m256d) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm_i32gather_ps(BASE, INDEX, SCALE)				\
   (__m128) __builtin_ia32_gathersiv4sf ((__v4sf) _mm_setzero_ps (),	\
-					(float const *)BASE,		\
-					(__v4si)(__m128i)INDEX,		\
-					_mm_set1_ps ((float)(int) -1),	\
-					(int)SCALE)
-
-#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC,	 \
-					(float const *)BASE,	 \
-					(__v4si)(__m128i)INDEX,	 \
-					(__v4sf)(__m128)MASK,	 \
-					(int)SCALE)
-
-#define _mm256_i32gather_ps(BASE, INDEX, SCALE)			       \
-  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (), \
-					(float const *)BASE,	       \
-					(__v8si)(__m256i)INDEX,	       \
-					(__v8sf)_mm256_set1_ps (       \
-					  (float)(int) -1),	       \
-					(int)SCALE)
-
-#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC,	\
-					(float const *)BASE,	\
-					(__v8si)(__m256i)INDEX, \
-					(__v8sf)(__m256)MASK,	\
-					(int)SCALE)
+					(float const *) (BASE),		\
+					(__v4si)(__m128i) (INDEX),	\
+					(__v4sf)			\
+					_mm_cmpeq_ps (_mm_setzero_ps (),\
+						      _mm_setzero_ps ()),\
+					(int) (SCALE))
+
+#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 	\
+  (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128) (SRC),		\
+					(float const *) (BASE),		\
+					(__v4si)(__m128i) (INDEX),	\
+					(__v4sf)(__m128) (MASK),	\
+					(int) (SCALE))
+
+#define _mm256_i32gather_ps(BASE, INDEX, SCALE)				\
+  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf) _mm256_setzero_ps (),	\
+					(float const *) (BASE),		\
+					(__v8si)(__m256i) (INDEX),	\
+					(__v8sf)			\
+					_mm256_cmp_ps (_mm256_setzero_ps (),\
+						       _mm256_setzero_ps (),\
+						       _CMP_EQ_OQ),	\
+					(int) (SCALE))
+
+#define _mm256_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256) (SRC),		\
+					(float const *) (BASE),		\
+					(__v8si)(__m256i) (INDEX),	\
+					(__v8sf)(__m256) (MASK),	\
+					(int) (SCALE))
 
 #define _mm_i64gather_ps(BASE, INDEX, SCALE)				\
   (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf) _mm_setzero_pd (),	\
-					(float const *)BASE,		\
-					(__v2di)(__m128i)INDEX,		\
-					(__v4sf)_mm_set1_ps (		\
-					  (float)(int) -1),		\
-					(int)SCALE)
-
-#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	 \
-  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC,	 \
-					(float const *)BASE,	 \
-					(__v2di)(__m128i)INDEX,	 \
-					(__v4sf)(__m128)MASK,	 \
-					(int)SCALE)
+					(float const *) (BASE),		\
+					(__v2di)(__m128i) (INDEX),	\
+					(__v4sf)			\
+					_mm_cmpeq_ps (_mm_setzero_ps (),\
+						      _mm_setzero_ps ()),\
+					(int) (SCALE))
+
+#define _mm_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128) (SRC),		\
+					(float const *) (BASE),		\
+					(__v2di)(__m128i) (INDEX),	\
+					(__v4sf)(__m128) (MASK),	\
+					(int) (SCALE))
 
 #define _mm256_i64gather_ps(BASE, INDEX, SCALE)				\
   (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf) _mm_setzero_ps (),	\
-					   (float const *)BASE,		\
-					   (__v4di)(__m256i)INDEX,	\
-					   (__v4sf)_mm_set1_ps(		\
-					     (float)(int) -1),		\
-					   (int)SCALE)
-
-#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   \
-  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128)SRC,	   \
-					   (float const *)BASE,	   \
-					   (__v4di)(__m256i)INDEX, \
-					   (__v4sf)(__m128)MASK,   \
-					   (int)SCALE)
+					   (float const *) (BASE),	\
+					   (__v4di)(__m256i) (INDEX),	\
+					   (__v4sf)			\
+					   _mm_cmpeq_ps (_mm_setzero_ps (),\
+							 _mm_setzero_ps ()),\
+					   (int) (SCALE))
+
+#define _mm256_mask_i64gather_ps(SRC, BASE, INDEX, MASK, SCALE)	   	\
+  (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)(__m128) (SRC),	\
+					   (float const *) (BASE),	\
+					   (__v4di)(__m256i) (INDEX),	\
+					   (__v4sf)(__m128) (MASK),	\
+					   (int) (SCALE))
 
 #define _mm_i32gather_epi64(BASE, INDEX, SCALE)				\
   (__m128i) __builtin_ia32_gathersiv2di ((__v2di) _mm_setzero_si128 (), \
-					 (long long const *)BASE,	\
-					 (__v4si)(__m128i)INDEX,	\
+					 (long long const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
 					 (__v2di)_mm_set1_epi64x (-1),	\
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
-  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i)SRC,	  \
-					 (long long const *)BASE, \
-					 (__v4si)(__m128i)INDEX,  \
-					 (__v2di)(__m128i)MASK,	  \
-					 (int)SCALE)
+#define _mm_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  	\
+  (__m128i) __builtin_ia32_gathersiv2di ((__v2di)(__m128i) (SRC),	\
+					 (long long const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v2di)(__m128i) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm256_i32gather_epi64(BASE, INDEX, SCALE)			   \
   (__m256i) __builtin_ia32_gathersiv4di ((__v4di) _mm256_setzero_si256 (), \
-					 (long long const *)BASE,	   \
-					 (__v4si)(__m128i)INDEX,	   \
+					 (long long const *) (BASE),	   \
+					 (__v4si)(__m128i) (INDEX),	   \
 					 (__v4di)_mm256_set1_epi64x (-1),  \
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i)SRC,	   \
-					 (long long const *)BASE,  \
-					 (__v4si)(__m128i)INDEX,   \
-					 (__v4di)(__m256i)MASK,	   \
-					 (int)SCALE)
+#define _mm256_mask_i32gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	\
+  (__m256i) __builtin_ia32_gathersiv4di ((__v4di)(__m256i) (SRC),	\
+					 (long long const *) (BASE),	\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__v4di)(__m256i) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm_i64gather_epi64(BASE, INDEX, SCALE)				\
   (__m128i) __builtin_ia32_gatherdiv2di ((__v2di) _mm_setzero_si128 (), \
-					 (long long const *)BASE,	\
-					 (__v2di)(__m128i)INDEX,	\
+					 (long long const *) (BASE),	\
+					 (__v2di)(__m128i) (INDEX),	\
 					 (__v2di)_mm_set1_epi64x (-1),	\
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)	  \
-  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i)SRC,	  \
-					 (long long const *)BASE, \
-					 (__v2di)(__m128i)INDEX,  \
-					 (__v2di)(__m128i)MASK,	  \
-					 (int)SCALE)
+#define _mm_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)(__m128i) (SRC),	\
+					 (long long const *) (BASE),	\
+					 (__v2di)(__m128i) (INDEX),	\
+					 (__v2di)(__m128i) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm256_i64gather_epi64(BASE, INDEX, SCALE)			   \
   (__m256i) __builtin_ia32_gatherdiv4di ((__v4di) _mm256_setzero_si256 (), \
-					 (long long const *)BASE,	   \
-					 (__v4di)(__m256i)INDEX,	   \
+					 (long long const *) (BASE),	   \
+					 (__v4di)(__m256i) (INDEX),	   \
 					 (__v4di)_mm256_set1_epi64x (-1),  \
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i)SRC,	   \
-					 (long long const *)BASE,  \
-					 (__v4di)(__m256i)INDEX,   \
-					 (__v4di)(__m256i)MASK,	   \
-					 (int)SCALE)
+#define _mm256_mask_i64gather_epi64(SRC, BASE, INDEX, MASK, SCALE) 	\
+  (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)(__m256i) (SRC),	\
+					 (long long const *) (BASE),	\
+					 (__v4di)(__m256i) (INDEX),	\
+					 (__v4di)(__m256i) (MASK),	\
+					 (int) (SCALE))
 
 #define _mm_i32gather_epi32(BASE, INDEX, SCALE)				\
   (__m128i) __builtin_ia32_gathersiv4si ((__v4si) _mm_setzero_si128 (),	\
-					 (int const *)BASE,		\
-					 (__v4si)(__m128i)INDEX,	\
+					 (int const *) (BASE),		\
+					 (__v4si)(__m128i) (INDEX),	\
 					 (__v4si)_mm_set1_epi32 (-1),	\
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i)SRC,	\
-					(int const *)BASE,	\
-					(__v4si)(__m128i)INDEX, \
-					(__v4si)(__m128i)MASK,	\
-					(int)SCALE)
+#define _mm_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m128i) __builtin_ia32_gathersiv4si ((__v4si)(__m128i) (SRC),	\
+					(int const *) (BASE),		\
+					(__v4si)(__m128i) (INDEX),	\
+					(__v4si)(__m128i) (MASK),	\
+					(int) (SCALE))
 
 #define _mm256_i32gather_epi32(BASE, INDEX, SCALE)			   \
   (__m256i) __builtin_ia32_gathersiv8si ((__v8si) _mm256_setzero_si256 (), \
-					 (int const *)BASE,		   \
-					 (__v8si)(__m256i)INDEX,	   \
+					 (int const *) (BASE),		   \
+					 (__v8si)(__m256i) (INDEX),	   \
 					 (__v8si)_mm256_set1_epi32 (-1),   \
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i)SRC,	   \
-					(int const *)BASE,	   \
-					(__v8si)(__m256i)INDEX,	   \
-					(__v8si)(__m256i)MASK,	   \
-					(int)SCALE)
+#define _mm256_mask_i32gather_epi32(SRC, BASE, INDEX, MASK, SCALE)	\
+  (__m256i) __builtin_ia32_gathersiv8si ((__v8si)(__m256i) (SRC),	\
+					(int const *) (BASE),	   	\
+					(__v8si)(__m256i) (INDEX),	\
+					(__v8si)(__m256i) (MASK),	\
+					(int) (SCALE))
 
 #define _mm_i64gather_epi32(BASE, INDEX, SCALE)				\
   (__m128i) __builtin_ia32_gatherdiv4si ((__v4si) _mm_setzero_si128 (),	\
-					 (int const *)BASE,		\
-					 (__v2di)(__m128i)INDEX,	\
+					 (int const *) (BASE),		\
+					 (__v2di)(__m128i) (INDEX),	\
 					 (__v4si)_mm_set1_epi32 (-1),	\
-					 (int)SCALE)
+					 (int) (SCALE))
 
-#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i)SRC,	\
-					(int const *)BASE,	\
-					(__v2di)(__m128i)INDEX, \
-					(__v4si)(__m128i)MASK,	\
-					(int)SCALE)
+#define _mm_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)		\
+  (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)(__m128i) (SRC),	\
+					(int const *) (BASE),		\
+					(__v2di)(__m128i) (INDEX),	\
+					(__v4si)(__m128i) (MASK),	\
+					(int) (SCALE))
 
 #define _mm256_i64gather_epi32(BASE, INDEX, SCALE)			   \
   (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si) _mm_setzero_si128 (), \
-					    (int const *)BASE,		   \
-					    (__v4di)(__m256i)INDEX,	   \
+					    (int const *) (BASE),	   \
+					    (__v4di)(__m256i) (INDEX),	   \
 					    (__v4si)_mm_set1_epi32(-1),	   \
-					    (int)SCALE)
+					    (int) (SCALE))
 
-#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE) \
-  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i)SRC,  \
-					   (int const *)BASE,	   \
-					   (__v4di)(__m256i)INDEX, \
-					   (__v4si)(__m128i)MASK,  \
-					   (int)SCALE)
+#define _mm256_mask_i64gather_epi32(SRC, BASE, INDEX, MASK, SCALE)	\
+  (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)(__m128i) (SRC),	\
+					   (int const *) (BASE),	\
+					   (__v4di)(__m256i) (INDEX),	\
+					   (__v4si)(__m128i) (MASK),	\
+					   (int) (SCALE))
 #endif  /* __OPTIMIZE__ */
 
 #ifdef __DISABLE_AVX2__
--- gcc/config/i386/avx512fintrin.h.jj	2020-01-12 11:54:36.315414887 +0100
+++ gcc/config/i386/avx512fintrin.h	2020-04-29 11:05:59.796380601 +0200
@@ -10468,179 +10468,189 @@  _mm512_mask_i64scatter_epi64 (void *__ad
 #else
 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE)				\
   (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
-					 (void const *)ADDR,		\
-					 (__v16si)(__m512i)INDEX,	\
-					 (__mmask16)0xFFFF, (int)SCALE)
+					 (void const *) (ADDR),		\
+					 (__v16si)(__m512i) (INDEX),	\
+					 (__mmask16)0xFFFF,		\
+					 (int) (SCALE))
 
 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD,	\
-					 (void const *)ADDR,		\
-					 (__v16si)(__m512i)INDEX,	\
-					 (__mmask16)MASK, (int)SCALE)
+  (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v16si)(__m512i) (INDEX),	\
+					 (__mmask16) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE)				\
   (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(),	\
-					 (void const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
+					 (void const *) (ADDR),		\
+					 (__v8si)(__m256i) (INDEX),	\
+					 (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD,	\
-					 (void const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8si)(__m256i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE)				\
   (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(),	\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD,		\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE)				\
   (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(),	\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD,	\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),	\
-					  (void const *)ADDR,		\
-					  (__v16si)(__m512i)INDEX,	\
-					  (__mmask16)0xFFFF, (int)SCALE)
+  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
+					  (void const *) (ADDR),	\
+					  (__v16si)(__m512i) (INDEX),	\
+					  (__mmask16)0xFFFF,		\
+					  (int) (SCALE))
 
 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v16si)(__m512i)INDEX,	\
-					  (__mmask16)MASK, (int)SCALE)
+  (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v16si)(__m512i) (INDEX),	\
+					  (__mmask16) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),	\
-					 (void const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
+  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
+					 (void const *) (ADDR),		\
+					 (__v8si)(__m256i) (INDEX),	\
+					 (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD,	\
-					 (void const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
-
-#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			  \
-  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \
-					  (void const *)ADDR,		  \
-					  (__v8di)(__m512i)INDEX,	  \
-					  (__mmask8)0xFF, (int)SCALE)
+  (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8si)(__m256i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
+
+#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE)			   \
+  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
+					  (void const *) (ADDR),	   \
+					  (__v8di)(__m512i) (INDEX),	   \
+					  (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v8di)(__m512i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v8di)(__m512i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE)			\
-  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),	\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)0xFF, (int)SCALE)
+  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8)0xFF, (int) (SCALE))
 
 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD,	\
-					 (void const *)ADDR,		\
-					 (__v8di)(__m512i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8di)(__m512i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)0xFFFF,	\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16sf)(__m512)V1, (int)SCALE)
+  __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF,	\
+				 (__v16si)(__m512i) (INDEX),		\
+				 (__v16sf)(__m512) (V1), (int) (SCALE))
 
 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv16sf ((void *)ADDR, (__mmask16)MASK,		\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16sf)(__m512)V1, (int)SCALE)
+  __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK),	\
+				 (__v16si)(__m512i) (INDEX),		\
+				 (__v16sf)(__m512) (V1), (int) (SCALE))
 
 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8df)(__m512d) (V1), (int) (SCALE))
 
 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv8df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8df)(__m512d) (V1), (int) (SCALE))
 
 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask8)0xFF,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8sf)(__m256)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF,	\
+				 (__v8di)(__m512i) (INDEX),		\
+				 (__v8sf)(__m256) (V1), (int) (SCALE))
 
 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv16sf ((void *)ADDR, (__mmask16)MASK,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8sf)(__m256)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK),	\
+				 (__v8di)(__m512i) (INDEX),		\
+				 (__v8sf)(__m256) (V1), (int) (SCALE))
 
 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8di)(__m512i) (INDEX),		\
+				(__v8df)(__m512d) (V1), (int) (SCALE))
 
 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv8df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8df)(__m512d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8di)(__m512i) (INDEX),		\
+				(__v8df)(__m512d) (V1), (int) (SCALE))
 
 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)0xFFFF,	\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16si)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF,	\
+				 (__v16si)(__m512i) (INDEX),		\
+				 (__v16si)(__m512i) (V1), (int) (SCALE))
 
 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv16si ((void *)ADDR, (__mmask16)MASK,		\
-				 (__v16si)(__m512i)INDEX,		\
-				 (__v16si)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK),	\
+				 (__v16si)(__m512i) (INDEX),		\
+				 (__v16si)(__m512i) (V1), (int) (SCALE))
 
 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8di)(__m512i) (V1), (int) (SCALE))
 
 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv8di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8di)(__m512i) (V1), (int) (SCALE))
 
 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)0xFF,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8si)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF,	\
+				 (__v8di)(__m512i) (INDEX),		\
+				 (__v8si)(__m256i) (V1), (int) (SCALE))
 
 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv16si ((void *)ADDR, (__mmask8)MASK,		\
-				 (__v8di)(__m512i)INDEX,		\
-				 (__v8si)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK),	\
+				 (__v8di)(__m512i) (INDEX),		\
+				 (__v8si)(__m256i) (V1), (int) (SCALE))
 
 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8di)(__m512i) (INDEX),		\
+				(__v8di)(__m512i) (V1), (int) (SCALE))
 
 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv8di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8di)(__m512i)INDEX,			\
-				(__v8di)(__m512i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8di)(__m512i) (INDEX),		\
+				(__v8di)(__m512i) (V1), (int) (SCALE))
 #endif
 
 extern __inline __m512d
--- gcc/config/i386/avx512pfintrin.h.jj	2020-01-12 11:54:36.315414887 +0100
+++ gcc/config/i386/avx512pfintrin.h	2020-04-29 11:10:11.272661182 +0200
@@ -192,68 +192,73 @@  _mm512_mask_prefetch_i64scatter_ps (void
 
 #else
 #define _mm512_prefetch_i32gather_pd(INDEX, ADDR, SCALE, HINT)		     \
-  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,	     \
-			      (void const *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),     \
+			      (void const *) (ADDR), (int) (SCALE),	     \
+			      (int) (HINT))
 
 #define _mm512_prefetch_i32gather_ps(INDEX, ADDR, SCALE, HINT)		     \
-  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,    \
-			      (void const *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX), \
+			      (void const *) (ADDR), (int) (SCALE),	     \
+			      (int) (HINT))
 
 #define _mm512_mask_prefetch_i32gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,	     \
-			      (void const *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX),  \
+			      (void const *) (ADDR), (int) (SCALE),	     \
+			      (int) (HINT))
 
 #define _mm512_mask_prefetch_i32gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,      \
-			      (void const *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfdps ((__mmask16) (MASK), (__v16si)(__m512i) (INDEX),\
+			      (void const *) (ADDR), (int) (SCALE),	     \
+			      (int) (HINT))
 
 #define _mm512_prefetch_i64gather_pd(INDEX, ADDR, SCALE, HINT)		     \
-  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			      (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
+			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_prefetch_i64gather_ps(INDEX, ADDR, SCALE, HINT)		     \
-  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			      (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),     \
+			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i64gather_pd(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			      (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
+			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i64gather_ps(INDEX, MASK, ADDR, SCALE, HINT)    \
-  __builtin_ia32_gatherpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			      (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_gatherpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX),  \
+			      (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_prefetch_i32scatter_pd(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i)INDEX,       \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfdpd ((__mmask8)0xFF, (__v8si)(__m256i) (INDEX),    \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_prefetch_i32scatter_ps(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i)INDEX,   \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfdps ((__mmask16)0xFFFF, (__v16si)(__m512i) (INDEX),\
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i32scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfdpd ((__mmask8)MASK, (__v8si)(__m256i)INDEX,       \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfdpd ((__mmask8) (MASK), (__v8si)(__m256i) (INDEX), \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i32scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfdps ((__mmask16)MASK, (__v16si)(__m512i)INDEX,     \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfdps ((__mmask16) (MASK),			     \
+			       (__v16si)(__m512i) (INDEX),		     \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_prefetch_i64scatter_pd(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfqpd ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_prefetch_i64scatter_ps(ADDR, INDEX, SCALE, HINT)              \
-  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i)INDEX,	     \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfqps ((__mmask8)0xFF, (__v8di)(__m512i) (INDEX),    \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i64scatter_pd(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfqpd ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfqpd ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 
 #define _mm512_mask_prefetch_i64scatter_ps(ADDR, MASK, INDEX, SCALE, HINT)   \
-  __builtin_ia32_scatterpfqps ((__mmask8)MASK, (__v8di)(__m512i)INDEX,	     \
-			       (void *)ADDR, (int)SCALE, (int)HINT)
+  __builtin_ia32_scatterpfqps ((__mmask8) (MASK), (__v8di)(__m512i) (INDEX), \
+			       (void *) (ADDR), (int) (SCALE), (int) (HINT))
 #endif
 
 #ifdef __DISABLE_AVX512PF__
--- gcc/config/i386/avx512vlintrin.h.jj	2020-01-12 11:54:36.316414872 +0100
+++ gcc/config/i386/avx512vlintrin.h	2020-04-29 11:16:27.671094124 +0200
@@ -13000,260 +13000,276 @@  _mm256_permutex_pd (__m256d __X, const i
                                           (__mmask8)(U)))
 
 #define _mm256_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256)V1OLD,		\
-					 (void const *)ADDR,		\
-					 (__v8si)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m256) __builtin_ia32_gather3siv8sf ((__v8sf)(__m256) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v8si)(__m256i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm_mmask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128)V1OLD,		\
-					 (void const *)ADDR,		\
-					 (__v4si)(__m128i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m128) __builtin_ia32_gather3siv4sf ((__v4sf)(__m128) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v4si)(__m128i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm256_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256d) __builtin_ia32_gather3siv4df ((__v4df)(__m256d) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4si)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128d) __builtin_ia32_gather3siv2df ((__v2df)(__m128d) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4si)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128)V1OLD,		\
-					 (void const *)ADDR,		\
-					 (__v4di)(__m256i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m128) __builtin_ia32_gather3div8sf ((__v4sf)(__m128) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v4di)(__m256i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm_mmask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128)V1OLD,		\
-					 (void const *)ADDR,		\
-					 (__v2di)(__m128i)INDEX,	\
-					 (__mmask8)MASK, (int)SCALE)
+  (__m128) __builtin_ia32_gather3div4sf ((__v4sf)(__m128) (V1OLD),	\
+					 (void const *) (ADDR),		\
+					 (__v2di)(__m128i) (INDEX),	\
+					 (__mmask8) (MASK),		\
+					 (int) (SCALE))
 
 #define _mm256_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256d) __builtin_ia32_gather3div4df ((__v4df)(__m256d) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4di)(__m256i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE)		\
-  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128d) __builtin_ia32_gather3div2df ((__v2df)(__m128d) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v2di)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v8si)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256i) __builtin_ia32_gather3siv8si ((__v8si)(__m256i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v8si)(__m256i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128i) __builtin_ia32_gather3siv4si ((__v4si)(__m128i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4si)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256i) __builtin_ia32_gather3siv4di ((__v4di)(__m256i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4si)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4si)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128i) __builtin_ia32_gather3siv2di ((__v2di)(__m128i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4si)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128i) __builtin_ia32_gather3div8si ((__v4si)(__m128i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4di)(__m256i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128i) __builtin_ia32_gather3div4si ((__v4si)(__m128i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v2di)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v4di)(__m256i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m256i) __builtin_ia32_gather3div4di ((__v4di)(__m256i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v4di)(__m256i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm_mmask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE)	\
-  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i)V1OLD,	\
-					  (void const *)ADDR,		\
-					  (__v2di)(__m128i)INDEX,	\
-					  (__mmask8)MASK, (int)SCALE)
+  (__m128i) __builtin_ia32_gather3div2di ((__v2di)(__m128i) (V1OLD),	\
+					  (void const *) (ADDR),	\
+					  (__v2di)(__m128i) (INDEX),	\
+					  (__mmask8) (MASK),		\
+					  (int) (SCALE))
 
 #define _mm256_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8sf)(__m256)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8sf)(__m256) (V1), (int) (SCALE))
 
 #define _mm256_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv8sf ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8sf)(__m256)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8sf)(__m256) (V1), (int) (SCALE))
 
 #define _mm_i32scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4sf ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm256_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4df)(__m256d) (V1), (int) (SCALE))
 
 #define _mm256_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4df)(__m256d) (V1), (int) (SCALE))
 
 #define _mm_i32scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v2df)(__m128d) (V1), (int) (SCALE))
 
 #define _mm_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv2df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
+  __builtin_ia32_scattersiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v2df)(__m128d) (V1), (int) (SCALE))
 
 #define _mm256_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm256_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv8sf ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8sf ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm_i64scatter_ps(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4sf ((void *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4sf)(__m128)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4sf ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v4sf)(__m128) (V1), (int) (SCALE))
 
 #define _mm256_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4df)(__m256d) (V1), (int) (SCALE))
 
 #define _mm256_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4df)(__m256d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4df)(__m256d) (V1), (int) (SCALE))
 
 #define _mm_i64scatter_pd(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v2df)(__m128d) (V1), (int) (SCALE))
 
 #define _mm_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv2df ((void *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2df)(__m128d)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv2df ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v2df)(__m128d) (V1), (int) (SCALE))
 
 #define _mm256_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8si)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8si)(__m256i) (V1), (int) (SCALE))
 
 #define _mm256_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv8si ((void *)ADDR, (__mmask8)MASK,		\
-				(__v8si)(__m256i)INDEX,			\
-				(__v8si)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v8si)(__m256i) (INDEX),		\
+				(__v8si)(__m256i) (V1), (int) (SCALE))
 
 #define _mm_i32scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv4si ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm256_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4di)(__m256i) (V1), (int) (SCALE))
 
 #define _mm256_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scattersiv4di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v4di)(__m256i) (V1), (int) (SCALE))
 
 #define _mm_i32scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v2di)(__m128i) (V1), (int) (SCALE))
 
 #define _mm_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scattersiv2di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4si)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scattersiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4si)(__m128i) (INDEX),		\
+				(__v2di)(__m128i) (V1), (int) (SCALE))
 
 #define _mm256_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm256_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv8si ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv8si ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm_i64scatter_epi32(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv4si ((void *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v4si)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4si ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v4si)(__m128i) (V1), (int) (SCALE))
 
 #define _mm256_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4di)(__m256i) (V1), (int) (SCALE))
 
 #define _mm256_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)	\
-  __builtin_ia32_scatterdiv4di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v4di)(__m256i)INDEX,			\
-				(__v4di)(__m256i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv4di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v4di)(__m256i) (INDEX),		\
+				(__v4di)(__m256i) (V1), (int) (SCALE))
 
 #define _mm_i64scatter_epi64(ADDR, INDEX, V1, SCALE)			\
-  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)0xFF,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8)0xFF,	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v2di)(__m128i) (V1), (int) (SCALE))
 
 #define _mm_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE)		\
-  __builtin_ia32_scatterdiv2di ((void *)ADDR, (__mmask8)MASK,		\
-				(__v2di)(__m128i)INDEX,			\
-				(__v2di)(__m128i)V1, (int)SCALE)
+  __builtin_ia32_scatterdiv2di ((void *) (ADDR), (__mmask8) (MASK),	\
+				(__v2di)(__m128i) (INDEX),		\
+				(__v2di)(__m128i) (V1), (int) (SCALE))
 
 #define _mm256_mask_shuffle_epi32(W, U, X, C)                                       \
   ((__m256i)  __builtin_ia32_pshufd256_mask ((__v8si)(__m256i)(X), (int)(C),        \