diff mbox

Uglify inline argument and local var names in x86 intrinsics

Message ID 20160819105638.GS14857@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Aug. 19, 2016, 10:56 a.m. UTC
On Fri, Aug 19, 2016 at 11:48:16AM +0200, Uros Bizjak wrote:
> >         * config/i386/pkuintrin.h (_wrpkru): Likewise.  Add space after
> >         function name.
> >         (_rdpkru_u32): Add space after function name.
> 
> OK as obvious patch.

Thanks.  When changing this part, I've noticed the argument name key.
While most of the inline function arguments and local variables are properly
uglified (using __* names), some of them aren't, so if one e.g. does
#define key ({ ... })
#include <x86intrin.h>
or similar, it will fail to compile.
E.g. the glibc and libstdc++ headers try hard to uglify everything that
isn't part of the namespace reserved for what the header provides and
implementation and IMNSHO so should do the intrinsics.

I've used -g -dA -fno-merge-debug-strings -S -fkeep-inline-functions -D__always_inline__= -D__artificial__=
to compile sse-13.c (in addition to its dg-options) and then
grep -A3 'DW_TAG_formal_parameter\|DW_TAG_variable' sse-13.s
and searched in there for "[^_ 	] regexp.

Ok for trunk if testing succeeds?

2016-08-19  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/rdseedintrin.h (_rdseed16_step, _rdseed32_step,
	_rdseed64_step): Uglify argument names and/or local variable names
	in inline functions.
	* config/i386/rtmintrin.h (_xabort): Likewise.
	* config/i386/avx512vlintrin.h (_mm256_ternarylogic_epi64,
	_mm256_mask_ternarylogic_epi64, _mm256_maskz_ternarylogic_epi64,
	_mm256_ternarylogic_epi32, _mm256_mask_ternarylogic_epi32,
	_mm256_maskz_ternarylogic_epi32, _mm_ternarylogic_epi64,
	_mm_mask_ternarylogic_epi64, _mm_maskz_ternarylogic_epi64,
	_mm_ternarylogic_epi32, _mm_mask_ternarylogic_epi32,
	_mm_maskz_ternarylogic_epi32): Likewise.
	* config/i386/lwpintrin.h (__llwpcb, __lwpval32, __lwpval64,
	__lwpins32, __lwpins64): Likewise.
	* config/i386/avx2intrin.h (_mm_i32gather_pd, _mm_mask_i32gather_pd,
	_mm256_i32gather_pd, _mm256_mask_i32gather_pd, _mm_i64gather_pd,
	_mm_mask_i64gather_pd, _mm256_i64gather_pd, _mm256_mask_i64gather_pd,
	_mm_i32gather_ps, _mm_mask_i32gather_ps, _mm256_i32gather_ps,
	_mm256_mask_i32gather_ps, _mm_i64gather_ps, _mm_mask_i64gather_ps,
	_mm256_i64gather_ps, _mm256_mask_i64gather_ps, _mm_i32gather_epi64,
	_mm_mask_i32gather_epi64, _mm256_i32gather_epi64,
	_mm256_mask_i32gather_epi64, _mm_i64gather_epi64,
	_mm_mask_i64gather_epi64, _mm256_i64gather_epi64,
	_mm256_mask_i64gather_epi64, _mm_i32gather_epi32,
	_mm_mask_i32gather_epi32, _mm256_i32gather_epi32,
	_mm256_mask_i32gather_epi32, _mm_i64gather_epi32,
	_mm_mask_i64gather_epi32, _mm256_i64gather_epi32,
	_mm256_mask_i64gather_epi32): Likewise.
	* config/i386/pmm_malloc.h (_mm_malloc, _mm_free): Likewise.
	* config/i386/ia32intrin.h (__writeeflags): Likewise.
	* config/i386/pkuintrin.h (_wrpkru): Likewise.
	* config/i386/avx512pfintrin.h (_mm512_mask_prefetch_i32gather_pd,
	_mm512_mask_prefetch_i32gather_ps, _mm512_mask_prefetch_i64gather_pd,
	_mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd,
	_mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd,
	_mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd,
	_mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd,
	_mm512_mask_prefetch_i64scatter_ps): Likewise.
	* config/i386/gmm_malloc.h (_mm_malloc, _mm_free): Likewise.
	* config/i386/avx512fintrin.h (_mm512_ternarylogic_epi64,
	_mm512_mask_ternarylogic_epi64, _mm512_maskz_ternarylogic_epi64,
	_mm512_ternarylogic_epi32, _mm512_mask_ternarylogic_epi32,
	_mm512_maskz_ternarylogic_epi32, _mm512_i32gather_ps,
	_mm512_mask_i32gather_ps, _mm512_i32gather_pd, _mm512_i64gather_ps,
	_mm512_i64gather_pd, _mm512_i32gather_epi32, _mm512_i32gather_epi64,
	_mm512_i64gather_epi32, _mm512_i64gather_epi64): Likewise.



	Jakub

Comments

Uros Bizjak Aug. 19, 2016, 11:10 a.m. UTC | #1
On Fri, Aug 19, 2016 at 12:56 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Aug 19, 2016 at 11:48:16AM +0200, Uros Bizjak wrote:
>> >         * config/i386/pkuintrin.h (_wrpkru): Likewise.  Add space after
>> >         function name.
>> >         (_rdpkru_u32): Add space after function name.
>>
>> OK as obvious patch.
>
> Thanks.  When changing this part, I've noticed the argument name key.
> While most of the inline function arguments and local variables are properly
> uglified (using __* names), some of them aren't, so if one e.g. does
> #define key ({ ... })
> #include <x86intrin.h>
> or similar, it will fail to compile.
> E.g. the glibc and libstdc++ headers try hard to uglify everything that
> isn't part of the namespace reserved for what the header provides and
> implementation and IMNSHO so should do the intrinsics.
>
> I've used -g -dA -fno-merge-debug-strings -S -fkeep-inline-functions -D__always_inline__= -D__artificial__=
> to compile sse-13.c (in addition to its dg-options) and then
> grep -A3 'DW_TAG_formal_parameter\|DW_TAG_variable' sse-13.s
> and searched in there for "[^_  ] regexp.
>
> Ok for trunk if testing succeeds?

Yes, also OK.

Thanks,
Uros.

>
> 2016-08-19  Jakub Jelinek  <jakub@redhat.com>
>
>         * config/i386/rdseedintrin.h (_rdseed16_step, _rdseed32_step,
>         _rdseed64_step): Uglify argument names and/or local variable names
>         in inline functions.
>         * config/i386/rtmintrin.h (_xabort): Likewise.
>         * config/i386/avx512vlintrin.h (_mm256_ternarylogic_epi64,
>         _mm256_mask_ternarylogic_epi64, _mm256_maskz_ternarylogic_epi64,
>         _mm256_ternarylogic_epi32, _mm256_mask_ternarylogic_epi32,
>         _mm256_maskz_ternarylogic_epi32, _mm_ternarylogic_epi64,
>         _mm_mask_ternarylogic_epi64, _mm_maskz_ternarylogic_epi64,
>         _mm_ternarylogic_epi32, _mm_mask_ternarylogic_epi32,
>         _mm_maskz_ternarylogic_epi32): Likewise.
>         * config/i386/lwpintrin.h (__llwpcb, __lwpval32, __lwpval64,
>         __lwpins32, __lwpins64): Likewise.
>         * config/i386/avx2intrin.h (_mm_i32gather_pd, _mm_mask_i32gather_pd,
>         _mm256_i32gather_pd, _mm256_mask_i32gather_pd, _mm_i64gather_pd,
>         _mm_mask_i64gather_pd, _mm256_i64gather_pd, _mm256_mask_i64gather_pd,
>         _mm_i32gather_ps, _mm_mask_i32gather_ps, _mm256_i32gather_ps,
>         _mm256_mask_i32gather_ps, _mm_i64gather_ps, _mm_mask_i64gather_ps,
>         _mm256_i64gather_ps, _mm256_mask_i64gather_ps, _mm_i32gather_epi64,
>         _mm_mask_i32gather_epi64, _mm256_i32gather_epi64,
>         _mm256_mask_i32gather_epi64, _mm_i64gather_epi64,
>         _mm_mask_i64gather_epi64, _mm256_i64gather_epi64,
>         _mm256_mask_i64gather_epi64, _mm_i32gather_epi32,
>         _mm_mask_i32gather_epi32, _mm256_i32gather_epi32,
>         _mm256_mask_i32gather_epi32, _mm_i64gather_epi32,
>         _mm_mask_i64gather_epi32, _mm256_i64gather_epi32,
>         _mm256_mask_i64gather_epi32): Likewise.
>         * config/i386/pmm_malloc.h (_mm_malloc, _mm_free): Likewise.
>         * config/i386/ia32intrin.h (__writeeflags): Likewise.
>         * config/i386/pkuintrin.h (_wrpkru): Likewise.
>         * config/i386/avx512pfintrin.h (_mm512_mask_prefetch_i32gather_pd,
>         _mm512_mask_prefetch_i32gather_ps, _mm512_mask_prefetch_i64gather_pd,
>         _mm512_mask_prefetch_i64gather_ps, _mm512_prefetch_i32scatter_pd,
>         _mm512_prefetch_i32scatter_ps, _mm512_mask_prefetch_i32scatter_pd,
>         _mm512_mask_prefetch_i32scatter_ps, _mm512_prefetch_i64scatter_pd,
>         _mm512_prefetch_i64scatter_ps, _mm512_mask_prefetch_i64scatter_pd,
>         _mm512_mask_prefetch_i64scatter_ps): Likewise.
>         * config/i386/gmm_malloc.h (_mm_malloc, _mm_free): Likewise.
>         * config/i386/avx512fintrin.h (_mm512_ternarylogic_epi64,
>         _mm512_mask_ternarylogic_epi64, _mm512_maskz_ternarylogic_epi64,
>         _mm512_ternarylogic_epi32, _mm512_mask_ternarylogic_epi32,
>         _mm512_maskz_ternarylogic_epi32, _mm512_i32gather_ps,
>         _mm512_mask_i32gather_ps, _mm512_i32gather_pd, _mm512_i64gather_ps,
>         _mm512_i64gather_pd, _mm512_i32gather_epi32, _mm512_i32gather_epi64,
>         _mm512_i64gather_epi32, _mm512_i64gather_epi64): Likewise.
>
> --- gcc/config/i386/rdseedintrin.h.jj   2016-01-04 14:55:56.000000000 +0100
> +++ gcc/config/i386/rdseedintrin.h      2016-08-19 11:55:35.603707812 +0200
> @@ -37,24 +37,24 @@
>
>  extern __inline int
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_rdseed16_step (unsigned short *p)
> +_rdseed16_step (unsigned short *__p)
>  {
> -    return __builtin_ia32_rdseed_hi_step (p);
> +    return __builtin_ia32_rdseed_hi_step (__p);
>  }
>
>  extern __inline int
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_rdseed32_step (unsigned int *p)
> +_rdseed32_step (unsigned int *__p)
>  {
> -    return __builtin_ia32_rdseed_si_step (p);
> +    return __builtin_ia32_rdseed_si_step (__p);
>  }
>
>  #ifdef __x86_64__
>  extern __inline int
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_rdseed64_step (unsigned long long *p)
> +_rdseed64_step (unsigned long long *__p)
>  {
> -    return __builtin_ia32_rdseed_di_step (p);
> +    return __builtin_ia32_rdseed_di_step (__p);
>  }
>  #endif
>
> --- gcc/config/i386/rtmintrin.h.jj      2016-01-04 14:55:56.000000000 +0100
> +++ gcc/config/i386/rtmintrin.h 2016-08-19 11:58:16.043692136 +0200
> @@ -68,9 +68,9 @@ _xend (void)
>  #ifdef __OPTIMIZE__
>  extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_xabort (const unsigned int imm)
> +_xabort (const unsigned int __imm)
>  {
> -  __builtin_ia32_xabort (imm);
> +  __builtin_ia32_xabort (__imm);
>  }
>  #else
>  #define _xabort(N)  __builtin_ia32_xabort (N)
> --- gcc/config/i386/avx512vlintrin.h.jj 2016-08-15 17:01:04.000000000 +0200
> +++ gcc/config/i386/avx512vlintrin.h    2016-08-19 11:59:55.049448284 +0200
> @@ -9796,11 +9796,11 @@ _mm_maskz_srli_epi64 (__mmask8 __U, __m1
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
> -                          const int imm)
> +                          const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
>                                                      (__v4di) __B,
> -                                                    (__v4di) __C, imm,
> +                                                    (__v4di) __C, __imm,
>                                                      (__mmask8) -1);
>  }
>
> @@ -9808,11 +9808,11 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
>                                 __m256i __B, __m256i __C,
> -                               const int imm)
> +                               const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
>                                                      (__v4di) __B,
> -                                                    (__v4di) __C, imm,
> +                                                    (__v4di) __C, __imm,
>                                                      (__mmask8) __U);
>  }
>
> @@ -9820,23 +9820,23 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
>                                  __m256i __B, __m256i __C,
> -                                const int imm)
> +                                const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
>                                                       (__v4di) __B,
>                                                       (__v4di) __C,
> -                                                     imm,
> +                                                     __imm,
>                                                       (__mmask8) __U);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
> -                          const int imm)
> +                          const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
>                                                      (__v8si) __B,
> -                                                    (__v8si) __C, imm,
> +                                                    (__v8si) __C, __imm,
>                                                      (__mmask8) -1);
>  }
>
> @@ -9844,11 +9844,11 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
>                                 __m256i __B, __m256i __C,
> -                               const int imm)
> +                               const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
>                                                      (__v8si) __B,
> -                                                    (__v8si) __C, imm,
> +                                                    (__v8si) __C, __imm,
>                                                      (__mmask8) __U);
>  }
>
> @@ -9856,80 +9856,80 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
>                                  __m256i __B, __m256i __C,
> -                                const int imm)
> +                                const int __imm)
>  {
>    return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
>                                                       (__v8si) __B,
>                                                       (__v8si) __C,
> -                                                     imm,
> +                                                     __imm,
>                                                       (__mmask8) __U);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
> -                       const int imm)
> +                       const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
>                                                      (__v2di) __B,
> -                                                    (__v2di) __C, imm,
> +                                                    (__v2di) __C, __imm,
>                                                      (__mmask8) -1);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
> -                            __m128i __B, __m128i __C, const int imm)
> +                            __m128i __B, __m128i __C, const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
>                                                      (__v2di) __B,
> -                                                    (__v2di) __C, imm,
> +                                                    (__v2di) __C, __imm,
>                                                      (__mmask8) __U);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
> -                             __m128i __B, __m128i __C, const int imm)
> +                             __m128i __B, __m128i __C, const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
>                                                       (__v2di) __B,
>                                                       (__v2di) __C,
> -                                                     imm,
> +                                                     __imm,
>                                                       (__mmask8) __U);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
> -                       const int imm)
> +                       const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
>                                                      (__v4si) __B,
> -                                                    (__v4si) __C, imm,
> +                                                    (__v4si) __C, __imm,
>                                                      (__mmask8) -1);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
> -                            __m128i __B, __m128i __C, const int imm)
> +                            __m128i __B, __m128i __C, const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
>                                                      (__v4si) __B,
> -                                                    (__v4si) __C, imm,
> +                                                    (__v4si) __C, __imm,
>                                                      (__mmask8) __U);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
> -                             __m128i __B, __m128i __C, const int imm)
> +                             __m128i __B, __m128i __C, const int __imm)
>  {
>    return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
>                                                       (__v4si) __B,
>                                                       (__v4si) __C,
> -                                                     imm,
> +                                                     __imm,
>                                                       (__mmask8) __U);
>  }
>
> --- gcc/config/i386/lwpintrin.h.jj      2016-01-04 14:55:56.000000000 +0100
> +++ gcc/config/i386/lwpintrin.h 2016-08-19 11:57:29.951271214 +0200
> @@ -35,9 +35,9 @@
>  #endif /* __LWP__ */
>
>  extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__llwpcb (void *pcbAddress)
> +__llwpcb (void *__pcbAddress)
>  {
> -  __builtin_ia32_llwpcb (pcbAddress);
> +  __builtin_ia32_llwpcb (__pcbAddress);
>  }
>
>  extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> @@ -48,16 +48,17 @@ __slwpcb (void)
>
>  #ifdef __OPTIMIZE__
>  extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
> +__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>  {
> -  __builtin_ia32_lwpval32 (data2, data1, flags);
> +  __builtin_ia32_lwpval32 (__data2, __data1, __flags);
>  }
>
>  #ifdef __x86_64__
>  extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
> +__lwpval64 (unsigned long long __data2, unsigned int __data1,
> +           unsigned int __flags)
>  {
> -  __builtin_ia32_lwpval64 (data2, data1, flags);
> +  __builtin_ia32_lwpval64 (__data2, __data1, __flags);
>  }
>  #endif
>  #else
> @@ -74,16 +75,17 @@ __lwpval64 (unsigned long long data2, un
>
>  #ifdef __OPTIMIZE__
>  extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
> +__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>  {
> -  return __builtin_ia32_lwpins32 (data2, data1, flags);
> +  return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
>  }
>
>  #ifdef __x86_64__
>  extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
> +__lwpins64 (unsigned long long __data2, unsigned int __data1,
> +           unsigned int __flags)
>  {
> -  return __builtin_ia32_lwpins64 (data2, data1, flags);
> +  return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
>  }
>  #endif
>  #else
> --- gcc/config/i386/avx2intrin.h.jj     2016-01-04 14:55:55.000000000 +0100
> +++ gcc/config/i386/avx2intrin.h        2016-08-19 12:23:32.612588675 +0200
> @@ -1246,422 +1246,426 @@ _mm_srlv_epi64 (__m128i __X, __m128i __Y
>  #ifdef __OPTIMIZE__
>  extern __inline __m128d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i32gather_pd (double const *base, __m128i index, const int scale)
> +_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
>  {
> -  __v2df zero = _mm_setzero_pd ();
> -  __v2df mask = _mm_cmpeq_pd (zero, zero);
> +  __v2df __zero = _mm_setzero_pd ();
> +  __v2df __mask = _mm_cmpeq_pd (__zero, __zero);
>
>    return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
> -                                               base,
> -                                               (__v4si)index,
> -                                               mask,
> -                                               scale);
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
> -                      __m128d mask, const int scale)
> +_mm_mask_i32gather_pd (__m128d __src, double const *__base, __m128i __index,
> +                      __m128d __mask, const int __scale)
>  {
> -  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               (__v2df)mask,
> -                                               scale);
> +  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               (__v2df)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
> +_mm256_i32gather_pd (double const *__base, __m128i __index, const int __scale)
>  {
> -  __v4df zero = _mm256_setzero_pd ();
> -  __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
> +  __v4df __zero = _mm256_setzero_pd ();
> +  __v4df __mask = _mm256_cmp_pd (__zero, __zero, _CMP_EQ_OQ);
>
>    return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
> -                                               base,
> -                                               (__v4si)index,
> -                                               mask,
> -                                               scale);
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i32gather_pd (__m256d src, double const *base,
> -                         __m128i index, __m256d mask, const int scale)
> +_mm256_mask_i32gather_pd (__m256d __src, double const *__base,
> +                         __m128i __index, __m256d __mask, const int __scale)
>  {
> -  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               (__v4df)mask,
> -                                               scale);
> +  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               (__v4df)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i64gather_pd (double const *base, __m128i index, const int scale)
> +_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
>  {
> -  __v2df src = _mm_setzero_pd ();
> -  __v2df mask = _mm_cmpeq_pd (src, src);
> +  __v2df __src = _mm_setzero_pd ();
> +  __v2df __mask = _mm_cmpeq_pd (__src, __src);
>
> -  return (__m128d) __builtin_ia32_gatherdiv2df (src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               mask,
> -                                               scale);
> +  return (__m128d) __builtin_ia32_gatherdiv2df (__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
> -                      __m128d mask, const int scale)
> +_mm_mask_i64gather_pd (__m128d __src, double const *__base, __m128i __index,
> +                      __m128d __mask, const int __scale)
>  {
> -  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               (__v2df)mask,
> -                                               scale);
> +  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               (__v2df)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
> +_mm256_i64gather_pd (double const *__base, __m256i __index, const int __scale)
>  {
> -  __v4df src = _mm256_setzero_pd ();
> -  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
> +  __v4df __src = _mm256_setzero_pd ();
> +  __v4df __mask = _mm256_cmp_pd (__src, __src, _CMP_EQ_OQ);
>
> -  return (__m256d) __builtin_ia32_gatherdiv4df (src,
> -                                               base,
> -                                               (__v4di)index,
> -                                               mask,
> -                                               scale);
> +  return (__m256d) __builtin_ia32_gatherdiv4df (__src,
> +                                               __base,
> +                                               (__v4di)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i64gather_pd (__m256d src, double const *base,
> -                         __m256i index, __m256d mask, const int scale)
> +_mm256_mask_i64gather_pd (__m256d __src, double const *__base,
> +                         __m256i __index, __m256d __mask, const int __scale)
>  {
> -  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
> -                                               base,
> -                                               (__v4di)index,
> -                                               (__v4df)mask,
> -                                               scale);
> +  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)__src,
> +                                               __base,
> +                                               (__v4di)__index,
> +                                               (__v4df)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i32gather_ps (float const *base, __m128i index, const int scale)
> +_mm_i32gather_ps (float const *__base, __m128i __index, const int __scale)
>  {
> -  __v4sf src = _mm_setzero_ps ();
> -  __v4sf mask = _mm_cmpeq_ps (src, src);
> +  __v4sf __src = _mm_setzero_ps ();
> +  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
>
> -  return (__m128) __builtin_ia32_gathersiv4sf (src,
> -                                              base,
> -                                              (__v4si)index,
> -                                              mask,
> -                                              scale);
> +  return (__m128) __builtin_ia32_gathersiv4sf (__src,
> +                                              __base,
> +                                              (__v4si)__index,
> +                                              __mask,
> +                                              __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
> -                      __m128 mask, const int scale)
> +_mm_mask_i32gather_ps (__m128 __src, float const *__base, __m128i __index,
> +                      __m128 __mask, const int __scale)
>  {
> -  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
> -                                              base,
> -                                              (__v4si)index,
> -                                              (__v4sf)mask,
> -                                              scale);
> +  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)__src,
> +                                              __base,
> +                                              (__v4si)__index,
> +                                              (__v4sf)__mask,
> +                                              __scale);
>  }
>
>  extern __inline __m256
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
> +_mm256_i32gather_ps (float const *__base, __m256i __index, const int __scale)
>  {
> -  __v8sf src = _mm256_setzero_ps ();
> -  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
> +  __v8sf __src = _mm256_setzero_ps ();
> +  __v8sf __mask = _mm256_cmp_ps (__src, __src, _CMP_EQ_OQ);
>
> -  return (__m256) __builtin_ia32_gathersiv8sf (src,
> -                                              base,
> -                                              (__v8si)index,
> -                                              mask,
> -                                              scale);
> +  return (__m256) __builtin_ia32_gathersiv8sf (__src,
> +                                              __base,
> +                                              (__v8si)__index,
> +                                              __mask,
> +                                              __scale);
>  }
>
>  extern __inline __m256
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i32gather_ps (__m256 src, float const *base,
> -                         __m256i index, __m256 mask, const int scale)
> +_mm256_mask_i32gather_ps (__m256 __src, float const *__base,
> +                         __m256i __index, __m256 __mask, const int __scale)
>  {
> -  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
> -                                              base,
> -                                              (__v8si)index,
> -                                              (__v8sf)mask,
> -                                              scale);
> +  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)__src,
> +                                              __base,
> +                                              (__v8si)__index,
> +                                              (__v8sf)__mask,
> +                                              __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i64gather_ps (float const *base, __m128i index, const int scale)
> +_mm_i64gather_ps (float const *__base, __m128i __index, const int __scale)
>  {
> -  __v4sf src = _mm_setzero_ps ();
> -  __v4sf mask = _mm_cmpeq_ps (src, src);
> +  __v4sf __src = _mm_setzero_ps ();
> +  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
>
> -  return (__m128) __builtin_ia32_gatherdiv4sf (src,
> -                                              base,
> -                                              (__v2di)index,
> -                                              mask,
> -                                              scale);
> +  return (__m128) __builtin_ia32_gatherdiv4sf (__src,
> +                                              __base,
> +                                              (__v2di)__index,
> +                                              __mask,
> +                                              __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
> -                      __m128 mask, const int scale)
> +_mm_mask_i64gather_ps (__m128 __src, float const *__base, __m128i __index,
> +                      __m128 __mask, const int __scale)
>  {
> -  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               (__v4sf)mask,
> -                                               scale);
> +  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               (__v4sf)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
> +_mm256_i64gather_ps (float const *__base, __m256i __index, const int __scale)
>  {
> -  __v4sf src = _mm_setzero_ps ();
> -  __v4sf mask = _mm_cmpeq_ps (src, src);
> +  __v4sf __src = _mm_setzero_ps ();
> +  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
>
> -  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
> -                                                 base,
> -                                                 (__v4di)index,
> -                                                 mask,
> -                                                 scale);
> +  return (__m128) __builtin_ia32_gatherdiv4sf256 (__src,
> +                                                 __base,
> +                                                 (__v4di)__index,
> +                                                 __mask,
> +                                                 __scale);
>  }
>
>  extern __inline __m128
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i64gather_ps (__m128 src, float const *base,
> -                         __m256i index, __m128 mask, const int scale)
> +_mm256_mask_i64gather_ps (__m128 __src, float const *__base,
> +                         __m256i __index, __m128 __mask, const int __scale)
>  {
> -  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
> -                                                 base,
> -                                                 (__v4di)index,
> -                                                 (__v4sf)mask,
> -                                                 scale);
> +  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)__src,
> +                                                 __base,
> +                                                 (__v4di)__index,
> +                                                 (__v4sf)__mask,
> +                                                 __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i32gather_epi64 (long long int const *base,
> -                    __m128i index, const int scale)
> +_mm_i32gather_epi64 (long long int const *__base,
> +                    __m128i __index, const int __scale)
>  {
> -  __v2di src = __extension__ (__v2di){ 0, 0 };
> -  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
> +  __v2di __src = __extension__ (__v2di){ 0, 0 };
> +  __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
>
> -  return (__m128i) __builtin_ia32_gathersiv2di (src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gathersiv2di (__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
> -                         __m128i index, __m128i mask, const int scale)
> +_mm_mask_i32gather_epi64 (__m128i __src, long long int const *__base,
> +                         __m128i __index, __m128i __mask, const int __scale)
>  {
> -  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               (__v2di)mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               (__v2di)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i32gather_epi64 (long long int const *base,
> -                       __m128i index, const int scale)
> +_mm256_i32gather_epi64 (long long int const *__base,
> +                       __m128i __index, const int __scale)
>  {
> -  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
> -  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
> +  __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
> +  __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
>
> -  return (__m256i) __builtin_ia32_gathersiv4di (src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               mask,
> -                                               scale);
> +  return (__m256i) __builtin_ia32_gathersiv4di (__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
> -                            __m128i index, __m256i mask, const int scale)
> -{
> -  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               (__v4di)mask,
> -                                               scale);
> +_mm256_mask_i32gather_epi64 (__m256i __src, long long int const *__base,
> +                            __m128i __index, __m256i __mask,
> +                            const int __scale)
> +{
> +  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               (__v4di)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i64gather_epi64 (long long int const *base,
> -                    __m128i index, const int scale)
> +_mm_i64gather_epi64 (long long int const *__base,
> +                    __m128i __index, const int __scale)
>  {
> -  __v2di src = __extension__ (__v2di){ 0, 0 };
> -  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
> +  __v2di __src = __extension__ (__v2di){ 0, 0 };
> +  __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
>
> -  return (__m128i) __builtin_ia32_gatherdiv2di (src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gatherdiv2di (__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
> -                         __m128i mask, const int scale)
> +_mm_mask_i64gather_epi64 (__m128i __src, long long int const *__base,
> +                         __m128i __index, __m128i __mask, const int __scale)
>  {
> -  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               (__v2di)mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               (__v2di)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i64gather_epi64 (long long int const *base,
> -                       __m256i index, const int scale)
> +_mm256_i64gather_epi64 (long long int const *__base,
> +                       __m256i __index, const int __scale)
>  {
> -  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
> -  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
> +  __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
> +  __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
>
> -  return (__m256i) __builtin_ia32_gatherdiv4di (src,
> -                                               base,
> -                                               (__v4di)index,
> -                                               mask,
> -                                               scale);
> +  return (__m256i) __builtin_ia32_gatherdiv4di (__src,
> +                                               __base,
> +                                               (__v4di)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
> -                            __m256i index, __m256i mask, const int scale)
> -{
> -  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
> -                                               base,
> -                                               (__v4di)index,
> -                                               (__v4di)mask,
> -                                               scale);
> +_mm256_mask_i64gather_epi64 (__m256i __src, long long int const *__base,
> +                            __m256i __index, __m256i __mask,
> +                            const int __scale)
> +{
> +  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)__src,
> +                                               __base,
> +                                               (__v4di)__index,
> +                                               (__v4di)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
> +_mm_i32gather_epi32 (int const *__base, __m128i __index, const int __scale)
>  {
> -  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
> -  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
> +  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
> +  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
>
> -  return (__m128i) __builtin_ia32_gathersiv4si (src,
> -                                              base,
> -                                              (__v4si)index,
> -                                              mask,
> -                                              scale);
> +  return (__m128i) __builtin_ia32_gathersiv4si (__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
> -                         __m128i mask, const int scale)
> +_mm_mask_i32gather_epi32 (__m128i __src, int const *__base, __m128i __index,
> +                         __m128i __mask, const int __scale)
>  {
> -  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
> -                                               base,
> -                                               (__v4si)index,
> -                                               (__v4si)mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)__src,
> +                                               __base,
> +                                               (__v4si)__index,
> +                                               (__v4si)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
> +_mm256_i32gather_epi32 (int const *__base, __m256i __index, const int __scale)
>  {
> -  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
> -  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
> +  __v8si __src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
> +  __v8si __mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
>
> -  return (__m256i) __builtin_ia32_gathersiv8si (src,
> -                                               base,
> -                                               (__v8si)index,
> -                                               mask,
> -                                               scale);
> +  return (__m256i) __builtin_ia32_gathersiv8si (__src,
> +                                               __base,
> +                                               (__v8si)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
> -                            __m256i index, __m256i mask, const int scale)
> -{
> -  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
> -                                               base,
> -                                               (__v8si)index,
> -                                               (__v8si)mask,
> -                                               scale);
> +_mm256_mask_i32gather_epi32 (__m256i __src, int const *__base,
> +                            __m256i __index, __m256i __mask,
> +                            const int __scale)
> +{
> +  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)__src,
> +                                               __base,
> +                                               (__v8si)__index,
> +                                               (__v8si)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
> +_mm_i64gather_epi32 (int const *__base, __m128i __index, const int __scale)
>  {
> -  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
> -  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
> +  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
> +  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
>
> -  return (__m128i) __builtin_ia32_gatherdiv4si (src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gatherdiv4si (__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               __mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
> -                         __m128i mask, const int scale)
> +_mm_mask_i64gather_epi32 (__m128i __src, int const *__base, __m128i __index,
> +                         __m128i __mask, const int __scale)
>  {
> -  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
> -                                               base,
> -                                               (__v2di)index,
> -                                               (__v4si)mask,
> -                                               scale);
> +  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)__src,
> +                                               __base,
> +                                               (__v2di)__index,
> +                                               (__v4si)__mask,
> +                                               __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
> +_mm256_i64gather_epi32 (int const *__base, __m256i __index, const int __scale)
>  {
> -  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
> -  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
> +  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
> +  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
>
> -  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
> -                                                 base,
> -                                                 (__v4di)index,
> -                                                 mask,
> -                                                 scale);
> +  return (__m128i) __builtin_ia32_gatherdiv4si256 (__src,
> +                                                  __base,
> +                                                  (__v4di)__index,
> +                                                  __mask,
> +                                                  __scale);
>  }
>
>  extern __inline __m128i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
> -                            __m256i index, __m128i mask, const int scale)
> -{
> -  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
> -                                                  base,
> -                                                  (__v4di)index,
> -                                                  (__v4si)mask,
> -                                                  scale);
> +_mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
> +                            __m256i __index, __m128i __mask,
> +                            const int __scale)
> +{
> +  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)__src,
> +                                                  __base,
> +                                                  (__v4di)__index,
> +                                                  (__v4si)__mask,
> +                                                  __scale);
>  }
>  #else /* __OPTIMIZE__ */
>  #define _mm_i32gather_pd(BASE, INDEX, SCALE)                           \
> --- gcc/config/i386/pmm_malloc.h.jj     2016-01-04 14:55:56.000000000 +0100
> +++ gcc/config/i386/pmm_malloc.h        2016-08-19 12:37:07.701297173 +0200
> @@ -35,23 +35,23 @@ extern "C" int posix_memalign (void **,
>  #endif
>
>  static __inline void *
> -_mm_malloc (size_t size, size_t alignment)
> +_mm_malloc (size_t __size, size_t __alignment)
>  {
> -  void *ptr;
> -  if (alignment == 1)
> -    return malloc (size);
> -  if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
> -    alignment = sizeof (void *);
> -  if (posix_memalign (&ptr, alignment, size) == 0)
> -    return ptr;
> +  void *__ptr;
> +  if (__alignment == 1)
> +    return malloc (__size);
> +  if (__alignment == 2 || (sizeof (void *) == 8 && __alignment == 4))
> +    __alignment = sizeof (void *);
> +  if (posix_memalign (&__ptr, __alignment, __size) == 0)
> +    return __ptr;
>    else
>      return NULL;
>  }
>
>  static __inline void
> -_mm_free (void * ptr)
> +_mm_free (void *__ptr)
>  {
> -  free (ptr);
> +  free (__ptr);
>  }
>
>  #endif /* _MM_MALLOC_H_INCLUDED */
> --- gcc/config/i386/ia32intrin.h.jj     2016-01-04 14:55:55.000000000 +0100
> +++ gcc/config/i386/ia32intrin.h        2016-08-19 12:24:15.836042925 +0200
> @@ -261,9 +261,9 @@ __readeflags (void)
>  /* Write flags register */
>  extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__writeeflags (unsigned long long X)
> +__writeeflags (unsigned long long __X)
>  {
> -  __builtin_ia32_writeeflags_u64 (X);
> +  __builtin_ia32_writeeflags_u64 (__X);
>  }
>
>  #define _bswap64(a)            __bswapq(a)
> @@ -281,9 +281,9 @@ __readeflags (void)
>  /* Write flags register */
>  extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -__writeeflags (unsigned int X)
> +__writeeflags (unsigned int __X)
>  {
> -  __builtin_ia32_writeeflags_u32 (X);
> +  __builtin_ia32_writeeflags_u32 (__X);
>  }
>
>  #endif
> --- gcc/config/i386/pkuintrin.h.jj      2016-08-19 11:37:50.000000000 +0200
> +++ gcc/config/i386/pkuintrin.h 2016-08-19 11:55:00.695146383 +0200
> @@ -43,9 +43,9 @@ _rdpkru_u32 (void)
>
>  extern __inline void
>  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> -_wrpkru (unsigned int key)
> +_wrpkru (unsigned int __key)
>  {
> -  __builtin_ia32_wrpkru (key);
> +  __builtin_ia32_wrpkru (__key);
>  }
>
>  #ifdef __DISABLE_PKU__
> --- gcc/config/i386/avx512pfintrin.h.jj 2016-08-15 17:01:04.000000000 +0200
> +++ gcc/config/i386/avx512pfintrin.h    2016-08-19 12:04:56.039666815 +0200
> @@ -48,110 +48,110 @@ typedef unsigned short __mmask16;
>  #ifdef __OPTIMIZE__
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
> -                                  void *addr, int scale, int hint)
> +_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
> +                                  void *__addr, int __scale, int __hint)
>  {
> -  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
> -                             scale, hint);
> +  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index,
> +                             (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
> -                                  void *addr, int scale, int hint)
> +_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
> +                                  void *__addr, int __scale, int __hint)
>  {
> -  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
> -                             scale, hint);
> +  __builtin_ia32_gatherpfdps (__mask, (__v16si) __index, (int const *) __addr,
> +                             __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
> -                                  void *addr, int scale, int hint)
> +_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
> +                                  void *__addr, int __scale, int __hint)
>  {
> -  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
> -                             scale, hint);
> +  __builtin_ia32_gatherpfqpd (__mask, (__v8di) __index,
> +                             (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
> -                                  void *addr, int scale, int hint)
> +_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
> +                                  void *__addr, int __scale, int __hint)
>  {
> -  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
> -                             scale, hint);
> +  __builtin_ia32_gatherpfqps (__mask, (__v8di) __index, (int const *) __addr,
> +                             __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
> -                              int hint)
> +_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
> +                              int __hint)
>  {
> -  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index,
> -                              (long long const *)addr, scale, hint);
> +  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index,
> +                              (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
> -                              int hint)
> +_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
> +                              int __hint)
>  {
> -  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index,
> +                              (int const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
> -                                   __m256i index, int scale, int hint)
> +_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
> +                                   __m256i __index, int __scale, int __hint)
>  {
> -  __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfdpd (__mask, (__v8si) __index,
> +                              (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
> -                                   __m512i index, int scale, int hint)
> +_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
> +                                   __m512i __index, int __scale, int __hint)
>  {
> -  __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, (int const *) __addr,
> +                              __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
> -                              int hint)
> +_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
> +                              int __hint)
>  {
> -  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,
> +                              (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
> -                              int hint)
> +_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
> +                              int __hint)
>  {
> -  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index,
> +                              (int const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
> -                                   __m512i index, int scale, int hint)
> +_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask16 __mask,
> +                                   __m512i __index, int __scale, int __hint)
>  {
> -  __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfqpd (__mask, (__v8di) __index,
> +                              (long long const *) __addr, __scale, __hint);
>  }
>
>  extern __inline void
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
> -                                   __m512i index, int scale, int hint)
> +_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask16 __mask,
> +                                   __m512i __index, int __scale, int __hint)
>  {
> -  __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
> -                              scale, hint);
> +  __builtin_ia32_scatterpfqps (__mask, (__v8di) __index, (int const *) __addr,
> +                              __scale, __hint);
>  }
>
>  #else
> --- gcc/config/i386/gmm_malloc.h.jj     2016-01-04 14:55:55.000000000 +0100
> +++ gcc/config/i386/gmm_malloc.h        2016-08-19 12:38:26.653300307 +0200
> @@ -27,48 +27,48 @@
>  #include <stdlib.h>
>  #include <errno.h>
>
> -static __inline__ void*
> -_mm_malloc (size_t size, size_t align)
> +static __inline__ void *
> +_mm_malloc (size_t __size, size_t __align)
>  {
> -  void * malloc_ptr;
> -  void * aligned_ptr;
> +  void * __malloc_ptr;
> +  void * __aligned_ptr;
>
>    /* Error if align is not a power of two.  */
> -  if (align & (align - 1))
> +  if (__align & (__align - 1))
>      {
>        errno = EINVAL;
> -      return ((void*) 0);
> +      return ((void *) 0);
>      }
>
> -  if (size == 0)
> +  if (__size == 0)
>      return ((void *) 0);
>
>   /* Assume malloc'd pointer is aligned at least to sizeof (void*).
>      If necessary, add another sizeof (void*) to store the value
>      returned by malloc. Effectively this enforces a minimum alignment
>      of sizeof double. */
> -    if (align < 2 * sizeof (void *))
> -      align = 2 * sizeof (void *);
> +    if (__align < 2 * sizeof (void *))
> +      __align = 2 * sizeof (void *);
>
> -  malloc_ptr = malloc (size + align);
> -  if (!malloc_ptr)
> +  __malloc_ptr = malloc (__size + __align);
> +  if (!__malloc_ptr)
>      return ((void *) 0);
>
>    /* Align  We have at least sizeof (void *) space below malloc'd ptr. */
> -  aligned_ptr = (void *) (((size_t) malloc_ptr + align)
> -                         & ~((size_t) (align) - 1));
> +  __aligned_ptr = (void *) (((size_t) __malloc_ptr + __align)
> +                           & ~((size_t) (__align) - 1));
>
>    /* Store the original pointer just before p.  */
> -  ((void **) aligned_ptr) [-1] = malloc_ptr;
> +  ((void **) __aligned_ptr)[-1] = __malloc_ptr;
>
> -  return aligned_ptr;
> +  return __aligned_ptr;
>  }
>
>  static __inline__ void
> -_mm_free (void * aligned_ptr)
> +_mm_free (void *__aligned_ptr)
>  {
> -  if (aligned_ptr)
> -    free (((void **) aligned_ptr) [-1]);
> +  if (__aligned_ptr)
> +    free (((void **) __aligned_ptr)[-1]);
>  }
>
>  #endif /* _MM_MALLOC_H_INCLUDED */
> --- gcc/config/i386/avx512fintrin.h.jj  2016-08-15 17:01:04.000000000 +0200
> +++ gcc/config/i386/avx512fintrin.h     2016-08-19 12:25:13.683312532 +0200
> @@ -1438,66 +1438,68 @@ _mm_sub_round_ss (__m128 __A, __m128 __B
>  #ifdef __OPTIMIZE__
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
> +_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
> +                          const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
>                                                      (__v8di) __B,
> -                                                    (__v8di) __C, imm,
> +                                                    (__v8di) __C, __imm,
>                                                      (__mmask8) -1);
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
> -                               __m512i __C, const int imm)
> +                               __m512i __C, const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
>                                                      (__v8di) __B,
> -                                                    (__v8di) __C, imm,
> +                                                    (__v8di) __C, __imm,
>                                                      (__mmask8) __U);
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
> -                                __m512i __C, const int imm)
> +                                __m512i __C, const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
>                                                       (__v8di) __B,
>                                                       (__v8di) __C,
> -                                                     imm, (__mmask8) __U);
> +                                                     __imm, (__mmask8) __U);
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
> +_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
> +                          const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
>                                                      (__v16si) __B,
>                                                      (__v16si) __C,
> -                                                    imm, (__mmask16) -1);
> +                                                    __imm, (__mmask16) -1);
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
> -                               __m512i __C, const int imm)
> +                               __m512i __C, const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
>                                                      (__v16si) __B,
>                                                      (__v16si) __C,
> -                                                    imm, (__mmask16) __U);
> +                                                    __imm, (__mmask16) __U);
>  }
>
>  extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
> -                                __m512i __C, const int imm)
> +                                __m512i __C, const int __imm)
>  {
>    return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
>                                                       (__v16si) __B,
>                                                       (__v16si) __C,
> -                                                     imm, (__mmask16) __U);
> +                                                     __imm, (__mmask16) __U);
>  }
>  #else
>  #define _mm512_ternarylogic_epi64(A, B, C, I)                          \
> @@ -9211,21 +9213,21 @@ extern __inline __m512
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
>  {
> -  __m512 v1_old = _mm512_undefined_ps ();
> -  __mmask16 mask = 0xFFFF;
> +  __m512 __v1_old = _mm512_undefined_ps ();
> +  __mmask16 __mask = 0xFFFF;
>
> -  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
> +  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
>                                                 __addr,
>                                                 (__v16si) __index,
> -                                               mask, __scale);
> +                                               __mask, __scale);
>  }
>
>  extern __inline __m512
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
> -_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
> +_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
>                           __m512i __index, float const *__addr, int __scale)
>  {
> -  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
> +  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
>                                                 __addr,
>                                                 (__v16si) __index,
>                                                 __mask, __scale);
> @@ -9235,12 +9237,12 @@ extern __inline __m512d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
>  {
> -  __m512d v1_old = _mm512_undefined_pd ();
> -  __mmask8 mask = 0xFF;
> +  __m512d __v1_old = _mm512_undefined_pd ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
> +  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
>                                                 __addr,
> -                                               (__v8si) __index, mask,
> +                                               (__v8si) __index, __mask,
>                                                 __scale);
>  }
>
> @@ -9259,12 +9261,12 @@ extern __inline __m256
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
>  {
> -  __m256 v1_old = _mm256_undefined_ps ();
> -  __mmask8 mask = 0xFF;
> +  __m256 __v1_old = _mm256_undefined_ps ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
> +  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
>                                                 __addr,
> -                                               (__v8di) __index, mask,
> +                                               (__v8di) __index, __mask,
>                                                 __scale);
>  }
>
> @@ -9283,12 +9285,12 @@ extern __inline __m512d
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
>  {
> -  __m512d v1_old = _mm512_undefined_pd ();
> -  __mmask8 mask = 0xFF;
> +  __m512d __v1_old = _mm512_undefined_pd ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
> +  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
>                                                 __addr,
> -                                               (__v8di) __index, mask,
> +                                               (__v8di) __index, __mask,
>                                                 __scale);
>  }
>
> @@ -9307,13 +9309,13 @@ extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
>  {
> -  __m512i v1_old = _mm512_undefined_epi32 ();
> -  __mmask16 mask = 0xFFFF;
> +  __m512i __v1_old = _mm512_undefined_epi32 ();
> +  __mmask16 __mask = 0xFFFF;
>
> -  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
> +  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
>                                                  __addr,
>                                                  (__v16si) __index,
> -                                                mask, __scale);
> +                                                __mask, __scale);
>  }
>
>  extern __inline __m512i
> @@ -9331,12 +9333,12 @@ extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
>  {
> -  __m512i v1_old = _mm512_undefined_epi32 ();
> -  __mmask8 mask = 0xFF;
> +  __m512i __v1_old = _mm512_undefined_epi32 ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
> +  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
>                                                 __addr,
> -                                               (__v8si) __index, mask,
> +                                               (__v8si) __index, __mask,
>                                                 __scale);
>  }
>
> @@ -9356,13 +9358,13 @@ extern __inline __m256i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
>  {
> -  __m256i v1_old = _mm256_undefined_si256 ();
> -  __mmask8 mask = 0xFF;
> +  __m256i __v1_old = _mm256_undefined_si256 ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
> +  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
>                                                  __addr,
>                                                  (__v8di) __index,
> -                                                mask, __scale);
> +                                                __mask, __scale);
>  }
>
>  extern __inline __m256i
> @@ -9380,12 +9382,12 @@ extern __inline __m512i
>  __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
>  _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
>  {
> -  __m512i v1_old = _mm512_undefined_epi32 ();
> -  __mmask8 mask = 0xFF;
> +  __m512i __v1_old = _mm512_undefined_epi32 ();
> +  __mmask8 __mask = 0xFF;
>
> -  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
> +  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
>                                                 __addr,
> -                                               (__v8di) __index, mask,
> +                                               (__v8di) __index, __mask,
>                                                 __scale);
>  }
>
>
>
>         Jakub
diff mbox

Patch

--- gcc/config/i386/rdseedintrin.h.jj	2016-01-04 14:55:56.000000000 +0100
+++ gcc/config/i386/rdseedintrin.h	2016-08-19 11:55:35.603707812 +0200
@@ -37,24 +37,24 @@ 
 
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed16_step (unsigned short *p)
+_rdseed16_step (unsigned short *__p)
 {
-    return __builtin_ia32_rdseed_hi_step (p);
+    return __builtin_ia32_rdseed_hi_step (__p);
 }
 
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed32_step (unsigned int *p)
+_rdseed32_step (unsigned int *__p)
 {
-    return __builtin_ia32_rdseed_si_step (p);
+    return __builtin_ia32_rdseed_si_step (__p);
 }
 
 #ifdef __x86_64__
 extern __inline int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdseed64_step (unsigned long long *p)
+_rdseed64_step (unsigned long long *__p)
 {
-    return __builtin_ia32_rdseed_di_step (p);
+    return __builtin_ia32_rdseed_di_step (__p);
 }
 #endif
 
--- gcc/config/i386/rtmintrin.h.jj	2016-01-04 14:55:56.000000000 +0100
+++ gcc/config/i386/rtmintrin.h	2016-08-19 11:58:16.043692136 +0200
@@ -68,9 +68,9 @@  _xend (void)
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_xabort (const unsigned int imm)
+_xabort (const unsigned int __imm)
 {
-  __builtin_ia32_xabort (imm);
+  __builtin_ia32_xabort (__imm);
 }
 #else
 #define _xabort(N)  __builtin_ia32_xabort (N)
--- gcc/config/i386/avx512vlintrin.h.jj	2016-08-15 17:01:04.000000000 +0200
+++ gcc/config/i386/avx512vlintrin.h	2016-08-19 11:59:55.049448284 +0200
@@ -9796,11 +9796,11 @@  _mm_maskz_srli_epi64 (__mmask8 __U, __m1
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C,
-			   const int imm)
+			   const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
 						     (__v4di) __B,
-						     (__v4di) __C, imm,
+						     (__v4di) __C, __imm,
 						     (__mmask8) -1);
 }
 
@@ -9808,11 +9808,11 @@  extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U,
 				__m256i __B, __m256i __C,
-				const int imm)
+				const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A,
 						     (__v4di) __B,
-						     (__v4di) __C, imm,
+						     (__v4di) __C, __imm,
 						     (__mmask8) __U);
 }
 
@@ -9820,23 +9820,23 @@  extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A,
 				 __m256i __B, __m256i __C,
-				 const int imm)
+				 const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A,
 						      (__v4di) __B,
 						      (__v4di) __C,
-						      imm,
+						      __imm,
 						      (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C,
-			   const int imm)
+			   const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
 						     (__v8si) __B,
-						     (__v8si) __C, imm,
+						     (__v8si) __C, __imm,
 						     (__mmask8) -1);
 }
 
@@ -9844,11 +9844,11 @@  extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U,
 				__m256i __B, __m256i __C,
-				const int imm)
+				const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A,
 						     (__v8si) __B,
-						     (__v8si) __C, imm,
+						     (__v8si) __C, __imm,
 						     (__mmask8) __U);
 }
 
@@ -9856,80 +9856,80 @@  extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A,
 				 __m256i __B, __m256i __C,
-				 const int imm)
+				 const int __imm)
 {
   return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A,
 						      (__v8si) __B,
 						      (__v8si) __C,
-						      imm,
+						      __imm,
 						      (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C,
-			const int imm)
+			const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
 						     (__v2di) __B,
-						     (__v2di) __C, imm,
+						     (__v2di) __C, __imm,
 						     (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U,
-			     __m128i __B, __m128i __C, const int imm)
+			     __m128i __B, __m128i __C, const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A,
 						     (__v2di) __B,
-						     (__v2di) __C, imm,
+						     (__v2di) __C, __imm,
 						     (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A,
-			      __m128i __B, __m128i __C, const int imm)
+			      __m128i __B, __m128i __C, const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A,
 						      (__v2di) __B,
 						      (__v2di) __C,
-						      imm,
+						      __imm,
 						      (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C,
-			const int imm)
+			const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
 						     (__v4si) __B,
-						     (__v4si) __C, imm,
+						     (__v4si) __C, __imm,
 						     (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U,
-			     __m128i __B, __m128i __C, const int imm)
+			     __m128i __B, __m128i __C, const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A,
 						     (__v4si) __B,
-						     (__v4si) __C, imm,
+						     (__v4si) __C, __imm,
 						     (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A,
-			      __m128i __B, __m128i __C, const int imm)
+			      __m128i __B, __m128i __C, const int __imm)
 {
   return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A,
 						      (__v4si) __B,
 						      (__v4si) __C,
-						      imm,
+						      __imm,
 						      (__mmask8) __U);
 }
 
--- gcc/config/i386/lwpintrin.h.jj	2016-01-04 14:55:56.000000000 +0100
+++ gcc/config/i386/lwpintrin.h	2016-08-19 11:57:29.951271214 +0200
@@ -35,9 +35,9 @@ 
 #endif /* __LWP__ */
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__llwpcb (void *pcbAddress)
+__llwpcb (void *__pcbAddress)
 {
-  __builtin_ia32_llwpcb (pcbAddress);
+  __builtin_ia32_llwpcb (__pcbAddress);
 }
 
 extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -48,16 +48,17 @@  __slwpcb (void)
 
 #ifdef __OPTIMIZE__
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpval32 (unsigned int data2, unsigned int data1, unsigned int flags)
+__lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
 {
-  __builtin_ia32_lwpval32 (data2, data1, flags);
+  __builtin_ia32_lwpval32 (__data2, __data1, __flags);
 }
 
 #ifdef __x86_64__
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpval64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+__lwpval64 (unsigned long long __data2, unsigned int __data1,
+	    unsigned int __flags)
 {
-  __builtin_ia32_lwpval64 (data2, data1, flags);
+  __builtin_ia32_lwpval64 (__data2, __data1, __flags);
 }
 #endif
 #else
@@ -74,16 +75,17 @@  __lwpval64 (unsigned long long data2, un
 
 #ifdef __OPTIMIZE__
 extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpins32 (unsigned int data2, unsigned int data1, unsigned int flags)
+__lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
 {
-  return __builtin_ia32_lwpins32 (data2, data1, flags);
+  return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
 }
 
 #ifdef __x86_64__
 extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
+__lwpins64 (unsigned long long __data2, unsigned int __data1,
+	    unsigned int __flags)
 {
-  return __builtin_ia32_lwpins64 (data2, data1, flags);
+  return __builtin_ia32_lwpins64 (__data2, __data1, __flags);
 }
 #endif
 #else
--- gcc/config/i386/avx2intrin.h.jj	2016-01-04 14:55:55.000000000 +0100
+++ gcc/config/i386/avx2intrin.h	2016-08-19 12:23:32.612588675 +0200
@@ -1246,422 +1246,426 @@  _mm_srlv_epi64 (__m128i __X, __m128i __Y
 #ifdef __OPTIMIZE__
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_pd (double const *base, __m128i index, const int scale)
+_mm_i32gather_pd (double const *__base, __m128i __index, const int __scale)
 {
-  __v2df zero = _mm_setzero_pd ();
-  __v2df mask = _mm_cmpeq_pd (zero, zero);
+  __v2df __zero = _mm_setzero_pd ();
+  __v2df __mask = _mm_cmpeq_pd (__zero, __zero);
 
   return (__m128d) __builtin_ia32_gathersiv2df (_mm_undefined_pd (),
-						base,
-						(__v4si)index,
-						mask,
-						scale);
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_pd (__m128d src, double const *base, __m128i index,
-		       __m128d mask, const int scale)
+_mm_mask_i32gather_pd (__m128d __src, double const *__base, __m128i __index,
+		       __m128d __mask, const int __scale)
 {
-  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)src,
-						base,
-						(__v4si)index,
-						(__v2df)mask,
-						scale);
+  return (__m128d) __builtin_ia32_gathersiv2df ((__v2df)__src,
+						__base,
+						(__v4si)__index,
+						(__v2df)__mask,
+						__scale);
 }
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
+_mm256_i32gather_pd (double const *__base, __m128i __index, const int __scale)
 {
-  __v4df zero = _mm256_setzero_pd ();
-  __v4df mask = _mm256_cmp_pd (zero, zero, _CMP_EQ_OQ);
+  __v4df __zero = _mm256_setzero_pd ();
+  __v4df __mask = _mm256_cmp_pd (__zero, __zero, _CMP_EQ_OQ);
 
   return (__m256d) __builtin_ia32_gathersiv4df (_mm256_undefined_pd (),
-						base,
-						(__v4si)index,
-						mask,
-						scale);
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_pd (__m256d src, double const *base,
-			  __m128i index, __m256d mask, const int scale)
+_mm256_mask_i32gather_pd (__m256d __src, double const *__base,
+			  __m128i __index, __m256d __mask, const int __scale)
 {
-  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)src,
-						base,
-						(__v4si)index,
-						(__v4df)mask,
-						scale);
+  return (__m256d) __builtin_ia32_gathersiv4df ((__v4df)__src,
+						__base,
+						(__v4si)__index,
+						(__v4df)__mask,
+						__scale);
 }
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_pd (double const *base, __m128i index, const int scale)
+_mm_i64gather_pd (double const *__base, __m128i __index, const int __scale)
 {
-  __v2df src = _mm_setzero_pd ();
-  __v2df mask = _mm_cmpeq_pd (src, src);
+  __v2df __src = _mm_setzero_pd ();
+  __v2df __mask = _mm_cmpeq_pd (__src, __src);
 
-  return (__m128d) __builtin_ia32_gatherdiv2df (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
+  return (__m128d) __builtin_ia32_gatherdiv2df (__src,
+						__base,
+						(__v2di)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_pd (__m128d src, double const *base, __m128i index,
-		       __m128d mask, const int scale)
+_mm_mask_i64gather_pd (__m128d __src, double const *__base, __m128i __index,
+		       __m128d __mask, const int __scale)
 {
-  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)src,
-						base,
-						(__v2di)index,
-						(__v2df)mask,
-						scale);
+  return (__m128d) __builtin_ia32_gatherdiv2df ((__v2df)__src,
+						__base,
+						(__v2di)__index,
+						(__v2df)__mask,
+						__scale);
 }
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
+_mm256_i64gather_pd (double const *__base, __m256i __index, const int __scale)
 {
-  __v4df src = _mm256_setzero_pd ();
-  __v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
+  __v4df __src = _mm256_setzero_pd ();
+  __v4df __mask = _mm256_cmp_pd (__src, __src, _CMP_EQ_OQ);
 
-  return (__m256d) __builtin_ia32_gatherdiv4df (src,
-						base,
-						(__v4di)index,
-						mask,
-						scale);
+  return (__m256d) __builtin_ia32_gatherdiv4df (__src,
+						__base,
+						(__v4di)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_pd (__m256d src, double const *base,
-			  __m256i index, __m256d mask, const int scale)
+_mm256_mask_i64gather_pd (__m256d __src, double const *__base,
+			  __m256i __index, __m256d __mask, const int __scale)
 {
-  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)src,
-						base,
-						(__v4di)index,
-						(__v4df)mask,
-						scale);
+  return (__m256d) __builtin_ia32_gatherdiv4df ((__v4df)__src,
+						__base,
+						(__v4di)__index,
+						(__v4df)__mask,
+						__scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_ps (float const *base, __m128i index, const int scale)
+_mm_i32gather_ps (float const *__base, __m128i __index, const int __scale)
 {
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
+  __v4sf __src = _mm_setzero_ps ();
+  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
 
-  return (__m128) __builtin_ia32_gathersiv4sf (src,
-					       base,
-					       (__v4si)index,
-					       mask,
-					       scale);
+  return (__m128) __builtin_ia32_gathersiv4sf (__src,
+					       __base,
+					       (__v4si)__index,
+					       __mask,
+					       __scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_ps (__m128 src, float const *base, __m128i index,
-		       __m128 mask, const int scale)
+_mm_mask_i32gather_ps (__m128 __src, float const *__base, __m128i __index,
+		       __m128 __mask, const int __scale)
 {
-  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)src,
-					       base,
-					       (__v4si)index,
-					       (__v4sf)mask,
-					       scale);
+  return (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)__src,
+					       __base,
+					       (__v4si)__index,
+					       (__v4sf)__mask,
+					       __scale);
 }
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
+_mm256_i32gather_ps (float const *__base, __m256i __index, const int __scale)
 {
-  __v8sf src = _mm256_setzero_ps ();
-  __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
+  __v8sf __src = _mm256_setzero_ps ();
+  __v8sf __mask = _mm256_cmp_ps (__src, __src, _CMP_EQ_OQ);
 
-  return (__m256) __builtin_ia32_gathersiv8sf (src,
-					       base,
-					       (__v8si)index,
-					       mask,
-					       scale);
+  return (__m256) __builtin_ia32_gathersiv8sf (__src,
+					       __base,
+					       (__v8si)__index,
+					       __mask,
+					       __scale);
 }
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_ps (__m256 src, float const *base,
-			  __m256i index, __m256 mask, const int scale)
+_mm256_mask_i32gather_ps (__m256 __src, float const *__base,
+			  __m256i __index, __m256 __mask, const int __scale)
 {
-  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)src,
-					       base,
-					       (__v8si)index,
-					       (__v8sf)mask,
-					       scale);
+  return (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)__src,
+					       __base,
+					       (__v8si)__index,
+					       (__v8sf)__mask,
+					       __scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_ps (float const *base, __m128i index, const int scale)
+_mm_i64gather_ps (float const *__base, __m128i __index, const int __scale)
 {
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
+  __v4sf __src = _mm_setzero_ps ();
+  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
 
-  return (__m128) __builtin_ia32_gatherdiv4sf (src,
-					       base,
-					       (__v2di)index,
-					       mask,
-					       scale);
+  return (__m128) __builtin_ia32_gatherdiv4sf (__src,
+					       __base,
+					       (__v2di)__index,
+					       __mask,
+					       __scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_ps (__m128 src, float const *base, __m128i index,
-		       __m128 mask, const int scale)
+_mm_mask_i64gather_ps (__m128 __src, float const *__base, __m128i __index,
+		       __m128 __mask, const int __scale)
 {
-  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)src,
-						base,
-						(__v2di)index,
-						(__v4sf)mask,
-						scale);
+  return (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)__src,
+						__base,
+						(__v2di)__index,
+						(__v4sf)__mask,
+						__scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_ps (float const *base, __m256i index, const int scale)
+_mm256_i64gather_ps (float const *__base, __m256i __index, const int __scale)
 {
-  __v4sf src = _mm_setzero_ps ();
-  __v4sf mask = _mm_cmpeq_ps (src, src);
+  __v4sf __src = _mm_setzero_ps ();
+  __v4sf __mask = _mm_cmpeq_ps (__src, __src);
 
-  return (__m128) __builtin_ia32_gatherdiv4sf256 (src,
-						  base,
-						  (__v4di)index,
-						  mask,
-						  scale);
+  return (__m128) __builtin_ia32_gatherdiv4sf256 (__src,
+						  __base,
+						  (__v4di)__index,
+						  __mask,
+						  __scale);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_ps (__m128 src, float const *base,
-			  __m256i index, __m128 mask, const int scale)
+_mm256_mask_i64gather_ps (__m128 __src, float const *__base,
+			  __m256i __index, __m128 __mask, const int __scale)
 {
-  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)src,
-						  base,
-						  (__v4di)index,
-						  (__v4sf)mask,
-						  scale);
+  return (__m128) __builtin_ia32_gatherdiv4sf256 ((__v4sf)__src,
+						  __base,
+						  (__v4di)__index,
+						  (__v4sf)__mask,
+						  __scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_epi64 (long long int const *base,
-		     __m128i index, const int scale)
+_mm_i32gather_epi64 (long long int const *__base,
+		     __m128i __index, const int __scale)
 {
-  __v2di src = __extension__ (__v2di){ 0, 0 };
-  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+  __v2di __src = __extension__ (__v2di){ 0, 0 };
+  __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
 
-  return (__m128i) __builtin_ia32_gathersiv2di (src,
-						base,
-						(__v4si)index,
-						mask,
-						scale);
+  return (__m128i) __builtin_ia32_gathersiv2di (__src,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_epi64 (__m128i src, long long int const *base,
-			  __m128i index, __m128i mask, const int scale)
+_mm_mask_i32gather_epi64 (__m128i __src, long long int const *__base,
+			  __m128i __index, __m128i __mask, const int __scale)
 {
-  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)src,
-						base,
-						(__v4si)index,
-						(__v2di)mask,
-						scale);
+  return (__m128i) __builtin_ia32_gathersiv2di ((__v2di)__src,
+						__base,
+						(__v4si)__index,
+						(__v2di)__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_epi64 (long long int const *base,
-			__m128i index, const int scale)
+_mm256_i32gather_epi64 (long long int const *__base,
+			__m128i __index, const int __scale)
 {
-  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
-  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+  __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
+  __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
 
-  return (__m256i) __builtin_ia32_gathersiv4di (src,
-						base,
-						(__v4si)index,
-						mask,
-						scale);
+  return (__m256i) __builtin_ia32_gathersiv4di (__src,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_epi64 (__m256i src, long long int const *base,
-			     __m128i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)src,
-						base,
-						(__v4si)index,
-						(__v4di)mask,
-						scale);
+_mm256_mask_i32gather_epi64 (__m256i __src, long long int const *__base,
+			     __m128i __index, __m256i __mask,
+			     const int __scale)
+{
+  return (__m256i) __builtin_ia32_gathersiv4di ((__v4di)__src,
+						__base,
+						(__v4si)__index,
+						(__v4di)__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_epi64 (long long int const *base,
-		     __m128i index, const int scale)
+_mm_i64gather_epi64 (long long int const *__base,
+		     __m128i __index, const int __scale)
 {
-  __v2di src = __extension__ (__v2di){ 0, 0 };
-  __v2di mask = __extension__ (__v2di){ ~0, ~0 };
+  __v2di __src = __extension__ (__v2di){ 0, 0 };
+  __v2di __mask = __extension__ (__v2di){ ~0, ~0 };
 
-  return (__m128i) __builtin_ia32_gatherdiv2di (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
+  return (__m128i) __builtin_ia32_gatherdiv2di (__src,
+						__base,
+						(__v2di)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_epi64 (__m128i src, long long int const *base, __m128i index,
-			  __m128i mask, const int scale)
+_mm_mask_i64gather_epi64 (__m128i __src, long long int const *__base,
+			  __m128i __index, __m128i __mask, const int __scale)
 {
-  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)src,
-						base,
-						(__v2di)index,
-						(__v2di)mask,
-						scale);
+  return (__m128i) __builtin_ia32_gatherdiv2di ((__v2di)__src,
+						__base,
+						(__v2di)__index,
+						(__v2di)__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_epi64 (long long int const *base,
-			__m256i index, const int scale)
+_mm256_i64gather_epi64 (long long int const *__base,
+			__m256i __index, const int __scale)
 {
-  __v4di src = __extension__ (__v4di){ 0, 0, 0, 0 };
-  __v4di mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
+  __v4di __src = __extension__ (__v4di){ 0, 0, 0, 0 };
+  __v4di __mask = __extension__ (__v4di){ ~0, ~0, ~0, ~0 };
 
-  return (__m256i) __builtin_ia32_gatherdiv4di (src,
-						base,
-						(__v4di)index,
-						mask,
-						scale);
+  return (__m256i) __builtin_ia32_gatherdiv4di (__src,
+						__base,
+						(__v4di)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_epi64 (__m256i src, long long int const *base,
-			     __m256i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)src,
-						base,
-						(__v4di)index,
-						(__v4di)mask,
-						scale);
+_mm256_mask_i64gather_epi64 (__m256i __src, long long int const *__base,
+			     __m256i __index, __m256i __mask,
+			     const int __scale)
+{
+  return (__m256i) __builtin_ia32_gatherdiv4di ((__v4di)__src,
+						__base,
+						(__v4di)__index,
+						(__v4di)__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i32gather_epi32 (int const *base, __m128i index, const int scale)
+_mm_i32gather_epi32 (int const *__base, __m128i __index, const int __scale)
 {
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
 
-  return (__m128i) __builtin_ia32_gathersiv4si (src,
-					       base,
-					       (__v4si)index,
-					       mask,
-					       scale);
+  return (__m128i) __builtin_ia32_gathersiv4si (__src,
+						__base,
+						(__v4si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i32gather_epi32 (__m128i src, int const *base, __m128i index,
-			  __m128i mask, const int scale)
+_mm_mask_i32gather_epi32 (__m128i __src, int const *__base, __m128i __index,
+			  __m128i __mask, const int __scale)
 {
-  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)src,
-						base,
-						(__v4si)index,
-						(__v4si)mask,
-						scale);
+  return (__m128i) __builtin_ia32_gathersiv4si ((__v4si)__src,
+						__base,
+						(__v4si)__index,
+						(__v4si)__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i32gather_epi32 (int const *base, __m256i index, const int scale)
+_mm256_i32gather_epi32 (int const *__base, __m256i __index, const int __scale)
 {
-  __v8si src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
-  __v8si mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
+  __v8si __src = __extension__ (__v8si){ 0, 0, 0, 0, 0, 0, 0, 0 };
+  __v8si __mask = __extension__ (__v8si){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 };
 
-  return (__m256i) __builtin_ia32_gathersiv8si (src,
-						base,
-						(__v8si)index,
-						mask,
-						scale);
+  return (__m256i) __builtin_ia32_gathersiv8si (__src,
+						__base,
+						(__v8si)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i32gather_epi32 (__m256i src, int const *base,
-			     __m256i index, __m256i mask, const int scale)
-{
-  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)src,
-						base,
-						(__v8si)index,
-						(__v8si)mask,
-						scale);
+_mm256_mask_i32gather_epi32 (__m256i __src, int const *__base,
+			     __m256i __index, __m256i __mask,
+			     const int __scale)
+{
+  return (__m256i) __builtin_ia32_gathersiv8si ((__v8si)__src,
+						__base,
+						(__v8si)__index,
+						(__v8si)__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_i64gather_epi32 (int const *base, __m128i index, const int scale)
+_mm_i64gather_epi32 (int const *__base, __m128i __index, const int __scale)
 {
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
 
-  return (__m128i) __builtin_ia32_gatherdiv4si (src,
-						base,
-						(__v2di)index,
-						mask,
-						scale);
+  return (__m128i) __builtin_ia32_gatherdiv4si (__src,
+						__base,
+						(__v2di)__index,
+						__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_i64gather_epi32 (__m128i src, int const *base, __m128i index,
-			  __m128i mask, const int scale)
+_mm_mask_i64gather_epi32 (__m128i __src, int const *__base, __m128i __index,
+			  __m128i __mask, const int __scale)
 {
-  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)src,
-						base,
-						(__v2di)index,
-						(__v4si)mask,
-						scale);
+  return (__m128i) __builtin_ia32_gatherdiv4si ((__v4si)__src,
+						__base,
+						(__v2di)__index,
+						(__v4si)__mask,
+						__scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_i64gather_epi32 (int const *base, __m256i index, const int scale)
+_mm256_i64gather_epi32 (int const *__base, __m256i __index, const int __scale)
 {
-  __v4si src = __extension__ (__v4si){ 0, 0, 0, 0 };
-  __v4si mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
+  __v4si __src = __extension__ (__v4si){ 0, 0, 0, 0 };
+  __v4si __mask = __extension__ (__v4si){ ~0, ~0, ~0, ~0 };
 
-  return (__m128i) __builtin_ia32_gatherdiv4si256 (src,
-						  base,
-						  (__v4di)index,
-						  mask,
-						  scale);
+  return (__m128i) __builtin_ia32_gatherdiv4si256 (__src,
+						   __base,
+						   (__v4di)__index,
+						   __mask,
+						   __scale);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_i64gather_epi32 (__m128i src, int const *base,
-			     __m256i index, __m128i mask, const int scale)
-{
-  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)src,
-						   base,
-						   (__v4di)index,
-						   (__v4si)mask,
-						   scale);
+_mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
+			     __m256i __index, __m128i __mask,
+			     const int __scale)
+{
+  return (__m128i) __builtin_ia32_gatherdiv4si256 ((__v4si)__src,
+						   __base,
+						   (__v4di)__index,
+						   (__v4si)__mask,
+						   __scale);
 }
 #else /* __OPTIMIZE__ */
 #define _mm_i32gather_pd(BASE, INDEX, SCALE)				\
--- gcc/config/i386/pmm_malloc.h.jj	2016-01-04 14:55:56.000000000 +0100
+++ gcc/config/i386/pmm_malloc.h	2016-08-19 12:37:07.701297173 +0200
@@ -35,23 +35,23 @@  extern "C" int posix_memalign (void **,
 #endif
 
 static __inline void *
-_mm_malloc (size_t size, size_t alignment)
+_mm_malloc (size_t __size, size_t __alignment)
 {
-  void *ptr;
-  if (alignment == 1)
-    return malloc (size);
-  if (alignment == 2 || (sizeof (void *) == 8 && alignment == 4))
-    alignment = sizeof (void *);
-  if (posix_memalign (&ptr, alignment, size) == 0)
-    return ptr;
+  void *__ptr;
+  if (__alignment == 1)
+    return malloc (__size);
+  if (__alignment == 2 || (sizeof (void *) == 8 && __alignment == 4))
+    __alignment = sizeof (void *);
+  if (posix_memalign (&__ptr, __alignment, __size) == 0)
+    return __ptr;
   else
     return NULL;
 }
 
 static __inline void
-_mm_free (void * ptr)
+_mm_free (void *__ptr)
 {
-  free (ptr);
+  free (__ptr);
 }
 
 #endif /* _MM_MALLOC_H_INCLUDED */
--- gcc/config/i386/ia32intrin.h.jj	2016-01-04 14:55:55.000000000 +0100
+++ gcc/config/i386/ia32intrin.h	2016-08-19 12:24:15.836042925 +0200
@@ -261,9 +261,9 @@  __readeflags (void)
 /* Write flags register */
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__writeeflags (unsigned long long X)
+__writeeflags (unsigned long long __X)
 {
-  __builtin_ia32_writeeflags_u64 (X);
+  __builtin_ia32_writeeflags_u64 (__X);
 }
 
 #define _bswap64(a)		__bswapq(a)
@@ -281,9 +281,9 @@  __readeflags (void)
 /* Write flags register */
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-__writeeflags (unsigned int X)
+__writeeflags (unsigned int __X)
 {
-  __builtin_ia32_writeeflags_u32 (X);
+  __builtin_ia32_writeeflags_u32 (__X);
 }
 
 #endif
--- gcc/config/i386/pkuintrin.h.jj	2016-08-19 11:37:50.000000000 +0200
+++ gcc/config/i386/pkuintrin.h	2016-08-19 11:55:00.695146383 +0200
@@ -43,9 +43,9 @@  _rdpkru_u32 (void)
 
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_wrpkru (unsigned int key)
+_wrpkru (unsigned int __key)
 {
-  __builtin_ia32_wrpkru (key);
+  __builtin_ia32_wrpkru (__key);
 }
 
 #ifdef __DISABLE_PKU__
--- gcc/config/i386/avx512pfintrin.h.jj	2016-08-15 17:01:04.000000000 +0200
+++ gcc/config/i386/avx512pfintrin.h	2016-08-19 12:04:56.039666815 +0200
@@ -48,110 +48,110 @@  typedef unsigned short __mmask16;
 #ifdef __OPTIMIZE__
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32gather_pd (__m256i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
+_mm512_mask_prefetch_i32gather_pd (__m256i __index, __mmask8 __mask,
+				   void *__addr, int __scale, int __hint)
 {
-  __builtin_ia32_gatherpfdpd (mask, (__v8si) index, (long long const *) addr,
-			      scale, hint);
+  __builtin_ia32_gatherpfdpd (__mask, (__v8si) __index,
+			      (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32gather_ps (__m512i index, __mmask16 mask,
-				   void *addr, int scale, int hint)
+_mm512_mask_prefetch_i32gather_ps (__m512i __index, __mmask16 __mask,
+				   void *__addr, int __scale, int __hint)
 {
-  __builtin_ia32_gatherpfdps (mask, (__v16si) index, (int const *) addr,
-			      scale, hint);
+  __builtin_ia32_gatherpfdps (__mask, (__v16si) __index, (int const *) __addr,
+			      __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64gather_pd (__m512i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
+_mm512_mask_prefetch_i64gather_pd (__m512i __index, __mmask8 __mask,
+				   void *__addr, int __scale, int __hint)
 {
-  __builtin_ia32_gatherpfqpd (mask, (__v8di) index, (long long const *) addr,
-			      scale, hint);
+  __builtin_ia32_gatherpfqpd (__mask, (__v8di) __index,
+			      (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64gather_ps (__m512i index, __mmask8 mask,
-				   void *addr, int scale, int hint)
+_mm512_mask_prefetch_i64gather_ps (__m512i __index, __mmask8 __mask,
+				   void *__addr, int __scale, int __hint)
 {
-  __builtin_ia32_gatherpfqps (mask, (__v8di) index, (int const *) addr,
-			      scale, hint);
+  __builtin_ia32_gatherpfqps (__mask, (__v8di) __index, (int const *) __addr,
+			      __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_pd (void *addr, __m256i index, int scale,
-			       int hint)
+_mm512_prefetch_i32scatter_pd (void *__addr, __m256i __index, int __scale,
+			       int __hint)
 {
-  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) index, 
-			       (long long const *)addr, scale, hint);
+  __builtin_ia32_scatterpfdpd ((__mmask8) 0xFF, (__v8si) __index, 
+			       (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i32scatter_ps (void *addr, __m512i index, int scale,
-			       int hint)
+_mm512_prefetch_i32scatter_ps (void *__addr, __m512i __index, int __scale,
+			       int __hint)
 {
-  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) index, (int const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfdps ((__mmask16) 0xFFFF, (__v16si) __index,
+			       (int const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_pd (void *addr, __mmask8 mask,
-				    __m256i index, int scale, int hint)
+_mm512_mask_prefetch_i32scatter_pd (void *__addr, __mmask8 __mask,
+				    __m256i __index, int __scale, int __hint)
 {
-  __builtin_ia32_scatterpfdpd (mask, (__v8si) index, (long long const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfdpd (__mask, (__v8si) __index,
+			       (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i32scatter_ps (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
+_mm512_mask_prefetch_i32scatter_ps (void *__addr, __mmask16 __mask,
+				    __m512i __index, int __scale, int __hint)
 {
-  __builtin_ia32_scatterpfdps (mask, (__v16si) index, (int const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfdps (__mask, (__v16si) __index, (int const *) __addr,
+			       __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_pd (void *addr, __m512i index, int scale,
-			       int hint)
+_mm512_prefetch_i64scatter_pd (void *__addr, __m512i __index, int __scale,
+			       int __hint)
 {
-  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) index, (long long const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfqpd ((__mmask8) 0xFF, (__v8di) __index,
+			       (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_prefetch_i64scatter_ps (void *addr, __m512i index, int scale,
-			       int hint)
+_mm512_prefetch_i64scatter_ps (void *__addr, __m512i __index, int __scale,
+			       int __hint)
 {
-  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) index, (int const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfqps ((__mmask8) 0xFF, (__v8di) __index,
+			       (int const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_pd (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
+_mm512_mask_prefetch_i64scatter_pd (void *__addr, __mmask16 __mask,
+				    __m512i __index, int __scale, int __hint)
 {
-  __builtin_ia32_scatterpfqpd (mask, (__v8di) index, (long long const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfqpd (__mask, (__v8di) __index,
+			       (long long const *) __addr, __scale, __hint);
 }
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_prefetch_i64scatter_ps (void *addr, __mmask16 mask,
-				    __m512i index, int scale, int hint)
+_mm512_mask_prefetch_i64scatter_ps (void *__addr, __mmask16 __mask,
+				    __m512i __index, int __scale, int __hint)
 {
-  __builtin_ia32_scatterpfqps (mask, (__v8di) index, (int const *) addr,
-			       scale, hint);
+  __builtin_ia32_scatterpfqps (__mask, (__v8di) __index, (int const *) __addr,
+			       __scale, __hint);
 }
 
 #else
--- gcc/config/i386/gmm_malloc.h.jj	2016-01-04 14:55:55.000000000 +0100
+++ gcc/config/i386/gmm_malloc.h	2016-08-19 12:38:26.653300307 +0200
@@ -27,48 +27,48 @@ 
 #include <stdlib.h>
 #include <errno.h>
 
-static __inline__ void* 
-_mm_malloc (size_t size, size_t align)
+static __inline__ void * 
+_mm_malloc (size_t __size, size_t __align)
 {
-  void * malloc_ptr;
-  void * aligned_ptr;
+  void * __malloc_ptr;
+  void * __aligned_ptr;
 
   /* Error if align is not a power of two.  */
-  if (align & (align - 1))
+  if (__align & (__align - 1))
     {
       errno = EINVAL;
-      return ((void*) 0);
+      return ((void *) 0);
     }
 
-  if (size == 0)
+  if (__size == 0)
     return ((void *) 0);
 
  /* Assume malloc'd pointer is aligned at least to sizeof (void*).
     If necessary, add another sizeof (void*) to store the value
     returned by malloc. Effectively this enforces a minimum alignment
     of sizeof double. */     
-    if (align < 2 * sizeof (void *))
-      align = 2 * sizeof (void *);
+    if (__align < 2 * sizeof (void *))
+      __align = 2 * sizeof (void *);
 
-  malloc_ptr = malloc (size + align);
-  if (!malloc_ptr)
+  __malloc_ptr = malloc (__size + __align);
+  if (!__malloc_ptr)
     return ((void *) 0);
 
   /* Align  We have at least sizeof (void *) space below malloc'd ptr. */
-  aligned_ptr = (void *) (((size_t) malloc_ptr + align)
-			  & ~((size_t) (align) - 1));
+  __aligned_ptr = (void *) (((size_t) __malloc_ptr + __align)
+			    & ~((size_t) (__align) - 1));
 
   /* Store the original pointer just before p.  */	
-  ((void **) aligned_ptr) [-1] = malloc_ptr;
+  ((void **) __aligned_ptr)[-1] = __malloc_ptr;
 
-  return aligned_ptr;
+  return __aligned_ptr;
 }
 
 static __inline__ void
-_mm_free (void * aligned_ptr)
+_mm_free (void *__aligned_ptr)
 {
-  if (aligned_ptr)
-    free (((void **) aligned_ptr) [-1]);
+  if (__aligned_ptr)
+    free (((void **) __aligned_ptr)[-1]);
 }
 
 #endif /* _MM_MALLOC_H_INCLUDED */
--- gcc/config/i386/avx512fintrin.h.jj	2016-08-15 17:01:04.000000000 +0200
+++ gcc/config/i386/avx512fintrin.h	2016-08-19 12:25:13.683312532 +0200
@@ -1438,66 +1438,68 @@  _mm_sub_round_ss (__m128 __A, __m128 __B
 #ifdef __OPTIMIZE__
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
+			   const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
 						     (__v8di) __B,
-						     (__v8di) __C, imm,
+						     (__v8di) __C, __imm,
 						     (__mmask8) -1);
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
-				__m512i __C, const int imm)
+				__m512i __C, const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
 						     (__v8di) __B,
-						     (__v8di) __C, imm,
+						     (__v8di) __C, __imm,
 						     (__mmask8) __U);
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
-				 __m512i __C, const int imm)
+				 __m512i __C, const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
 						      (__v8di) __B,
 						      (__v8di) __C,
-						      imm, (__mmask8) __U);
+						      __imm, (__mmask8) __U);
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm)
+_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
+			   const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
 						     (__v16si) __B,
 						     (__v16si) __C,
-						     imm, (__mmask16) -1);
+						     __imm, (__mmask16) -1);
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
-				__m512i __C, const int imm)
+				__m512i __C, const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
 						     (__v16si) __B,
 						     (__v16si) __C,
-						     imm, (__mmask16) __U);
+						     __imm, (__mmask16) __U);
 }
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
-				 __m512i __C, const int imm)
+				 __m512i __C, const int __imm)
 {
   return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
 						      (__v16si) __B,
 						      (__v16si) __C,
-						      imm, (__mmask16) __U);
+						      __imm, (__mmask16) __U);
 }
 #else
 #define _mm512_ternarylogic_epi64(A, B, C, I)				\
@@ -9211,21 +9213,21 @@  extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale)
 {
-  __m512 v1_old = _mm512_undefined_ps ();
-  __mmask16 mask = 0xFFFF;
+  __m512 __v1_old = _mm512_undefined_ps ();
+  __mmask16 __mask = 0xFFFF;
 
-  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
 						__addr,
 						(__v16si) __index,
-						mask, __scale);
+						__mask, __scale);
 }
 
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask,
+_mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
 			  __m512i __index, float const *__addr, int __scale)
 {
-  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old,
+  return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
 						__addr,
 						(__v16si) __index,
 						__mask, __scale);
@@ -9235,12 +9237,12 @@  extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale)
 {
-  __m512d v1_old = _mm512_undefined_pd ();
-  __mmask8 mask = 0xFF;
+  __m512d __v1_old = _mm512_undefined_pd ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old,
+  return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
 						__addr,
-						(__v8si) __index, mask,
+						(__v8si) __index, __mask,
 						__scale);
 }
 
@@ -9259,12 +9261,12 @@  extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale)
 {
-  __m256 v1_old = _mm256_undefined_ps ();
-  __mmask8 mask = 0xFF;
+  __m256 __v1_old = _mm256_undefined_ps ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old,
+  return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
 						__addr,
-						(__v8di) __index, mask,
+						(__v8di) __index, __mask,
 						__scale);
 }
 
@@ -9283,12 +9285,12 @@  extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale)
 {
-  __m512d v1_old = _mm512_undefined_pd ();
-  __mmask8 mask = 0xFF;
+  __m512d __v1_old = _mm512_undefined_pd ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old,
+  return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
 						__addr,
-						(__v8di) __index, mask,
+						(__v8di) __index, __mask,
 						__scale);
 }
 
@@ -9307,13 +9309,13 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale)
 {
-  __m512i v1_old = _mm512_undefined_epi32 ();
-  __mmask16 mask = 0xFFFF;
+  __m512i __v1_old = _mm512_undefined_epi32 ();
+  __mmask16 __mask = 0xFFFF;
 
-  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old,
+  return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
 						 __addr,
 						 (__v16si) __index,
-						 mask, __scale);
+						 __mask, __scale);
 }
 
 extern __inline __m512i
@@ -9331,12 +9333,12 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale)
 {
-  __m512i v1_old = _mm512_undefined_epi32 ();
-  __mmask8 mask = 0xFF;
+  __m512i __v1_old = _mm512_undefined_epi32 ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old,
+  return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
 						__addr,
-						(__v8si) __index, mask,
+						(__v8si) __index, __mask,
 						__scale);
 }
 
@@ -9356,13 +9358,13 @@  extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale)
 {
-  __m256i v1_old = _mm256_undefined_si256 ();
-  __mmask8 mask = 0xFF;
+  __m256i __v1_old = _mm256_undefined_si256 ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old,
+  return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
 						 __addr,
 						 (__v8di) __index,
-						 mask, __scale);
+						 __mask, __scale);
 }
 
 extern __inline __m256i
@@ -9380,12 +9382,12 @@  extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale)
 {
-  __m512i v1_old = _mm512_undefined_epi32 ();
-  __mmask8 mask = 0xFF;
+  __m512i __v1_old = _mm512_undefined_epi32 ();
+  __mmask8 __mask = 0xFF;
 
-  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old,
+  return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
 						__addr,
-						(__v8di) __index, mask,
+						(__v8di) __index, __mask,
 						__scale);
 }