diff mbox series

AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]

Message ID 20220422081019.31897-1-hongyu.wang@intel.com
State New
Headers show
Series AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339] | expand

Commit Message

Hongyu Wang April 22, 2022, 8:10 a.m. UTC
Hi,

Add missing macro under O0 and adjust macro format for scalf
intrinsics.

Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.

Ok for master and backport to GCC 9/10/11?

gcc/ChangeLog:

	PR target/105339
	* config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
	Add parentheses for parameters and djust format.
	(_mm512_mask_scalef_round_pd): Ditto.
	(_mm512_maskz_scalef_round_pd): Ditto.
	(_mm512_scalef_round_ps): Ditto.
	(_mm512_mask_scalef_round_ps): Ditto.
	(_mm512_maskz_scalef_round_ps): Ditto.
	(_mm_scalef_round_sd): Use _mm_undefined_pd.
	(_mm_scalef_round_ss): Use _mm_undefined_ps.
	(_mm_mask_scalef_round_sd): New macro.
	(_mm_mask_scalef_round_ss): Ditto.
	(_mm_maskz_scalef_round_sd): Ditto.
	(_mm_maskz_scalef_round_ss): Ditto.
---
 gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
 1 file changed, 56 insertions(+), 20 deletions(-)

Comments

Hongtao Liu April 22, 2022, 8:49 a.m. UTC | #1
On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Hi,
>
> Add missing macro under O0 and adjust macro format for scalf
> intrinsics.
>
Please add the corresponding intrinsic test in sse-14.c.
> Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
>
> Ok for master and backport to GCC 9/10/11?
>
> gcc/ChangeLog:
>
>         PR target/105339
>         * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
>         Add parentheses for parameters and djust format.
>         (_mm512_mask_scalef_round_pd): Ditto.
>         (_mm512_maskz_scalef_round_pd): Ditto.
>         (_mm512_scalef_round_ps): Ditto.
>         (_mm512_mask_scalef_round_ps): Ditto.
>         (_mm512_maskz_scalef_round_ps): Ditto.
>         (_mm_scalef_round_sd): Use _mm_undefined_pd.
>         (_mm_scalef_round_ss): Use _mm_undefined_ps.
>         (_mm_mask_scalef_round_sd): New macro.
>         (_mm_mask_scalef_round_ss): Ditto.
>         (_mm_maskz_scalef_round_sd): Ditto.
>         (_mm_maskz_scalef_round_ss): Ditto.
> ---
>  gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
>  1 file changed, 56 insertions(+), 20 deletions(-)
>
> diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> index 29511fd2831..6dc69ff0234 100644
> --- a/gcc/config/i386/avx512fintrin.h
> +++ b/gcc/config/i386/avx512fintrin.h
> @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
>                                                       (__mmask8) __U, __R);
>  }
>  #else
> -#define _mm512_scalef_round_pd(A, B, C)            \
> -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> -
> -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> -
> -#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
> -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> +#define _mm512_scalef_round_pd(A, B, C)                                        \
> +  ((__m512d)                                                           \
> +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> +                                  (__v8df) _mm512_undefined_pd(),      \
> +                                  -1, (C)))
> +
> +#define _mm512_mask_scalef_round_pd(W, U, A, B, C)                     \
> +  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> +
> +#define _mm512_maskz_scalef_round_pd(U, A, B, C)                       \
> +  ((__m512d)                                                           \
> +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> +                                  (__v8df) _mm512_setzero_pd(),        \
> +                                  (U), (C)))
> +
> +#define _mm512_scalef_round_ps(A, B, C)                                        \
> +  ((__m512)                                                            \
> +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> +                                  (__v16sf) _mm512_undefined_ps(),     \
> +                                  -1, (C)))
> +
> +#define _mm512_mask_scalef_round_ps(W, U, A, B, C)                     \
> +  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> +
> +#define _mm512_maskz_scalef_round_ps(U, A, B, C)                       \
> +  ((__m512)                                                            \
> +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> +                                  (__v16sf) _mm512_setzero_ps(),       \
> +                                  (U), (C)))
> +
> +#define _mm_scalef_round_sd(A, B, C)                                   \
> +  ((__m128d)                                                           \
> +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> +                                      (__v2df) _mm_undefined_pd (),    \
> +                                      -1, (C)))
>
> -#define _mm512_scalef_round_ps(A, B, C)            \
> -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> +#define _mm_scalef_round_ss(A, B, C)                                   \
> +  ((__m128)                                                            \
> +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> +                                      (__v4sf) _mm_undefined_ps (),    \
> +                                      -1, (C)))
>
> -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> -    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> +#define _mm_mask_scalef_round_sd(W, U, A, B, C)                                \
> +  ((__m128d)                                                           \
> +   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
>
> -#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
> -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> +#define _mm_mask_scalef_round_ss(W, U, A, B, C)                                \
> +  ((__m128)                                                            \
> +   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
>
> -#define _mm_scalef_round_sd(A, B, C)            \
> -    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> -       (__v2df)_mm_setzero_pd (), -1, C)
> +#define _mm_maskz_scalef_round_sd(U, A, B, C)                          \
> +  ((__m128d)                                                           \
> +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> +                                      (__v2df) _mm_setzero_pd (),      \
> +                                      (U), (C)))
>
> -#define _mm_scalef_round_ss(A, B, C)            \
> -    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> -       (__v4sf)_mm_setzero_ps (), -1, C)
> +#define _mm_maskz_scalef_round_ss(U, A, B, C)                          \
> +  ((__m128)                                                            \
> +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> +                                      (__v4sf) _mm_setzero_ps (),      \
> +                                      (W), (U), (C)))
>  #endif
>
>  #define _mm_mask_scalef_sd(W, U, A, B) \
> --
> 2.18.1
>
Hongyu Wang April 22, 2022, 12:38 p.m. UTC | #2
> Please add the corresponding intrinsic test in sse-14.c

Sorry for forgetting this part. Updated patch. Thanks.

Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2022年4月22日周五 16:49写道:
>
> On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > Hi,
> >
> > Add missing macro under O0 and adjust macro format for scalf
> > intrinsics.
> >
> Please add the corresponding intrinsic test in sse-14.c.
> > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
> >
> > Ok for master and backport to GCC 9/10/11?
> >
> > gcc/ChangeLog:
> >
> >         PR target/105339
> >         * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
> >         Add parentheses for parameters and djust format.
> >         (_mm512_mask_scalef_round_pd): Ditto.
> >         (_mm512_maskz_scalef_round_pd): Ditto.
> >         (_mm512_scalef_round_ps): Ditto.
> >         (_mm512_mask_scalef_round_ps): Ditto.
> >         (_mm512_maskz_scalef_round_ps): Ditto.
> >         (_mm_scalef_round_sd): Use _mm_undefined_pd.
> >         (_mm_scalef_round_ss): Use _mm_undefined_ps.
> >         (_mm_mask_scalef_round_sd): New macro.
> >         (_mm_mask_scalef_round_ss): Ditto.
> >         (_mm_maskz_scalef_round_sd): Ditto.
> >         (_mm_maskz_scalef_round_ss): Ditto.
> > ---
> >  gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
> >  1 file changed, 56 insertions(+), 20 deletions(-)
> >
> > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> > index 29511fd2831..6dc69ff0234 100644
> > --- a/gcc/config/i386/avx512fintrin.h
> > +++ b/gcc/config/i386/avx512fintrin.h
> > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
> >                                                       (__mmask8) __U, __R);
> >  }
> >  #else
> > -#define _mm512_scalef_round_pd(A, B, C)            \
> > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> > -
> > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> > -
> > -#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
> > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> > +#define _mm512_scalef_round_pd(A, B, C)                                        \
> > +  ((__m512d)                                                           \
> > +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> > +                                  (__v8df) _mm512_undefined_pd(),      \
> > +                                  -1, (C)))
> > +
> > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C)                     \
> > +  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> > +
> > +#define _mm512_maskz_scalef_round_pd(U, A, B, C)                       \
> > +  ((__m512d)                                                           \
> > +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> > +                                  (__v8df) _mm512_setzero_pd(),        \
> > +                                  (U), (C)))
> > +
> > +#define _mm512_scalef_round_ps(A, B, C)                                        \
> > +  ((__m512)                                                            \
> > +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> > +                                  (__v16sf) _mm512_undefined_ps(),     \
> > +                                  -1, (C)))
> > +
> > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C)                     \
> > +  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> > +
> > +#define _mm512_maskz_scalef_round_ps(U, A, B, C)                       \
> > +  ((__m512)                                                            \
> > +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> > +                                  (__v16sf) _mm512_setzero_ps(),       \
> > +                                  (U), (C)))
> > +
> > +#define _mm_scalef_round_sd(A, B, C)                                   \
> > +  ((__m128d)                                                           \
> > +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> > +                                      (__v2df) _mm_undefined_pd (),    \
> > +                                      -1, (C)))
> >
> > -#define _mm512_scalef_round_ps(A, B, C)            \
> > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> > +#define _mm_scalef_round_ss(A, B, C)                                   \
> > +  ((__m128)                                                            \
> > +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> > +                                      (__v4sf) _mm_undefined_ps (),    \
> > +                                      -1, (C)))
> >
> > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> > +#define _mm_mask_scalef_round_sd(W, U, A, B, C)                                \
> > +  ((__m128d)                                                           \
> > +   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
> >
> > -#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
> > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> > +#define _mm_mask_scalef_round_ss(W, U, A, B, C)                                \
> > +  ((__m128)                                                            \
> > +   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
> >
> > -#define _mm_scalef_round_sd(A, B, C)            \
> > -    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> > -       (__v2df)_mm_setzero_pd (), -1, C)
> > +#define _mm_maskz_scalef_round_sd(U, A, B, C)                          \
> > +  ((__m128d)                                                           \
> > +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> > +                                      (__v2df) _mm_setzero_pd (),      \
> > +                                      (U), (C)))
> >
> > -#define _mm_scalef_round_ss(A, B, C)            \
> > -    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> > -       (__v4sf)_mm_setzero_ps (), -1, C)
> > +#define _mm_maskz_scalef_round_ss(U, A, B, C)                          \
> > +  ((__m128)                                                            \
> > +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> > +                                      (__v4sf) _mm_setzero_ps (),      \
> > +                                      (W), (U), (C)))
> >  #endif
> >
> >  #define _mm_mask_scalef_sd(W, U, A, B) \
> > --
> > 2.18.1
> >
>
>
> --
> BR,
> Hongtao
Hongtao Liu April 24, 2022, 2:35 a.m. UTC | #3
On Fri, Apr 22, 2022 at 8:43 PM Hongyu Wang <wwwhhhyyy333@gmail.com> wrote:
>
> > Please add the corresponding intrinsic test in sse-14.c
>
> Sorry for forgetting this part. Updated patch. Thanks.
>
LGTM.
> Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> 于2022年4月22日周五 16:49写道:
> >
> > On Fri, Apr 22, 2022 at 4:12 PM Hongyu Wang via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > Hi,
> > >
> > > Add missing macro under O0 and adjust macro format for scalf
> > > intrinsics.
> > >
> > Please add the corresponding intrinsic test in sse-14.c.
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu{-m32,}.
> > >
> > > Ok for master and backport to GCC 9/10/11?
> > >
> > > gcc/ChangeLog:
> > >
> > >         PR target/105339
> > >         * config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
> > >         Add parentheses for parameters and djust format.
> > >         (_mm512_mask_scalef_round_pd): Ditto.
> > >         (_mm512_maskz_scalef_round_pd): Ditto.
> > >         (_mm512_scalef_round_ps): Ditto.
> > >         (_mm512_mask_scalef_round_ps): Ditto.
> > >         (_mm512_maskz_scalef_round_ps): Ditto.
> > >         (_mm_scalef_round_sd): Use _mm_undefined_pd.
> > >         (_mm_scalef_round_ss): Use _mm_undefined_ps.
> > >         (_mm_mask_scalef_round_sd): New macro.
> > >         (_mm_mask_scalef_round_ss): Ditto.
> > >         (_mm_maskz_scalef_round_sd): Ditto.
> > >         (_mm_maskz_scalef_round_ss): Ditto.
> > > ---
> > >  gcc/config/i386/avx512fintrin.h | 76 ++++++++++++++++++++++++---------
> > >  1 file changed, 56 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
> > > index 29511fd2831..6dc69ff0234 100644
> > > --- a/gcc/config/i386/avx512fintrin.h
> > > +++ b/gcc/config/i386/avx512fintrin.h
> > > @@ -3286,31 +3286,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
> > >                                                       (__mmask8) __U, __R);
> > >  }
> > >  #else
> > > -#define _mm512_scalef_round_pd(A, B, C)            \
> > > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
> > > -
> > > -#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
> > > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
> > > -
> > > -#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
> > > -    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
> > > +#define _mm512_scalef_round_pd(A, B, C)                                        \
> > > +  ((__m512d)                                                           \
> > > +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> > > +                                  (__v8df) _mm512_undefined_pd(),      \
> > > +                                  -1, (C)))
> > > +
> > > +#define _mm512_mask_scalef_round_pd(W, U, A, B, C)                     \
> > > +  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
> > > +
> > > +#define _mm512_maskz_scalef_round_pd(U, A, B, C)                       \
> > > +  ((__m512d)                                                           \
> > > +   __builtin_ia32_scalefpd512_mask((A), (B),                           \
> > > +                                  (__v8df) _mm512_setzero_pd(),        \
> > > +                                  (U), (C)))
> > > +
> > > +#define _mm512_scalef_round_ps(A, B, C)                                        \
> > > +  ((__m512)                                                            \
> > > +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> > > +                                  (__v16sf) _mm512_undefined_ps(),     \
> > > +                                  -1, (C)))
> > > +
> > > +#define _mm512_mask_scalef_round_ps(W, U, A, B, C)                     \
> > > +  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
> > > +
> > > +#define _mm512_maskz_scalef_round_ps(U, A, B, C)                       \
> > > +  ((__m512)                                                            \
> > > +   __builtin_ia32_scalefps512_mask((A), (B),                           \
> > > +                                  (__v16sf) _mm512_setzero_ps(),       \
> > > +                                  (U), (C)))
> > > +
> > > +#define _mm_scalef_round_sd(A, B, C)                                   \
> > > +  ((__m128d)                                                           \
> > > +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> > > +                                      (__v2df) _mm_undefined_pd (),    \
> > > +                                      -1, (C)))
> > >
> > > -#define _mm512_scalef_round_ps(A, B, C)            \
> > > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
> > > +#define _mm_scalef_round_ss(A, B, C)                                   \
> > > +  ((__m128)                                                            \
> > > +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> > > +                                      (__v4sf) _mm_undefined_ps (),    \
> > > +                                      -1, (C)))
> > >
> > > -#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
> > > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
> > > +#define _mm_mask_scalef_round_sd(W, U, A, B, C)                                \
> > > +  ((__m128d)                                                           \
> > > +   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
> > >
> > > -#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
> > > -    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
> > > +#define _mm_mask_scalef_round_ss(W, U, A, B, C)                                \
> > > +  ((__m128)                                                            \
> > > +   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
> > >
> > > -#define _mm_scalef_round_sd(A, B, C)            \
> > > -    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
> > > -       (__v2df)_mm_setzero_pd (), -1, C)
> > > +#define _mm_maskz_scalef_round_sd(U, A, B, C)                          \
> > > +  ((__m128d)                                                           \
> > > +   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
> > > +                                      (__v2df) _mm_setzero_pd (),      \
> > > +                                      (U), (C)))
> > >
> > > -#define _mm_scalef_round_ss(A, B, C)            \
> > > -    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
> > > -       (__v4sf)_mm_setzero_ps (), -1, C)
> > > +#define _mm_maskz_scalef_round_ss(U, A, B, C)                          \
> > > +  ((__m128)                                                            \
> > > +   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
> > > +                                      (__v4sf) _mm_setzero_ps (),      \
> > > +                                      (W), (U), (C)))
> > >  #endif
> > >
> > >  #define _mm_mask_scalef_sd(W, U, A, B) \
> > > --
> > > 2.18.1
> > >
> >
> >
> > --
> > BR,
> > Hongtao
diff mbox series

Patch

diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 29511fd2831..6dc69ff0234 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -3286,31 +3286,67 @@  _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
 						      (__mmask8) __U, __R);
 }
 #else
-#define _mm512_scalef_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C)					\
+  ((__m512d)								\
+   __builtin_ia32_scalefpd512_mask((A), (B),				\
+				   (__v8df) _mm512_undefined_pd(),	\
+				   -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C)			\
+  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C)			\
+  ((__m512d)								\
+   __builtin_ia32_scalefpd512_mask((A), (B),				\
+				   (__v8df) _mm512_setzero_pd(),	\
+				   (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C)					\
+  ((__m512)								\
+   __builtin_ia32_scalefps512_mask((A), (B),				\
+				   (__v16sf) _mm512_undefined_ps(),	\
+				   -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C)			\
+  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C)			\
+  ((__m512)								\
+   __builtin_ia32_scalefps512_mask((A), (B),				\
+				   (__v16sf) _mm512_setzero_ps(),	\
+				   (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C)					\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B),			\
+				       (__v2df) _mm_undefined_pd (),	\
+				       -1, (C)))
 
-#define _mm512_scalef_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C)					\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B),			\
+				       (__v4sf) _mm_undefined_ps (),	\
+				       -1, (C)))
 
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C)				\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C)				\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm_scalef_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
-	(__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C)				\
+  ((__m128d)								\
+   __builtin_ia32_scalefsd_mask_round ((A), (B),			\
+				       (__v2df) _mm_setzero_pd (),	\
+				       (U), (C)))
 
-#define _mm_scalef_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
-	(__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C)				\
+  ((__m128)								\
+   __builtin_ia32_scalefss_mask_round ((A), (B),			\
+				       (__v4sf) _mm_setzero_ps (),	\
+				       (W), (U), (C)))
 #endif
 
 #define _mm_mask_scalef_sd(W, U, A, B) \