diff mbox series

x86: Use Yw constraint on *ssse3_pshufbv8qi3

Message ID 20220327181423.1030064-1-hjl.tools@gmail.com
State New
Headers show
Series x86: Use Yw constraint on *ssse3_pshufbv8qi3 | expand

Commit Message

H.J. Lu March 27, 2022, 6:14 p.m. UTC
Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
"Yv" register constraint with the "Yw" register constraint.

gcc/

	PR target/105068
	* config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
	"Yw".

gcc/testsuite/

	PR target/105068
	* gcc.target/i386/pr105068.c: New test.
---
 gcc/config/i386/sse.md                   |  6 +--
 gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c

Comments

Uros Bizjak March 27, 2022, 6:35 p.m. UTC | #1
On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
> "Yv" register constraint with the "Yw" register constraint.

This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html :

Obvious fixes can be committed without prior approval. Just check in
the fix and copy it to gcc-patches. A good test to determine whether a
fix is obvious: will the person who objects to my work the most be
able to find a fault with my fix? If the fix is later found to be
faulty, it can always be rolled back. We don't want to get overly
restrictive about checkin policies.

Thanks,
Uros.

>
> gcc/
>
>         PR target/105068
>         * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
>         "Yw".
>
> gcc/testsuite/
>
>         PR target/105068
>         * gcc.target/i386/pr105068.c: New test.
> ---
>  gcc/config/i386/sse.md                   |  6 +--
>  gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
>  2 files changed, 50 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 33bd2c4768a..58d2bd972ed 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
>  })
>
>  (define_insn_and_split "*ssse3_pshufbv8qi3"
> -  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
> -       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> -                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
> +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
> +       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
> +                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
>                       (match_operand:V4SI 4 "reg_or_const_vector_operand"
>                                           "i,3,3")]
>                      UNSPEC_PSHUFB))
> diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
> new file mode 100644
> index 00000000000..e5fb0338e3b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr105068.c
> @@ -0,0 +1,47 @@
> +/* { dg-do compile } */
> +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
> +
> +typedef char __attribute__((__vector_size__(8))) C;
> +typedef int __attribute__((__vector_size__(8))) U;
> +typedef int __attribute__((__vector_size__(16))) V;
> +typedef int __attribute__((__vector_size__(32))) W;
> +typedef long long __attribute__((__vector_size__(64))) L;
> +typedef _Float64 __attribute__((__vector_size__(16))) F;
> +typedef _Float64 __attribute__((__vector_size__(64))) G;
> +C c;
> +int i;
> +
> +U foo0( W v256u32_0,
> +           W v256s32_0,
> +           V v128u64_0,
> +           V v128s64_0,
> +           W v256u64_0,
> +           W v256s64_0,
> +           L v512s64_0,
> +           W v256u128_0,
> +           W v256s128_0,
> +           V v128f32_0,
> +           W v256f32_0,
> +           F F_0,
> +           W v256f64_0,
> +           G G_0) {
> +  C U_1 = __builtin_ia32_pshufb(c, c);
> +  G_0 += __builtin_convertvector(v512s64_0, G);
> +  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
> +  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
> +                    v256s128_0 + v256f32_0 + v256f64_0;
> +  V V_r = ((union {
> +                      W a;
> +                      V b;
> +                    })W_r)
> +                        .b +
> +                    i + v128u64_0 + v128s64_0 + v128f32_0 +
> +                    (V)F_1;
> +  U U_r = ((union {
> +                    V a;
> +                    U b;
> +                  })V_r)
> +                      .b +
> +                  (U)U_1;
> +  return U_r;
> +}
> --
> 2.35.1
>
H.J. Lu March 28, 2022, 1:38 p.m. UTC | #2
On Sun, Mar 27, 2022 at 11:35 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the
> > "Yv" register constraint with the "Yw" register constraint.
>
> This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html :
>
> Obvious fixes can be committed without prior approval. Just check in
> the fix and copy it to gcc-patches. A good test to determine whether a
> fix is obvious: will the person who objects to my work the most be
> able to find a fault with my fix? If the fix is later found to be
> faulty, it can always be rolled back. We don't want to get overly
> restrictive about checkin policies.

I checked this into the master branch.  I am backporting it to
release branches.  I will drop the testcase for release branches
since __builtin_shufflevector is new for GCC 12.

> Thanks,
> Uros.
>
> >
> > gcc/
> >
> >         PR target/105068
> >         * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with
> >         "Yw".
> >
> > gcc/testsuite/
> >
> >         PR target/105068
> >         * gcc.target/i386/pr105068.c: New test.
> > ---
> >  gcc/config/i386/sse.md                   |  6 +--
> >  gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++
> >  2 files changed, 50 insertions(+), 3 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 33bd2c4768a..58d2bd972ed 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3"
> >  })
> >
> >  (define_insn_and_split "*ssse3_pshufbv8qi3"
> > -  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
> > -       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
> > -                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
> > +  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
> > +       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
> > +                     (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
> >                       (match_operand:V4SI 4 "reg_or_const_vector_operand"
> >                                           "i,3,3")]
> >                      UNSPEC_PSHUFB))
> > diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
> > new file mode 100644
> > index 00000000000..e5fb0338e3b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr105068.c
> > @@ -0,0 +1,47 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
> > +
> > +typedef char __attribute__((__vector_size__(8))) C;
> > +typedef int __attribute__((__vector_size__(8))) U;
> > +typedef int __attribute__((__vector_size__(16))) V;
> > +typedef int __attribute__((__vector_size__(32))) W;
> > +typedef long long __attribute__((__vector_size__(64))) L;
> > +typedef _Float64 __attribute__((__vector_size__(16))) F;
> > +typedef _Float64 __attribute__((__vector_size__(64))) G;
> > +C c;
> > +int i;
> > +
> > +U foo0( W v256u32_0,
> > +           W v256s32_0,
> > +           V v128u64_0,
> > +           V v128s64_0,
> > +           W v256u64_0,
> > +           W v256s64_0,
> > +           L v512s64_0,
> > +           W v256u128_0,
> > +           W v256s128_0,
> > +           V v128f32_0,
> > +           W v256f32_0,
> > +           F F_0,
> > +           W v256f64_0,
> > +           G G_0) {
> > +  C U_1 = __builtin_ia32_pshufb(c, c);
> > +  G_0 += __builtin_convertvector(v512s64_0, G);
> > +  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
> > +  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
> > +                    v256s128_0 + v256f32_0 + v256f64_0;
> > +  V V_r = ((union {
> > +                      W a;
> > +                      V b;
> > +                    })W_r)
> > +                        .b +
> > +                    i + v128u64_0 + v128s64_0 + v128f32_0 +
> > +                    (V)F_1;
> > +  U U_r = ((union {
> > +                    V a;
> > +                    U b;
> > +                  })V_r)
> > +                      .b +
> > +                  (U)U_1;
> > +  return U_r;
> > +}
> > --
> > 2.35.1
> >
diff mbox series

Patch

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 33bd2c4768a..58d2bd972ed 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -20758,9 +20758,9 @@  (define_expand "ssse3_pshufbv8qi3"
 })
 
 (define_insn_and_split "*ssse3_pshufbv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
-	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
-		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw")
+		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")
 		      (match_operand:V4SI 4 "reg_or_const_vector_operand"
 					  "i,3,3")]
 		     UNSPEC_PSHUFB))
diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c
new file mode 100644
index 00000000000..e5fb0338e3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr105068.c
@@ -0,0 +1,47 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */
+
+typedef char __attribute__((__vector_size__(8))) C;
+typedef int __attribute__((__vector_size__(8))) U;
+typedef int __attribute__((__vector_size__(16))) V;
+typedef int __attribute__((__vector_size__(32))) W;
+typedef long long __attribute__((__vector_size__(64))) L;
+typedef _Float64 __attribute__((__vector_size__(16))) F;
+typedef _Float64 __attribute__((__vector_size__(64))) G;
+C c;
+int i;
+
+U foo0( W v256u32_0,
+           W v256s32_0,
+           V v128u64_0,
+           V v128s64_0,
+           W v256u64_0,
+           W v256s64_0,
+           L v512s64_0,
+           W v256u128_0,
+           W v256s128_0,
+           V v128f32_0,
+           W v256f32_0,
+           F F_0,
+           W v256f64_0,
+           G G_0) {
+  C U_1 = __builtin_ia32_pshufb(c, c);
+  G_0 += __builtin_convertvector(v512s64_0, G);
+  F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2);
+  W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 +
+                    v256s128_0 + v256f32_0 + v256f64_0;
+  V V_r = ((union {
+                      W a;
+                      V b;
+                    })W_r)
+                        .b +
+                    i + v128u64_0 + v128s64_0 + v128f32_0 +
+                    (V)F_1;
+  U U_r = ((union {
+                    V a;
+                    U b;
+                  })V_r)
+                      .b +
+                  (U)U_1;
+  return U_r;
+}