Message ID | 20220327181423.1030064-1-hjl.tools@gmail.com |
---|---|
State | New |
Headers | show |
Series | x86: Use Yw constraint on *ssse3_pshufbv8qi3 | expand |
On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the > "Yv" register constraint with the "Yw" register constraint. This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html : Obvious fixes can be committed without prior approval. Just check in the fix and copy it to gcc-patches. A good test to determine whether a fix is obvious: will the person who objects to my work the most be able to find a fault with my fix? If the fix is later found to be faulty, it can always be rolled back. We don't want to get overly restrictive about checkin policies. Thanks, Uros. > > gcc/ > > PR target/105068 > * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with > "Yw". > > gcc/testsuite/ > > PR target/105068 > * gcc.target/i386/pr105068.c: New test. > --- > gcc/config/i386/sse.md | 6 +-- > gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++ > 2 files changed, 50 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 33bd2c4768a..58d2bd972ed 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3" > }) > > (define_insn_and_split "*ssse3_pshufbv8qi3" > - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") > - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") > - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv") > + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") > + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw") > + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw") > (match_operand:V4SI 4 "reg_or_const_vector_operand" > "i,3,3")] > UNSPEC_PSHUFB)) > diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c > new file mode 100644 > index 00000000000..e5fb0338e3b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr105068.c > @@ -0,0 +1,47 @@ > +/* { dg-do compile } */ > +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */ > + > +typedef char __attribute__((__vector_size__(8))) C; > +typedef int __attribute__((__vector_size__(8))) U; > +typedef int __attribute__((__vector_size__(16))) V; > +typedef int __attribute__((__vector_size__(32))) W; > +typedef long long __attribute__((__vector_size__(64))) L; > +typedef _Float64 __attribute__((__vector_size__(16))) F; > +typedef _Float64 __attribute__((__vector_size__(64))) G; > +C c; > +int i; > + > +U foo0( W v256u32_0, > + W v256s32_0, > + V v128u64_0, > + V v128s64_0, > + W v256u64_0, > + W v256s64_0, > + L v512s64_0, > + W v256u128_0, > + W v256s128_0, > + V v128f32_0, > + W v256f32_0, > + F F_0, > + W v256f64_0, > + G G_0) { > + C U_1 = __builtin_ia32_pshufb(c, c); > + G_0 += __builtin_convertvector(v512s64_0, G); > + F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2); > + W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 + > + v256s128_0 + v256f32_0 + v256f64_0; > + V V_r = ((union { > + W a; > + V b; > + })W_r) > + .b + > + i + v128u64_0 + v128s64_0 + v128f32_0 + > + (V)F_1; > + U U_r = ((union { > + V a; > + U b; > + })V_r) > + .b + > + (U)U_1; > + return U_r; > +} > -- > 2.35.1 >
On Sun, Mar 27, 2022 at 11:35 AM Uros Bizjak <ubizjak@gmail.com> wrote: > > On Sun, Mar 27, 2022 at 8:14 PM H.J. Lu <hjl.tools@gmail.com> wrote: > > > > Since AVX512VL and AVX512BW are required for AVX512 VPSHUFB, replace the > > "Yv" register constraint with the "Yw" register constraint. > > This is an obvious fix, as said in https://gcc.gnu.org/gitwrite.html : > > Obvious fixes can be committed without prior approval. Just check in > the fix and copy it to gcc-patches. A good test to determine whether a > fix is obvious: will the person who objects to my work the most be > able to find a fault with my fix? If the fix is later found to be > faulty, it can always be rolled back. We don't want to get overly > restrictive about checkin policies. I checked this into the master branch. I am backporting it to release branches. I will drop the testcase for release branches since __builtin_shufflevector is new for GCC 12. > Thanks, > Uros. > > > > > gcc/ > > > > PR target/105068 > > * config/i386/sse.md (*ssse3_pshufbv8qi3): Replace "Yv" with > > "Yw". > > > > gcc/testsuite/ > > > > PR target/105068 > > * gcc.target/i386/pr105068.c: New test. > > --- > > gcc/config/i386/sse.md | 6 +-- > > gcc/testsuite/gcc.target/i386/pr105068.c | 47 ++++++++++++++++++++++++ > > 2 files changed, 50 insertions(+), 3 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr105068.c > > > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > > index 33bd2c4768a..58d2bd972ed 100644 > > --- a/gcc/config/i386/sse.md > > +++ b/gcc/config/i386/sse.md > > @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3" > > }) > > > > (define_insn_and_split "*ssse3_pshufbv8qi3" > > - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") > > - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") > > - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv") > > + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") > > + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw") > > + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw") > > (match_operand:V4SI 4 "reg_or_const_vector_operand" > > "i,3,3")] > > UNSPEC_PSHUFB)) > > diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c > > new file mode 100644 > > index 00000000000..e5fb0338e3b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr105068.c > > @@ -0,0 +1,47 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */ > > + > > +typedef char __attribute__((__vector_size__(8))) C; > > +typedef int __attribute__((__vector_size__(8))) U; > > +typedef int __attribute__((__vector_size__(16))) V; > > +typedef int __attribute__((__vector_size__(32))) W; > > +typedef long long __attribute__((__vector_size__(64))) L; > > +typedef _Float64 __attribute__((__vector_size__(16))) F; > > +typedef _Float64 __attribute__((__vector_size__(64))) G; > > +C c; > > +int i; > > + > > +U foo0( W v256u32_0, > > + W v256s32_0, > > + V v128u64_0, > > + V v128s64_0, > > + W v256u64_0, > > + W v256s64_0, > > + L v512s64_0, > > + W v256u128_0, > > + W v256s128_0, > > + V v128f32_0, > > + W v256f32_0, > > + F F_0, > > + W v256f64_0, > > + G G_0) { > > + C U_1 = __builtin_ia32_pshufb(c, c); > > + G_0 += __builtin_convertvector(v512s64_0, G); > > + F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2); > > + W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 + > > + v256s128_0 + v256f32_0 + v256f64_0; > > + V V_r = ((union { > > + W a; > > + V b; > > + })W_r) > > + .b + > > + i + v128u64_0 + v128s64_0 + v128f32_0 + > > + (V)F_1; > > + U U_r = ((union { > > + V a; > > + U b; > > + })V_r) > > + .b + > > + (U)U_1; > > + return U_r; > > +} > > -- > > 2.35.1 > >
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 33bd2c4768a..58d2bd972ed 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -20758,9 +20758,9 @@ (define_expand "ssse3_pshufbv8qi3" }) (define_insn_and_split "*ssse3_pshufbv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") - (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") + (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw") (match_operand:V4SI 4 "reg_or_const_vector_operand" "i,3,3")] UNSPEC_PSHUFB)) diff --git a/gcc/testsuite/gcc.target/i386/pr105068.c b/gcc/testsuite/gcc.target/i386/pr105068.c new file mode 100644 index 00000000000..e5fb0338e3b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr105068.c @@ -0,0 +1,47 @@ +/* { dg-do compile } */ +/* { dg-options "-Og -march=x86-64 -mavx512vl -fsanitize=thread -fstack-protector-all" } */ + +typedef char __attribute__((__vector_size__(8))) C; +typedef int __attribute__((__vector_size__(8))) U; +typedef int __attribute__((__vector_size__(16))) V; +typedef int __attribute__((__vector_size__(32))) W; +typedef long long __attribute__((__vector_size__(64))) L; +typedef _Float64 __attribute__((__vector_size__(16))) F; +typedef _Float64 __attribute__((__vector_size__(64))) G; +C c; +int i; + +U foo0( W v256u32_0, + W v256s32_0, + V v128u64_0, + V v128s64_0, + W v256u64_0, + W v256s64_0, + L v512s64_0, + W v256u128_0, + W v256s128_0, + V v128f32_0, + W v256f32_0, + F F_0, + W v256f64_0, + G G_0) { + C U_1 = __builtin_ia32_pshufb(c, c); + G_0 += __builtin_convertvector(v512s64_0, G); + F F_1 = __builtin_shufflevector(F_0, G_0, 2, 2); + W W_r = v256u32_0 + v256s32_0 + v256u64_0 + v256s64_0 + v256u128_0 + + v256s128_0 + v256f32_0 + v256f64_0; + V V_r = ((union { + W a; + V b; + })W_r) + .b + + i + v128u64_0 + v128s64_0 + v128f32_0 + + (V)F_1; + U U_r = ((union { + V a; + U b; + })V_r) + .b + + (U)U_1; + return U_r; +}