Message ID | 5D1484FD020000780023B737@prv1-mh.provo.novell.com |
---|---|
State | New |
Headers | show |
Series | x86: fix/improve vgf2p8affine*qb insns | expand |
On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote: > > - the affine transformations are not commutative (the two source > operands have entirely different meaning) > - there's no need for three alternatives > - the nonimmediate_operand/Bm combination can better be vector_operand/m > > gcc/ > 2019-06-27 Jan Beulich <jbeulich@suse.com> > > * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>, > vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier. > Eliminate redundant alternative. Use vector_operand plus "m" > constraint. Please just drop % modifier and use vector_operand here. IIRC, register allocator operates on constraints, it doesn't care for predicates. But predicates shouldn't be more constrained than constraints. So, having "m" instead of "Bm" is a bad idea with vector_operand. Uros. > > gcc/testsuite/ > 2019-06-27 Jan Beulich <jbeulich@suse.com> > > * gcc.target/i386/gfni-5.c: New. > > --- > On top of this (in further patches) it looks like the Bm constraint could be > dropped altogether. At the very least its combination with vector_operand is > pointless, because both resolve to the same vector_memory_operand. Same goes > for round{,_saeonly}_nimm_predicate, which too resolves to vector_operand. > > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -22072,56 +22072,53 @@ > "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}") > > (define_insn "vgf2p8affineinvqb_<mode><mask_name>" > - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") > + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") > (unspec:VI1_AVX512F > - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") > - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm") > - (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")] > + [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") > + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm") > + (match_operand:QI 3 "const_0_to_255_operand" "n,n")] > UNSPEC_GF2P8AFFINEINV))] > "TARGET_GFNI" > "@ > gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3} > - vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3} > vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}" > - [(set_attr "isa" "noavx,avx,avx512f") > - (set_attr "prefix_data16" "1,*,*") > + [(set_attr "isa" "noavx,avx") > + (set_attr "prefix_data16" "1,*") > (set_attr "prefix_extra" "1") > - (set_attr "prefix" "orig,maybe_evex,evex") > + (set_attr "prefix" "orig,maybe_evex") > (set_attr "mode" "<sseinsnmode>")]) > > (define_insn "vgf2p8affineqb_<mode><mask_name>" > - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") > + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") > (unspec:VI1_AVX512F > - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") > - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm") > - (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")] > + [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") > + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm") > + (match_operand:QI 3 "const_0_to_255_operand" "n,n")] > UNSPEC_GF2P8AFFINE))] > "TARGET_GFNI" > "@ > gf2p8affineqb\t{%3, %2, %0| %0, %2, %3} > - vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3} > vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}" > - [(set_attr "isa" "noavx,avx,avx512f") > - (set_attr "prefix_data16" "1,*,*") > + [(set_attr "isa" "noavx,avx") > + (set_attr "prefix_data16" "1,*") > (set_attr "prefix_extra" "1") > - (set_attr "prefix" "orig,maybe_evex,evex") > + (set_attr "prefix" "orig,maybe_evex") > (set_attr "mode" "<sseinsnmode>")]) > > (define_insn "vgf2p8mulb_<mode><mask_name>" > - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") > + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") > (unspec:VI1_AVX512F > - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") > - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")] > + [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v") > + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")] > UNSPEC_GF2P8MUL))] > "TARGET_GFNI" > "@ > gf2p8mulb\t{%2, %0| %0, %2} > - vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2} > vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}" > - [(set_attr "isa" "noavx,avx,avx512f") > - (set_attr "prefix_data16" "1,*,*") > + [(set_attr "isa" "noavx,avx") > + (set_attr "prefix_data16" "1,*") > (set_attr "prefix_extra" "1") > - (set_attr "prefix" "orig,maybe_evex,evex") > + (set_attr "prefix" "orig,maybe_evex") > (set_attr "mode" "<sseinsnmode>")]) > > (define_insn "vpshrd_<mode><mask_name>" > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/gfni-5.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -msse2 -mgfni" } */ > + > +typedef char __attribute__((vector_size(16))) v16qi_t; > + > +v16qi_t test16a (v16qi_t x, v16qi_t a) > +{ > + asm volatile ("" : "+m" (a)); > + return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0); > +} > + > +v16qi_t test16b (v16qi_t x, v16qi_t a) > +{ > + asm volatile ("" : "+m" (x)); > + return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0); > +} > + > +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*\\(" 1 } } */ > +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*%xmm.*%xmm" 1 } } */ > > >
>>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote: > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote: >> >> - the affine transformations are not commutative (the two source >> operands have entirely different meaning) >> - there's no need for three alternatives >> - the nonimmediate_operand/Bm combination can better be vector_operand/m >> >> gcc/ >> 2019-06-27 Jan Beulich <jbeulich@suse.com> >> >> * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>, >> vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier. >> Eliminate redundant alternative. Use vector_operand plus "m" >> constraint. > > Please just drop % modifier and use vector_operand here. IIRC, > register allocator operates on constraints, it doesn't care for > predicates. But predicates shouldn't be more constrained than > constraints. So, having "m" instead of "Bm" is a bad idea with > vector_operand. Well, putting back the Bm is easy (if that's really needed). But do you also mean me to put back to redundant 3rd alternative? Jan
On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote: > > >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote: > > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote: > >> > >> - the affine transformations are not commutative (the two source > >> operands have entirely different meaning) > >> - there's no need for three alternatives > >> - the nonimmediate_operand/Bm combination can better be vector_operand/m > >> > >> gcc/ > >> 2019-06-27 Jan Beulich <jbeulich@suse.com> > >> > >> * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>, > >> vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier. > >> Eliminate redundant alternative. Use vector_operand plus "m" > >> constraint. > > > > Please just drop % modifier and use vector_operand here. IIRC, > > register allocator operates on constraints, it doesn't care for > > predicates. But predicates shouldn't be more constrained than > > constraints. So, having "m" instead of "Bm" is a bad idea with > > vector_operand. > > Well, putting back the Bm is easy (if that's really needed). But do > you also mean me to put back to redundant 3rd alternative? It is not redundant, "x" and "v" are different register constraints. Uros.
>>> On 27.06.19 at 12:58, <ubizjak@gmail.com> wrote: > On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote: >> >> >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote: >> > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote: >> >> >> >> - the affine transformations are not commutative (the two source >> >> operands have entirely different meaning) >> >> - there's no need for three alternatives >> >> - the nonimmediate_operand/Bm combination can better be vector_operand/m >> >> >> >> gcc/ >> >> 2019-06-27 Jan Beulich <jbeulich@suse.com> >> >> >> >> * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>, >> >> vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier. >> >> Eliminate redundant alternative. Use vector_operand plus "m" >> >> constraint. >> > >> > Please just drop % modifier and use vector_operand here. IIRC, >> > register allocator operates on constraints, it doesn't care for >> > predicates. But predicates shouldn't be more constrained than >> > constraints. So, having "m" instead of "Bm" is a bad idea with >> > vector_operand. >> >> Well, putting back the Bm is easy (if that's really needed). But do >> you also mean me to put back to redundant 3rd alternative? > > It is not redundant, "x" and "v" are different register constraints. Well, yes, "v" is covering a wider set than "x". But only if AVX512F is enabled. The original combinations ("x", "avx") and ("v", "avx512f") are thus effectively the same as the new ("v", "avx"). But yes - I guess I'll split this from the actual bug fix. Jan
On Thu, Jun 27, 2019 at 1:39 PM Jan Beulich <JBeulich@suse.com> wrote: > > >>> On 27.06.19 at 12:58, <ubizjak@gmail.com> wrote: > > On Thu, Jun 27, 2019 at 12:49 PM Jan Beulich <JBeulich@suse.com> wrote: > >> > >> >>> On 27.06.19 at 12:20, <ubizjak@gmail.com> wrote: > >> > On Thu, Jun 27, 2019 at 10:57 AM Jan Beulich <JBeulich@suse.com> wrote: > >> >> > >> >> - the affine transformations are not commutative (the two source > >> >> operands have entirely different meaning) > >> >> - there's no need for three alternatives > >> >> - the nonimmediate_operand/Bm combination can better be vector_operand/m > >> >> > >> >> gcc/ > >> >> 2019-06-27 Jan Beulich <jbeulich@suse.com> > >> >> > >> >> * config/i386/sse.md (vgf2p8affineinvqb_<mode><mask_name>, > >> >> vgf2p8affineqb_<mode><mask_name>): Drop % constraint modifier. > >> >> Eliminate redundant alternative. Use vector_operand plus "m" > >> >> constraint. > >> > > >> > Please just drop % modifier and use vector_operand here. IIRC, > >> > register allocator operates on constraints, it doesn't care for > >> > predicates. But predicates shouldn't be more constrained than > >> > constraints. So, having "m" instead of "Bm" is a bad idea with > >> > vector_operand. > >> > >> Well, putting back the Bm is easy (if that's really needed). But do > >> you also mean me to put back to redundant 3rd alternative? > > > > It is not redundant, "x" and "v" are different register constraints. > > Well, yes, "v" is covering a wider set than "x". But only if AVX512F > is enabled. The original combinations ("x", "avx") and ("v", "avx512f") > are thus effectively the same as the new ("v", "avx"). But yes - I > guess I'll split this from the actual bug fix. No, you are correct. Please use "v", and remove the redundant alternative. Uros.
--- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22072,56 +22072,53 @@ "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}") (define_insn "vgf2p8affineinvqb_<mode><mask_name>" - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") (unspec:VI1_AVX512F - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm") - (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")] + [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm") + (match_operand:QI 3 "const_0_to_255_operand" "n,n")] UNSPEC_GF2P8AFFINEINV))] "TARGET_GFNI" "@ gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3} - vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3} vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}" - [(set_attr "isa" "noavx,avx,avx512f") - (set_attr "prefix_data16" "1,*,*") + [(set_attr "isa" "noavx,avx") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "vgf2p8affineqb_<mode><mask_name>" - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") (unspec:VI1_AVX512F - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm") - (match_operand:QI 3 "const_0_to_255_operand" "n,n,n")] + [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm") + (match_operand:QI 3 "const_0_to_255_operand" "n,n")] UNSPEC_GF2P8AFFINE))] "TARGET_GFNI" "@ gf2p8affineqb\t{%3, %2, %0| %0, %2, %3} - vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3} vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}" - [(set_attr "isa" "noavx,avx,avx512f") - (set_attr "prefix_data16" "1,*,*") + [(set_attr "isa" "noavx,avx") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "vgf2p8mulb_<mode><mask_name>" - [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v") (unspec:VI1_AVX512F - [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") - (match_operand:VI1_AVX512F 2 "nonimmediate_operand" "xBm,xm,vm")] + [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v") + (match_operand:VI1_AVX512F 2 "vector_operand" "xm,vm")] UNSPEC_GF2P8MUL))] "TARGET_GFNI" "@ gf2p8mulb\t{%2, %0| %0, %2} - vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2} vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512f") - (set_attr "prefix_data16" "1,*,*") + [(set_attr "isa" "noavx,avx") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "<sseinsnmode>")]) (define_insn "vpshrd_<mode><mask_name>" --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/gfni-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -mgfni" } */ + +typedef char __attribute__((vector_size(16))) v16qi_t; + +v16qi_t test16a (v16qi_t x, v16qi_t a) +{ + asm volatile ("" : "+m" (a)); + return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0); +} + +v16qi_t test16b (v16qi_t x, v16qi_t a) +{ + asm volatile ("" : "+m" (x)); + return __builtin_ia32_vgf2p8affineqb_v16qi (x, a, 0); +} + +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*\\(" 1 } } */ +/* { dg-final { scan-assembler-times "gf2p8affineqb\[ \t].*%xmm.*%xmm" 1 } } */