Message ID | 20160404174401.GI19207@tucnak.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, Apr 4, 2016 at 7:44 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > This patch fixes various *andnot<mode>3* issues. There are two issues on > the ISA side that makes stuff harder for andnot: there are no VPANDNB and > VPANDNW instructions, and while there used to be just VPANDN instruction > in AVX/AVX2, there is only VPANDND and VPANDNQ in EVEX. > The patch changes: > 1) simplifies asserts, TARGET_AVX512VL implies both TARGET_AVX2 and > TARGET_SSE2, so asserts like TARGET_AVX2 || TARGET_AVX512VL make no > sense > 2) for V32HImode/V64QImode it emits vpandnq instruction, rather than > vpandn that fails to assemble > 3) the *andnot<mode>3 pattern clearly wasn't expecting subst, but > as it used (copy-paste?) <mask_operand3_1> in the template, it actually > was substed, which is wrong - we can't implement V64QImode or V32HImode > masking of andnot (well, not in a single instruction); checked > this was the only case of <mask_oeprand3_1> used in define_insn > without <mask_name>; for V*[SD]Imode *andnot<mode>3_mask pattern > should DTRT > 4) the *andnot<mode>3_mask pattern makes no sense, for similar reasons > - VPANDNB and VPANDNW are not in the ISA, not even with AVX512-BW > 5) formatting fixes > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2016-04-04 Jakub Jelinek <jakub@redhat.com> > > PR target/70525 > * config/i386/sse.md (*andnot<mode>3): Simplify assertions. > Use vpandn<ssemodesuffix> for V16SI/V8DImode, vpandnq for > V32HI/V64QImode, don't use <mask_operand3_1>, fix up formatting. > (*andnot<mode>3_mask): Remove insn with VI12_AVX512VL iterator. > > * gcc.target/i386/pr70525.c: New test. OK. Thanks, Uros. > --- gcc/config/i386/sse.md.jj 2016-04-01 17:21:31.000000000 +0200 > +++ gcc/config/i386/sse.md 2016-04-04 14:42:06.296867515 +0200 > @@ -11377,45 +11377,46 @@ (define_insn "*andnot<mode>3" > case MODE_XI: > gcc_assert (TARGET_AVX512F); > case MODE_OI: > - gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); > + gcc_assert (TARGET_AVX2); > case MODE_TI: > - gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); > + gcc_assert (TARGET_SSE2); > switch (<MODE>mode) > - { > - case V16SImode: > - case V8DImode: > - if (TARGET_AVX512F) > - { > - tmp = "pandn<ssemodesuffix>"; > - break; > - } > - case V8SImode: > - case V4DImode: > - case V4SImode: > - case V2DImode: > - if (TARGET_AVX512VL) > - { > - tmp = "pandn<ssemodesuffix>"; > - break; > - } > - default: > - tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; > - } > + { > + case V64QImode: > + case V32HImode: > + /* There is no vpandnb or vpandnw instruction, nor vpandn for > + 512-bit vectors. Use vpandnq instead. */ > + tmp = "pandnq"; > + break; > + case V16SImode: > + case V8DImode: > + tmp = "pandn<ssemodesuffix>"; > + break; > + case V8SImode: > + case V4DImode: > + case V4SImode: > + case V2DImode: > + tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn"; > + break; > + default: > + tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; > + break; > + } > break; > > - case MODE_V16SF: > + case MODE_V16SF: > gcc_assert (TARGET_AVX512F); > - case MODE_V8SF: > + case MODE_V8SF: > gcc_assert (TARGET_AVX); > - case MODE_V4SF: > + case MODE_V4SF: > gcc_assert (TARGET_SSE); > > tmp = "andnps"; > break; > > - default: > + default: > gcc_unreachable (); > - } > + } > > switch (which_alternative) > { > @@ -11423,7 +11424,7 @@ (define_insn "*andnot<mode>3" > ops = "%s\t{%%2, %%0|%%0, %%2}"; > break; > case 1: > - ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; > + ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; > break; > default: > gcc_unreachable (); > @@ -11471,21 +11472,6 @@ (define_insn "*andnot<mode>3_mask" > "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; > [(set_attr "type" "sselog") > (set_attr "prefix" "evex") > - (set_attr "mode" "<sseinsnmode>")]) > - > -(define_insn "*andnot<mode>3_mask" > - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") > - (vec_merge:VI12_AVX512VL > - (and:VI12_AVX512VL > - (not:VI12_AVX512VL > - (match_operand:VI12_AVX512VL 1 "register_operand" "v")) > - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) > - (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") > - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] > - "TARGET_AVX512BW" > - "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; > - [(set_attr "type" "sselog") > - (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > > (define_expand "<code><mode>3" > --- gcc/testsuite/gcc.target/i386/pr70525.c.jj 2016-04-04 15:13:23.417615588 +0200 > +++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.000000000 +0200 > @@ -0,0 +1,32 @@ > +/* PR target/70525 */ > +/* { dg-do assemble { target avx512bw } } */ > +/* { dg-options "-O2 -mavx512bw -mno-avx512vl" } */ > + > +typedef char v64qi __attribute__ ((vector_size (64))); > +typedef short v32hi __attribute__ ((vector_size (64))); > +typedef int v16si __attribute__ ((vector_size (64))); > +typedef long long v8di __attribute__ ((vector_size (64))); > + > +v64qi > +f1 (v64qi x, v64qi y) > +{ > + return x & ~y; > +} > + > +v32hi > +f2 (v32hi x, v32hi y) > +{ > + return x & ~y; > +} > + > +v16si > +f3 (v16si x, v16si y) > +{ > + return x & ~y; > +} > + > +v8di > +f4 (v8di x, v8di y) > +{ > + return x & ~y; > +} > > Jakub
--- gcc/config/i386/sse.md.jj 2016-04-01 17:21:31.000000000 +0200 +++ gcc/config/i386/sse.md 2016-04-04 14:42:06.296867515 +0200 @@ -11377,45 +11377,46 @@ (define_insn "*andnot<mode>3" case MODE_XI: gcc_assert (TARGET_AVX512F); case MODE_OI: - gcc_assert (TARGET_AVX2 || TARGET_AVX512VL); + gcc_assert (TARGET_AVX2); case MODE_TI: - gcc_assert (TARGET_SSE2 || TARGET_AVX512VL); + gcc_assert (TARGET_SSE2); switch (<MODE>mode) - { - case V16SImode: - case V8DImode: - if (TARGET_AVX512F) - { - tmp = "pandn<ssemodesuffix>"; - break; - } - case V8SImode: - case V4DImode: - case V4SImode: - case V2DImode: - if (TARGET_AVX512VL) - { - tmp = "pandn<ssemodesuffix>"; - break; - } - default: - tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; - } + { + case V64QImode: + case V32HImode: + /* There is no vpandnb or vpandnw instruction, nor vpandn for + 512-bit vectors. Use vpandnq instead. */ + tmp = "pandnq"; + break; + case V16SImode: + case V8DImode: + tmp = "pandn<ssemodesuffix>"; + break; + case V8SImode: + case V4DImode: + case V4SImode: + case V2DImode: + tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn"; + break; + default: + tmp = TARGET_AVX512VL ? "pandnq" : "pandn"; + break; + } break; - case MODE_V16SF: + case MODE_V16SF: gcc_assert (TARGET_AVX512F); - case MODE_V8SF: + case MODE_V8SF: gcc_assert (TARGET_AVX); - case MODE_V4SF: + case MODE_V4SF: gcc_assert (TARGET_SSE); tmp = "andnps"; break; - default: + default: gcc_unreachable (); - } + } switch (which_alternative) { @@ -11423,7 +11424,7 @@ (define_insn "*andnot<mode>3" ops = "%s\t{%%2, %%0|%%0, %%2}"; break; case 1: - ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}"; + ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}"; break; default: gcc_unreachable (); @@ -11471,21 +11472,6 @@ (define_insn "*andnot<mode>3_mask" "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) - -(define_insn "*andnot<mode>3_mask" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI12_AVX512VL - (and:VI12_AVX512VL - (not:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "register_operand" "v")) - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")) - (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C") - (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))] - "TARGET_AVX512BW" - "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"; - [(set_attr "type" "sselog") - (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) (define_expand "<code><mode>3" --- gcc/testsuite/gcc.target/i386/pr70525.c.jj 2016-04-04 15:13:23.417615588 +0200 +++ gcc/testsuite/gcc.target/i386/pr70525.c 2016-04-04 15:13:04.000000000 +0200 @@ -0,0 +1,32 @@ +/* PR target/70525 */ +/* { dg-do assemble { target avx512bw } } */ +/* { dg-options "-O2 -mavx512bw -mno-avx512vl" } */ + +typedef char v64qi __attribute__ ((vector_size (64))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef long long v8di __attribute__ ((vector_size (64))); + +v64qi +f1 (v64qi x, v64qi y) +{ + return x & ~y; +} + +v32hi +f2 (v32hi x, v32hi y) +{ + return x & ~y; +} + +v16si +f3 (v16si x, v16si y) +{ + return x & ~y; +} + +v8di +f4 (v8di x, v8di y) +{ + return x & ~y; +}