Message ID | 20200212063228.GW17695@tucnak |
---|---|
State | New |
Headers | show |
Series | i386: Fix k*shift* intrinsics [PR93673] | expand |
On Wed, Feb 12, 2020 at 7:33 AM Jakub Jelinek <jakub@redhat.com> wrote: > > Hi! > > As mentioned in the PR, the intrinsics allow counts from 0 to 255, but > we actually reject values from 128 to 255. That is because QImode > CONST_INTs can be only -128 to 127. Fixed by using const_0_to_255_operand > and adjusting the modes for those predicate to HImode instead of QImode > (the IL actually contains the CONST_INT which has VOIDmode). > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > Another option would be to drop the modes from match_operand with > const_0_to_255_operand. Please drop the mode, it is not necessary with predicates that limit their const_int operand by themselves > > 2020-02-12 Jakub Jelinek <jakub@redhat.com> > > PR target/93673 > * config/i386/sse.md (k<code><mode>): Use HImode instead of QImode > for last operand and use const_0_to_255_operand predicate instead of > immediate_operand. > (avx512dq_fpclass<mode><mask_scalar_merge_name>, > avx512dq_vmfpclass<mode><mask_scalar_merge_name>, > vgf2p8affineinvqb_<mode><mask_name>, > vgf2p8affineqb_<mode><mask_name>): Use HImode instead of QImode > for const_0_to_255_operand predicated operands. > > * gcc.target/i386/avx512f-pr93673.c: New test. > * gcc.target/i386/avx512dq-pr93673.c: New test. > * gcc.target/i386/avx512bw-pr93673.c: New test. OK. Thanks, Uros. > > --- gcc/config/i386/sse.md.jj 2020-02-10 22:44:15.235205656 +0100 > +++ gcc/config/i386/sse.md 2020-02-11 13:35:37.768477721 +0100 > @@ -1650,7 +1650,7 @@ (define_insn "k<code><mode>" > [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k") > (any_lshift:SWI1248_AVX512BWDQ > (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k") > - (match_operand:QI 2 "immediate_operand" "n"))) > + (match_operand:HI 2 "const_0_to_255_operand" "n"))) > (unspec [(const_int 0)] UNSPEC_MASKOP)] > "TARGET_AVX512F" > "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" > @@ -22016,7 +22016,7 @@ (define_insn "avx512dq_fpclass<mode><mas > [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") > (unspec:<avx512fmaskmode> > [(match_operand:VF_AVX512VL 1 "vector_operand" "vm") > - (match_operand:QI 2 "const_0_to_255_operand" "n")] > + (match_operand:HI 2 "const_0_to_255_operand" "n")] > UNSPEC_FPCLASS))] > "TARGET_AVX512DQ" > "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; > @@ -22030,7 +22030,7 @@ (define_insn "avx512dq_vmfpclass<mode><m > (and:<avx512fmaskmode> > (unspec:<avx512fmaskmode> > [(match_operand:VF_128 1 "nonimmediate_operand" "vm") > - (match_operand:QI 2 "const_0_to_255_operand" "n")] > + (match_operand:HI 2 "const_0_to_255_operand" "n")] > UNSPEC_FPCLASS) > (const_int 1)))] > "TARGET_AVX512DQ" > @@ -22637,7 +22637,7 @@ (define_insn "vgf2p8affineinvqb_<mode><m > (unspec:VI1_AVX512F > [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") > (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm") > - (match_operand:QI 3 "const_0_to_255_operand" "n,n")] > + (match_operand:HI 3 "const_0_to_255_operand" "n,n")] > UNSPEC_GF2P8AFFINEINV))] > "TARGET_GFNI" > "@ > @@ -22654,7 +22654,7 @@ (define_insn "vgf2p8affineqb_<mode><mask > (unspec:VI1_AVX512F > [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") > (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm") > - (match_operand:QI 3 "const_0_to_255_operand" "n,n")] > + (match_operand:HI 3 "const_0_to_255_operand" "n,n")] > UNSPEC_GF2P8AFFINE))] > "TARGET_GFNI" > "@ > --- gcc/testsuite/gcc.target/i386/avx512f-pr93673.c.jj 2020-02-11 13:43:05.340775467 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512f-pr93673.c 2020-02-11 13:43:47.202148358 +0100 > @@ -0,0 +1,20 @@ > +/* PR target/93673 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512f" } */ > + > +#include <x86intrin.h> > + > +void > +foo (__mmask16 *b) > +{ > + b[0] = _kshiftli_mask16 (b[0], 0); > + b[1] = _kshiftri_mask16 (b[1], 0); > + b[2] = _kshiftli_mask16 (b[2], 1); > + b[3] = _kshiftri_mask16 (b[3], 1); > + b[4] = _kshiftli_mask16 (b[4], 15); > + b[5] = _kshiftri_mask16 (b[5], 15); > + b[6] = _kshiftli_mask16 (b[6], 0x7f); > + b[7] = _kshiftri_mask16 (b[7], 0x7f); > + b[8] = _kshiftli_mask16 (b[8], 0xff); > + b[9] = _kshiftri_mask16 (b[9], 0xff); > +} > --- gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c.jj 2020-02-11 13:45:34.956534896 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c 2020-02-11 13:45:56.990205434 +0100 > @@ -0,0 +1,20 @@ > +/* PR target/93673 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512dq" } */ > + > +#include <x86intrin.h> > + > +void > +foo (__mmask8 *a) > +{ > + a[0] = _kshiftli_mask8 (a[0], 0); > + a[1] = _kshiftri_mask8 (a[1], 0); > + a[2] = _kshiftli_mask8 (a[2], 1); > + a[3] = _kshiftri_mask8 (a[3], 1); > + a[4] = _kshiftli_mask8 (a[4], 7); > + a[5] = _kshiftri_mask8 (a[5], 7); > + a[6] = _kshiftli_mask8 (a[6], 0x7f); > + a[7] = _kshiftri_mask8 (a[7], 0x7f); > + a[8] = _kshiftli_mask8 (a[8], 0xff); > + a[9] = _kshiftri_mask8 (a[9], 0xff); > +} > --- gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c.jj 2020-02-11 13:46:13.558957690 +0100 > +++ gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c 2020-02-11 13:46:29.406720723 +0100 > @@ -0,0 +1,30 @@ > +/* PR target/93673 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512bw" } */ > + > +#include <x86intrin.h> > + > +void > +foo (__mmask32 *c, __mmask64 *d) > +{ > + c[0] = _kshiftli_mask32 (c[0], 0); > + c[1] = _kshiftri_mask32 (c[1], 0); > + c[2] = _kshiftli_mask32 (c[2], 1); > + c[3] = _kshiftri_mask32 (c[3], 1); > + c[4] = _kshiftli_mask32 (c[4], 31); > + c[5] = _kshiftri_mask32 (c[5], 31); > + c[6] = _kshiftli_mask32 (c[6], 0x7f); > + c[7] = _kshiftri_mask32 (c[7], 0x7f); > + c[8] = _kshiftli_mask32 (c[8], 0xff); > + c[9] = _kshiftri_mask32 (c[9], 0xff); > + d[0] = _kshiftli_mask64 (d[0], 0); > + d[1] = _kshiftri_mask64 (d[1], 0); > + d[2] = _kshiftli_mask64 (d[2], 1); > + d[3] = _kshiftri_mask64 (d[3], 1); > + d[4] = _kshiftli_mask64 (d[4], 63); > + d[5] = _kshiftri_mask64 (d[5], 63); > + d[6] = _kshiftli_mask64 (d[6], 0x7f); > + d[7] = _kshiftri_mask64 (d[7], 0x7f); > + d[8] = _kshiftli_mask64 (d[8], 0xff); > + d[9] = _kshiftri_mask64 (d[9], 0xff); > +} > > Jakub >
--- gcc/config/i386/sse.md.jj 2020-02-10 22:44:15.235205656 +0100 +++ gcc/config/i386/sse.md 2020-02-11 13:35:37.768477721 +0100 @@ -1650,7 +1650,7 @@ (define_insn "k<code><mode>" [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k") (any_lshift:SWI1248_AVX512BWDQ (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k") - (match_operand:QI 2 "immediate_operand" "n"))) + (match_operand:HI 2 "const_0_to_255_operand" "n"))) (unspec [(const_int 0)] UNSPEC_MASKOP)] "TARGET_AVX512F" "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}" @@ -22016,7 +22016,7 @@ (define_insn "avx512dq_fpclass<mode><mas [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k") (unspec:<avx512fmaskmode> [(match_operand:VF_AVX512VL 1 "vector_operand" "vm") - (match_operand:QI 2 "const_0_to_255_operand" "n")] + (match_operand:HI 2 "const_0_to_255_operand" "n")] UNSPEC_FPCLASS))] "TARGET_AVX512DQ" "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; @@ -22030,7 +22030,7 @@ (define_insn "avx512dq_vmfpclass<mode><m (and:<avx512fmaskmode> (unspec:<avx512fmaskmode> [(match_operand:VF_128 1 "nonimmediate_operand" "vm") - (match_operand:QI 2 "const_0_to_255_operand" "n")] + (match_operand:HI 2 "const_0_to_255_operand" "n")] UNSPEC_FPCLASS) (const_int 1)))] "TARGET_AVX512DQ" @@ -22637,7 +22637,7 @@ (define_insn "vgf2p8affineinvqb_<mode><m (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm") - (match_operand:QI 3 "const_0_to_255_operand" "n,n")] + (match_operand:HI 3 "const_0_to_255_operand" "n,n")] UNSPEC_GF2P8AFFINEINV))] "TARGET_GFNI" "@ @@ -22654,7 +22654,7 @@ (define_insn "vgf2p8affineqb_<mode><mask (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "0,v") (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm") - (match_operand:QI 3 "const_0_to_255_operand" "n,n")] + (match_operand:HI 3 "const_0_to_255_operand" "n,n")] UNSPEC_GF2P8AFFINE))] "TARGET_GFNI" "@ --- gcc/testsuite/gcc.target/i386/avx512f-pr93673.c.jj 2020-02-11 13:43:05.340775467 +0100 +++ gcc/testsuite/gcc.target/i386/avx512f-pr93673.c 2020-02-11 13:43:47.202148358 +0100 @@ -0,0 +1,20 @@ +/* PR target/93673 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ + +#include <x86intrin.h> + +void +foo (__mmask16 *b) +{ + b[0] = _kshiftli_mask16 (b[0], 0); + b[1] = _kshiftri_mask16 (b[1], 0); + b[2] = _kshiftli_mask16 (b[2], 1); + b[3] = _kshiftri_mask16 (b[3], 1); + b[4] = _kshiftli_mask16 (b[4], 15); + b[5] = _kshiftri_mask16 (b[5], 15); + b[6] = _kshiftli_mask16 (b[6], 0x7f); + b[7] = _kshiftri_mask16 (b[7], 0x7f); + b[8] = _kshiftli_mask16 (b[8], 0xff); + b[9] = _kshiftri_mask16 (b[9], 0xff); +} --- gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c.jj 2020-02-11 13:45:34.956534896 +0100 +++ gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c 2020-02-11 13:45:56.990205434 +0100 @@ -0,0 +1,20 @@ +/* PR target/93673 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512dq" } */ + +#include <x86intrin.h> + +void +foo (__mmask8 *a) +{ + a[0] = _kshiftli_mask8 (a[0], 0); + a[1] = _kshiftri_mask8 (a[1], 0); + a[2] = _kshiftli_mask8 (a[2], 1); + a[3] = _kshiftri_mask8 (a[3], 1); + a[4] = _kshiftli_mask8 (a[4], 7); + a[5] = _kshiftri_mask8 (a[5], 7); + a[6] = _kshiftli_mask8 (a[6], 0x7f); + a[7] = _kshiftri_mask8 (a[7], 0x7f); + a[8] = _kshiftli_mask8 (a[8], 0xff); + a[9] = _kshiftri_mask8 (a[9], 0xff); +} --- gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c.jj 2020-02-11 13:46:13.558957690 +0100 +++ gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c 2020-02-11 13:46:29.406720723 +0100 @@ -0,0 +1,30 @@ +/* PR target/93673 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bw" } */ + +#include <x86intrin.h> + +void +foo (__mmask32 *c, __mmask64 *d) +{ + c[0] = _kshiftli_mask32 (c[0], 0); + c[1] = _kshiftri_mask32 (c[1], 0); + c[2] = _kshiftli_mask32 (c[2], 1); + c[3] = _kshiftri_mask32 (c[3], 1); + c[4] = _kshiftli_mask32 (c[4], 31); + c[5] = _kshiftri_mask32 (c[5], 31); + c[6] = _kshiftli_mask32 (c[6], 0x7f); + c[7] = _kshiftri_mask32 (c[7], 0x7f); + c[8] = _kshiftli_mask32 (c[8], 0xff); + c[9] = _kshiftri_mask32 (c[9], 0xff); + d[0] = _kshiftli_mask64 (d[0], 0); + d[1] = _kshiftri_mask64 (d[1], 0); + d[2] = _kshiftli_mask64 (d[2], 1); + d[3] = _kshiftri_mask64 (d[3], 1); + d[4] = _kshiftli_mask64 (d[4], 63); + d[5] = _kshiftri_mask64 (d[5], 63); + d[6] = _kshiftli_mask64 (d[6], 0x7f); + d[7] = _kshiftri_mask64 (d[7], 0x7f); + d[8] = _kshiftli_mask64 (d[8], 0xff); + d[9] = _kshiftri_mask64 (d[9], 0xff); +}