diff mbox series

i386: Fix k*shift* intrinsics [PR93673]

Message ID 20200212063228.GW17695@tucnak
State New
Headers show
Series i386: Fix k*shift* intrinsics [PR93673] | expand

Commit Message

Jakub Jelinek Feb. 12, 2020, 6:32 a.m. UTC
Hi!

As mentioned in the PR, the intrinsics allow counts from 0 to 255, but
we actually reject values from 128 to 255.  That is because QImode
CONST_INTs can be only -128 to 127.  Fixed by using const_0_to_255_operand
and adjusting the modes for those predicate to HImode instead of QImode
(the IL actually contains the CONST_INT which has VOIDmode).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Another option would be to drop the modes from match_operand with
const_0_to_255_operand.

2020-02-12  Jakub Jelinek  <jakub@redhat.com>

	PR target/93673
	* config/i386/sse.md (k<code><mode>): Use HImode instead of QImode
	for last operand and use const_0_to_255_operand predicate instead of
	immediate_operand.
	(avx512dq_fpclass<mode><mask_scalar_merge_name>,
	avx512dq_vmfpclass<mode><mask_scalar_merge_name>,
	vgf2p8affineinvqb_<mode><mask_name>,
	vgf2p8affineqb_<mode><mask_name>): Use HImode instead of QImode
	for const_0_to_255_operand predicated operands.

	* gcc.target/i386/avx512f-pr93673.c: New test.
	* gcc.target/i386/avx512dq-pr93673.c: New test.
	* gcc.target/i386/avx512bw-pr93673.c: New test.


	Jakub

Comments

Uros Bizjak Feb. 12, 2020, 7:05 a.m. UTC | #1
On Wed, Feb 12, 2020 at 7:33 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As mentioned in the PR, the intrinsics allow counts from 0 to 255, but
> we actually reject values from 128 to 255.  That is because QImode
> CONST_INTs can be only -128 to 127.  Fixed by using const_0_to_255_operand
> and adjusting the modes for those predicate to HImode instead of QImode
> (the IL actually contains the CONST_INT which has VOIDmode).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> Another option would be to drop the modes from match_operand with
> const_0_to_255_operand.

Please drop the mode, it is not necessary with predicates that limit
their const_int operand by themselves

>
> 2020-02-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/93673
>         * config/i386/sse.md (k<code><mode>): Use HImode instead of QImode
>         for last operand and use const_0_to_255_operand predicate instead of
>         immediate_operand.
>         (avx512dq_fpclass<mode><mask_scalar_merge_name>,
>         avx512dq_vmfpclass<mode><mask_scalar_merge_name>,
>         vgf2p8affineinvqb_<mode><mask_name>,
>         vgf2p8affineqb_<mode><mask_name>): Use HImode instead of QImode
>         for const_0_to_255_operand predicated operands.
>
>         * gcc.target/i386/avx512f-pr93673.c: New test.
>         * gcc.target/i386/avx512dq-pr93673.c: New test.
>         * gcc.target/i386/avx512bw-pr93673.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/sse.md.jj   2020-02-10 22:44:15.235205656 +0100
> +++ gcc/config/i386/sse.md      2020-02-11 13:35:37.768477721 +0100
> @@ -1650,7 +1650,7 @@ (define_insn "k<code><mode>"
>    [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
>         (any_lshift:SWI1248_AVX512BWDQ
>           (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
> -         (match_operand:QI 2 "immediate_operand" "n")))
> +         (match_operand:HI 2 "const_0_to_255_operand" "n")))
>     (unspec [(const_int 0)] UNSPEC_MASKOP)]
>    "TARGET_AVX512F"
>    "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
> @@ -22016,7 +22016,7 @@ (define_insn "avx512dq_fpclass<mode><mas
>    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
>            (unspec:<avx512fmaskmode>
>              [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
> -             (match_operand:QI 2 "const_0_to_255_operand" "n")]
> +             (match_operand:HI 2 "const_0_to_255_operand" "n")]
>               UNSPEC_FPCLASS))]
>     "TARGET_AVX512DQ"
>     "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
> @@ -22030,7 +22030,7 @@ (define_insn "avx512dq_vmfpclass<mode><m
>         (and:<avx512fmaskmode>
>           (unspec:<avx512fmaskmode>
>             [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
> -             (match_operand:QI 2 "const_0_to_255_operand" "n")]
> +             (match_operand:HI 2 "const_0_to_255_operand" "n")]
>             UNSPEC_FPCLASS)
>           (const_int 1)))]
>     "TARGET_AVX512DQ"
> @@ -22637,7 +22637,7 @@ (define_insn "vgf2p8affineinvqb_<mode><m
>         (unspec:VI1_AVX512F
>           [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
>            (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
> -          (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
> +          (match_operand:HI 3 "const_0_to_255_operand" "n,n")]
>           UNSPEC_GF2P8AFFINEINV))]
>    "TARGET_GFNI"
>    "@
> @@ -22654,7 +22654,7 @@ (define_insn "vgf2p8affineqb_<mode><mask
>         (unspec:VI1_AVX512F
>           [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
>            (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
> -          (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
> +          (match_operand:HI 3 "const_0_to_255_operand" "n,n")]
>           UNSPEC_GF2P8AFFINE))]
>    "TARGET_GFNI"
>    "@
> --- gcc/testsuite/gcc.target/i386/avx512f-pr93673.c.jj  2020-02-11 13:43:05.340775467 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr93673.c     2020-02-11 13:43:47.202148358 +0100
> @@ -0,0 +1,20 @@
> +/* PR target/93673 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512f" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +foo (__mmask16 *b)
> +{
> +  b[0] = _kshiftli_mask16 (b[0], 0);
> +  b[1] = _kshiftri_mask16 (b[1], 0);
> +  b[2] = _kshiftli_mask16 (b[2], 1);
> +  b[3] = _kshiftri_mask16 (b[3], 1);
> +  b[4] = _kshiftli_mask16 (b[4], 15);
> +  b[5] = _kshiftri_mask16 (b[5], 15);
> +  b[6] = _kshiftli_mask16 (b[6], 0x7f);
> +  b[7] = _kshiftri_mask16 (b[7], 0x7f);
> +  b[8] = _kshiftli_mask16 (b[8], 0xff);
> +  b[9] = _kshiftri_mask16 (b[9], 0xff);
> +}
> --- gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c.jj 2020-02-11 13:45:34.956534896 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c    2020-02-11 13:45:56.990205434 +0100
> @@ -0,0 +1,20 @@
> +/* PR target/93673 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512dq" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +foo (__mmask8 *a)
> +{
> +  a[0] = _kshiftli_mask8 (a[0], 0);
> +  a[1] = _kshiftri_mask8 (a[1], 0);
> +  a[2] = _kshiftli_mask8 (a[2], 1);
> +  a[3] = _kshiftri_mask8 (a[3], 1);
> +  a[4] = _kshiftli_mask8 (a[4], 7);
> +  a[5] = _kshiftri_mask8 (a[5], 7);
> +  a[6] = _kshiftli_mask8 (a[6], 0x7f);
> +  a[7] = _kshiftri_mask8 (a[7], 0x7f);
> +  a[8] = _kshiftli_mask8 (a[8], 0xff);
> +  a[9] = _kshiftri_mask8 (a[9], 0xff);
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c.jj 2020-02-11 13:46:13.558957690 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c    2020-02-11 13:46:29.406720723 +0100
> @@ -0,0 +1,30 @@
> +/* PR target/93673 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512bw" } */
> +
> +#include <x86intrin.h>
> +
> +void
> +foo (__mmask32 *c, __mmask64 *d)
> +{
> +  c[0] = _kshiftli_mask32 (c[0], 0);
> +  c[1] = _kshiftri_mask32 (c[1], 0);
> +  c[2] = _kshiftli_mask32 (c[2], 1);
> +  c[3] = _kshiftri_mask32 (c[3], 1);
> +  c[4] = _kshiftli_mask32 (c[4], 31);
> +  c[5] = _kshiftri_mask32 (c[5], 31);
> +  c[6] = _kshiftli_mask32 (c[6], 0x7f);
> +  c[7] = _kshiftri_mask32 (c[7], 0x7f);
> +  c[8] = _kshiftli_mask32 (c[8], 0xff);
> +  c[9] = _kshiftri_mask32 (c[9], 0xff);
> +  d[0] = _kshiftli_mask64 (d[0], 0);
> +  d[1] = _kshiftri_mask64 (d[1], 0);
> +  d[2] = _kshiftli_mask64 (d[2], 1);
> +  d[3] = _kshiftri_mask64 (d[3], 1);
> +  d[4] = _kshiftli_mask64 (d[4], 63);
> +  d[5] = _kshiftri_mask64 (d[5], 63);
> +  d[6] = _kshiftli_mask64 (d[6], 0x7f);
> +  d[7] = _kshiftri_mask64 (d[7], 0x7f);
> +  d[8] = _kshiftli_mask64 (d[8], 0xff);
> +  d[9] = _kshiftri_mask64 (d[9], 0xff);
> +}
>
>         Jakub
>
diff mbox series

Patch

--- gcc/config/i386/sse.md.jj	2020-02-10 22:44:15.235205656 +0100
+++ gcc/config/i386/sse.md	2020-02-11 13:35:37.768477721 +0100
@@ -1650,7 +1650,7 @@  (define_insn "k<code><mode>"
   [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
 	(any_lshift:SWI1248_AVX512BWDQ
 	  (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
-	  (match_operand:QI 2 "immediate_operand" "n")))
+	  (match_operand:HI 2 "const_0_to_255_operand" "n")))
    (unspec [(const_int 0)] UNSPEC_MASKOP)]
   "TARGET_AVX512F"
   "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
@@ -22016,7 +22016,7 @@  (define_insn "avx512dq_fpclass<mode><mas
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
           (unspec:<avx512fmaskmode>
             [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
-             (match_operand:QI 2 "const_0_to_255_operand" "n")]
+             (match_operand:HI 2 "const_0_to_255_operand" "n")]
              UNSPEC_FPCLASS))]
    "TARGET_AVX512DQ"
    "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
@@ -22030,7 +22030,7 @@  (define_insn "avx512dq_vmfpclass<mode><m
 	(and:<avx512fmaskmode>
 	  (unspec:<avx512fmaskmode>
 	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
-             (match_operand:QI 2 "const_0_to_255_operand" "n")]
+             (match_operand:HI 2 "const_0_to_255_operand" "n")]
 	    UNSPEC_FPCLASS)
 	  (const_int 1)))]
    "TARGET_AVX512DQ"
@@ -22637,7 +22637,7 @@  (define_insn "vgf2p8affineinvqb_<mode><m
 	(unspec:VI1_AVX512F
 	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
 	   (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
-	   (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
+	   (match_operand:HI 3 "const_0_to_255_operand" "n,n")]
 	  UNSPEC_GF2P8AFFINEINV))]
   "TARGET_GFNI"
   "@
@@ -22654,7 +22654,7 @@  (define_insn "vgf2p8affineqb_<mode><mask
 	(unspec:VI1_AVX512F
 	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
 	   (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
-	   (match_operand:QI 3 "const_0_to_255_operand" "n,n")]
+	   (match_operand:HI 3 "const_0_to_255_operand" "n,n")]
 	  UNSPEC_GF2P8AFFINE))]
   "TARGET_GFNI"
   "@
--- gcc/testsuite/gcc.target/i386/avx512f-pr93673.c.jj	2020-02-11 13:43:05.340775467 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-pr93673.c	2020-02-11 13:43:47.202148358 +0100
@@ -0,0 +1,20 @@ 
+/* PR target/93673 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include <x86intrin.h>
+
+void
+foo (__mmask16 *b)
+{
+  b[0] = _kshiftli_mask16 (b[0], 0);
+  b[1] = _kshiftri_mask16 (b[1], 0);
+  b[2] = _kshiftli_mask16 (b[2], 1);
+  b[3] = _kshiftri_mask16 (b[3], 1);
+  b[4] = _kshiftli_mask16 (b[4], 15);
+  b[5] = _kshiftri_mask16 (b[5], 15);
+  b[6] = _kshiftli_mask16 (b[6], 0x7f);
+  b[7] = _kshiftri_mask16 (b[7], 0x7f);
+  b[8] = _kshiftli_mask16 (b[8], 0xff);
+  b[9] = _kshiftri_mask16 (b[9], 0xff);
+}
--- gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c.jj	2020-02-11 13:45:34.956534896 +0100
+++ gcc/testsuite/gcc.target/i386/avx512dq-pr93673.c	2020-02-11 13:45:56.990205434 +0100
@@ -0,0 +1,20 @@ 
+/* PR target/93673 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512dq" } */
+
+#include <x86intrin.h>
+
+void
+foo (__mmask8 *a)
+{
+  a[0] = _kshiftli_mask8 (a[0], 0);
+  a[1] = _kshiftri_mask8 (a[1], 0);
+  a[2] = _kshiftli_mask8 (a[2], 1);
+  a[3] = _kshiftri_mask8 (a[3], 1);
+  a[4] = _kshiftli_mask8 (a[4], 7);
+  a[5] = _kshiftri_mask8 (a[5], 7);
+  a[6] = _kshiftli_mask8 (a[6], 0x7f);
+  a[7] = _kshiftri_mask8 (a[7], 0x7f);
+  a[8] = _kshiftli_mask8 (a[8], 0xff);
+  a[9] = _kshiftri_mask8 (a[9], 0xff);
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c.jj	2020-02-11 13:46:13.558957690 +0100
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr93673.c	2020-02-11 13:46:29.406720723 +0100
@@ -0,0 +1,30 @@ 
+/* PR target/93673 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bw" } */
+
+#include <x86intrin.h>
+
+void
+foo (__mmask32 *c, __mmask64 *d)
+{
+  c[0] = _kshiftli_mask32 (c[0], 0);
+  c[1] = _kshiftri_mask32 (c[1], 0);
+  c[2] = _kshiftli_mask32 (c[2], 1);
+  c[3] = _kshiftri_mask32 (c[3], 1);
+  c[4] = _kshiftli_mask32 (c[4], 31);
+  c[5] = _kshiftri_mask32 (c[5], 31);
+  c[6] = _kshiftli_mask32 (c[6], 0x7f);
+  c[7] = _kshiftri_mask32 (c[7], 0x7f);
+  c[8] = _kshiftli_mask32 (c[8], 0xff);
+  c[9] = _kshiftri_mask32 (c[9], 0xff);
+  d[0] = _kshiftli_mask64 (d[0], 0);
+  d[1] = _kshiftri_mask64 (d[1], 0);
+  d[2] = _kshiftli_mask64 (d[2], 1);
+  d[3] = _kshiftri_mask64 (d[3], 1);
+  d[4] = _kshiftli_mask64 (d[4], 63);
+  d[5] = _kshiftri_mask64 (d[5], 63);
+  d[6] = _kshiftli_mask64 (d[6], 0x7f);
+  d[7] = _kshiftri_mask64 (d[7], 0x7f);
+  d[8] = _kshiftli_mask64 (d[8], 0xff);
+  d[9] = _kshiftri_mask64 (d[9], 0xff);
+}