diff mbox

[i386] : Fix zero-extension optimizations from mask registers (PR target/77476)

Message ID 20160905170719.GT14857@tucnak.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Sept. 5, 2016, 5:07 p.m. UTC
Hi!

On Fri, Aug 05, 2016 at 02:22:39PM +0200, Uros Bizjak wrote:
> 2016-08-05  Uros Bizjak  <ubizjak@gmail.com>
> 
>     * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
>     (zero_extend<mode>di2): Ditto.
>     (*zero_extend<mode>si2): Ditto.
>     (*zero_extendqihi2): Ditto.

As the PR says, unfortunately not all kmov instructions are supported by all
AVX512F supporting ISAs, kmovb is AVX512DQ, kmovw is AVX512F and kmovd and
kmovq are AVX512BW.

Thus, the following patch enables those alternatives only when the
instructions are available.

Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with sde
-knl on the avx512f-* testcase.  Ok for trunk?

2016-09-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/77476
	* config/i386/i386.md (isa): Add x64_avx512bw.
	(*zero_extendsidi2): For alternative 11 use x64_avx512bw isa.
	(kmov_isa): New mode attr.
	(zero_extend<mode>di2): Use <kmov_isa> isa for the last alternative.
	(*zero_extend<mode>si2): Likewise.
	(*zero_extendqihi2): Use avx512dq isa for the last alternative.

	* gcc.target/i386/avx512f-pr77476.c: New test.
	* gcc.target/i386/avx512bw-pr77476.c: New test.
	* gcc.target/i386/avx512dq-pr77476.c: New test.



	Jakub

Comments

Uros Bizjak Sept. 5, 2016, 6:38 p.m. UTC | #1
On Mon, Sep 5, 2016 at 7:07 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> On Fri, Aug 05, 2016 at 02:22:39PM +0200, Uros Bizjak wrote:
>> 2016-08-05  Uros Bizjak  <ubizjak@gmail.com>
>>
>>     * config/i386/i386.md (*zero_extendsidi2): Add (*r,*k) alternative.
>>     (zero_extend<mode>di2): Ditto.
>>     (*zero_extend<mode>si2): Ditto.
>>     (*zero_extendqihi2): Ditto.
>
> As the PR says, unfortunately not all kmov instructions are supported by all
> AVX512F supporting ISAs, kmovb is AVX512DQ, kmovw is AVX512F and kmovd and
> kmovq are AVX512BW.
>
> Thus, the following patch enables those alternatives only when the
> instructions are available.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, tested also with sde
> -knl on the avx512f-* testcase.  Ok for trunk?
>
> 2016-09-05  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/77476
>         * config/i386/i386.md (isa): Add x64_avx512bw.
>         (*zero_extendsidi2): For alternative 11 use x64_avx512bw isa.
>         (kmov_isa): New mode attr.
>         (zero_extend<mode>di2): Use <kmov_isa> isa for the last alternative.
>         (*zero_extend<mode>si2): Likewise.
>         (*zero_extendqihi2): Use avx512dq isa for the last alternative.
>
>         * gcc.target/i386/avx512f-pr77476.c: New test.
>         * gcc.target/i386/avx512bw-pr77476.c: New test.
>         * gcc.target/i386/avx512dq-pr77476.c: New test.

OK.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2016-08-29 12:17:41.000000000 +0200
> +++ gcc/config/i386/i386.md     2016-09-05 10:35:53.404313654 +0200
> @@ -799,7 +799,7 @@ (define_attr "isa" "base,x64,x64_sse4,x6
>                     sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
>                     avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
>                     fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq,
> -                   avx512vl,noavx512vl,x64_avx512dq"
> +                   avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
>    (const_string "base"))
>
>  (define_attr "enabled" ""
> @@ -812,6 +812,8 @@ (define_attr "enabled" ""
>            (symbol_ref "TARGET_64BIT && TARGET_AVX")
>          (eq_attr "isa" "x64_avx512dq")
>            (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
> +        (eq_attr "isa" "x64_avx512bw")
> +          (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
>          (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
>          (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
>          (eq_attr "isa" "sse2_noavx")
> @@ -3735,12 +3737,14 @@ (define_insn "*zero_extendsidi2"
>    [(set (attr "isa")
>       (cond [(eq_attr "alternative" "0,1,2")
>               (const_string "nox64")
> -           (eq_attr "alternative" "3,7,11")
> +           (eq_attr "alternative" "3,7")
>               (const_string "x64")
>             (eq_attr "alternative" "8")
>               (const_string "x64_sse4")
>             (eq_attr "alternative" "10")
>               (const_string "sse2")
> +           (eq_attr "alternative" "11")
> +             (const_string "x64_avx512bw")
>            ]
>            (const_string "*")))
>     (set (attr "type")
> @@ -3804,6 +3808,9 @@ (define_split
>     (set (match_dup 4) (const_int 0))]
>    "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
>
> +(define_mode_attr kmov_isa
> +  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
> +
>  (define_insn "zero_extend<mode>di2"
>    [(set (match_operand:DI 0 "register_operand" "=r,*r")
>         (zero_extend:DI
> @@ -3812,7 +3819,8 @@ (define_insn "zero_extend<mode>di2"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
>     kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
> -  [(set_attr "type" "imovx,mskmov")
> +  [(set_attr "isa" "*,<kmov_isa>")
> +   (set_attr "type" "imovx,mskmov")
>     (set_attr "mode" "SI")])
>
>  (define_expand "zero_extend<mode>si2"
> @@ -3863,7 +3871,8 @@ (define_insn "*zero_extend<mode>si2"
>    "@
>     movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
>     kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
> -  [(set_attr "type" "imovx,mskmov")
> +  [(set_attr "isa" "*,<kmov_isa>")
> +   (set_attr "type" "imovx,mskmov")
>     (set_attr "mode" "SI,<MODE>")])
>
>  (define_expand "zero_extendqihi2"
> @@ -3914,6 +3923,7 @@ (define_insn "*zero_extendqihi2"
>     movz{bl|x}\t{%1, %k0|%k0, %1}
>     kmovb\t{%1, %k0|%k0, %1}"
>    [(set_attr "type" "imovx,mskmov")
> +   (set_attr "isa" "*,avx512dq")
>     (set_attr "mode" "SI,QI")])
>
>  (define_insn_and_split "*zext<mode>_doubleword_and"
> --- gcc/testsuite/gcc.target/i386/avx512f-pr77476.c.jj  2016-09-05 10:23:42.108364379 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512f-pr77476.c     2016-09-05 10:23:26.000000000 +0200
> @@ -0,0 +1,76 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512f" } */
> +/* { dg-require-effective-target avx512f } */
> +
> +#ifndef PR77476_TEST
> +#include "avx512f-check.h"
> +#define PR77476_TEST avx512f_test
> +#endif
> +
> +unsigned short s;
> +unsigned int i;
> +unsigned long long l;
> +
> +void
> +f1 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  s = a;
> +}
> +
> +void
> +f2 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  i = a;
> +}
> +
> +void
> +f3 (void)
> +{
> +  unsigned char a = 0xff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +
> +void
> +f4 (void)
> +{
> +  unsigned short a = 0xffff;
> +  asm volatile ("" : "+Yk" (a));
> +  i = a;
> +}
> +
> +void
> +f5 (void)
> +{
> +  unsigned short a = 0xffff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +
> +#ifdef __AVX512BW__
> +void
> +f6 (void)
> +{
> +  unsigned int a = 0xffffffff;
> +  asm volatile ("" : "+Yk" (a));
> +  l = a;
> +}
> +#endif
> +
> +static void
> +PR77476_TEST ()
> +{
> +  f1 (); if (s != 0xff) __builtin_abort (); s = 0;
> +  f2 (); if (i != 0xff) __builtin_abort (); i = 0;
> +  f3 (); if (l != 0xff) __builtin_abort (); l = 0;
> +  f4 (); if (i != 0xffff) __builtin_abort (); i = 0;
> +  f5 (); if (l != 0xffff) __builtin_abort (); l = 0;
> +#ifdef __AVX512BW__
> +  f6 (); if (l != 0xffffffff) __builtin_abort (); l = 0;
> +#endif
> +}
> --- gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c.jj 2016-09-05 10:24:07.078055576 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c    2016-09-05 10:24:37.870674759 +0200
> @@ -0,0 +1,9 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512bw" } */
> +/* { dg-require-effective-target avx512bw } */
> +
> +#include "avx512bw-check.h"
> +#define PR77476_TEST avx512bw_test
> +
> +#include "avx512f-pr77476.c"
> --- gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c.jj 2016-09-05 10:24:57.617430548 +0200
> +++ gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c    2016-09-05 10:25:26.406074515 +0200
> @@ -0,0 +1,9 @@
> +/* PR target/77476 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -mavx512dq" } */
> +/* { dg-require-effective-target avx512dq } */
> +
> +#include "avx512dq-check.h"
> +#define PR77476_TEST avx512dq_test
> +
> +#include "avx512f-pr77476.c"
>
>
>         Jakub
diff mbox

Patch

--- gcc/config/i386/i386.md.jj	2016-08-29 12:17:41.000000000 +0200
+++ gcc/config/i386/i386.md	2016-09-05 10:35:53.404313654 +0200
@@ -799,7 +799,7 @@  (define_attr "isa" "base,x64,x64_sse4,x6
 		    sse2,sse2_noavx,sse3,sse4,sse4_noavx,avx,noavx,
 		    avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
 		    fma_avx512f,avx512bw,noavx512bw,avx512dq,noavx512dq,
-		    avx512vl,noavx512vl,x64_avx512dq"
+		    avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
 (define_attr "enabled" ""
@@ -812,6 +812,8 @@  (define_attr "enabled" ""
 	   (symbol_ref "TARGET_64BIT && TARGET_AVX")
 	 (eq_attr "isa" "x64_avx512dq")
 	   (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
+	 (eq_attr "isa" "x64_avx512bw")
+	   (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
 	 (eq_attr "isa" "nox64") (symbol_ref "!TARGET_64BIT")
 	 (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
 	 (eq_attr "isa" "sse2_noavx")
@@ -3735,12 +3737,14 @@  (define_insn "*zero_extendsidi2"
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,2")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "3,7,11")
+	    (eq_attr "alternative" "3,7")
 	      (const_string "x64")
 	    (eq_attr "alternative" "8")
 	      (const_string "x64_sse4")
 	    (eq_attr "alternative" "10")
 	      (const_string "sse2")
+	    (eq_attr "alternative" "11")
+	      (const_string "x64_avx512bw")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -3804,6 +3808,9 @@  (define_split
    (set (match_dup 4) (const_int 0))]
   "split_double_mode (DImode, &operands[0], 1, &operands[3], &operands[4]);")
 
+(define_mode_attr kmov_isa
+  [(QI "avx512dq") (HI "avx512f") (SI "avx512bw") (DI "avx512bw")])
+
 (define_insn "zero_extend<mode>di2"
   [(set (match_operand:DI 0 "register_operand" "=r,*r")
 	(zero_extend:DI
@@ -3812,7 +3819,8 @@  (define_insn "zero_extend<mode>di2"
   "@
    movz{<imodesuffix>l|x}\t{%1, %k0|%k0, %1}
    kmov<mskmodesuffix>\t{%1, %k0|%k0, %1}"
-  [(set_attr "type" "imovx,mskmov")
+  [(set_attr "isa" "*,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov")
    (set_attr "mode" "SI")])
 
 (define_expand "zero_extend<mode>si2"
@@ -3863,7 +3871,8 @@  (define_insn "*zero_extend<mode>si2"
   "@
    movz{<imodesuffix>l|x}\t{%1, %0|%0, %1}
    kmov<mskmodesuffix>\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx,mskmov")
+  [(set_attr "isa" "*,<kmov_isa>")
+   (set_attr "type" "imovx,mskmov")
    (set_attr "mode" "SI,<MODE>")])
 
 (define_expand "zero_extendqihi2"
@@ -3914,6 +3923,7 @@  (define_insn "*zero_extendqihi2"
    movz{bl|x}\t{%1, %k0|%k0, %1}
    kmovb\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx,mskmov")
+   (set_attr "isa" "*,avx512dq")
    (set_attr "mode" "SI,QI")])
 
 (define_insn_and_split "*zext<mode>_doubleword_and"
--- gcc/testsuite/gcc.target/i386/avx512f-pr77476.c.jj	2016-09-05 10:23:42.108364379 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-pr77476.c	2016-09-05 10:23:26.000000000 +0200
@@ -0,0 +1,76 @@ 
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#ifndef PR77476_TEST
+#include "avx512f-check.h"
+#define PR77476_TEST avx512f_test
+#endif
+
+unsigned short s;
+unsigned int i;
+unsigned long long l;
+
+void
+f1 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  s = a;
+}
+
+void
+f2 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  i = a;
+}
+
+void
+f3 (void)
+{
+  unsigned char a = 0xff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+
+void
+f4 (void)
+{
+  unsigned short a = 0xffff;
+  asm volatile ("" : "+Yk" (a));
+  i = a;
+}
+
+void
+f5 (void)
+{
+  unsigned short a = 0xffff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+
+#ifdef __AVX512BW__
+void
+f6 (void)
+{
+  unsigned int a = 0xffffffff;
+  asm volatile ("" : "+Yk" (a));
+  l = a;
+}
+#endif
+
+static void
+PR77476_TEST ()
+{
+  f1 (); if (s != 0xff) __builtin_abort (); s = 0;
+  f2 (); if (i != 0xff) __builtin_abort (); i = 0;
+  f3 (); if (l != 0xff) __builtin_abort (); l = 0;
+  f4 (); if (i != 0xffff) __builtin_abort (); i = 0;
+  f5 (); if (l != 0xffff) __builtin_abort (); l = 0;
+#ifdef __AVX512BW__
+  f6 (); if (l != 0xffffffff) __builtin_abort (); l = 0;
+#endif
+}
--- gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c.jj	2016-09-05 10:24:07.078055576 +0200
+++ gcc/testsuite/gcc.target/i386/avx512bw-pr77476.c	2016-09-05 10:24:37.870674759 +0200
@@ -0,0 +1,9 @@ 
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+#include "avx512bw-check.h"
+#define PR77476_TEST avx512bw_test
+
+#include "avx512f-pr77476.c"
--- gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c.jj	2016-09-05 10:24:57.617430548 +0200
+++ gcc/testsuite/gcc.target/i386/avx512dq-pr77476.c	2016-09-05 10:25:26.406074515 +0200
@@ -0,0 +1,9 @@ 
+/* PR target/77476 */
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512dq" } */
+/* { dg-require-effective-target avx512dq } */
+
+#include "avx512dq-check.h"
+#define PR77476_TEST avx512dq_test
+
+#include "avx512f-pr77476.c"