diff mbox series

Add tune option for integer mask cmov, enable this tune for m_CORE_AVX512

Message ID CAMZc-bwwD1XzubuLDBgVSMw6WLhkn4NAvvr+AE9r-NZbrST0Bg@mail.gmail.com
State New
Headers show
Series Add tune option for integer mask cmov, enable this tune for m_CORE_AVX512 | expand

Commit Message

Hongtao Liu Dec. 11, 2019, 10:18 a.m. UTC
Hi:
  This patch is about to add tune option for integer mask cmov, for
some targets has both integer mask register and sse mask register,
this tune indicates to use integer one. Currently it's default on for
m_CORE_AVX512.

  Bootstrap is ok, regression test on i386/x86_64 backends is ok.
  ok for trunk?

Changelog
gcc/
        * config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Return
        false if target not prefer using integer mask cmov for
        128/256-bit vector under avx512f.
        * config/i386/i386.h (TARGET_PREFER_INTEGER_MASK_CMOV): New
        macro.
        * config/i386/x86-tune.def
        (X86_TUNE_PREFER_INTEGER_MASK_CMOV): New tune.

gcc/testsuite
        * gcc.target/i386/avx512bw-pr92686-movcc-1.c: Adjust test case.
        * gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
        * gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: Ditto.
        * gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
        * gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
        * gcc.target/i386/avx512vl-pr88547-1.c: Ditto.

Comments

Jakub Jelinek Dec. 11, 2019, 10:21 a.m. UTC | #1
On Wed, Dec 11, 2019 at 06:18:31PM +0800, Hongtao Liu wrote:
> Hi:
>   This patch is about to add tune option for integer mask cmov, for
> some targets has both integer mask register and sse mask register,
> this tune indicates to use integer one. Currently it's default on for
> m_CORE_AVX512.
> 
>   Bootstrap is ok, regression test on i386/x86_64 backends is ok.
>   ok for trunk?

I don't see the need for that right now, doesn't m_CORE_AVX512 include
all CPUs that support AVX512VL right now?  If yes, the whole effect of the
patch will be that the masked registers won't be used in generic tuning,
something most people actually use.

I think it is worth adding something like this only when some other AVX512VL
capable CPUs appear and what will perform better on those.

	Jakub
diff mbox series

Patch

From 716bdede7f23ef035d93fb1d4f6917e19cef5f3e Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Wed, 11 Dec 2019 16:38:04 +0800
Subject: [PATCH] Add tune option for integer mask cmov, enable this tune for m_CORE_AVX512

Changelog
gcc/
	* config/i386/i386-expand.c (ix86_valid_mask_cmp_mode): Return
	false if target not prefer using integer mask cmov for
	128/256-bit vector under avx512f.
	* config/i386/i386.h (TARGET_PREFER_INTEGER_MASK_CMOV): New
	macro.
	* config/i386/x86-tune.def
	(X86_TUNE_PREFER_INTEGER_MASK_CMOV): New tune.

gcc/testsuite
	* gcc.target/i386/avx512bw-pr92686-movcc-1.c: Adjust test case.
	* gcc.target/i386/avx512bw-pr92686-movcc-2.c: Ditto.
	* gcc.target/i386/avx512bw-pr92686-vpcmp-1.c: Ditto.
	* gcc.target/i386/avx512bw-pr92686-vpcmp-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-movcc-1.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-movcc-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-vpcmp-1.c: Ditto.
	* gcc.target/i386/avx512vl-pr92686-vpcmp-2.c: Ditto.
	* gcc.target/i386/avx512vl-pr88547-1.c: Ditto.
---
 gcc/config/i386/i386-expand.c                      |    4 ++++
 gcc/config/i386/i386.h                             |    2 ++
 gcc/config/i386/x86-tune.def                       |   10 ++++++++++
 .../gcc.target/i386/avx512bw-pr92686-movcc-1.c     |    2 +-
 .../gcc.target/i386/avx512bw-pr92686-movcc-2.c     |    2 +-
 .../gcc.target/i386/avx512bw-pr92686-vpcmp-1.c     |    2 +-
 .../gcc.target/i386/avx512bw-pr92686-vpcmp-2.c     |    2 +-
 gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c |    6 +++---
 .../gcc.target/i386/avx512vl-pr92686-movcc-1.c     |    2 +-
 .../gcc.target/i386/avx512vl-pr92686-movcc-2.c     |    2 +-
 .../gcc.target/i386/avx512vl-pr92686-vpcmp-1.c     |    2 +-
 .../gcc.target/i386/avx512vl-pr92686-vpcmp-2.c     |    2 +-
 12 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index cbf4eb7..a627642 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -3431,6 +3431,10 @@  ix86_valid_mask_cmp_mode (machine_mode mode)
   if (TARGET_XOP && !TARGET_AVX512F)
     return false;
 
+  /* For 512-bit vector, only integer mask vcmp/vcmov is valid.  */
+  if (!TARGET_PREFER_INTEGER_MASK_CMOV && GET_MODE_SIZE (mode) != 64)
+    return false;
+
   /* AVX512F is needed for mask operation.  */
   if (!(TARGET_AVX512F && VECTOR_MODE_P (mode)))
     return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 2542cb3..23d796e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -596,6 +596,8 @@  extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
 #define TARGET_EMIT_VZEROUPPER \
 	ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
+#define TARGET_PREFER_INTEGER_MASK_CMOV \
+	ix86_tune_features[X86_TUNE_PREFER_INTEGER_MASK_CMOV]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 328535d..e944f39 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -467,6 +467,16 @@  DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
 
 /*****************************************************************************/
+/* AVX512 instruction selection tuning.				     */
+/*****************************************************************************/
+
+/* X86_TUNE_PREFER_INTEGER_MASK_CMOV: Use integer mask vcmov/vcmp for
+   128/256-bit vector under avx512f, there's are also instructions
+   using sse regs as mask under avx2 or xop.  */
+DEF_TUNE (X86_TUNE_PREFER_INTEGER_MASK_CMOV, "prefer_integer_mask_cmov",
+	 m_CORE_AVX512)
+
+/*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
 /*****************************************************************************/
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
index 2a89077..7afc37e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-1.c
@@ -1,6 +1,6 @@ 
 /* PR target/92686 */
 /* { dg-do compile } */
-/* { dg-options "-Ofast -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mno-avx512dq -mno-avx512vl -mno-xop -mprefer-vector-width=512" } */
 /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%zmm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
index 53a7da1..0386ea6 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-movcc-2.c
@@ -1,7 +1,7 @@ 
 /* { dg-do run } */
 /* { dg-require-effective-target avx512bw } */
 /* { dg-require-effective-target avx512vl } */
-/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mprefer-vector-width=256" } */
 
 #ifndef CHECK
 #define CHECK "avx512f-helper.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
index 4fd3b36..b94c19d 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-1.c
@@ -1,6 +1,6 @@ 
 /* PR target/92686 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bw -mno-avx512dq -mno-avx512vl -mno-xop" } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512dq -mno-avx512vl -mno-xop" } */
 /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
index 0ea5b56..6e31c09 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr92686-vpcmp-2.c
@@ -1,6 +1,6 @@ 
 /* { dg-do run } */
 /* { dg-require-effective-target avx512bw } */
-/* { dg-options "-O2 -mavx512bw" } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
 
 #ifndef CHECK
 #define CHECK "avx512f-helper.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
index a3ffeca..432a19e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr88547-1.c
@@ -6,7 +6,7 @@ 
 /* { dg-final { scan-assembler-times "vpminsb\[\t ]" 2 } } */
 /* { dg-final { scan-assembler-times "vpminuw\[\t ]" 2 } } */
 /* { dg-final { scan-assembler-times "vpminsw\[\t ]" 2 } } */
-/* { dg-final { scan-assembler-times "vpcmp\[dq\]\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpcmpu\[dq\]\[\t ]" 4 } } */
-/* { dg-final { scan-assembler-times "vpternlog\[qd\]\[\t ]" 8 } } */
+/* { dg-final { scan-assembler-times "vpminud\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminsd\[\t ]" 2 } } */
+/* { dg-final { scan-assembler-times "vpminuq\[\t ]" 2 } } */
 #include "avx2-pr88547-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
index 1b9644a..722ba9f 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-1.c
@@ -1,6 +1,6 @@ 
 /* PR target/92686 */
 /* { dg-do compile } */
-/* { dg-options "-Ofast -mavx512bw -mavx512vl -mno-xop -mprefer-vector-width=256" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mno-xop -mprefer-vector-width=256" } */
 /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vmovdq\[au\]8\[^\{\n\]*%ymm0+\[^\n\]*\{%k\[1-7\]\}" 4 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
index 5f5562b..b9c1881 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-movcc-2.c
@@ -1,7 +1,7 @@ 
 /* { dg-do run } */
 /* { dg-require-effective-target avx512bw } */
 /* { dg-require-effective-target avx512vl } */
-/* { dg-options "-Ofast -mavx512bw -mavx512vl -mprefer-vector-width=256" } */
+/* { dg-options "-Ofast -march=skylake-avx512 -mprefer-vector-width=256" } */
 
 #ifndef CHECK
 #define CHECK "avx512f-helper.h"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
index 5b79d4d..5bf008e 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-1.c
@@ -1,6 +1,6 @@ 
 /* PR target/88547 */
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bw -mavx512vl -mno-avx512dq -mno-xop" } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512dq -mno-xop" } */
 /* { dg-final { scan-assembler-times "vpcmp\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpcmpu\[bwdq\]\[\t ]" 8 } } */
 /* { dg-final { scan-assembler-times "vpmovm2\[bw\]\[\t ]" 8 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
index 6be24ff..f5cbdeb 100644
--- a/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr92686-vpcmp-2.c
@@ -1,7 +1,7 @@ 
 /* { dg-do run } */
 /* { dg-require-effective-target avx512bw } */
 /* { dg-require-effective-target avx512vl } */
-/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
 
 #ifndef CHECK
 #define CHECK "avx512f-helper.h"
-- 
1.7.0.7