Add abs pattern to handle {si,di} mode abs to avoid pmax/cmove conversion (PR92651)
diff mbox series

Message ID CA+OydWnvvvU3gaw3TKj5TNh2EDROjv6Ar=NEjdCGfYM3sbBSug@mail.gmail.com
State New
Headers show
Series
  • Add abs pattern to handle {si,di} mode abs to avoid pmax/cmove conversion (PR92651)
Related show

Commit Message

Hongyu Wang Dec. 11, 2019, 3:21 a.m. UTC
Hi:
  Currently smax/smin pattern added by r274481 cause some regression
in 525.x264_r by 8% with -O2 -march=corei7. The reason is some IA
backends (contain TARGET_SSE4_1) will do transform for simple abs
(using rshift, xor and sub) to pmax/pmin if smax/smin pattern exists,
which generate unnecessary sse instruction. This patch adds abs
patterns to generate simple abs for integer mode to recover the
regression.

  Bootstrap ok, regression test on i386 backend is ok.
  Ok for trunk?

Changelog
gcc/
    PR target/92651
    * config/i386/i386.h (TARGET_USE_SIMPLE_ABS_PATTERN): New macro.
    * config/i386/x86-tune.def (X86_TUNE_USE_SIMPLE_ABS_PATTERN): New
    * config/i386/i386.md (abs<SWI48x>2): New define_expand.

gcc/testsuite
    * gcc.target/i386/pr92651.c: New testcase.

Regards,
Hongyu, Wang

Comments

Uros Bizjak Dec. 16, 2019, 11:38 a.m. UTC | #1
On Wed, Dec 11, 2019 at 4:24 AM 玩还有 <wwwhhhyyy333@gmail.com> wrote:
>
> Hi:
>   Currently smax/smin pattern added by r274481 cause some regression
> in 525.x264_r by 8% with -O2 -march=corei7. The reason is some IA
> backends (contain TARGET_SSE4_1) will do transform for simple abs
> (using rshift, xor and sub) to pmax/pmin if smax/smin pattern exists,
> which generate unnecessary sse instruction. This patch adds abs
> patterns to generate simple abs for integer mode to recover the
> regression.
>
>   Bootstrap ok, regression test on i386 backend is ok.
>   Ok for trunk?
>
> Changelog
> gcc/
>     PR target/92651
>     * config/i386/i386.h (TARGET_USE_SIMPLE_ABS_PATTERN): New macro.
>     * config/i386/x86-tune.def (X86_TUNE_USE_SIMPLE_ABS_PATTERN): New
>     * config/i386/i386.md (abs<SWI48x>2): New define_expand.
>
> gcc/testsuite
>     * gcc.target/i386/pr92651.c: New testcase.

OK, but please name new macro TARGET_EXPAND_ABS and corresponding tune
X86_TUNE_EXPAND_ABS.

Thanks,
Uros.

> Regards,
> Hongyu, Wang

Patch
diff mbox series

From c0bf64efbcaa989349130b676880cc2ed49fca69 Mon Sep 17 00:00:00 2001
From: hongyuw1 <hongyuw1@intel.com>
Date: Thu, 28 Nov 2019 12:49:04 +0000
Subject: [PATCH] Add abs pattern to handle {si,di} mode abs to avoid
 pmax/cmove conversion.

    gcc/
	PR target/92651
	* config/i386/i386.h (TARGET_USE_SIMPLE_ABS_PATTERN): New macro.
	* config/i386/x86-tune.def (X86_TUNE_USE_SIMPLE_ABS_PATTERN): New.
	* config/i386/i386.md (abs<SWI48x>2): New define_expand.

    gcc/testsuite
	* gcc.target/i386/pr92651.c: New testcase.
---
 gcc/config/i386/i386.h                  |  2 ++
 gcc/config/i386/i386.md                 | 39 +++++++++++++++++++++++++
 gcc/config/i386/x86-tune.def            |  7 +++++
 gcc/testsuite/gcc.target/i386/pr92651.c | 16 ++++++++++
 4 files changed, 64 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr92651.c

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 108456b14d4..ea27526e42e 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -596,6 +596,8 @@  extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE]
 #define TARGET_EMIT_VZEROUPPER \
 	ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
+#define TARGET_USE_SIMPLE_ABS_PATTERN \
+	ix86_tune_features[X86_TUNE_USE_SIMPLE_ABS_PATTERN]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7ff5872ba43..8e5059aedec 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9668,6 +9668,45 @@ 
   "#"
   [(set_attr "isa" "noavx,noavx,avx,avx")])
 
+;; Special expand pattern to handle integer mode abs
+
+(define_expand "abs<mode>2"
+  [(set (match_operand:SWI48x 0 "register_operand")
+    (abs:SWI48x
+      (match_operand:SWI48x 1 "register_operand")))]
+  "TARGET_USE_SIMPLE_ABS_PATTERN"
+  {
+    machine_mode mode = <MODE>mode;
+
+    /* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) -
+       ((signed) x >> (W-1)) */
+    rtx shift_amount = gen_int_shift_amount (mode,
+				       GET_MODE_PRECISION (mode)
+				       - 1);
+    shift_amount = convert_modes (E_QImode, GET_MODE (shift_amount),
+			    shift_amount, 1);
+    rtx shift_dst = gen_reg_rtx (mode);
+    rtx shift_op = gen_rtx_SET (shift_dst,
+			  gen_rtx_fmt_ee (ASHIFTRT, mode,
+					  operands[1], shift_amount));
+    rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode,
+						    FLAGS_REG));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, shift_op,
+						clobber)));
+
+    rtx xor_op = gen_rtx_SET (operands[0],
+			gen_rtx_fmt_ee (XOR, mode, shift_dst,
+					operands[1]));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, xor_op, clobber)));
+
+    rtx minus_op = gen_rtx_SET (operands[0],
+			  gen_rtx_fmt_ee (MINUS, mode,
+					  operands[0], shift_dst));
+    emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, minus_op,
+						clobber)));
+    DONE;
+  })
+
 (define_expand "<code><mode>2"
   [(set (match_operand:X87MODEF 0 "register_operand")
 	(absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))]
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 328535d38d7..86ff24122e6 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -317,6 +317,13 @@  DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
 DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store",
 	 m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC)
 
+/* X86_TUNE_USE_SIMPLE_ABS_PATTERN: This enables a new abs pattern by
+   generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) -
+   (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions.  */
+DEF_TUNE (X86_TUNE_USE_SIMPLE_ABS_PATTERN, "use_simple_abs_pattern",
+	  m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
+	  | m_GOLDMONT_PLUS | m_TREMONT )
+
 /*****************************************************************************/
 /* 387 instruction selection tuning                                          */
 /*****************************************************************************/
diff --git a/gcc/testsuite/gcc.target/i386/pr92651.c b/gcc/testsuite/gcc.target/i386/pr92651.c
new file mode 100644
index 00000000000..3d0c3c7bf4e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr92651.c
@@ -0,0 +1,16 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=corei7" } */
+
+#include <stdlib.h>
+
+int foo(unsigned char a, unsigned char b)
+{
+    int isum=abs(a - b);
+    return isum;
+}
+
+/* { dg-final { scan-assembler-not "cmov*" } } */
+/* { dg-final { scan-assembler "(cltd|cdq|shr)" } } */
+/* { dg-final { scan-assembler-times "xor" 1 } } */
+/* { dg-final { scan-assembler-times "sub" 2 } } */
+
-- 
2.19.1