diff mbox

[RTL] Int div by constant compilation enhancement

Message ID 5457B4FA.5070204@arm.com
State New
Headers show

Commit Message

Alex Velenko Nov. 3, 2014, 5:01 p.m. UTC
Hi,
This patch adds a mid-end check to catch division by
constant case and optimize it to generate one shift,
instead of two.

A testacase to check the correct codegeneration for aarch64
is added. This check is not made generic, because the optimisation
implemented is not used by all targets.

Is it ok?

Thanks,
Alex

gcc/

2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>

     * simplify-rtx.c (simplify_binary_operation_1): Div check added.
     * rtl.h (SUBREG_P): New macro added.

gcc/testsuite/

2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>

     * gcc.dg/asr-div1.c : New testcase.

Comments

Jeff Law Nov. 4, 2014, 10:34 p.m. UTC | #1
On 11/03/14 10:01, Alex Velenko wrote:
> Hi,
> This patch adds a mid-end check to catch division by
> constant case and optimize it to generate one shift,
> instead of two.
>
> A testacase to check the correct codegeneration for aarch64
> is added. This check is not made generic, because the optimisation
> implemented is not used by all targets.
>
> Is it ok?
>
> Thanks,
> Alex
>
> gcc/
>
> 2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>
>
>      * simplify-rtx.c (simplify_binary_operation_1): Div check added.
>      * rtl.h (SUBREG_P): New macro added.
>
> gcc/testsuite/
>
> 2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>
>
>      * gcc.dg/asr-div1.c : New testcase.
OK for the trunk.

THanks,
Jeff
Eric Botcazou Nov. 5, 2014, 12:22 p.m. UTC | #2
> 2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>
> 
>      * simplify-rtx.c (simplify_binary_operation_1): Div check added.
>      * rtl.h (SUBREG_P): New macro added.

Present tense in Change entries:

	* rtl.h (SUBREG_P): New macro.
	* simplify-rtx.c (simplify_binary_operation_1): Simplify consecutive
	right shifts in combination with a low-part operation.

Can't the 'c1 == size (M2) - size (M1)' condition be relaxed?
H.J. Lu Nov. 13, 2014, 3:56 a.m. UTC | #3
On Mon, Nov 3, 2014 at 9:01 AM, Alex Velenko <Alex.Velenko@arm.com> wrote:
> Hi,
> This patch adds a mid-end check to catch division by
> constant case and optimize it to generate one shift,
> instead of two.
>
> A testacase to check the correct codegeneration for aarch64
> is added. This check is not made generic, because the optimisation
> implemented is not used by all targets.
>
> Is it ok?
>
> Thanks,
> Alex
>
> gcc/
>
> 2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>
>
>     * simplify-rtx.c (simplify_binary_operation_1): Div check added.
>     * rtl.h (SUBREG_P): New macro added.
>
> gcc/testsuite/
>
> 2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>
>
>     * gcc.dg/asr-div1.c : New testcase.

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63843
diff mbox

Patch

From 471fbb2057b4d338d01bb403f0973adbed33a31d Mon Sep 17 00:00:00 2001
From: Alex Velenko <alex.velenko@arm.com>
Date: Mon, 6 Oct 2014 15:33:36 +0100
Subject: [PATCH] [RTL, Patch] Int div by constant compilation enhancement

This patch adds a mid-end check to catch division by
constant case and optimize it to generate one shift,
instead of two.

A testacase to check the correct codegeneration for aarch64
is added. This check is not made generic, because the optimisation
implemented is not used by all targets.
Thanks,
Alex

gcc/

2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>

	* simplify-rtx.c (simplify_binary_operation_1): Div check added.
	* rtl.h (SUBREG_P): New macro added.

gcc/testsuite/

2014-11-03  Alex Velenko  <Alex.Velenko@arm.com>

	* gcc.dg/asr-div1.c : New testcase.
---
 gcc/rtl.h                       |  3 +++
 gcc/simplify-rtx.c              | 38 ++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/asr_div1.c | 56 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/asr_div1.c

diff --git a/gcc/rtl.h b/gcc/rtl.h
index ddd39c9..6222817 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -789,6 +789,9 @@  struct GTY(()) rtvec_def {
 /* Predicate yielding nonzero iff X is a data for a jump table.  */
 #define JUMP_TABLE_DATA_P(INSN) (GET_CODE (INSN) == JUMP_TABLE_DATA)
 
+/* Predicate yielding nonzero iff RTX is a subreg.  */
+#define SUBREG_P(RTX) (GET_CODE (RTX) == SUBREG)
+
 template <>
 template <>
 inline bool
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index d783c22..e01fba7 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -3102,6 +3102,44 @@  simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
 	  && UINTVAL (trueop0) == GET_MODE_MASK (mode)
 	  && ! side_effects_p (op1))
 	return op0;
+      /* Given:
+         scalar modes M1, M2
+         scalar constants c1, c2
+         size (M2) > size (M1)
+         c1 == size (M2) - size (M1)
+         optimize:
+         (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2)
+                                              (const_int <c1>))
+                                  <low_part>)
+                      (const_int <c2>))
+         to:
+         (subreg:M1 (ashiftrt:M2 (reg:M2)
+                                 (const_int <c1 + c2>))
+          <low_part>).  */
+      if (!VECTOR_MODE_P (mode)
+          && SUBREG_P (op0)
+          && CONST_INT_P (op1)
+          && (GET_CODE (SUBREG_REG (op0)) == LSHIFTRT)
+          && !VECTOR_MODE_P (GET_MODE (SUBREG_REG (op0)))
+          && CONST_INT_P (XEXP (SUBREG_REG (op0), 1))
+          && (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+              > GET_MODE_BITSIZE (mode))
+          && (INTVAL (XEXP (SUBREG_REG (op0), 1))
+              == (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+                  - GET_MODE_BITSIZE (mode)))
+          && subreg_lowpart_p (op0))
+        {
+          rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+                             + INTVAL (op1));
+          machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
+          tmp = simplify_gen_binary (ASHIFTRT,
+                                     GET_MODE (SUBREG_REG (op0)),
+                                     XEXP (SUBREG_REG (op0), 0),
+                                     tmp);
+          return simplify_gen_subreg (mode, tmp, inner_mode,
+                                      subreg_lowpart_offset (mode,
+                                                             inner_mode));
+        }
     canonicalize_shift:
       if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
 	{
diff --git a/gcc/testsuite/gcc.dg/asr_div1.c b/gcc/testsuite/gcc.dg/asr_div1.c
new file mode 100644
index 0000000..61430ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asr_div1.c
@@ -0,0 +1,56 @@ 
+/* Test division by const int generates only one shift.  */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (int n)
+{
+  return n / 33;
+}
+
+static NOINLINE int
+f2 (int n)
+{
+  return n / 77;
+}
+
+int
+main ()
+{
+  int a = 0xaaaaaaaa;
+  int b = 0x55555555;
+  int c;
+  c = f1 (a);
+  if (c != 0xfd6a052c)
+    abort ();
+  c = f1 (b);
+  if (c != 0x295FAD4)
+    abort ();
+  c = f2 (a);
+  if (c != 0xfee44b5c)
+    abort ();
+  c = f2 (b);
+  if (c != 0x11bb4a4)
+    abort ();
+  return 0;
+}
+
+/* Following replacement pattern of intger division by constant, GCC is expected
+   to generate MULT and (x)SHIFTRT.  This test checks that considering division
+   by const 33, gcc generates a single ASHIFTRT by 35, instead of two - LSHIFTRT
+   by 32 and ASHIFTRT by 3.  */
+
+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(ashiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(const_int 35 " "combine" { target aarch64*-*-* } } } */
+
+/* Similarly, considering division by const 77, gcc generates a single ASHIFTRT
+   by 36, instead of two - LSHIFTRT by 32 and ASHIFTRT by 4.  */
+
+/* { dg-final { scan-rtl-dump "\\(const_int 36 " "combine" { target aarch64*-*-* } } } */
+
+/* { dg-final { cleanup-rtl-dump "combine" } } */
-- 
1.8.1.2