From 471fbb2057b4d338d01bb403f0973adbed33a31d Mon Sep 17 00:00:00 2001
From: Alex Velenko <alex.velenko@arm.com>
Date: Mon, 6 Oct 2014 15:33:36 +0100
Subject: [PATCH] [RTL, Patch] Int div by constant compilation enhancement
This patch adds a mid-end check to catch division by
constant case and optimize it to generate one shift,
instead of two.
A testacase to check the correct codegeneration for aarch64
is added. This check is not made generic, because the optimisation
implemented is not used by all targets.
Thanks,
Alex
gcc/
2014-11-03 Alex Velenko <Alex.Velenko@arm.com>
* simplify-rtx.c (simplify_binary_operation_1): Div check added.
* rtl.h (SUBREG_P): New macro added.
gcc/testsuite/
2014-11-03 Alex Velenko <Alex.Velenko@arm.com>
* gcc.dg/asr-div1.c : New testcase.
---
gcc/rtl.h | 3 +++
gcc/simplify-rtx.c | 38 ++++++++++++++++++++++++++++
gcc/testsuite/gcc.dg/asr_div1.c | 56 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 97 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/asr_div1.c
@@ -789,6 +789,9 @@ struct GTY(()) rtvec_def {
/* Predicate yielding nonzero iff X is a data for a jump table. */
#define JUMP_TABLE_DATA_P(INSN) (GET_CODE (INSN) == JUMP_TABLE_DATA)
+/* Predicate yielding nonzero iff RTX is a subreg. */
+#define SUBREG_P(RTX) (GET_CODE (RTX) == SUBREG)
+
template <>
template <>
inline bool
@@ -3102,6 +3102,44 @@ simplify_binary_operation_1 (enum rtx_code code, enum machine_mode mode,
&& UINTVAL (trueop0) == GET_MODE_MASK (mode)
&& ! side_effects_p (op1))
return op0;
+ /* Given:
+ scalar modes M1, M2
+ scalar constants c1, c2
+ size (M2) > size (M1)
+ c1 == size (M2) - size (M1)
+ optimize:
+ (ashiftrt:M1 (subreg:M1 (lshiftrt:M2 (reg:M2)
+ (const_int <c1>))
+ <low_part>)
+ (const_int <c2>))
+ to:
+ (subreg:M1 (ashiftrt:M2 (reg:M2)
+ (const_int <c1 + c2>))
+ <low_part>). */
+ if (!VECTOR_MODE_P (mode)
+ && SUBREG_P (op0)
+ && CONST_INT_P (op1)
+ && (GET_CODE (SUBREG_REG (op0)) == LSHIFTRT)
+ && !VECTOR_MODE_P (GET_MODE (SUBREG_REG (op0)))
+ && CONST_INT_P (XEXP (SUBREG_REG (op0), 1))
+ && (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+ > GET_MODE_BITSIZE (mode))
+ && (INTVAL (XEXP (SUBREG_REG (op0), 1))
+ == (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)))
+ - GET_MODE_BITSIZE (mode)))
+ && subreg_lowpart_p (op0))
+ {
+ rtx tmp = GEN_INT (INTVAL (XEXP (SUBREG_REG (op0), 1))
+ + INTVAL (op1));
+ machine_mode inner_mode = GET_MODE (SUBREG_REG (op0));
+ tmp = simplify_gen_binary (ASHIFTRT,
+ GET_MODE (SUBREG_REG (op0)),
+ XEXP (SUBREG_REG (op0), 0),
+ tmp);
+ return simplify_gen_subreg (mode, tmp, inner_mode,
+ subreg_lowpart_offset (mode,
+ inner_mode));
+ }
canonicalize_shift:
if (SHIFT_COUNT_TRUNCATED && CONST_INT_P (op1))
{
new file mode 100644
@@ -0,0 +1,56 @@
+/* Test division by const int generates only one shift. */
+/* { dg-do run } */
+/* { dg-options "-O2 -fdump-rtl-combine-all" } */
+
+extern void abort (void);
+
+#define NOINLINE __attribute__((noinline))
+
+static NOINLINE int
+f1 (int n)
+{
+ return n / 33;
+}
+
+static NOINLINE int
+f2 (int n)
+{
+ return n / 77;
+}
+
+int
+main ()
+{
+ int a = 0xaaaaaaaa;
+ int b = 0x55555555;
+ int c;
+ c = f1 (a);
+ if (c != 0xfd6a052c)
+ abort ();
+ c = f1 (b);
+ if (c != 0x295FAD4)
+ abort ();
+ c = f2 (a);
+ if (c != 0xfee44b5c)
+ abort ();
+ c = f2 (b);
+ if (c != 0x11bb4a4)
+ abort ();
+ return 0;
+}
+
+/* Following replacement pattern of intger division by constant, GCC is expected
+ to generate MULT and (x)SHIFTRT. This test checks that considering division
+ by const 33, gcc generates a single ASHIFTRT by 35, instead of two - LSHIFTRT
+ by 32 and ASHIFTRT by 3. */
+
+/* { dg-final { scan-rtl-dump "\\(set \\(subreg:DI \\(reg:SI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(ashiftrt:DI \\(reg:DI" "combine" { target aarch64*-*-* } } } */
+/* { dg-final { scan-rtl-dump "\\(const_int 35 " "combine" { target aarch64*-*-* } } } */
+
+/* Similarly, considering division by const 77, gcc generates a single ASHIFTRT
+ by 36, instead of two - LSHIFTRT by 32 and ASHIFTRT by 4. */
+
+/* { dg-final { scan-rtl-dump "\\(const_int 36 " "combine" { target aarch64*-*-* } } } */
+
+/* { dg-final { cleanup-rtl-dump "combine" } } */
--
1.8.1.2