diff mbox

[AArch64,2/5] Implement adrp+add fusion

Message ID 546B2088.2050806@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Nov. 18, 2014, 10:33 a.m. UTC
Hi all,

This patch is just rebased on top of the changes from the previous patch 
in the series.
Otherwise it's the same as 
https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01263.html with some style 
cleanup

There can be cases where we miss fusion of adrd+add because although 
they are generated together (in aarch64_load_symref_appropriately),
combine can sometimes combine the losym part with the instruction after 
it and we end up with an instruction stream where the is an insn between 
the two, preventing the fusion in sched1.
We still catch enough cases to make this approach worthwhile and the 
above-mentioned exceptions can be mitigated in the future (for example, 
by somehow delaying the generation of the adrp,add RTL after combine but 
before sched1)

Tested and bootstrapped on aarch64-none-linux-gnu.
Ok for trunk?

2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c: Include tm-constrs.h
     (AARCH64_FUSE_ADRP_ADD): Define.
     (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
     (cortexa53_tunings): Likewise.
     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.

Comments

Marcus Shawcroft Nov. 21, 2014, 4:55 p.m. UTC | #1
On 18 November 2014 10:33, Kyrill Tkachov <kyrylo.tkachov@arm.com> wrote:

> 2014-11-18  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>
>     * config/aarch64/aarch64.c: Include tm-constrs.h
>     (AARCH64_FUSE_ADRP_ADD): Define.
>     (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
>     (cortexa53_tunings): Likewise.
>     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.

OK /Marcus
diff mbox

Patch

commit 248ec70cfac6cb552a427b4336a3340bb25a5e53
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date:   Thu Nov 6 12:05:26 2014 +0000

    [AArch64] Fuse ADRP+ADD

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 502ba6d..03ae7c4 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -77,6 +77,7 @@ 
 #include "dumpfile.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "tm-constrs.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -304,6 +305,7 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
 
 #define AARCH64_FUSE_NOTHING	(0)
 #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+#define AARCH64_FUSE_ADRP_ADD	(1 << 1)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -327,7 +329,7 @@  static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -338,7 +340,7 @@  static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params thunderx_tunings =
@@ -10037,6 +10039,32 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
         }
     }
 
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
+    {
+
+      /*  We're trying to match:
+          prev (adrp) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+          curr (add) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))
+          Note that r0 need not necessarily be the same as r1, especially
+          during pre-regalloc scheduling.  */
+
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+              && REG_P (XEXP (SET_SRC (curr_set), 0))
+              && REGNO (XEXP (SET_SRC (curr_set), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+                              XEXP (SET_SRC (curr_set), 1)))
+            return true;
+        }
+    }
+
   return false;
 }