diff mbox

[AArch64] Implement adrp+add fusion

Message ID 54638968.3060803@arm.com
State New
Headers show

Commit Message

Kyrylo Tkachov Nov. 12, 2014, 4:23 p.m. UTC
Hi all,

This patch implements fusion of adrp and add instructions that appear in 
RTL as consecutive HIGH and LO_SUM SETs.
This fusion is enabled by default for Cortex-A57 and Cortex-A53.
This depends on the previous macro fusion patch posted at 
https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00956.html


Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk after the prerequisite?

Thanks,
Kyrill

2014-11-12  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     * config/aarch64/aarch64.c: Include tm-constrs.h
     (AARCH64_FUSE_ADRP_ADD): Define.
     (cortexa57_tunings): Add AARCH64_FUSE_ADRP_ADD to fuseable_ops.
     (cortexa53_tunings): Likewise.
     (aarch_macro_fusion_pair_p): Handle AARCH64_FUSE_ADRP_ADD.
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 71265de..c9499ee 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -77,6 +77,7 @@ 
 #include "dumpfile.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "tm-constrs.h"
 
 /* Defined for convenience.  */
 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
@@ -301,6 +302,7 @@  static const struct cpu_vector_cost cortexa57_vector_cost =
 
 #define AARCH64_FUSE_NOTHING	(0)
 #define AARCH64_FUSE_MOV_MOVK	(1 << 0)
+#define AARCH64_FUSE_ADRP_ADD	(1 << 1)
 
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
@@ -324,7 +326,7 @@  static const struct tune_params cortexa53_tunings =
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 2),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -335,7 +337,7 @@  static const struct tune_params cortexa57_tunings =
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
   NAMED_PARAM (issue_rate, 3),
-  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
+  NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD))
 };
 
 static const struct tune_params thunderx_tunings =
@@ -10055,12 +10057,39 @@  aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 
       set_dest = SET_DEST (curr_set);
 
-      return GET_CODE (set_dest) == ZERO_EXTRACT
+      if (GET_CODE (set_dest) == ZERO_EXTRACT
              && CONST_INT_P (SET_SRC (curr_set))
              && CONST_INT_P (SET_SRC (prev_set))
              && REG_P (XEXP (set_dest, 0))
              && REG_P (SET_DEST (prev_set))
-             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set));
+             && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+        return true;
+    }
+
+  if (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD)
+    {
+
+      /*  We're trying to match:
+          prev (adrp) == (set (reg r1)
+                              (high (symbol_ref ("SYM"))))
+          curr (add) == (set (reg r0)
+                             (lo_sum (reg r1)
+                                     (symbol_ref ("SYM"))))
+          Note that r0 need not necessarily be the same as r1, especially
+          during pre-regalloc scheduling.  */
+
+      if (satisfies_constraint_Ush (SET_SRC (prev_set))
+          && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
+        {
+          if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
+              && REG_P (XEXP (SET_SRC (curr_set), 0))
+              && REGNO (XEXP (SET_SRC (curr_set), 0))
+                 == REGNO (SET_DEST (prev_set))
+              && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
+                              XEXP (SET_SRC (curr_set), 1)))
+            return true;
+
+        }
     }
 
   return false;