diff mbox

[AArch64] Handle function literal pools according to function size

Message ID 564665DF.70507@samsung.com
State New
Headers show

Commit Message

Evandro Menezes Nov. 13, 2015, 10:36 p.m. UTC
[AArch64] Handle function literal pools according to function size

    gcc/

        PR target/63304
        * config/aarch64/aarch64-protos.h
        (aarch64_nopcrelative_literal_loads):
        Move to module scope in "aarch64.c".
        (aarch64_may_load_literal_pcrel) New function.
        * config/aarch64/aarch64.c (aarch64_nopcrelative_literal_loads):
        Change
        scope to module.
        (aarch64_may_load_literal_pcrel): New function that replaces the
        global
        variable "aarch64_nopcrelative_literal_loads" in most cases.
        (aarch64_current_func_size): New function.
        * config/aarch64/aarch64.h (machine_function): Add new member
        "size".
        * config/aarch64/aarch64.md
        (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
        Use "aarch64_may_load_literal_pcrel".
        (aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.

Since defaulting to always using a global literal pool results in 
possible performance degradation on targets without insn fusion of the 
resulting insns, this tentative patch reverts to per function literal 
pool when the function size allows it or to the global literal pool 
otherwise.

Though the global literal pool promotes reuse of constants with positive 
impact in text size, it comes at the cost of increased I-cache pressure, 
since it then takes a pair of insns to access a literal.  Conversely, 
the per function literal pools limit reuse of constants, but reduce 
I-cache pressure due to then just a PC-relative load being used to 
access a literal.  I hope to have data to quantifying such analysis soon.

Bootstrapped in aarch64 and arm.

Feedback is welcome.
diff mbox

Patch

From d0fa78c4c29a15964467276493280efa091fbd64 Mon Sep 17 00:00:00 2001
From: Evandro Menezes <e.menezes@samsung.com>
Date: Fri, 13 Nov 2015 15:55:45 -0600
Subject: [PATCH] [AArch64] Handle function literal pools according to function
 size

gcc/
	PR target/63304
	* config/aarch64/aarch64-protos.h (aarch64_nopcrelative_literal_loads):
	Move to module scope in "aarch64.c".
	(aarch64_may_load_literal_pcrel) New function.
	* config/aarch64/aarch64.c (aarch64_nopcrelative_literal_loads): Change
	scope to module.
	(aarch64_may_load_literal_pcrel): New function that replaces the global
	variable "aarch64_nopcrelative_literal_loads" in most cases.
	(+aarch64_current_func_size): New function.
	* config/aarch64/aarch64.h (machine_function): Add new member "size".
	* config/aarch64/aarch64.md (aarch64_reload_movcp<GPF_TF:mode><P:mode>):
	Use "aarch64_may_load_literal_pcrel".
	(aarch64_reload_movcp<VALL:mode><P:mode>): Likewise.
---
 gcc/config/aarch64/aarch64-protos.h |  4 ++-
 gcc/config/aarch64/aarch64.c        | 49 ++++++++++++++++++++++++++++++++-----
 gcc/config/aarch64/aarch64.h        |  7 +++++-
 gcc/config/aarch64/aarch64.md       |  4 +--
 4 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 9000d67..57868b7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -426,5 +426,7 @@  int aarch64_ccmp_mode_to_code (enum machine_mode mode);
 bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
 bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
 bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
-extern bool aarch64_nopcrelative_literal_loads;
+
+extern bool aarch64_may_load_literal_pcrel (void);
+
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5ec7f08..71f8331 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -139,6 +139,7 @@  static bool aarch64_vector_mode_supported_p (machine_mode);
 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 						 const unsigned char *sel);
 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
+static unsigned long aarch64_current_func_size (void);
 
 /* Major revision number of the ARM Architecture implemented by the target.  */
 unsigned aarch64_architecture_version;
@@ -150,7 +151,7 @@  enum aarch64_processor aarch64_tune = cortexa53;
 unsigned long aarch64_tune_flags = 0;
 
 /* Global flag for PC relative loads.  */
-bool aarch64_nopcrelative_literal_loads;
+static bool aarch64_nopcrelative_literal_loads;
 
 /* Support for command line parsing of boolean flags in the tuning
    structures.  */
@@ -1558,7 +1559,7 @@  aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	     we need to expand the literal pool access carefully.
 	     This is something that needs to be done in a number
 	     of places, so could well live as a separate function.  */
-	  if (aarch64_nopcrelative_literal_loads)
+	  if (!aarch64_may_load_literal_pcrel ())
 	    {
 	      gcc_assert (can_create_pseudo_p ());
 	      base = gen_reg_rtx (ptr_mode);
@@ -3698,7 +3699,7 @@  aarch64_classify_address (struct aarch64_address_info *info,
 	  return ((GET_CODE (sym) == LABEL_REF
 		   || (GET_CODE (sym) == SYMBOL_REF
 		       && CONSTANT_POOL_ADDRESS_P (sym)
-		       && !aarch64_nopcrelative_literal_loads)));
+		       && aarch64_may_load_literal_pcrel ())));
 	}
       return false;
 
@@ -4929,7 +4930,7 @@  aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
   if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)
       && (SCALAR_FLOAT_MODE_P (GET_MODE (x))
 	  || targetm.vector_mode_supported_p (GET_MODE (x)))
-      && aarch64_nopcrelative_literal_loads)
+      && !aarch64_may_load_literal_pcrel ())
     {
       sri->icode = aarch64_constant_pool_reload_icode (mode);
       return NO_REGS;
@@ -5256,6 +5257,22 @@  aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
   return 0;
 }
 
+/* Return true if PC-relative loads may be used to access the per function
+   literal pool.  */
+
+bool
+aarch64_may_load_literal_pcrel (void)
+{
+  /* The range for PC-relative loads is +/-1MB.  To stay on the safe side,
+     the function size limit is set lower to half of the range.  */
+  const unsigned long max_func_size = 1048576UL / 2;
+
+  if (aarch64_nopcrelative_literal_loads)
+    return false;
+
+  return (aarch64_current_func_size () < max_func_size);
+}
+
 /* Constant pools are per function only when PC relative
    literal loads are true or we are in the large memory
    model.  */
@@ -5263,7 +5280,7 @@  aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
 static inline bool
 aarch64_can_use_per_function_literal_pools_p (void)
 {
-  return (!aarch64_nopcrelative_literal_loads
+  return (aarch64_may_load_literal_pcrel ()
 	  || aarch64_cmodel == AARCH64_CMODEL_LARGE);
 }
 
@@ -7181,6 +7198,26 @@  aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
   return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
 }
 
+/* Return the size of the current function in bytes.  */
+
+unsigned long
+aarch64_current_func_size (void)
+{
+  unsigned long func_size;
+  rtx_insn *insn;
+
+  if (cfun->machine->size)
+    return cfun->machine->size;
+
+  if (reload_in_progress)
+    return 0;
+
+  for (func_size = 0, insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    func_size += get_attr_length (insn);
+
+  cfun->machine->size = func_size;
+  return func_size;
+}
 
 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
    autopref_multipass_dfa_lookahead_guard from haifa-sched.c.  It only
@@ -8966,7 +9003,7 @@  aarch64_classify_symbol (rtx x, rtx offset)
 	  /* This is alright even in PIC code as the constant
 	     pool reference is always PC relative and within
 	     the same translation unit.  */
-	  if (nopcrelative_literal_loads
+	  if (!aarch64_may_load_literal_pcrel ()
 	      && CONSTANT_POOL_ADDRESS_P (x))
 	    return SYMBOL_SMALL_ABSOLUTE;
 	  else
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 8834c9b..85afdb8 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -546,12 +546,17 @@  struct GTY (()) aarch64_frame
 
   bool laid_out;
 };
+#endif
 
 typedef struct GTY (()) machine_function
 {
+  /* Approximate size of the current function in bytes.
+     Only valid after reload and if non-zero.  */
+  unsigned long size;
+#ifdef HOST_WIDE_INT
   struct aarch64_frame frame;
-} machine_function;
 #endif
+} machine_function;
 
 /* Which ABI to use.  */
 enum aarch64_abi_type
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1586256..d992273 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4535,7 +4535,7 @@ 
  [(set (match_operand:GPF_TF 0 "register_operand" "=w")
        (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S")))
   (clobber (match_operand:P 2 "register_operand" "=&r"))]
- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads"
+ "TARGET_FLOAT && !aarch64_may_load_literal_pcrel ()"
  {
    aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
    emit_move_insn (operands[0], gen_rtx_MEM (<GPF_TF:MODE>mode, operands[2]));
@@ -4548,7 +4548,7 @@ 
  [(set (match_operand:VALL 0 "register_operand" "=w")
        (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S")))
   (clobber (match_operand:P 2 "register_operand" "=&r"))]
- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads"
+ "TARGET_FLOAT && !aarch64_may_load_literal_pcrel ()"
  {
    aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0));
    emit_move_insn (operands[0], gen_rtx_MEM (<VALL:MODE>mode, operands[2]));
-- 
2.1.0.243.g30d45f7