[RFC,AArch64] Cleanup frame pointer usage

Submitted by Wilco Dijkstra on April 20, 2017, 4:07 p.m.

Details

Message ID AM5PR0802MB26107EC0DFD996C8FF4953F5831B0@AM5PR0802MB2610.eurprd08.prod.outlook.com
State New
Headers show

Commit Message

Wilco Dijkstra April 20, 2017, 4:07 p.m.
ping

From: Wilco Dijkstra
Sent: 31 October 2016 18:29
To: GCC Patches
Cc: nd
Subject: [RFC][PATCH][AArch64] Cleanup frame pointer usage
    
This patch cleans up all code related to the frame pointer.  On AArch64 we
emit a frame chain even in cases where the frame pointer is not required.
So make this explicit by introducing a boolean emit_frame_chain in
aarch64_frame record.

When the frame pointer is enabled but not strictly required (eg. no use of
alloca), we emit a frame chain in non-leaf functions, but continue to use the
stack pointer to access locals.  This results in smaller code and unwind info.

Also simplify the complex logic in aarch64_override_options_after_change_1
and compute whether the frame chain is required in aarch64_layout_frame
instead.  As a result aarch64_frame_pointer_required is now redundant and
aarch64_can_eliminate can be greatly simplified.

Finally convert all callee save/restore functions to use gen_frame_mem.

Bootstrap OK. Any comments?

ChangeLog:
2016-10-31  Wilco Dijkstra  <wdijkstr@arm.com>

    gcc/
        * config/aarch64/aarch64.h (aarch64_frame):
         Add emit_frame_chain boolean.
        * config/aarch64/aarch64.c (aarch64_frame_pointer_required)
        Remove.
        (aarch64_layout_frame): Initialise emit_frame_chain.
        (aarch64_pushwb_single_reg): Use gen_frame_mem.
        (aarch64_pop_regs): Likewise.
        (aarch64_gen_load_pair): Likewise.
        (aarch64_save_callee_saves): Likewise.
        (aarch64_restore_callee_saves): Likewise.
        (aarch64_expand_prologue): Use emit_frame_chain.
        (aarch64_can_eliminate): Simplify. When FP needed or outgoing
        arguments are large, eliminate to FP, otherwise SP.
        (aarch64_override_options_after_change_1): Simplify.
        (TARGET_FRAME_POINTER_REQUIRED): Remove define.

--

Patch hide | download patch | download mbox

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index fa81e4b853dafcccc08842955288861ec7e7acca..6e32dc9f6f171dde0c182fdd7857230251f71712 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -583,6 +583,9 @@  struct GTY (()) aarch64_frame
   /* The size of the stack adjustment after saving callee-saves.  */
   HOST_WIDE_INT final_adjust;
 
+  /* Store FP,LR and setup a frame pointer.  */
+  bool emit_frame_chain;
+
   unsigned wb_candidate1;
   unsigned wb_candidate2;
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f07d771ea343803e054e03f59c8c1efb698bf474..6c06ac18d16f8afa7ee1cc5e8530e285a60e2b0f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2728,24 +2728,6 @@  aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   return "";
 }
 
-static bool
-aarch64_frame_pointer_required (void)
-{
-  /* In aarch64_override_options_after_change
-     flag_omit_leaf_frame_pointer turns off the frame pointer by
-     default.  Turn it back on now if we've not got a leaf
-     function.  */
-  if (flag_omit_leaf_frame_pointer
-      && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
-    return true;
-
-  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
-  if (crtl->calls_eh_return)
-    return true;
-
-  return false;
-}
-
 /* Mark the registers that need to be saved by the callee and calculate
    the size of the callee-saved registers area and frame record (both FP
    and LR may be omitted).  */
@@ -2758,6 +2740,18 @@  aarch64_layout_frame (void)
   if (reload_completed && cfun->machine->frame.laid_out)
     return;
 
+  /* Force a frame chain for EH returns so the return address is at FP+8.  */
+  cfun->machine->frame.emit_frame_chain
+    = frame_pointer_needed || crtl->calls_eh_return;
+
+  /* Emit a frame chain if the frame pointer is enabled.
+     If -momit-leaf-frame-pointer is used, do not use a frame chain
+     in leaf functions which do not use LR.  */
+  if (flag_omit_frame_pointer == 2
+      && !(flag_omit_leaf_frame_pointer && crtl->is_leaf
+          && !df_regs_ever_live_p (LR_REGNUM)))
+    cfun->machine->frame.emit_frame_chain = true;
+
 #define SLOT_NOT_REQUIRED (-2)
 #define SLOT_REQUIRED     (-1)
 
@@ -2789,7 +2783,7 @@  aarch64_layout_frame (void)
         && !call_used_regs[regno])
       cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
 
-  if (frame_pointer_needed)
+  if (cfun->machine->frame.emit_frame_chain)
     {
       /* FP and LR are placed in the linkage record.  */
       cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
@@ -2937,7 +2931,7 @@  aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
   reg = gen_rtx_REG (mode, regno);
   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
                             plus_constant (Pmode, base_rtx, -adjustment));
-  mem = gen_rtx_MEM (mode, mem);
+  mem = gen_frame_mem (mode, mem);
 
   insn = emit_move_insn (mem, reg);
   RTX_FRAME_RELATED_P (insn) = 1;
@@ -3011,7 +3005,7 @@  aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
     {
       rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
       mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
-      emit_move_insn (reg1, gen_rtx_MEM (mode, mem));
+      emit_move_insn (reg1, gen_frame_mem (mode, mem));
     }
   else
     {
@@ -3062,8 +3056,6 @@  aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
                            unsigned start, unsigned limit, bool skip_wb)
 {
   rtx_insn *insn;
-  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
-                                                ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
 
@@ -3081,8 +3073,8 @@  aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
-      mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-                                             offset));
+      mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+                                               offset));
 
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
@@ -3095,8 +3087,8 @@  aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
           rtx mem2;
 
           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-         mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
-                                                  offset));
+         mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx,
+                                                    offset));
           insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
                                                     reg2));
 
@@ -3120,8 +3112,6 @@  aarch64_restore_callee_saves (machine_mode mode,
                               unsigned limit, bool skip_wb, rtx *cfi_ops)
 {
   rtx base_rtx = stack_pointer_rtx;
-  rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
-                                                ? gen_frame_mem : gen_rtx_MEM);
   unsigned regno;
   unsigned regno2;
   HOST_WIDE_INT offset;
@@ -3139,7 +3129,7 @@  aarch64_restore_callee_saves (machine_mode mode,
 
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
-      mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+      mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
 
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
@@ -3151,7 +3141,7 @@  aarch64_restore_callee_saves (machine_mode mode,
           rtx mem2;
 
           offset = start_offset + cfun->machine->frame.reg_offset[regno2];
-         mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
+         mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
           emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
 
           *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
@@ -3217,6 +3207,7 @@  aarch64_expand_prologue (void)
   HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset;
   unsigned reg1 = cfun->machine->frame.wb_candidate1;
   unsigned reg2 = cfun->machine->frame.wb_candidate2;
+  bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
   rtx_insn *insn;
 
   if (flag_stack_usage_info)
@@ -3239,7 +3230,7 @@  aarch64_expand_prologue (void)
   if (callee_adjust != 0)
     aarch64_push_regs (reg1, reg2, callee_adjust);
 
-  if (frame_pointer_needed)
+  if (emit_frame_chain)
     {
       if (callee_adjust == 0)
         aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM,
@@ -3247,12 +3238,12 @@  aarch64_expand_prologue (void)
       insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
                                        stack_pointer_rtx,
                                        GEN_INT (callee_offset)));
-      RTX_FRAME_RELATED_P (insn) = 1;
+      RTX_FRAME_RELATED_P (insn) = frame_pointer_needed;
       emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
     }
 
   aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
-                            callee_adjust != 0 || frame_pointer_needed);
+                            callee_adjust != 0 || emit_frame_chain);
   aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
                              callee_adjust != 0 || frame_pointer_needed);
   aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
@@ -5157,36 +5148,13 @@  aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 }
 
 static bool
-aarch64_can_eliminate (const int from, const int to)
+aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 {
-  /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
-     HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
-
-  if (frame_pointer_needed)
-    {
-      if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
-       return true;
-      if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
-       return false;
-      if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
-         && !cfun->calls_alloca)
-       return true;
-      if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
-       return true;
-
-      return false;
-    }
-  else
-    {
-      /* If we decided that we didn't need a leaf frame pointer but then used
-        LR in the function, then we'll want a frame pointer after all, so
-        prevent this elimination to ensure a frame pointer is used.  */
-      if (to == STACK_POINTER_REGNUM
-         && flag_omit_leaf_frame_pointer
-         && df_regs_ever_live_p (LR_REGNUM))
-       return false;
-    }
-
+  /* If we need a frame pointer, eliminate to HARD_FRAME_POINTER_REGNUM.
+     Use FP as well as with a large number of outgoing arguments so
+     that stack offsets are smaller - this may generate better code.  */
+  if (frame_pointer_needed || crtl->outgoing_args_size > 64)
+    return to == HARD_FRAME_POINTER_REGNUM;
   return true;
 }
 
@@ -8112,24 +8080,13 @@  aarch64_parse_override_string (const char* input_string,
 static void
 aarch64_override_options_after_change_1 (struct gcc_options *opts)
 {
-  /* The logic here is that if we are disabling all frame pointer generation
-     then we do not need to disable leaf frame pointer generation as a
-     separate operation.  But if we are *only* disabling leaf frame pointer
-     generation then we set flag_omit_frame_pointer to true, but in
-     aarch64_frame_pointer_required we return false only for leaf functions.
-
-     PR 70044: We have to be careful about being called multiple times for the
-     same function.  Once we have decided to set flag_omit_frame_pointer just
-     so that we can omit leaf frame pointers, we must then not interpret a
-     second call as meaning that all frame pointer generation should be
-     omitted.  We do this by setting flag_omit_frame_pointer to a special,
-     non-zero value.  */
-  if (opts->x_flag_omit_frame_pointer == 2)
-    opts->x_flag_omit_frame_pointer = 0;
-
-  if (opts->x_flag_omit_frame_pointer)
-    opts->x_flag_omit_leaf_frame_pointer = false;
-  else if (opts->x_flag_omit_leaf_frame_pointer)
+  /* PR 70044: We have to be careful about being called multiple times for the
+     same function.  This means all changes should be repeatable.  */
+
+  /* If the frame pointer is enabled, set the flag to a special value.
+     To implement -momit-leaf-frame-pointer this special value is checked in
+     aarch64_layout_frame.  The frame chain is emitted only when required.  */
+  if (opts->x_flag_omit_frame_pointer == 0)
     opts->x_flag_omit_frame_pointer = 2;
 
   /* If not optimizing for size, set the default
@@ -14126,9 +14083,6 @@  aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
 #undef TARGET_FUNCTION_VALUE_REGNO_P
 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
 
-#undef TARGET_FRAME_POINTER_REQUIRED
-#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
-
 #undef TARGET_GIMPLE_FOLD_BUILTIN
 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin