diff mbox

[ARM,5/8] Epilogue in RTL: expand

Message ID 001201cd3f35$a5c6acd0$f1540670$@Yorsh@arm.com
State New
Headers show

Commit Message

Greta Yorsh May 31, 2012, 1:59 p.m. UTC
The main function for epilogue RTL generation, used by expand epilogue
patterns.

ChangeLog:

gcc

2012-05-31  Ian Bolton  <ian.bolton@arm.com>
            Sameera Deshpande  <sameera.deshpande@arm.com>
            Greta Yorsh  <greta.yorsh@arm.com>

        * config/arm/arm-protos.h (arm_expand_epilogue): New declaration.
        * config/arm/arm.c (arm_expand_epilogue): New function.
        * config/arm/arm.md (epilogue): Update condition and code.
        (sibcall_epilogue): Likewise.

Comments

Richard Earnshaw June 15, 2012, 10:43 a.m. UTC | #1
On 31/05/12 14:59, Greta Yorsh wrote:
> The main function for epilogue RTL generation, used by expand epilogue
> patterns.
> 
> ChangeLog:
> 
> gcc
> 
> 2012-05-31  Ian Bolton  <ian.bolton@arm.com>
>             Sameera Deshpande  <sameera.deshpande@arm.com>
>             Greta Yorsh  <greta.yorsh@arm.com>
> 
>         * config/arm/arm-protos.h (arm_expand_epilogue): New declaration.
>         * config/arm/arm.c (arm_expand_epilogue): New function.
>         * config/arm/arm.md (epilogue): Update condition and code.
>         (sibcall_epilogue): Likewise.
> 

Same as last patch, OK once the FPA support has been stripped out.

R.
diff mbox

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 7b25e37..f61feef 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -30,6 +30,7 @@  extern void arm_load_pic_register (unsigned long);
 extern int arm_volatile_func (void);
 extern const char *arm_output_epilogue (rtx);
 extern void arm_expand_prologue (void);
+extern void arm_expand_epilogue (bool);
 extern const char *arm_strip_name_encoding (const char *);
 extern void arm_asm_output_labelref (FILE *, const char *);
 extern void thumb2_asm_output_opcode (FILE *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d6b4c2e..c8642e2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23122,6 +23122,326 @@  arm_expand_epilogue_apcs_frame (bool really_return)
   emit_jump_insn (simple_return_rtx);
 }
 
+/* Generate RTL to represent ARM epilogue.  Really_return is true if the
+   function is not a sibcall.  */
+void
+arm_expand_epilogue (bool really_return)
+{
+  unsigned long func_type;
+  unsigned long saved_regs_mask;
+  int num_regs = 0;
+  int i;
+  int amount;
+  int floats_from_frame = 0;
+  arm_stack_offsets *offsets;
+
+  func_type = arm_current_func_type ();
+
+  /* Naked functions don't have epilogue.  Hence, generate return pattern, and
+     let output_return_instruction take care of instruction emition if any.  */
+  if (IS_NAKED (func_type)
+      || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
+    {
+      emit_jump_insn (simple_return_rtx);
+      return;
+    }
+
+  /* If we are throwing an exception, then we really must be doing a
+     return, so we can't tail-call.  */
+  gcc_assert (!crtl->calls_eh_return || really_return);
+
+  if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
+    {
+      arm_expand_epilogue_apcs_frame (really_return);
+      return;
+    }
+
+  /* Get frame offsets for ARM.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  /* Find offset of floating point register from frame pointer.
+     The initialization is done in this way to take care of frame pointer
+     and static-chain register, if stored.  */
+  floats_from_frame = offsets->saved_args - offsets->frame;
+  /* Compute how many registers saved and how far away the floats will be.  */
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        num_regs++;
+        floats_from_frame += 4;
+      }
+
+  if (frame_pointer_needed)
+    {
+      /* Restore stack pointer if necessary.  */
+      if (TARGET_ARM)
+        {
+          /* In ARM mode, frame pointer points to first saved register.
+             Restore stack pointer to last saved register.  */
+          amount = offsets->frame - offsets->saved_regs;
+
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+          emit_insn (gen_addsi3 (stack_pointer_rtx,
+                                 hard_frame_pointer_rtx,
+                                 GEN_INT (amount)));
+
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
+             deleted.  */
+          emit_insn (gen_prologue_use (stack_pointer_rtx));
+        }
+      else
+        {
+          /* In Thumb-2 mode, the frame pointer points to the last saved
+             register.  */
+          amount = offsets->locals_base - offsets->saved_regs;
+          if (amount)
+            emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+                                   hard_frame_pointer_rtx,
+                                   GEN_INT (amount)));
+
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+          emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
+             deleted.  */
+          emit_insn (gen_prologue_use (stack_pointer_rtx));
+        }
+    }
+  else
+    {
+      /* Pop off outgoing args and local frame to adjust stack pointer to
+         last saved register.  */
+      amount = offsets->outgoing_args - offsets->saved_regs;
+      if (amount)
+        {
+          /* Force out any pending memory operations that reference stacked data
+             before stack de-allocation occurs.  */
+          emit_insn (gen_blockage ());
+          emit_insn (gen_addsi3 (stack_pointer_rtx,
+                                 stack_pointer_rtx,
+                                 GEN_INT (amount)));
+          /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
+             not deleted.  */
+          emit_insn (gen_prologue_use (stack_pointer_rtx));
+        }
+    }
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      /* Generate VFP register multi-pop.  */
+      int end_reg = LAST_VFP_REGNUM + 1;
+
+      /* Scan the registers in reverse order.  We need to match
+         any groupings made in the prologue and generate matching
+         vldm operations.  The need to match groups is because,
+         unlike pop, vldm can only do consecutive regs.  */
+      for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+            && (!df_regs_ever_live_p (i + 1)
+                || call_used_regs[i + 1]))
+          {
+            /* Restore the regs discovered so far (from reg+2 to
+               end_reg).  */
+            if (end_reg > i + 2)
+              arm_emit_vfp_multi_reg_pop (i + 2,
+                                          (end_reg - (i + 2)) / 2,
+                                          stack_pointer_rtx);
+            end_reg = i;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never
+         fired).  */
+      if (end_reg > i + 2)
+        arm_emit_vfp_multi_reg_pop (i + 2,
+                                    (end_reg - (i + 2)) / 2,
+                                    stack_pointer_rtx);
+    }
+  else if (TARGET_FPA_EMU2)
+    {
+      for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++)
+        if (df_regs_ever_live_p (i) && !call_used_regs[i])
+          {
+            /* Generate memory reference with write-back to SP.  */
+            rtx insn;
+            rtx addr = gen_rtx_MEM (XFmode,
+                                    gen_rtx_POST_INC (SImode,
+                                                      stack_pointer_rtx));
+            set_mem_alias_set (addr, get_frame_alias_set ());
+            insn = emit_insn (gen_movxf (gen_rtx_REG (XFmode, i), addr));
+            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                               gen_rtx_REG (XFmode, i),
+                                               NULL_RTX);
+          }
+    }
+  else
+    {
+      int idx = 0;
+      rtx load_seq[5];
+      rtx par;
+      rtx tmp;
+      rtx dwarf = NULL_RTX;
+
+      for (i = FIRST_FPA_REGNUM; i <= LAST_FPA_REGNUM; i++)
+        {
+          if (idx == 4)
+            {
+              load_seq[0] = gen_rtx_SET (VOIDmode,
+                                         stack_pointer_rtx,
+                                         plus_constant (stack_pointer_rtx,
+                                                        12 * idx));
+              tmp = gen_rtx_PARALLEL (VOIDmode,
+                                      gen_rtvec_v (idx + 1, load_seq));
+              par = emit_insn (tmp);
+              REG_NOTES (par) = dwarf;
+              dwarf = NULL_RTX;
+              idx = 0;
+            }
+
+          if (df_regs_ever_live_p (i) && !call_used_regs[i])
+            {
+              tmp = gen_frame_mem (XFmode,
+                                   plus_constant (stack_pointer_rtx, 12 * idx));
+              load_seq[idx + 1] = gen_rtx_SET (VOIDmode,
+                                               gen_rtx_REG (XFmode, i),
+                                               tmp);
+              dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                                      gen_rtx_REG (XFmode, i),
+                                      dwarf);
+              idx++;
+            }
+          else
+            {
+              if (idx)
+                {
+                  /* Create parallel and emit.  */
+                  load_seq[0] = gen_rtx_SET (VOIDmode,
+                                             stack_pointer_rtx,
+                                             plus_constant (stack_pointer_rtx,
+                                                            12 * idx));
+                  par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+                                                     gen_rtvec_v (idx + 1,
+                                                                  load_seq)));
+                  REG_NOTES (par) = dwarf;
+                  dwarf = NULL_RTX;
+                  idx = 0;
+                }
+            }
+        }
+
+      if (idx)
+        {
+          load_seq[0] = gen_rtx_SET (VOIDmode,
+                                     stack_pointer_rtx,
+                                     plus_constant (stack_pointer_rtx,
+                                                    12 * idx));
+          par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+                                             gen_rtvec_v (idx + 1, load_seq)));
+          REG_NOTES (par) = dwarf;
+          dwarf = NULL_RTX;
+          idx = 0;
+        }
+    }
+
+  if (TARGET_IWMMXT)
+    for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
+      if (df_regs_ever_live_p (i) && !call_used_regs[i])
+        {
+          rtx insn;
+          rtx addr = gen_rtx_MEM (V2SImode,
+                                  gen_rtx_POST_INC (SImode,
+                                                    stack_pointer_rtx));
+          set_mem_alias_set (addr, get_frame_alias_set ());
+          insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+          REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                             gen_rtx_REG (V2SImode, i),
+                                             NULL_RTX);
+        }
+
+  if (saved_regs_mask)
+    {
+      rtx insn;
+      bool return_in_pc = false;
+
+      if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
+          && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
+          && !IS_STACKALIGN (func_type)
+          && really_return
+          && crtl->args.pretend_args_size == 0
+          && saved_regs_mask & (1 << LR_REGNUM)
+          && !crtl->calls_eh_return)
+        {
+          saved_regs_mask &= ~(1 << LR_REGNUM);
+          saved_regs_mask |= (1 << PC_REGNUM);
+          return_in_pc = true;
+        }
+
+      if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
+        {
+          for (i = 0; i <= LAST_ARM_REGNUM; i++)
+            if (saved_regs_mask & (1 << i))
+              {
+                rtx addr = gen_rtx_MEM (SImode,
+                                        gen_rtx_POST_INC (SImode,
+                                                          stack_pointer_rtx));
+                set_mem_alias_set (addr, get_frame_alias_set ());
+
+                if (i == PC_REGNUM)
+                  {
+                    insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+                    XVECEXP (insn, 0, 0) = ret_rtx;
+                    XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
+                                                        gen_rtx_REG (SImode, i),
+                                                        addr);
+                    RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
+                    insn = emit_jump_insn (insn);
+                  }
+                else
+                  {
+                    insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
+                                                 addr));
+                    REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                                       gen_rtx_REG (SImode, i),
+                                                       NULL_RTX);
+                  }
+              }
+        }
+      else
+        {
+          arm_emit_multi_reg_pop (saved_regs_mask);
+        }
+
+      if (return_in_pc == true)
+        return;
+    }
+
+  if (crtl->args.pretend_args_size)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           GEN_INT (crtl->args.pretend_args_size)));
+
+  if (!really_return)
+    return;
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+                           stack_pointer_rtx,
+                           gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
+
+  if (IS_STACKALIGN (func_type))
+    /* Restore the original stack pointer.  Before prologue, the stack was
+       realigned and the original stack pointer saved in r0.  For details,
+       see comment in arm_expand_prologue.  */
+    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+
+  emit_jump_insn (simple_return_rtx);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 3a237c8..d1c1894 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -10625,14 +10625,21 @@ 
   if (crtl->calls_eh_return)
     emit_insn (gen_prologue_use (gen_rtx_REG (Pmode, 2)));
   if (TARGET_THUMB1)
-    thumb1_expand_epilogue ();
-  else if (USE_RETURN_INSN (FALSE))
-    {
-      emit_jump_insn (gen_return ());
-      DONE;
-    }
-  emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
-	gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE));
+   {
+     thumb1_expand_epilogue ();
+     emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+                     gen_rtvec (1, ret_rtx), VUNSPEC_EPILOGUE));
+   }
+  else if (HAVE_return)
+   {
+     /* HAVE_return is testing for USE_RETURN_INSN (FALSE).  Hence,
+        no need for explicit testing again.  */
+     emit_jump_insn (gen_return ());
+   }
+  else if (TARGET_32BIT)
+   {
+    arm_expand_epilogue (true);
+   }
   DONE;
   "
 )
@@ -10649,22 +10656,14 @@ 
 ;; to add an unspec of the link register to ensure that flow
 ;; does not think that it is unused by the sibcall branch that
 ;; will replace the standard function epilogue.
-(define_insn "sibcall_epilogue"
-  [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
-              (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
-  "TARGET_32BIT"
-  "*
-  if (use_return_insn (FALSE, next_nonnote_insn (insn)))
-    return output_return_instruction (const_true_rtx, FALSE, FALSE);
-  return arm_output_epilogue (next_nonnote_insn (insn));
-  "
-;; Length is absolute worst case
-  [(set_attr "length" "44")
-   (set_attr "type" "block")
-   ;; We don't clobber the conditions, but the potential length of this
-   ;; operation is sufficient to make conditionalizing the sequence 
-   ;; unlikely to be profitable.
-   (set_attr "conds" "clob")]
+(define_expand "sibcall_epilogue"
+   [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
+               (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
+   "TARGET_32BIT"
+   "
+   arm_expand_epilogue (false);
+   DONE;
+   "
 )
 
 (define_insn "*epilogue_insns"