diff mbox

[ARM,4/8] Epilogue in RTL: expand epilogue for apcs frame

Message ID 000e01cd3f35$6bf83e60$43e8bb20$@Yorsh@arm.com
State New
Headers show

Commit Message

Greta Yorsh May 31, 2012, 1:58 p.m. UTC
Helper function for epilogue expansion. Emit RTL for APCS frame epilogue
(when -mapcs-frame command line option is specified).
This function is used by a later patch.

For APCS frame epilogue, the compiler currently generates LDM with SP as
both the base register
and one of the destination registers. For example:

@ APCS_FRAME epilogue
ldmfd   sp, {r4, fp, sp, pc}

@ non-APCS_FRAME epilogue
ldmfd     sp!, {r4, fp, pc}

The use of SP in LDM register list is deprecated, but this patch does not
address the problem.

To generate the epilogue for APCS frame in RTL, this patch adds a new
alternative to arm_addsi2 insn in ARM mode only to generate "sub sp, fp,
#imm". Previously, there was no pattern to generate sub with SP as the
destination register and not SP as the operand register.


ChangeLog:

gcc

2012-05-31  Ian Bolton  <ian.bolton@arm.com>
            Sameera Deshpande  <sameera.deshpande@arm.com>
            Greta Yorsh  <greta.yorsh@arm.com>

        * config/arm/arm.c (arm_expand_epilogue_apcs_frame): New function.
        * config/arm/arm.md (arm_addsi3) Add an alternative.

Comments

Richard Earnshaw June 15, 2012, 10:40 a.m. UTC | #1
On 31/05/12 14:58, Greta Yorsh wrote:
> Helper function for epilogue expansion. Emit RTL for APCS frame epilogue
> (when -mapcs-frame command line option is specified).
> This function is used by a later patch.
> 
> For APCS frame epilogue, the compiler currently generates LDM with SP as
> both the base register
> and one of the destination registers. For example:
> 
> @ APCS_FRAME epilogue
> ldmfd   sp, {r4, fp, sp, pc}
> 
> @ non-APCS_FRAME epilogue
> ldmfd     sp!, {r4, fp, pc}
> 
> The use of SP in LDM register list is deprecated, but this patch does not
> address the problem.
> 
> To generate the epilogue for APCS frame in RTL, this patch adds a new
> alternative to arm_addsi2 insn in ARM mode only to generate "sub sp, fp,
> #imm". Previously, there was no pattern to generate sub with SP as the
> destination register and not SP as the operand register.
> 
> 
> ChangeLog:
> 
> gcc
> 
> 2012-05-31  Ian Bolton  <ian.bolton@arm.com>
>             Sameera Deshpande  <sameera.deshpande@arm.com>
>             Greta Yorsh  <greta.yorsh@arm.com>
> 
>         * config/arm/arm.c (arm_expand_epilogue_apcs_frame): New function.
>         * config/arm/arm.md (arm_addsi3) Add an alternative.
> 

The FPA support is now obsolete.  Please remove that.

OK with that change.

R.
diff mbox

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 491ffea..d6b4c2e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -22896,6 +22896,232 @@  thumb1_expand_epilogue (void)
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
 }
 
+/* Epilogue code for APCS frame.  */
+static void
+arm_expand_epilogue_apcs_frame (bool really_return)
+{
+  unsigned long func_type;
+  unsigned long saved_regs_mask;
+  int num_regs = 0;
+  int i;
+  int floats_from_frame = 0;
+  arm_stack_offsets *offsets;
+
+  gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
+  func_type = arm_current_func_type ();
+
+  /* Get frame offsets for ARM.  */
+  offsets = arm_get_frame_offsets ();
+  saved_regs_mask = offsets->saved_regs_mask;
+
+  /* Find the offset of the floating-point save area in the frame.  */
+  floats_from_frame = offsets->saved_args - offsets->frame;
+
+  /* Compute how many core registers saved and how far away the floats are.  */
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      {
+        num_regs++;
+        floats_from_frame += 4;
+      }
+
+  if (TARGET_HARD_FLOAT && TARGET_VFP)
+    {
+      int start_reg;
+
+      /* The offset is from IP_REGNUM.  */
+      int saved_size = arm_get_vfp_saved_size ();
+      if (saved_size > 0)
+        {
+          floats_from_frame += saved_size;
+          emit_insn (gen_addsi3 (gen_rtx_REG (SImode, IP_REGNUM),
+                                 hard_frame_pointer_rtx,
+                                 GEN_INT (-floats_from_frame)));
+        }
+
+      /* Generate VFP register multi-pop.  */
+      start_reg = FIRST_VFP_REGNUM;
+
+      for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
+        /* Look for a case where a reg does not need restoring.  */
+        if ((!df_regs_ever_live_p (i) || call_used_regs[i])
+            && (!df_regs_ever_live_p (i + 1)
+                || call_used_regs[i + 1]))
+          {
+            if (start_reg != i)
+              arm_emit_vfp_multi_reg_pop (start_reg,
+                                          (i - start_reg) / 2,
+                                          gen_rtx_REG (SImode,
+                                                       IP_REGNUM));
+            start_reg = i + 2;
+          }
+
+      /* Restore the remaining regs that we have discovered (or possibly
+         even all of them, if the conditional in the for loop never
+         fired).  */
+      if (start_reg != i)
+        arm_emit_vfp_multi_reg_pop (start_reg,
+                                    (i - start_reg) / 2,
+                                    gen_rtx_REG (SImode, IP_REGNUM));
+    }
+  else if (TARGET_FPA_EMU2)
+    {
+      for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+        if (df_regs_ever_live_p (i) && !call_used_regs[i])
+          {
+            rtx addr;
+            rtx insn;
+            floats_from_frame += 12;
+            addr = gen_rtx_MEM (XFmode,
+                                gen_rtx_PLUS (SImode,
+                                              hard_frame_pointer_rtx,
+                                              GEN_INT (- floats_from_frame)));
+            set_mem_alias_set (addr, get_frame_alias_set ());
+            insn = emit_insn (gen_rtx_SET (XFmode,
+                                           gen_rtx_REG (XFmode, i),
+                                           addr));
+            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                               gen_rtx_REG (XFmode, i),
+                                               NULL_RTX);
+          }
+    }
+  else
+    {
+      int idx = 0;
+      rtx load_seq[4];
+      rtx dwarf = NULL_RTX;
+      rtx par;
+      rtx frame_mem;
+
+      for (i = LAST_FPA_REGNUM; i >= FIRST_FPA_REGNUM; i--)
+        {
+          /* We can't unstack more than four registers at once.  */
+          if (idx == 4)
+            {
+              par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+                                                 gen_rtvec_v (idx, load_seq)));
+              REG_NOTES (par) = dwarf;
+              dwarf = NULL_RTX;
+              idx = 0;
+            }
+
+          if (df_regs_ever_live_p (i) && !call_used_regs[i])
+            {
+              floats_from_frame += 12;
+
+              frame_mem = gen_frame_mem (XFmode,
+                                         plus_constant (hard_frame_pointer_rtx,
+                                                        - floats_from_frame));
+              load_seq[idx] = gen_rtx_SET (VOIDmode, gen_rtx_REG (XFmode, i),
+                                           frame_mem);
+              dwarf = alloc_reg_note (REG_CFA_RESTORE, gen_rtx_REG (XFmode, i),
+                                      dwarf);
+              idx++;
+            }
+          else if (idx)
+            {
+              /* Registers must be consecutive.  */
+              par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+                                                 gen_rtvec_v (idx, load_seq)));
+              REG_NOTES (par) = dwarf;
+              dwarf = NULL_RTX;
+              idx = 0;
+            }
+        }
+
+      /* Pop the last registers.  */
+      if (idx)
+        {
+          par = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+                                             gen_rtvec_v (idx, load_seq)));
+          REG_NOTES (par) = dwarf;
+        }
+    }
+
+  if (TARGET_IWMMXT)
+    {
+      /* The frame pointer is guaranteed to be non-double-word aligned, as
+         it is set to double-word-aligned old_stack_pointer - 4.  */
+      rtx insn;
+      int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
+
+      for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
+        if (df_regs_ever_live_p (i) && !call_used_regs[i])
+          {
+            rtx addr = gen_frame_mem (V2SImode,
+                                 plus_constant (hard_frame_pointer_rtx,
+                                                - lrm_count * 4));
+            insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
+            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                               gen_rtx_REG (V2SImode, i),
+                                               NULL_RTX);
+            lrm_count += 2;
+          }
+    }
+
+  /* saved_regs_mask should contain IP which contains old stack pointer
+     at the time of activation creation.  Since SP and IP are adjacent registers,
+     we can restore the value directly into SP.  */
+  gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
+  saved_regs_mask &= ~(1 << IP_REGNUM);
+  saved_regs_mask |= (1 << SP_REGNUM);
+
+  /* There are two registers left in saved_regs_mask - LR and PC.  We
+     only need to restore LR (the return address), but to
+     save time we can load it directly into PC, unless we need a
+     special function exit sequence, or we are not really returning.  */
+  if (really_return
+      && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
+      && !crtl->calls_eh_return)
+    /* Delete LR from the register mask, so that LR on
+       the stack is loaded into the PC in the register mask.  */
+    saved_regs_mask &= ~(1 << LR_REGNUM);
+  else
+    saved_regs_mask &= ~(1 << PC_REGNUM);
+
+  num_regs = bit_count (saved_regs_mask);
+  if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
+    {
+      /* Unwind the stack to just below the saved registers.  */
+      emit_insn (gen_addsi3 (stack_pointer_rtx,
+                             hard_frame_pointer_rtx,
+                             GEN_INT (- 4 * num_regs)));
+    }
+
+  arm_emit_multi_reg_pop (saved_regs_mask);
+
+  if (IS_INTERRUPT (func_type))
+    {
+      /* Interrupt handlers will have pushed the
+         IP onto the stack, so restore it now.  */
+      rtx insn;
+      rtx addr = gen_rtx_MEM (SImode,
+                              gen_rtx_POST_INC (SImode,
+                              stack_pointer_rtx));
+      set_mem_alias_set (addr, get_frame_alias_set ());
+      insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
+      REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
+                                         gen_rtx_REG (SImode, IP_REGNUM),
+                                         NULL_RTX);
+    }
+
+  if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
+    return;
+
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx,
+               stack_pointer_rtx,
+               GEN_INT (ARM_EH_STACKADJ_REGNUM)));
+
+  if (IS_STACKALIGN (func_type))
+    /* Restore the original stack pointer.  Before prologue, the stack was
+       realigned and the original stack pointer saved in r0.  For details,
+       see comment in arm_expand_prologue.  */
+    emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
+
+  emit_jump_insn (simple_return_rtx);
+}
+
 /* Implementation of insn prologue_thumb1_interwork.  This is the first
    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 98387fa..3a237c8 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -718,9 +718,9 @@ 
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
+  [(set (match_operand:SI          0 "s_register_operand" "=r, k,r,r, k, r, k,k,r, k, r")
+	(plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,r,rk,k, rk")
+		 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,L,PJ,PJ,?n")))]
   "TARGET_32BIT"
   "@
    add%?\\t%0, %1, %2
@@ -730,6 +730,7 @@ 
    addw%?\\t%0, %1, %2
    sub%?\\t%0, %1, #%n2
    sub%?\\t%0, %1, #%n2
+   sub%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    subw%?\\t%0, %1, #%n2
    #"
@@ -744,9 +745,9 @@ 
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length" "4,4,4,4,4,4,4,4,4,4,16")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
+   (set_attr "arch" "*,*,*,t2,t2,*,*,a,t2,t2,*")]
 )
 
 (define_insn_and_split "*thumb1_addsi3"