Patchwork [ARM,3/4] Epilogue using LDRD in Thumb mode

login
register
mail settings
Submitter Greta Yorsh
Date Oct. 10, 2012, 3:03 p.m.
Message ID <001f01cda6f8$6ab6af70$40240e50$@yorsh@arm.com>
Download mbox | patch
Permalink /patch/190669/
State New
Headers show

Comments

Greta Yorsh - Oct. 10, 2012, 3:03 p.m.
Generate epilogue using LDRD in Thumb mode when prefer_ldrd_strd is set in
tune_params.

ChangeLog

gcc/

2012-09-13  Sameera Deshpande  <sameera.deshpande@arm.com>
            Greta Yorsh  <Greta.Yorsh@arm.com>

    * config/arm/arm.c (thumb2_emit_ldrd_pop): New function.
      (arm_expand_epilogue): Use the new function.
Richard Earnshaw - Oct. 19, 2012, 2:42 p.m.
On 10/10/12 16:03, Greta Yorsh wrote:
> Generate epilogue using LDRD in Thumb mode when prefer_ldrd_strd is set in
> tune_params.
>
> ChangeLog
>
> gcc/
>
> 2012-09-13  Sameera Deshpande  <sameera.deshpande@arm.com>
>              Greta Yorsh  <Greta.Yorsh@arm.com>
>
>      * config/arm/arm.c (thumb2_emit_ldrd_pop): New function.
>        (arm_expand_epilogue): Use the new function.
>

This is OK, apart from:


>
> +
> +  /* Var j iterates over all the registers to gather all the registers in
> +     saved_regs_mask.  Var i gives index of saved registers in stack frame.
> +     A PARALLEL RTX of register-pair is created here, so that pattern for
> +     LDRD can be matched.  As PC is always last register to be popped, and
> +     we have already decremented num_regs if PC, we don't have to worry
> +     about PC in this loop.  */
> +  for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
> +    if (saved_regs_mask & (1 << j))
> +      {
> +        gcc_assert (j != SP_REGNUM);

Please move the assert outside of the loop.

R.

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1212a93..f330da3 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -16150,6 +16150,143 @@  arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
   REG_NOTES (par) = dwarf;
 }
 
+/* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
+   number of registers are being popped, multiple LDRD patterns are created for
+   all register pairs.  If odd number of registers are popped, last register is
+   loaded by using LDR pattern.  */
+static void
+thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg, tmp1;
+  bool return_in_pc;
+
+  return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
+     to be popped.  So, if num_regs is even, now it will become odd,
+     and we can generate pop with PC.  If num_regs is odd, it will be
+     even now, and ldr with return can be generated for PC.  */
+  if (return_in_pc)
+    num_regs--;
+
+  /* Var j iterates over all the registers to gather all the registers in
+     saved_regs_mask.  Var i gives index of saved registers in stack frame.
+     A PARALLEL RTX of register-pair is created here, so that pattern for
+     LDRD can be matched.  As PC is always last register to be popped, and
+     we have already decremented num_regs if PC, we don't have to worry
+     about PC in this loop.  */
+  for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
+    if (saved_regs_mask & (1 << j))
+      {
+        gcc_assert (j != SP_REGNUM);
+
+        /* Create RTX for memory load.  */
+        reg = gen_rtx_REG (SImode, j);
+        tmp = gen_rtx_SET (SImode,
+                           reg,
+                           gen_frame_mem (SImode,
+                               plus_constant (Pmode,
+                                              stack_pointer_rtx, 4 * i)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+
+        if (i % 2 == 0)
+          {
+            /* When saved-register index (i) is even, the RTX to be emitted is
+               yet to be created.  Hence create it first.  The LDRD pattern we
+               are generating is :
+               [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
+                 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
+               where target registers need not be consecutive.  */
+            par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+            dwarf = NULL_RTX;
+          }
+
+        /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
+           added as 0th element and if i is odd, reg_i is added as 1st element
+           of LDRD pattern shown above.  */
+        XVECEXP (par, 0, (i % 2)) = tmp;
+        dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        if ((i % 2) == 1)
+          {
+            /* When saved-register index (i) is odd, RTXs for both the registers
+               to be loaded are generated in above given LDRD pattern, and the
+               pattern can be emitted now.  */
+            par = emit_insn (par);
+            REG_NOTES (par) = dwarf;
+          }
+
+        i++;
+      }
+
+  /* If the number of registers pushed is odd AND return_in_pc is false OR
+     number of registers are even AND return_in_pc is true, last register is
+     popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
+     then LDR with post increment.  */
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (Pmode, stack_pointer_rtx, 4 * i));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  emit_insn (tmp);
+
+  dwarf = NULL_RTX;
+
+  if (((num_regs % 2) == 1 && !return_in_pc)
+      || ((num_regs % 2) == 0 && return_in_pc))
+    {
+      /* Scan for the single register to be popped.  Skip until the saved
+         register is found.  */
+      for (; (saved_regs_mask & (1 << j)) == 0; j++);
+
+      /* Gen LDR with post increment here.  */
+      tmp1 = gen_rtx_MEM (SImode,
+                          gen_rtx_POST_INC (SImode,
+                                            stack_pointer_rtx));
+      set_mem_alias_set (tmp1, get_frame_alias_set ());
+
+      reg = gen_rtx_REG (SImode, j);
+      tmp = gen_rtx_SET (SImode, reg, tmp1);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      if (return_in_pc)
+        {
+          /* If return_in_pc, j must be PC_REGNUM.  */
+          gcc_assert (j == PC_REGNUM);
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = tmp;
+          par = emit_jump_insn (par);
+        }
+      else
+        {
+          par = emit_insn (tmp);
+        }
+
+      REG_NOTES (par) = dwarf;
+    }
+  else if ((num_regs % 2) == 1 && return_in_pc)
+    {
+      /* There are 2 registers to be popped.  So, generate the pattern
+         pop_multiple_with_stack_update_and_return to pop in PC.  */
+      arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
+    }
+
+  return;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -23102,7 +23239,16 @@  arm_expand_epilogue (bool really_return)
         }
       else
         {
-          arm_emit_multi_reg_pop (saved_regs_mask);
+          if (current_tune->prefer_ldrd_strd
+              && !optimize_function_for_size_p (cfun))
+            {
+              if (TARGET_THUMB2)
+                thumb2_emit_ldrd_pop (saved_regs_mask);
+              else
+                arm_emit_multi_reg_pop (saved_regs_mask);
+            }
+          else
+            arm_emit_multi_reg_pop (saved_regs_mask);
         }
 
       if (return_in_pc == true)