Patchwork [RFA/ARM,05/05] : LDRD generation instead of POP in A15 ARM epilogue.

login
register
mail settings
Submitter Sameera Deshpande
Date Dec. 30, 2011, 12:41 p.m.
Message ID <1325248900.20655.204.camel@e102549-lin.cambridge.arm.com>
Download mbox | patch
Permalink /patch/133642/
State New
Headers show

Comments

Sameera Deshpande - Dec. 30, 2011, 12:41 p.m.
Hi Ramana,

Please find attached revised LDRD generation patch for A15 ARM mode.

Because of the major rework in ARM RTL epilogue patch, this patch has
undergone some changes.

The patch is tested with check-gcc, bootstrap and check-gdb without
regression.

Ok for trunk?

--

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d5c651c..46becfb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -16101,6 +16101,135 @@  bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2)
           || (REGNO (src2) == SP_REGNUM));
 }
 
+/* LDRD in ARM mode needs consecutive registers to be stored.  This function
+   keeps accumulating non-consecutive registers until first consecutive register
+   pair is found.  It then generates multi-reg POP for all accumulated
+   registers, and then generates LDRD with write-back for consecutive register
+   pair.  This process is repeated until all the registers are loaded from
+   stack.  multi register POP takes care of lone registers as well.  However,
+   LDRD cannot be generated for PC, as results are unpredictable.  Hence, if PC
+   is in SAVED_REGS_MASK, generate multi-reg POP with RETURN or LDR with RETURN
+   depending upon number of registers in REGS_TO_BE_POPPED_MASK.  */
+static void
+arm_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx insn = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp;
+  unsigned long regs_to_be_popped_mask = 0;
+  bool pc_in_list = false;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  for (i = 0, j = 0; i < num_regs; j++)
+    if (saved_regs_mask & (1 << j))
+      {
+        i++;
+        if ((j % 2) == 0
+            && (saved_regs_mask & (1 << (j + 1)))
+            && (j + 1) != SP_REGNUM
+            && (j + 1) != PC_REGNUM
+            && regs_to_be_popped_mask)
+          {
+            /* Current register and next register form register pair for which
+               LDRD can be generated.  Generate POP for accumulated registers
+               and reset regs_to_be_popped_mask.  SP should be handled here as
+               the results are unpredictable if register being stored is same
+               as index register (in this case, SP).  PC is always the last
+               register being popped.  Hence, we don't have to worry about PC
+               here.  */
+            arm_emit_multi_reg_pop (regs_to_be_popped_mask, pc_in_list);
+            pc_in_list = false;
+            regs_to_be_popped_mask = 0;
+            continue;
+          }
+
+        if (j == PC_REGNUM)
+          {
+            gcc_assert (really_return);
+            pc_in_list = 1;
+          }
+
+        regs_to_be_popped_mask |= (1 << j);
+
+        if ((j % 2) == 1
+            && (saved_regs_mask & (1 << (j - 1)))
+            && j != SP_REGNUM
+            && j != PC_REGNUM)
+          {
+             /* Generate a LDRD for register pair R_<j>, R_<j+1>.  The pattern
+                generated here is
+                [(SET SP, (PLUS SP, 8))
+                 (SET R_<j-1>, (MEM SP))
+                 (SET R_<j>, (MEM (PLUS SP, 4)))].  */
+             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+
+             tmp = gen_rtx_SET (VOIDmode,
+                                stack_pointer_rtx,
+                                plus_constant (stack_pointer_rtx, 8));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             XVECEXP (par, 0, 0) = tmp;
+
+             tmp = gen_rtx_SET (SImode,
+                                gen_rtx_REG (SImode, j - 1),
+                                gen_frame_mem (SImode, stack_pointer_rtx));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             XVECEXP (par, 0, 1) = tmp;
+             dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                                     gen_rtx_REG (SImode, j - 1),
+                                     dwarf);
+
+             tmp = gen_rtx_SET (SImode,
+                                 gen_rtx_REG (SImode, j),
+                                 gen_frame_mem (SImode,
+                                       plus_constant (stack_pointer_rtx, 4)));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             XVECEXP (par, 0, 2) = tmp;
+             dwarf = alloc_reg_note (REG_CFA_RESTORE,
+                                     gen_rtx_REG (SImode, j),
+                                     dwarf);
+
+             insn = emit_insn (par);
+             REG_NOTES (insn) = dwarf;
+             pc_in_list = false;
+             regs_to_be_popped_mask = 0;
+             dwarf = NULL_RTX;
+          }
+      }
+
+  if (regs_to_be_popped_mask)
+    {
+      /* single PC pop can happen here.  Take care of that.  */
+      if (pc_in_list && (regs_to_be_popped_mask == (1 << PC_REGNUM)))
+        {
+          /* Only PC is to be popped.  */
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          XVECEXP (par, 0, 0) = ret_rtx;
+          tmp = gen_rtx_SET (SImode,
+                             gen_rtx_REG (SImode, PC_REGNUM),
+                             gen_frame_mem (SImode,
+                                            gen_rtx_POST_INC (SImode,
+                                                         stack_pointer_rtx)));
+          RTX_FRAME_RELATED_P (tmp) = 1;
+          XVECEXP (par, 0, 1) = tmp;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          arm_emit_multi_reg_pop (regs_to_be_popped_mask, pc_in_list);
+        }
+    }
+
+  return;
+}
+
 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
    number of registers are being popped, multiple LDRD patterns are created for
    all register pairs.  If odd number of registers are popped, last register is
@@ -23019,12 +23148,14 @@  arm_expand_epilogue (bool really_return)
             else
               {
                 if (!current_tune->prefer_ldrd_strd
-                    || optimize_function_for_size_p (cfun)
-                    || TARGET_ARM)
+                    || optimize_function_for_size_p (cfun))
                   arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
                 else
                   /* Generate LDRD pattern instead of POP pattern.  */
-                  thumb2_emit_ldrd_pop (saved_regs_mask, return_in_pc);
+                  if (TARGET_THUMB2)
+                    thumb2_emit_ldrd_pop (saved_regs_mask, return_in_pc);
+                  else
+                    arm_emit_ldrd_pop (saved_regs_mask, return_in_pc);
               }
 
             if (return_in_pc == true)
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index ffa675d..149fd8b 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -109,6 +109,54 @@ 
   "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));"
 )
 
+(define_insn "*arm_ldrd_base_update"
+  [(set (match_operand:SI 0 "arm_hard_register_operand" "+rk")
+        (plus:SI (match_dup 0)
+                 (const_int 8)))
+   (set (match_operand:SI 1 "arm_hard_register_operand" "=r")
+        (mem:SI (match_dup 0)))
+   (set (match_operand:SI 2 "arm_hard_register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 0)
+                         (const_int 4))))]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd
+     && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2]))
+     && (REGNO (operands[1]) != REGNO (operands[0]))
+     && (REGNO (operands[2]) != REGNO (operands[0])))"
+  "ldr%(d%)\t%1, %2, [%0], #8"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "arm_hard_register_operand" "")
+        (plus:SI (match_dup 0)
+                 (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 0)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 0)
+                           (const_int 4))))])]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd
+     && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2]))
+     && (REGNO (operands[1]) != REGNO (operands[0]))
+     && (REGNO (operands[2]) != REGNO (operands[0])))"
+  [(set (match_dup 1)
+        (mem:DI (post_inc:SI (match_dup 0))))]
+  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));"
+)
+
+(define_insn "*arm_ldr_with_update"
+  [(parallel
+    [(set (match_operand:SI 0 "arm_hard_register_operand" "")
+        (plus:SI (match_dup 0)
+                 (const_int 4)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 0)))])]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd)"
+  "ldr%?\t%1, [%0], #4"
+  [(set_attr "type" "load1")
+  (set_attr "predicable" "yes")])
+
 (define_insn "*ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 1 "arm_hard_register_operand" "")