Patchwork [RFA/ARM,05/05] : LDRD generation instead of POP in A15 ARM epilogue.

login
register
mail settings
Submitter Sameera Deshpande
Date Oct. 11, 2011, 9:37 a.m.
Message ID <1318325869.2186.67.camel@e102549-lin.cambridge.arm.com>
Download mbox | patch
Permalink /patch/118892/
State New
Headers show

Comments

Sameera Deshpande - Oct. 11, 2011, 9:37 a.m.
Hi!

This patch generates LDRD instead of POP in epilogue for A15 ARM mode.

For optimize_size, original epilogue is generated for A15.
The work involves defining new functions, predicates and patterns.

In this patch we keep on accumulating non-consecutive registers till
register-pair to be popped is found. Then, first POP all the accumulated
registers, followed by LDRD with post-stack update for register-pair. We
repeat this until all the registers in register-list are POPPed.

The patch is tested with check-gcc, check-gdb and bootstrap with no
regression.
 
Changelog entry for Patch to emit LDRD for ARM epilogue in A15:

2011-10-11  Sameera Deshpande
<sameera.deshpande@arm.com>                                             
                                                                           
        * config/arm/arm.c (arm_emit_ldrd_pop): New static function.  
          (arm_expand_epilogue): Update. 
        * config/arm/ldmstm.md (arm_ldrd_base): New pattern.
          (arm_ldr_with_update): Likewise. 
--
Ramana Radhakrishnan - Oct. 21, 2011, 12:45 p.m.
> 2011-10-11  Sameera Deshpande
> <sameera.deshpande@arm.com>
>
>        * config/arm/arm.c (arm_emit_ldrd_pop): New static function.
>          (arm_expand_epilogue): Update.
>        * config/arm/ldmstm.md (arm_ldrd_base): New pattern.
>          (arm_ldr_with_update): Likewise.

rth's comment about REG_CFA_RESTORE applies here as well. Please
change that. Other than that this patch looks OK and please watch out
for stylistic issues from the previous patch.

Ramana

> --
>
>
>

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 08fa0d5..0b9fd93 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -967,7 +967,7 @@  const struct tune_params arm_cortex_a9_tune =
   ARM_PREFETCH_BENEFICIAL(4,32,32),
   false,					/* Prefer constant pool.  */
   arm_default_branch_cost,
-  false                                         /* Prefer LDRD/STRD.  */
+  true                                          /* Prefer LDRD/STRD.  */
 };
 
 const struct tune_params arm_fa726te_tune =
@@ -15664,6 +15664,145 @@  bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2)
           || (REGNO (src2) == SP_REGNUM));
 }
 
+/* LDRD in ARM mode needs consecutive registers to be stored.  This function
+   keeps accumulating non-consecutive registers until first consecutive register
+   pair is found.  It then generates multi-reg POP for all accumulated
+   registers, and then generates LDRD with write-back for consecutive register
+   pair.  This process is repeated until all the registers are loaded from
+   stack.  multi-reg POP takes care of lone registers as well.  However, LDRD
+   cannot be generated for PC, as results are unpredictable.  Hence, if PC is
+   in SAVED_REGS_MASK, generate multi-reg POP with RETURN or LDR with RETURN
+   depending upon number of registers in REGS_TO_BE_POPPED_MASK.  */
+static void
+arm_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx insn = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, tmp1;
+  unsigned long regs_to_be_popped_mask = 0;
+  bool pc_in_list = false;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  for (i = 0, j = 0; i < num_regs; j++)
+    if (saved_regs_mask & (1 << j))
+      {
+        i++;
+        if ((j % 2) == 0
+            && (saved_regs_mask & (1 << (j + 1)))
+            && (j + 1) != SP_REGNUM
+            && (j + 1) != PC_REGNUM
+            && regs_to_be_popped_mask)
+          {
+            /* Current register and next register form register pair for which
+               LDRD can be generated.  Generate POP for accumulated registers
+               and reset regs_to_be_popped_mask.  SP should be handled here as
+               the results are unpredictable if register being stored is same
+               as index register (in this case, SP).  PC is always the last
+               register being popped.  Hence, we don't have to worry about PC
+               here.  */
+            arm_emit_multi_reg_pop (regs_to_be_popped_mask, pc_in_list);
+            pc_in_list = false;
+            regs_to_be_popped_mask = 0;
+            continue;
+          }
+
+        if (j == PC_REGNUM)
+          {
+            gcc_assert (really_return);
+            pc_in_list = 1;
+          }
+
+        regs_to_be_popped_mask |= (1 << j);
+
+        if ((j % 2) == 1
+            && (saved_regs_mask & (1 << (j - 1)))
+            && j != SP_REGNUM
+            && j != PC_REGNUM)
+          {
+             /* Generate a LDRD for register pair R_<j>, R_<j+1>.  The pattern
+                generated here is
+                [(SET SP, (PLUS SP, 8))
+                 (SET R_<j-1>, (MEM SP))
+                 (SET R_<j>, (MEM (PLUS SP, 4)))].  */
+             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
+             dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
+
+             tmp = gen_rtx_SET (VOIDmode,
+                                stack_pointer_rtx,
+                                plus_constant (stack_pointer_rtx, 8));
+             tmp1 = gen_rtx_SET (VOIDmode,
+                                 stack_pointer_rtx,
+                                 plus_constant (stack_pointer_rtx, 8));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             RTX_FRAME_RELATED_P (tmp1) = 1;
+             XVECEXP (par, 0, 0) = tmp;
+             XVECEXP (dwarf, 0, 0) = tmp1;
+
+             tmp = gen_rtx_SET (SImode,
+                                gen_rtx_REG (SImode, j - 1),
+                                gen_frame_mem (SImode, stack_pointer_rtx));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             tmp1 = gen_rtx_SET (SImode,
+                                gen_rtx_REG (SImode, j - 1),
+                                gen_frame_mem (SImode, stack_pointer_rtx));
+             RTX_FRAME_RELATED_P (tmp1) = 1;
+             XVECEXP (par, 0, 1) = tmp;
+             XVECEXP (dwarf, 0, 1) = tmp1;
+
+             tmp = gen_rtx_SET (SImode,
+                                 gen_rtx_REG (SImode, j),
+                                 gen_frame_mem (SImode,
+                                       plus_constant (stack_pointer_rtx, 4)));
+             RTX_FRAME_RELATED_P (tmp) = 1;
+             tmp1 = gen_rtx_SET (SImode,
+                                 gen_rtx_REG (SImode, j),
+                                 gen_frame_mem (SImode,
+                                       plus_constant (stack_pointer_rtx, 4)));
+             RTX_FRAME_RELATED_P (tmp1) = 1;
+             XVECEXP (par, 0, 2) = tmp;
+             XVECEXP (dwarf, 0, 2) = tmp1;
+
+             insn = emit_insn (par);
+             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+             pc_in_list = false;
+             regs_to_be_popped_mask = 0;
+          }
+      }
+
+  if (regs_to_be_popped_mask)
+    {
+      /* single PC pop can happen here.  Take care of that.  */
+      if (pc_in_list && (regs_to_be_popped_mask == (1 << PC_REGNUM)))
+        {
+          /* Only PC is to be popped.  */
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          XVECEXP (par, 0, 0) = ret_rtx;
+          tmp = gen_rtx_SET (SImode,
+                             gen_rtx_REG (SImode, PC_REGNUM),
+                             gen_frame_mem (SImode,
+                                            gen_rtx_POST_INC (SImode,
+                                                         stack_pointer_rtx)));
+          RTX_FRAME_RELATED_P (tmp) = 1;
+          XVECEXP (par, 0, 1) = tmp;
+          emit_jump_insn (par);
+        }
+      else
+        {
+          arm_emit_multi_reg_pop (regs_to_be_popped_mask, pc_in_list);
+        }
+    }
+
+  return;
+}
+
 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
    number of registers are being popped, multiple LDRD patterns are created for
    all register pairs.  If odd number of registers are popped, last register is
@@ -22488,8 +22627,13 @@  arm_expand_epilogue (bool really_return)
                     saved_regs_mask |=   (1 << PC_REGNUM);
                     return_in_pc = true;
                   }
-        
-                arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
+
+                if (!current_tune->prefer_ldrd_strd || optimize_size)
+                  arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
+                else
+                  /* Generate LDRD pattern instead of POP pattern.  */
+                  arm_emit_ldrd_pop (saved_regs_mask, return_in_pc);
+
                 if (return_in_pc == true)
                   return;
               }
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 3c729bb..7d69b0b 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -109,6 +109,54 @@ 
   "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));"
 )
 
+(define_insn "*arm_ldrd_base"
+  [(set (match_operand:SI 0 "arm_hard_register_operand" "+rk")
+        (plus:SI (match_dup 0)
+                 (const_int 8)))
+   (set (match_operand:SI 1 "arm_hard_register_operand" "=r")
+        (mem:SI (match_dup 0)))
+   (set (match_operand:SI 2 "arm_hard_register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 0)
+                         (const_int 4))))]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd
+     && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2]))
+     && (REGNO (operands[1]) != REGNO (operands[0]))
+     && (REGNO (operands[2]) != REGNO (operands[0])))"
+  "ldr%(d%)\t%1, %2, [%0], #8"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "arm_hard_register_operand" "")
+        (plus:SI (match_dup 0)
+                 (const_int 8)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 0)))
+     (set (match_operand:SI 2 "arm_hard_register_operand" "")
+          (mem:SI (plus:SI (match_dup 0)
+                           (const_int 4))))])]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd
+     && (!bad_reg_pair_for_arm_ldrd_strd (operands[1], operands[2]))
+     && (REGNO (operands[1]) != REGNO (operands[0]))
+     && (REGNO (operands[2]) != REGNO (operands[0])))"
+  [(set (match_dup 1)
+        (mem:DI (post_inc:SI (match_dup 0))))]
+  "operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));"
+)
+
+(define_insn "*arm_ldr_with_update"
+  [(parallel
+    [(set (match_operand:SI 0 "arm_hard_register_operand" "")
+        (plus:SI (match_dup 0)
+                 (const_int 4)))
+     (set (match_operand:SI 1 "arm_hard_register_operand" "")
+          (mem:SI (match_dup 0)))])]
+  "(TARGET_ARM && current_tune->prefer_ldrd_strd)"
+  "ldr%?\t%1, [%0], #4"
+  [(set_attr "type" "load1")
+  (set_attr "predicable" "yes")])
+
 (define_insn "*ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 1 "arm_hard_register_operand" "")