Patchwork [RFA/ARM,02/05] : LDRD generation instead of POP in A15 Thumb2 epilogue.

login
register
mail settings
Submitter Sameera Deshpande
Date Dec. 30, 2011, 12:39 p.m.
Message ID <1325248740.20655.201.camel@e102549-lin.cambridge.arm.com>
Download mbox | patch
Permalink /patch/133641/
State New
Headers show

Comments

Sameera Deshpande - Dec. 30, 2011, 12:39 p.m.
Hi!

Please find attached revised LDRD generation patch for A15 Thumb-2 mode.

Because of the major rework in ARM and Thumb-2 RTL epilogue patches,
this patch has undergone some changes.

The patch is tested with check-gcc, bootstrap and check-gdb without
regression.

Ok for trunk?

--

Patch

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 64d5993..49aae52 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -201,6 +201,7 @@  extern void thumb_reload_in_hi (rtx *);
 extern void thumb_set_return_address (rtx, rtx);
 extern const char *thumb1_output_casesi (rtx *);
 extern const char *thumb2_output_casesi (rtx *);
+extern bool bad_reg_pair_for_thumb_ldrd_strd (rtx, rtx);
 #endif
 
 /* Defined in pe.c.  */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d671281..6d008c5 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15847,6 +15847,154 @@  arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
   REG_NOTES (par) = dwarf;
 }
 
+bool
+bad_reg_pair_for_thumb_ldrd_strd (rtx src1, rtx src2)
+{
+  return (GET_CODE (src1) != REG
+          || GET_CODE (src2) != REG
+          || (REGNO (src1) == PC_REGNUM)
+          || (REGNO (src1) == SP_REGNUM)
+          || (REGNO (src1) == REGNO (src2))
+          || (REGNO (src2) == PC_REGNUM)
+          || (REGNO (src2) == SP_REGNUM));
+}
+
+/* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
+   number of registers are being popped, multiple LDRD patterns are created for
+   all register pairs.  If odd number of registers are popped, last register is
+   loaded by using LDR pattern.  */
+static void
+thumb2_emit_ldrd_pop (unsigned long saved_regs_mask, bool really_return)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg, tmp1;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+  gcc_assert (really_return || ((saved_regs_mask & (1 << PC_REGNUM)) == 0));
+
+  /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
+     to be popped.  So, if num_regs is even, now it will become odd,
+     and we can generate pop with PC.  If num_regs is odd, it will be
+     even now, and ldr with return can be generated for PC.  */
+  if (really_return && (saved_regs_mask & (1 << PC_REGNUM)))
+    num_regs--;
+
+  /* Var j iterates over all the registers to gather all the registers in
+     saved_regs_mask.  Var i gives index of saved registers in stack frame.
+     A PARALLEL RTX of register-pair is created here, so that pattern for
+     LDRD can be matched.  As PC is always last register to be popped, and
+     we have already decremented num_regs if PC, we don't have to worry
+     about PC in this loop.  */
+  for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
+    if (saved_regs_mask & (1 << j))
+      {
+        gcc_assert (j != SP_REGNUM);
+
+        /* Create RTX for memory load.  */
+        reg = gen_rtx_REG (SImode, j);
+        tmp = gen_rtx_SET (SImode,
+                           reg,
+                           gen_frame_mem (SImode,
+                               plus_constant (stack_pointer_rtx, 4 * i)));
+        RTX_FRAME_RELATED_P (tmp) = 1;
+
+        if (i % 2 == 0)
+          {
+            /* When saved-register index (i) is even, the RTX to be emitted is
+               yet to be created.  Hence create it first.  The LDRD pattern we
+               are generating is :
+               [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
+                 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
+               where target registers need not be consecutive.  */
+            par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+            dwarf = NULL_RTX;
+          }
+
+        /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
+           added as 0th element and if i is odd, reg_i is added as 1st element
+           of LDRD pattern shown above.  */
+        XVECEXP (par, 0, (i % 2)) = tmp;
+        dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+        if ((i % 2) == 1)
+          {
+            /* When saved-register index (i) is odd, RTXs for both the registers
+               to be loaded are generated in above given LDRD pattern, and the
+               pattern can be emitted now.  */
+            par = emit_insn (par);
+            REG_NOTES (par) = dwarf;
+          }
+
+        i++;
+      }
+
+  /* If the number of registers pushed is odd AND really_return is false OR
+     number of registers are even AND really_return is true, last register is
+     popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
+     then LDR with post increment.  */
+
+  /* Increment the stack pointer, based on there being
+     num_regs 4-byte registers to restore.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (stack_pointer_rtx, 4 * i));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  emit_insn (tmp);
+
+  dwarf = NULL_RTX;
+
+  if (((num_regs % 2) == 1 && !really_return)
+      || ((num_regs % 2) == 0 && really_return))
+    {
+      /* Scan for the single register to be popped.  Skip until the saved
+         register is found.  */
+      for (; (saved_regs_mask & (1 << j)) == 0; j++);
+
+      /* Gen LDR with post increment here.  */
+      tmp1 = gen_rtx_MEM (SImode,
+                          gen_rtx_POST_INC (SImode,
+                                            stack_pointer_rtx));
+      set_mem_alias_set (tmp1, get_frame_alias_set ());
+
+      reg = gen_rtx_REG (SImode, j);
+      tmp = gen_rtx_SET (SImode, reg, tmp1);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+      if (really_return)
+        {
+          /* If really_return, j must be PC_REGNUM.  */
+          gcc_assert (j == PC_REGNUM);
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+          XVECEXP (par, 0, 0) = ret_rtx;
+          XVECEXP (par, 0, 1) = tmp;
+          par = emit_jump_insn (par);
+        }
+      else
+        {
+          par = emit_insn (tmp);
+        }
+
+      REG_NOTES (par) = dwarf;
+    }
+  else if ((num_regs % 2) == 1 && really_return)
+    {
+      /* There are 2 registers to be popped.  So, generate the pattern
+         pop_multiple_with_stack_update_and_return to pop in PC.  */
+      arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)),
+                              really_return);
+    }
+
+  return;
+}
+
 /* Calculate the size of the return value that is passed in registers.  */
 static unsigned
 arm_size_return_regs (void)
@@ -22615,7 +22763,13 @@  arm_expand_epilogue (bool really_return)
               }
             else
               {
-                arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
+                if (!current_tune->prefer_ldrd_strd
+                    || optimize_function_for_size_p (cfun)
+                    || TARGET_ARM)
+                  arm_emit_multi_reg_pop (saved_regs_mask, return_in_pc);
+                else
+                  /* Generate LDRD pattern instead of POP pattern.  */
+                  thumb2_emit_ldrd_pop (saved_regs_mask, return_in_pc);
               }
 
             if (return_in_pc == true)
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 7d0269a..e33eff2 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -207,6 +207,12 @@ 
   (and (match_code "const_int")
        (match_test "TARGET_THUMB2 && ival >= 0 && ival <= 255")))
 
+(define_constraint "Pz"
+  "@internal In Thumb-2 state a constant in the range -1020 to 1020"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= -1020 && ival <= 1020
+                    && ival % 4 == 0")))
+
 (define_constraint "G"
  "In ARM/Thumb-2 state a valid FPA immediate constant."
  (and (match_code "const_double")
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 5db4a32..21d2815 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -21,6 +21,32 @@ 
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+(define_insn "*thumb2_ldrd_base"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mem:SI (match_operand:SI 1 "s_register_operand" "rk")))
+   (set (match_operand:SI 2 "register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 1)
+                         (const_int 4))))]
+  "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+     && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[2])))"
+  "ldrd%?\t%0, %2, [%1]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
+(define_insn "*thumb2_ldrd"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (mem:SI (plus:SI (match_operand:SI 1 "s_register_operand" "rk")
+                         (match_operand:SI 2 "ldrd_immediate_operand" "Pz"))))
+   (set (match_operand:SI 3 "register_operand" "=r")
+        (mem:SI (plus:SI (match_dup 1)
+                         (match_operand:SI 4 "const_int_operand" ""))))]
+  "(TARGET_THUMB2 && current_tune->prefer_ldrd_strd
+     && ((INTVAL (operands[2]) + 4) == INTVAL (operands[4]))
+     && (!bad_reg_pair_for_thumb_ldrd_strd (operands[0], operands[3])))"
+  "ldrd%?\t%0, %3, [%1, %2]"
+  [(set_attr "type" "load2")
+   (set_attr "predicable" "yes")])
+
 (define_insn "*ldm4_ia"
   [(match_parallel 0 "load_multiple_operation"
     [(set (match_operand:SI 1 "arm_hard_register_operand" "")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 85a112e..881c0b0 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -209,6 +209,10 @@ 
 	    (match_test "(GET_CODE (op) != CONST_INT
 			  || (INTVAL (op) < 4096 && INTVAL (op) > -4096))"))))
 
+(define_predicate "ldrd_immediate_operand"
+  (and (match_operand 0 "const_int_operand")
+  (match_test "(INTVAL (op) < 1020 && INTVAL (op) > -1020)")))
+
 ;; True for operators that can be combined with a shift in ARM state.
 (define_special_predicate "shiftable_operator"
   (and (match_code "plus,minus,ior,xor,and")