Patchwork [ARM,1/3] Prologue using STRD in ARM mode

login
register
mail settings
Submitter Greta Yorsh
Date Oct. 10, 2012, 2:32 p.m.
Message ID <000901cda6f4$06ae9c30$140bd490$@yorsh@arm.com>
Download mbox | patch
Permalink /patch/190655/
State New
Headers show

Comments

Greta Yorsh - Oct. 10, 2012, 2:32 p.m.
Emit prologue using STRD in ARM mode when tune parameter prefer_ldrd_strd is
set.

ChangeLog

gcc/

2012-09-13  Sameera Deshpande  <sameera.deshpande at arm.com>
            Greta Yorsh  <Greta.Yorsh at arm.com>

        * config/arm/arm.c (emit_multi_reg_push): New declaration
        for an existing function.
        (arm_emit_strd_push): New function.
        (arm_expand_prologue): Use here.
        (arm_get_frame_offsets): Update condition.

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 84f099f..3522da7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -174,6 +174,7 @@  static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
 static tree arm_builtin_decl (unsigned, bool);
 static void emit_constant_insn (rtx cond, rtx pattern);
 static rtx emit_set_insn (rtx, rtx);
+static rtx emit_multi_reg_push (unsigned long);
 static int arm_arg_partial_bytes (cumulative_args_t, enum machine_mode,
 				  tree, bool);
 static rtx arm_function_arg (cumulative_args_t, enum machine_mode,
@@ -15906,6 +15907,108 @@  thumb2_emit_strd_push (unsigned long saved_regs_mask)
   return;
 }
 
+/* STRD in ARM mode needs consecutive registers to be stored.  This function
+   keeps accumulating non-consecutive registers until first consecutive register
+   pair is found.  It then generates multi register PUSH for all accumulated
+   registers, and then generates STRD with write-back for consecutive register
+   pair.  This process is repeated until all the registers are stored on stack.
+   multi register PUSH takes care of lone registers as well.  */
+static void
+arm_emit_strd_push (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx insn = NULL_RTX;
+  rtx tmp, tmp1;
+  unsigned long regs_to_be_pushed_mask;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* Var j iterates over all registers to gather all registers in
+     saved_regs_mask.  Var i is used to count number of registers stored on
+     stack.  regs_to_be_pushed_mask accumulates non-consecutive registers
+     that can be pushed using multi register PUSH before STRD is
+     generated.  */
+  for (i=0, j = LAST_ARM_REGNUM, regs_to_be_pushed_mask = 0; i < num_regs; j--)
+    if (saved_regs_mask & (1 << j))
+      {
+        gcc_assert (j != SP_REGNUM);
+        gcc_assert (j != PC_REGNUM);
+        i++;
+
+        if ((j % 2 == 1)
+            && (saved_regs_mask & (1 << (j - 1)))
+            && regs_to_be_pushed_mask)
+          {
+            /* Current register and previous register form register pair for
+               which STRD can be generated.  Hence, emit PUSH for accumulated
+               registers and reset regs_to_be_pushed_mask.  */
+            insn = emit_multi_reg_push (regs_to_be_pushed_mask);
+            regs_to_be_pushed_mask = 0;
+            RTX_FRAME_RELATED_P (insn) = 1;
+            continue;
+          }
+
+        regs_to_be_pushed_mask |= (1 << j);
+
+        if ((j % 2) == 0 && (saved_regs_mask & (1 << (j + 1))))
+          {
+            /* We have found 2 consecutive registers, for which STRD can be
+               generated.  Generate pattern to emit STRD as accumulated
+               registers have already been pushed.  */
+            tmp = gen_rtx_SET (DImode,
+                               gen_frame_mem (DImode,
+                                              gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx)),
+                               gen_rtx_REG (DImode, j));
+            tmp = emit_insn(tmp);
+            RTX_FRAME_RELATED_P (tmp) = 1;
+
+            /* Generate dwarf info.  */
+            dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
+            tmp = gen_rtx_SET (VOIDmode,
+                               stack_pointer_rtx,
+                               plus_constant (Pmode, stack_pointer_rtx, -8));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            XVECEXP (dwarf, 0, 0) = tmp;
+
+            tmp = gen_rtx_SET (SImode,
+                               gen_frame_mem (SImode, stack_pointer_rtx),
+                               gen_rtx_REG (SImode, j));
+            tmp1 = gen_rtx_SET (SImode,
+                                gen_frame_mem (SImode,
+                                               plus_constant(Pmode,
+                                                             stack_pointer_rtx,
+                                                             4)),
+                                gen_rtx_REG (SImode, j+1));
+            RTX_FRAME_RELATED_P (tmp) = 1;
+            RTX_FRAME_RELATED_P (tmp1) = 1;
+            XVECEXP (dwarf, 0, 1) = tmp;
+            XVECEXP (dwarf, 0, 2) = tmp1;
+
+            insn = emit_insn (par);
+            add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+            RTX_FRAME_RELATED_P (insn) = 1;
+            regs_to_be_pushed_mask = 0;
+          }
+      }
+
+  /* Check if any accumulated registers are yet to be pushed, and generate
+     multi register PUSH for them.  */
+  if (regs_to_be_pushed_mask)
+    {
+      insn = emit_multi_reg_push (regs_to_be_pushed_mask);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  return;
+}
+
 /* Generate and emit an insn that we will recognize as a push_multi.
    Unfortunately, since this insn does not reflect very well the actual
    semantics of the operation, we need to annotate the insn for the benefit
@@ -16547,7 +16650,8 @@  arm_get_frame_offsets (void)
 	     use 32-bit push/pop instructions.  */
  	  if (! any_sibcall_uses_r3 ()
 	      && arm_size_return_regs () <= 12
-	      && (offsets->saved_regs_mask & (1 << 3)) == 0)
+	      && (offsets->saved_regs_mask & (1 << 3)) == 0
+              && (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
 	    {
 	      reg = 3;
 	    }
@@ -16979,6 +17083,10 @@  arm_expand_prologue (void)
             {
               thumb2_emit_strd_push (live_regs_mask);
             }
+          else if (TARGET_ARM)
+            {
+              arm_emit_strd_push (live_regs_mask);
+            }
           else
             {
               insn = emit_multi_reg_push (live_regs_mask);