diff mbox

[ARM,2/4] Prologue using STRD in Thumb mode

Message ID 001b01cda6f8$65c98be0$315ca3a0$@yorsh@arm.com
State New
Headers show

Commit Message

Greta Yorsh Oct. 10, 2012, 3:03 p.m. UTC
Generate prologue using STRD when prefer_ldrd_strd is set in tune_params.

ChangeLog

gcc/

2012-09-13  Sameera Deshpande  <sameera.deshpande@arm.com>
            Greta Yorsh  <Greta.Yorsh@arm.com>

        * config/arm/arm.c (thumb2_emit_strd_push): New function.
       (arm_expand_prologue): Use the new function.

Comments

Richard Earnshaw Oct. 18, 2012, 1:54 p.m. UTC | #1
On 10/10/12 16:03, Greta Yorsh wrote:
> Generate prologue using STRD when prefer_ldrd_strd is set in tune_params.
>
> ChangeLog
>
> gcc/
>
> 2012-09-13  Sameera Deshpande  <sameera.deshpande@arm.com>
>              Greta Yorsh  <Greta.Yorsh@arm.com>
>
>          * config/arm/arm.c (thumb2_emit_strd_push): New function.
>         (arm_expand_prologue): Use the new function.
>
>
> 2-thumb-prolog-strd.patch.txt
>
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index b3a3774..1212a93 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -15737,6 +15737,126 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
>       }
>   }
>
> +/* Generate and emit a pattern that will be recognized as STRD pattern.  If even
> +   number of registers are being pushed, multiple STRD patterns are created for
> +   all register pairs.  If odd number of registers are pushed, emit a
> +   combination of STRDs and STR for the prologue saves.  */
> +static void
> +thumb2_emit_strd_push (unsigned long saved_regs_mask)
> +{
> +  int num_regs = 0;
> +  int i, j;
> +  rtx par = NULL_RTX;
> +  rtx insn = NULL_RTX;
> +  rtx dwarf = NULL_RTX;
> +  rtx tmp, reg, tmp1;
> +
> +  for (i = 0; i <= LAST_ARM_REGNUM; i++)
> +    if (saved_regs_mask & (1 << i))
> +      num_regs++;
> +
> +  gcc_assert (num_regs && num_regs <= 16);
> +
> +  /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
> +     registers to push.  */
> +  tmp = gen_rtx_SET (VOIDmode,
> +                     stack_pointer_rtx,
> +                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
> +  RTX_FRAME_RELATED_P (tmp) = 1;
> +  insn = emit_insn (tmp);
> +
> +  /* Create sequence for DWARF info.  */
> +  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
> +
> +  /* RTLs cannot be shared, hence create new copy for dwarf.  */
> +  tmp1 = gen_rtx_SET (VOIDmode,
> +                     stack_pointer_rtx,
> +                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
> +  RTX_FRAME_RELATED_P (tmp1) = 1;
> +  XVECEXP (dwarf, 0, 0) = tmp1;
> +
> +  /* Var j iterates over all the registers to gather all the registers in
> +     saved_regs_mask.  Var i gives index of register R_j in stack frame.
> +     A PARALLEL RTX of register-pair is created here, so that pattern for
> +     STRD can be matched.  If num_regs is odd, 1st register will be pushed
> +     using STR and remaining registers will be pushed with STRD in pairs.
> +     If num_regs is even, all registers are pushed with STRD in pairs.
> +     Hence, skip first element for odd num_regs.  */
> +  for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
> +    if (saved_regs_mask & (1 << j))
> +      {
> +        gcc_assert (j != SP_REGNUM);
> +        gcc_assert (j != PC_REGNUM);

It would be better to assert at the head of the function that 
saved_regs_mask does not contain SP or PC, rather than checking every 
iteration of the loop.

> +
> +        /* Create RTX for store.  New RTX is created for dwarf as
> +           they are not sharable.  */
> +        reg = gen_rtx_REG (SImode, j);
> +        tmp = gen_rtx_SET (SImode,
> +                           gen_frame_mem
> +                           (SImode,
> +                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
> +                           reg);
> +
> +        tmp1 = gen_rtx_SET (SImode,
> +                           gen_frame_mem
> +                           (SImode,
> +                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
> +                           reg);
> +        RTX_FRAME_RELATED_P (tmp) = 1;
> +        RTX_FRAME_RELATED_P (tmp1) = 1;
> +
> +        if (((i - (num_regs % 2)) % 2) == 1)
> +          /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
> +             be created.  Hence create it first.  The STRD pattern we are
> +             generating is :
> +             [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
> +               (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
> +             were target registers need not be consecutive.  */

s/were/where the/

> +          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
> +
> +        /* Register R_j is added in PARALLEL RTX.  If (i - (num_regs % 2)) is
> +           even, the reg_j is added as 0th element and if it is odd, reg_i is
> +           added as 1st element of STRD pattern shown above.  */
> +        XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
> +        XVECEXP (dwarf, 0, (i + 1)) = tmp1;
> +
> +        if (((i - (num_regs % 2)) % 2) == 0)
> +          /* When (i - (num_regs % 2)) is even, RTXs for both the registers
> +             to be loaded are generated in above given STRD pattern, and the
> +             pattern can be emitted now.  */
> +          emit_insn (par);
> +
> +        i--;
> +      }
> +
> +  if ((num_regs % 2) == 1)
> +    {
> +      /* If odd number of registers are pushed, generate STR pattern to store
> +         lone register.  */
> +      for (; (saved_regs_mask & (1 << j)) == 0; j--);
> +
> +      tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
> +                                                   stack_pointer_rtx, 4 * i));
> +      reg = gen_rtx_REG (SImode, j);
> +      tmp = gen_rtx_SET (SImode, tmp1, reg);
> +      RTX_FRAME_RELATED_P (tmp) = 1;
> +
> +      emit_insn (tmp);
> +
> +      tmp1 = gen_rtx_SET (SImode,
> +                         gen_frame_mem
> +                         (SImode,
> +                          plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
> +                          reg);
> +      RTX_FRAME_RELATED_P (tmp1) = 1;
> +      XVECEXP (dwarf, 0, (i + 1)) = tmp1;
> +    }
> +
> +  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
> +  RTX_FRAME_RELATED_P (insn) = 1;
> +  return;
> +}
> +
>   /* Generate and emit an insn that we will recognize as a push_multi.
>      Unfortunately, since this insn does not reflect very well the actual
>      semantics of the operation, we need to annotate the insn for the benefit
> @@ -16661,8 +16781,25 @@ arm_expand_prologue (void)
>   	      saved_regs += frame;
>   	    }
>   	}
> -      insn = emit_multi_reg_push (live_regs_mask);
> -      RTX_FRAME_RELATED_P (insn) = 1;
> +
> +      if (current_tune->prefer_ldrd_strd
> +          && !optimize_function_for_size_p (cfun))
> +        {
> +          if (TARGET_THUMB2)
> +            {
> +              thumb2_emit_strd_push (live_regs_mask);
> +            }
> +          else
> +            {
> +              insn = emit_multi_reg_push (live_regs_mask);
> +              RTX_FRAME_RELATED_P (insn) = 1;
> +            }
> +        }
> +      else
> +        {
> +          insn = emit_multi_reg_push (live_regs_mask);
> +          RTX_FRAME_RELATED_P (insn) = 1;
> +        }
>       }
>
>     if (! IS_VOLATILE (func_type))
>

Otherwise OK.

R.
diff mbox

Patch

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index b3a3774..1212a93 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15737,6 +15737,126 @@  arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
     }
 }
 
+/* Generate and emit a pattern that will be recognized as STRD pattern.  If even
+   number of registers are being pushed, multiple STRD patterns are created for
+   all register pairs.  If odd number of registers are pushed, emit a
+   combination of STRDs and STR for the prologue saves.  */
+static void
+thumb2_emit_strd_push (unsigned long saved_regs_mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par = NULL_RTX;
+  rtx insn = NULL_RTX;
+  rtx dwarf = NULL_RTX;
+  rtx tmp, reg, tmp1;
+
+  for (i = 0; i <= LAST_ARM_REGNUM; i++)
+    if (saved_regs_mask & (1 << i))
+      num_regs++;
+
+  gcc_assert (num_regs && num_regs <= 16);
+
+  /* Pre-decrement the stack pointer, based on there being num_regs 4-byte
+     registers to push.  */
+  tmp = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  insn = emit_insn (tmp);
+
+  /* Create sequence for DWARF info.  */
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  /* RTLs cannot be shared, hence create new copy for dwarf.  */
+  tmp1 = gen_rtx_SET (VOIDmode,
+                     stack_pointer_rtx,
+                     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp1) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp1;
+
+  /* Var j iterates over all the registers to gather all the registers in
+     saved_regs_mask.  Var i gives index of register R_j in stack frame.
+     A PARALLEL RTX of register-pair is created here, so that pattern for
+     STRD can be matched.  If num_regs is odd, 1st register will be pushed
+     using STR and remaining registers will be pushed with STRD in pairs.
+     If num_regs is even, all registers are pushed with STRD in pairs.
+     Hence, skip first element for odd num_regs.  */
+  for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
+    if (saved_regs_mask & (1 << j))
+      {
+        gcc_assert (j != SP_REGNUM);
+        gcc_assert (j != PC_REGNUM);
+
+        /* Create RTX for store.  New RTX is created for dwarf as
+           they are not sharable.  */
+        reg = gen_rtx_REG (SImode, j);
+        tmp = gen_rtx_SET (SImode,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
+                           reg);
+
+        tmp1 = gen_rtx_SET (SImode,
+                           gen_frame_mem
+                           (SImode,
+                            plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
+                           reg);
+        RTX_FRAME_RELATED_P (tmp) = 1;
+        RTX_FRAME_RELATED_P (tmp1) = 1;
+
+        if (((i - (num_regs % 2)) % 2) == 1)
+          /* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
+             be created.  Hence create it first.  The STRD pattern we are
+             generating is :
+             [ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
+               (SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
+             were target registers need not be consecutive.  */
+          par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+
+        /* Register R_j is added in PARALLEL RTX.  If (i - (num_regs % 2)) is
+           even, the reg_j is added as 0th element and if it is odd, reg_i is
+           added as 1st element of STRD pattern shown above.  */
+        XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp;
+        XVECEXP (dwarf, 0, (i + 1)) = tmp1;
+
+        if (((i - (num_regs % 2)) % 2) == 0)
+          /* When (i - (num_regs % 2)) is even, RTXs for both the registers
+             to be loaded are generated in above given STRD pattern, and the
+             pattern can be emitted now.  */
+          emit_insn (par);
+
+        i--;
+      }
+
+  if ((num_regs % 2) == 1)
+    {
+      /* If odd number of registers are pushed, generate STR pattern to store
+         lone register.  */
+      for (; (saved_regs_mask & (1 << j)) == 0; j--);
+
+      tmp1 = gen_frame_mem (SImode, plus_constant (Pmode,
+                                                   stack_pointer_rtx, 4 * i));
+      reg = gen_rtx_REG (SImode, j);
+      tmp = gen_rtx_SET (SImode, tmp1, reg);
+      RTX_FRAME_RELATED_P (tmp) = 1;
+
+      emit_insn (tmp);
+
+      tmp1 = gen_rtx_SET (SImode,
+                         gen_frame_mem
+                         (SImode,
+                          plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
+                          reg);
+      RTX_FRAME_RELATED_P (tmp1) = 1;
+      XVECEXP (dwarf, 0, (i + 1)) = tmp1;
+    }
+
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  return;
+}
+
 /* Generate and emit an insn that we will recognize as a push_multi.
    Unfortunately, since this insn does not reflect very well the actual
    semantics of the operation, we need to annotate the insn for the benefit
@@ -16661,8 +16781,25 @@  arm_expand_prologue (void)
 	      saved_regs += frame;
 	    }
 	}
-      insn = emit_multi_reg_push (live_regs_mask);
-      RTX_FRAME_RELATED_P (insn) = 1;
+
+      if (current_tune->prefer_ldrd_strd
+          && !optimize_function_for_size_p (cfun))
+        {
+          if (TARGET_THUMB2)
+            {
+              thumb2_emit_strd_push (live_regs_mask);
+            }
+          else
+            {
+              insn = emit_multi_reg_push (live_regs_mask);
+              RTX_FRAME_RELATED_P (insn) = 1;
+            }
+        }
+      else
+        {
+          insn = emit_multi_reg_push (live_regs_mask);
+          RTX_FRAME_RELATED_P (insn) = 1;
+        }
     }
 
   if (! IS_VOLATILE (func_type))