===================================================================
@@ -533,6 +533,7 @@ Objective-C and Objective-C++ Dialects}.
-mcpu=@var{name} -march=@var{name} -mfpu=@var{name} @gol
-mstructure-size-boundary=@var{n} @gol
-mabort-on-noreturn @gol
+-malign-saved-fp-regs @gol
-mlong-calls -mno-long-calls @gol
-msingle-pic-base -mno-single-pic-base @gol
-mpic-register=@var{reg} @gol
@@ -12861,6 +12862,13 @@ Generate a call to the function @code{ab
@code{noreturn} function. It is executed if the function tries to
return.
+@item -malign-saved-fp-regs
+@opindex malign-saved-fp-regs
+Add extra padding to align saved VFP coprocessor register values
+on a 64-bit boundary in function prologues.
+This can provide better performance on targets optimized for
+aligned memory accesses.
+
@item -mlong-calls
@itemx -mno-long-calls
@opindex mlong-calls
===================================================================
@@ -1509,6 +1509,9 @@ typedef struct GTY(()) arm_stack_offsets
int locals_base; /* THUMB_HARD_FRAME_POINTER_REGNUM. */
int outgoing_args; /* STACK_POINTER_REGNUM. */
unsigned int saved_regs_mask;
+ /* Extra padding inserted before saved VFP regs to keep them
+ doubleword-aligned. See TARGET_ALIGN_SAVED_FP_REGS. */
+ int fp_regs_padding;
}
arm_stack_offsets;
===================================================================
@@ -58,6 +58,10 @@ mabort-on-noreturn
Target Report Mask(ABORT_NORETURN)
Generate a call to abort if a noreturn function returns
+malign-saved-fp-regs
+Target Report Mask(ALIGN_SAVED_FP_REGS)
+Save floating-point registers on stack in naturally-aligned locations
+
mapcs
Target RejectNegative Mask(APCS_FRAME) Undocumented
===================================================================
@@ -20721,6 +20721,67 @@ any_sibcall_could_use_r3 (void)
return false;
}
+/* Attempt to add a dummy register push to OFFSETS for alignment purposes.
+ Returns true and adjusts OFFSETS if successful, otherwise returns
+ false if the alignment must be accomplished in some other way. */
+static bool
+add_dummy_register_save_p (struct arm_stack_offsets *offsets)
+{
+ int reg = -1;
+ int i;
+
+ /* Register r3 is caller-saved. Normally it does not need to be
+ saved on entry by the prologue. However if we choose to save
+ it for padding then we may confuse the compiler into thinking
+ a prologue sequence is required when in fact it is not. This
+ will occur when shrink-wrapping if r3 is used as a scratch
+ register and there are no other callee-saved writes.
+
+ This situation can be avoided when other callee-saved registers
+ are available and r3 is not mandatory if we choose a callee-saved
+ register for padding. */
+ bool prefer_callee_reg_p = false;
+
+ /* If it is safe to use r3, then do so. This sometimes
+ generates better code on Thumb-2 by avoiding the need to
+ use 32-bit push/pop instructions. */
+ if (! any_sibcall_could_use_r3 ()
+ && arm_size_return_regs () <= 12
+ && (offsets->saved_regs_mask & (1 << 3)) == 0
+ && (TARGET_THUMB2
+ || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
+ {
+ reg = 3;
+ if (!TARGET_THUMB2)
+ prefer_callee_reg_p = true;
+ }
+ if (reg == -1
+ || prefer_callee_reg_p)
+ {
+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
+ {
+ /* Avoid fixed registers; they may be changed at
+ arbitrary times so it's unsafe to restore them
+ during the epilogue. */
+ if (!fixed_regs[i]
+ && (offsets->saved_regs_mask & (1 << i)) == 0)
+ {
+ reg = i;
+ break;
+ }
+ }
+ }
+
+ if (reg != -1)
+ {
+ offsets->saved_regs += 4;
+ offsets->saved_regs_mask |= (1 << reg);
+ offsets->soft_frame += 4;
+ return true;
+ }
+ return false;
+}
+
/* Compute the distance from register FROM to register TO.
These can be the arg pointer (26), the soft frame pointer (25),
@@ -20784,7 +20845,8 @@ arm_get_frame_offsets (void)
int saved;
int core_saved;
HOST_WIDE_INT frame_size;
- int i;
+ bool need_fp_align = false;
+ bool need_fp_padding = false;
offsets = &cfun->machine->stack_offsets;
@@ -20816,9 +20878,12 @@ arm_get_frame_offsets (void)
+ arm_compute_static_chain_stack_bytes ()
+ (frame_pointer_needed ? 4 : 0));
+ offsets->fp_regs_padding = 0;
+
if (TARGET_32BIT)
{
unsigned int regno;
+ int coproc_regs_saved = 0;
offsets->saved_regs_mask = arm_compute_save_reg_mask ();
core_saved = bit_count (offsets->saved_regs_mask) * 4;
@@ -20835,14 +20900,28 @@ arm_get_frame_offsets (void)
regno <= LAST_IWMMXT_REGNUM;
regno++)
if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
- saved += 8;
+ coproc_regs_saved += 8;
}
func_type = arm_current_func_type ();
/* Space for saved VFP registers. */
if (! IS_VOLATILE (func_type)
&& TARGET_HARD_FLOAT && TARGET_VFP)
- saved += arm_get_vfp_saved_size ();
+ coproc_regs_saved += arm_get_vfp_saved_size ();
+
+ /* Check if extra padding is required to align saved FP regs
+ on a doubleword boundary. */
+ if (TARGET_ALIGN_SAVED_FP_REGS && coproc_regs_saved)
+ {
+ need_fp_align = true;
+ if (saved % 8)
+ {
+ gcc_assert (! (saved % 4));
+ need_fp_padding = true;
+ }
+ }
+
+ saved += coproc_regs_saved;
}
else /* TARGET_THUMB1 */
{
@@ -20858,6 +20937,15 @@ arm_get_frame_offsets (void)
= offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
+ /* If we need to align the saved FP registers, see if we can do that
+ by pushing an extra reg. Otherwise insert some padding explicitly. */
+ if (need_fp_padding && !add_dummy_register_save_p (offsets))
+ {
+ offsets->fp_regs_padding = 4;
+ offsets->saved_regs += 4;
+ offsets->soft_frame += 4;
+ }
+
/* A leaf function does not need any stack alignment if it has nothing
on the stack. */
if (leaf && frame_size == 0
@@ -20874,62 +20962,15 @@ arm_get_frame_offsets (void)
if (ARM_DOUBLEWORD_ALIGN
&& (offsets->soft_frame & 7))
{
- offsets->soft_frame += 4;
/* Try to align stack by pushing an extra reg. Don't bother doing this
when there is a stack frame as the alignment will be rolled into
- the normal stack adjustment. */
- if (frame_size + crtl->outgoing_args_size == 0)
- {
- int reg = -1;
-
- /* Register r3 is caller-saved. Normally it does not need to be
- saved on entry by the prologue. However if we choose to save
- it for padding then we may confuse the compiler into thinking
- a prologue sequence is required when in fact it is not. This
- will occur when shrink-wrapping if r3 is used as a scratch
- register and there are no other callee-saved writes.
-
- This situation can be avoided when other callee-saved registers
- are available and r3 is not mandatory if we choose a callee-saved
- register for padding. */
- bool prefer_callee_reg_p = false;
-
- /* If it is safe to use r3, then do so. This sometimes
- generates better code on Thumb-2 by avoiding the need to
- use 32-bit push/pop instructions. */
- if (! any_sibcall_could_use_r3 ()
- && arm_size_return_regs () <= 12
- && (offsets->saved_regs_mask & (1 << 3)) == 0
- && (TARGET_THUMB2
- || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
- {
- reg = 3;
- if (!TARGET_THUMB2)
- prefer_callee_reg_p = true;
- }
- if (reg == -1
- || prefer_callee_reg_p)
- {
- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
- {
- /* Avoid fixed registers; they may be changed at
- arbitrary times so it's unsafe to restore them
- during the epilogue. */
- if (!fixed_regs[i]
- && (offsets->saved_regs_mask & (1 << i)) == 0)
- {
- reg = i;
- break;
- }
- }
- }
-
- if (reg != -1)
- {
- offsets->saved_regs += 4;
- offsets->saved_regs_mask |= (1 << reg);
- }
- }
+ the normal stack adjustment. We also cannot do this if we need
+ to maintain the alignment of the saved FP registers. */
+ if (! (frame_size + crtl->outgoing_args_size == 0
+ && !need_fp_align
+ && add_dummy_register_save_p (offsets)))
+ /* We must accomplish the desired alignment explicitly. */
+ offsets->soft_frame += 4;
}
offsets->locals_base = offsets->soft_frame + frame_size;
@@ -21378,6 +21419,17 @@ arm_expand_prologue (void)
}
}
+ /* Add explicit padding for alignment of saved FP regs, if necessary. */
+ if (offsets->fp_regs_padding)
+ {
+ gcc_assert (offsets->fp_regs_padding == 4);
+ amount = GEN_INT (-offsets->fp_regs_padding);
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx, amount));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ saved_regs += offsets->fp_regs_padding;
+ }
+
if (! IS_VOLATILE (func_type))
saved_regs += arm_save_coproc_regs ();
@@ -27922,6 +27974,16 @@ arm_expand_epilogue (bool really_return)
stack_pointer_rtx, stack_pointer_rtx);
}
+ /* Adjust for any extra padding added to align saved FP registers. */
+ if (offsets->fp_regs_padding)
+ {
+ rtx tmp;
+ gcc_assert (offsets->fp_regs_padding == 4);
+ tmp = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+ GEN_INT (offsets->fp_regs_padding)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ }
+
if (saved_regs_mask)
{
rtx insn;