@@ -12783,6 +12783,24 @@ choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
return len;
}
+/* Determine if the stack pointer is valid for accessing the cfa_offset. */
+
+static inline bool sp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+ const struct machine_frame_state &fs = cfun->machine->fs;
+ return fs.sp_valid && !(fs.sp_realigned
+ && cfa_offset < fs.sp_realigned_offset);
+}
+
+/* Determine if the frame pointer is valid for accessing the cfa_offset. */
+
+static inline bool fp_valid_at (HOST_WIDE_INT cfa_offset)
+{
+ const struct machine_frame_state &fs = cfun->machine->fs;
+ return fs.fp_valid && !(fs.sp_valid && fs.sp_realigned
+ && cfa_offset >= fs.sp_realigned_offset);
+}
+
/* Return an RTX that points to CFA_OFFSET within the stack frame.
The valid base registers are taken from CFUN->MACHINE->FS. */
@@ -13081,15 +13099,18 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
{
HOST_WIDE_INT ooffset = m->fs.sp_offset;
bool valid = m->fs.sp_valid;
+ bool realigned = m->fs.sp_realigned;
if (src == hard_frame_pointer_rtx)
{
valid = m->fs.fp_valid;
+ realigned = false;
ooffset = m->fs.fp_offset;
}
else if (src == crtl->drap_reg)
{
valid = m->fs.drap_valid;
+ realigned = false;
ooffset = 0;
}
else
@@ -13103,6 +13124,7 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
m->fs.sp_offset = ooffset - INTVAL (offset);
m->fs.sp_valid = valid;
+ m->fs.sp_realigned = realigned;
}
}
@@ -13852,6 +13874,7 @@ ix86_expand_prologue (void)
this is fudged; we're interested to offsets within the local frame. */
m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
m->fs.sp_valid = true;
+ m->fs.sp_realigned = false;
ix86_compute_frame_layout (&frame);
@@ -14068,11 +14091,10 @@ ix86_expand_prologue (void)
that we must allocate the size of the register save area before
performing the actual alignment. Otherwise we cannot guarantee
that there's enough storage above the realignment point. */
- if (m->fs.sp_offset != frame.sse_reg_save_offset)
+ allocate = frame.stack_realign_allocate_offset - m->fs.sp_offset;
+ if (allocate)
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
- GEN_INT (m->fs.sp_offset
- - frame.sse_reg_save_offset),
- -1, false);
+ GEN_INT (-allocate), -1, false);
/* Align the stack. */
insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
@@ -14080,11 +14102,19 @@ ix86_expand_prologue (void)
GEN_INT (-align_bytes)));
/* For the purposes of register save area addressing, the stack
- pointer is no longer valid. As for the value of sp_offset,
- see ix86_compute_frame_layout, which we need to match in order
- to pass verification of stack_pointer_offset at the end. */
+ pointer can no longer be used to access anything in the frame
+ below m->fs.sp_realigned_offset and the frame pointer cannot be
+ used for anything at or above. */
m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
- m->fs.sp_valid = false;
+ m->fs.sp_realigned = true;
+ m->fs.sp_realigned_offset = m->fs.sp_offset - frame.nsseregs * 16;
+ gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
+ /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
+ is needed to describe where a register is saved using a realigned
+ stack pointer, so we need to invalidate the stack pointer for that
+ target. */
+ if (TARGET_SEH)
+ m->fs.sp_valid = false;
}
allocate = frame.stack_pointer_offset - m->fs.sp_offset;
@@ -14423,6 +14453,7 @@ ix86_emit_leave (void)
gcc_assert (m->fs.fp_valid);
m->fs.sp_valid = true;
+ m->fs.sp_realigned = false;
m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
m->fs.fp_valid = false;
@@ -14523,9 +14554,10 @@ ix86_expand_epilogue (int style)
ix86_finalize_stack_realign_flags ();
ix86_compute_frame_layout (&frame);
- m->fs.sp_valid = (!frame_pointer_needed
- || (crtl->sp_is_unchanging
- && !stack_realign_fp));
+ m->fs.sp_realigned = stack_realign_fp;
+ m->fs.sp_valid = stack_realign_fp
+ || !frame_pointer_needed
+ || crtl->sp_is_unchanging;
gcc_assert (!m->fs.sp_valid
|| m->fs.sp_offset == frame.stack_pointer_offset);
@@ -14575,10 +14607,10 @@ ix86_expand_epilogue (int style)
/* SEH requires the use of pops to identify the epilogue. */
else if (TARGET_SEH)
restore_regs_via_mov = false;
- /* If we're only restoring one register and sp is not valid then
+ /* If we're only restoring one register and sp cannot be used then
using a move instruction to restore the register since it's
less work than reloading sp and popping the register. */
- else if (!m->fs.sp_valid && frame.nregs <= 1)
+ else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
restore_regs_via_mov = true;
else if (TARGET_EPILOGUE_USING_MOVE
&& cfun->machine->use_fast_prologue_epilogue
@@ -14603,7 +14635,7 @@ ix86_expand_epilogue (int style)
the stack pointer, if we will restore via sp. */
if (TARGET_64BIT
&& m->fs.sp_offset > 0x7fffffff
- && !(m->fs.fp_valid || m->fs.drap_valid)
+ && !(fp_valid_at (frame.stack_realign_offset) || m->fs.drap_valid)
&& (frame.nsseregs + frame.nregs) != 0)
{
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
@@ -14689,6 +14721,7 @@ ix86_expand_epilogue (int style)
}
m->fs.sp_offset = UNITS_PER_WORD;
m->fs.sp_valid = true;
+ m->fs.sp_realigned = false;
}
}
else
@@ -14710,10 +14743,11 @@ ix86_expand_epilogue (int style)
}
/* First step is to deallocate the stack frame so that we can
- pop the registers. Also do it on SEH target for very large
- frame as the emitted instructions aren't allowed by the ABI in
- epilogues. */
- if (!m->fs.sp_valid
+ pop the registers. If the stack pointer was realigned, it needs
+ to be restored now. Also do it on SEH target for very large
+ frame as the emitted instructions aren't allowed by the ABI
+ in epilogues. */
+ if (!m->fs.sp_valid || m->fs.sp_realigned
|| (TARGET_SEH
&& (m->fs.sp_offset - frame.reg_save_offset
>= SEH_MAX_FRAME_SIZE)))
@@ -14741,7 +14775,8 @@ ix86_expand_epilogue (int style)
{
/* If the stack pointer is valid and pointing at the frame
pointer store address, then we only need a pop. */
- if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
+ if (sp_valid_at (frame.hfp_save_offset)
+ && m->fs.sp_offset == frame.hfp_save_offset)
ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
/* Leave results in shorter dependency chains on CPUs that are
able to grok it fast. */
@@ -14795,6 +14830,7 @@ ix86_expand_epilogue (int style)
be possible to merge the local stack deallocation with the
deallocation forced by ix86_static_chain_on_stack. */
gcc_assert (m->fs.sp_valid);
+ gcc_assert (!m->fs.sp_realigned);
gcc_assert (!m->fs.fp_valid);
gcc_assert (!m->fs.realigned);
if (m->fs.sp_offset != UNITS_PER_WORD)
@@ -2482,6 +2482,17 @@ struct GTY(()) machine_frame_state
set, the SP/FP offsets above are relative to the aligned frame
and not the CFA. */
BOOL_BITFIELD realigned : 1;
+
+ /* Indicates whether the stack pointer has been re-aligned. When set,
+ SP/FP continue to be relative to the CFA, but the stack pointer
+ should only be used for offsets >= sp_realigned_offset, while
+ the frame pointer should be used for offsets < sp_realigned_offset.
+ The flags realigned and sp_realigned are mutually exclusive. */
+ BOOL_BITFIELD sp_realigned : 1;
+
+ /* If sp_realigned is set, this is the offset from the CFA that the
+ stack pointer was realigned to. */
+ HOST_WIDE_INT sp_realigned_offset;
};
/* Private to winnt.c. */
Add the fields sp_realigned and sp_realigned_offset to struct machine_frame_state. We now have the concept of the stack pointer being re-aligned rather than invalid. The inline functions sp_valid_at and fp_valid_at are added to test if a given location relative to the CFA can be accessed with the stack or frame pointer, respectively. Stack allocation prior to re-alignment is modified so that we allocate what is needed, but don't allocate unneeded space in the event that no SSE registers are saved, but frame.sse_reg_save_offset is increased for alignment. As this change only alters how SSE registers are saved, moving the re-alignment AND should not hinder parallelization of int register saves. Signed-off-by: Daniel Santos <daniel.santos@pobox.com> --- gcc/config/i386/i386.c | 74 +++++++++++++++++++++++++++++++++++++------------- gcc/config/i386/i386.h | 11 ++++++++ 2 files changed, 66 insertions(+), 19 deletions(-)