@@ -12960,6 +12960,14 @@ ix86_compute_frame_layout (void)
gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
gcc_assert (preferred_alignment <= stack_alignment_needed);
+ /* The only ABI saving SSE regs should be 64-bit ms_abi. */
+ gcc_assert (TARGET_64BIT || !frame->nsseregs);
+ if (TARGET_64BIT && m->call_ms2sysv)
+ {
+ gcc_assert (stack_alignment_needed >= 16);
+ gcc_assert (!frame->nsseregs);
+ }
+
/* For SEH we have to limit the amount of code movement into the prologue.
At present we do this via a BLOCKAGE, at which point there's very little
scheduling that can be done, which means that there's very little point
@@ -13022,54 +13030,88 @@ ix86_compute_frame_layout (void)
if (TARGET_SEH)
frame->hard_frame_pointer_offset = offset;
- /* When re-aligning the stack frame, but not saving SSE registers, this
- is the offset we want adjust the stack pointer to. */
- frame->stack_realign_allocate_offset = offset;
+ /* Calculate the size of the va-arg area (not including padding, if any). */
+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
- /* The re-aligned stack starts here. Values before this point are not
- directly comparable with values below this point. Use sp_valid_at
- to determine if the stack pointer is valid for a given offset and
- fp_valid_at for the frame pointer. */
if (stack_realign_fp)
- offset = ROUND_UP (offset, stack_alignment_needed);
- frame->stack_realign_offset = offset;
-
- if (TARGET_64BIT && m->call_ms2sysv)
{
- gcc_assert (stack_alignment_needed >= 16);
- gcc_assert (!frame->nsseregs);
+ /* We may need a 16-byte aligned stack for the remainder of the
+ register save area, but the stack frame for the local function
+ may require a greater alignment if using AVX/2/512. In order
+ to avoid wasting space, we first calculate the space needed for
+ the rest of the register saves, add that to the stack pointer,
+ and then realign the stack to the boundary of the start of the
+ frame for the local function. */
+ HOST_WIDE_INT space_needed = 0;
+ HOST_WIDE_INT sse_reg_space_needed = 0;
- m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
- offset += xlogue_layout::get_instance ().get_stack_space_used ();
- }
+ if (TARGET_64BIT)
+ {
+ if (m->call_ms2sysv)
+ {
+ m->call_ms2sysv_pad_in = 0;
+ space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
+ }
- /* Align and set SSE register save area. */
- else if (frame->nsseregs)
- {
- /* The only ABI that has saved SSE registers (Win64) also has a
- 16-byte aligned default stack. However, many programs violate
- the ABI, and Wine64 forces stack realignment to compensate.
+ else if (frame->nsseregs)
+ /* The only ABI that has saved SSE registers (Win64) also has a
+ 16-byte aligned default stack. However, many programs violate
+ the ABI, and Wine64 forces stack realignment to compensate. */
+ space_needed = frame->nsseregs * 16;
+
+ sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
+
+ /* 64-bit frame->va_arg_size should always be a multiple of 16, but
+ rounding to be pedantic. */
+ space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
+ }
+ else
+ space_needed = frame->va_arg_size;
+
+ /* Record the allocation size required prior to the realignment AND. */
+ frame->stack_realign_allocate = space_needed;
+
+ /* The re-aligned stack starts at frame->stack_realign_offset. Values
+ before this point are not directly comparable with values below
+ this point. Use sp_valid_at to determine if the stack pointer is
+ valid for a given offset, fp_valid_at for the frame pointer, or
+ choose_baseaddr to have a base register chosen for you.
- If the incoming stack boundary is at least 16 bytes, or DRAP is
- required and the DRAP re-alignment boundary is at least 16 bytes,
- then we want the SSE register save area properly aligned. */
- if (ix86_incoming_stack_boundary >= 128
- || (stack_realign_drap && stack_alignment_needed >= 16))
- offset = ROUND_UP (offset, 16);
- offset += frame->nsseregs * 16;
- frame->stack_realign_allocate_offset = offset;
+ Note that the result of (frame->stack_realign_offset
+ & (stack_alignment_needed - 1)) may not equal zero. */
+ offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
+ frame->stack_realign_offset = offset - space_needed;
+ frame->sse_reg_save_offset = frame->stack_realign_offset
+ + sse_reg_space_needed;
}
+ else
+ {
+ frame->stack_realign_offset = offset;
- frame->sse_reg_save_offset = offset;
+ if (TARGET_64BIT && m->call_ms2sysv)
+ {
+ m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
+ offset += xlogue_layout::get_instance ().get_stack_space_used ();
+ }
- /* Va-arg area */
- frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
- offset += frame->va_arg_size;
+ /* Align and set SSE register save area. */
+ else if (frame->nsseregs)
+ {
+ /* If the incoming stack boundary is at least 16 bytes, or DRAP is
+ required and the DRAP re-alignment boundary is at least 16 bytes,
+ then we want the SSE register save area properly aligned. */
+ if (ix86_incoming_stack_boundary >= 128
+ || (stack_realign_drap && stack_alignment_needed >= 16))
+ offset = ROUND_UP (offset, 16);
+ offset += frame->nsseregs * 16;
+ }
+ frame->sse_reg_save_offset = offset;
+ offset += frame->va_arg_size;
+ }
/* Align start of frame for local function. */
- if (stack_realign_fp
- || m->call_ms2sysv
- || offset != frame->sse_reg_save_offset
+ if (m->call_ms2sysv
+ || frame->va_arg_size != 0
|| size != 0
|| !crtl->is_leaf
|| cfun->calls_alloca
@@ -2503,7 +2503,7 @@ struct GTY(()) ix86_frame
HOST_WIDE_INT stack_pointer_offset;
HOST_WIDE_INT hfp_save_offset;
HOST_WIDE_INT reg_save_offset;
- HOST_WIDE_INT stack_realign_allocate_offset;
+ HOST_WIDE_INT stack_realign_allocate;
HOST_WIDE_INT stack_realign_offset;
HOST_WIDE_INT sse_reg_save_offset;
These changes affect how the stack frame is calculated from the region starting at frame.reg_save_offset until frame.frame_pointer_offset, which includes either the stub save area or the (inline) SSE register save area and the va_args register save area. The calculation used when not realigning the stack pointer is the same, but when when realigning we calculate the 16-byte aligned space needed in reverse so that the stack realignment boundary at frame.stack_realign_offset may not necessarily be a multiple of stack_alignment_needed, but the value of frame.frame_pointer_offset will. This results in a properly aligned stack for the function body and avoids wasting stack space. Signed-off-by: Daniel Santos <daniel.santos@pobox.com> --- gcc/config/i386/i386.c | 116 +++++++++++++++++++++++++++++++++---------------- gcc/config/i386/i386.h | 2 +- 2 files changed, 80 insertions(+), 38 deletions(-)