@@ -10999,6 +10999,107 @@ pass_s390_early_mach::execute (function *fun)
} // anon namespace
+#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+
+/* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
+ if necessary. LAST_PROBE_OFFSET contains the offset of the closest
+ probe relative to the stack pointer.
+
+ Note that SIZE is negative.
+
+ TMP_REG_IS_LIVE indicates that TEMP_REG actually holds a live
+ value and must be restored if we clobber it. */
+static void
+allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
+ rtx temp_reg, bool temp_reg_is_live)
+{
+ rtx insn;
+
+ /* If we are emitting stack probes and a SIZE allocation would cross
+ the PROBE_INTERVAL boundary, then we need significantly different
+ sequences to allocate and probe the stack. */
+ if (flag_stack_clash_protection
+ && last_probe_offset + -INTVAL (size) < PROBE_INTERVAL)
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+ else if (flag_stack_clash_protection
+ && last_probe_offset + -INTVAL (size) >= PROBE_INTERVAL)
+ {
+ rtx memref;
+
+ HOST_WIDE_INT rounded_size = -INTVAL (size) & -PROBE_INTERVAL;
+
+ emit_move_insn (temp_reg, GEN_INT (PROBE_INTERVAL - 8));
+
+ /* We really should have a runtime loop version as well. */
+ for (unsigned int i = 0; i < rounded_size; i += PROBE_INTERVAL)
+ {
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+ GEN_INT (-PROBE_INTERVAL)));
+ RTX_FRAME_RELATED_P (insn);
+
+ /* We just allocated PROBE_INTERVAL bytes of stack space. Thus,
+ a probe is mandatory here, but LAST_PROBE_OFFSET does not
+ change. */
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+ stack_pointer_rtx));
+ MEM_VOLATILE_P (memref);
+ emit_move_insn (memref, temp_reg);
+ }
+
+ /* Handle any residual allocation request. */
+ HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
+ GEN_INT (-residual)));
+ RTX_FRAME_RELATED_P (insn) = 1;
+ last_probe_offset += residual;
+ if (last_probe_offset >= PROBE_INTERVAL)
+ {
+ emit_move_insn (temp_reg, GEN_INT (residual
+ - GET_MODE_SIZE (word_mode)));
+ memref = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, temp_reg,
+ stack_pointer_rtx));
+ MEM_VOLATILE_P (memref);
+ emit_move_insn (memref, temp_reg);
+ }
+
+ /* We clobbered TEMP_REG, but it really isn't a temporary at this point,
+ restore its value. */
+ if (temp_reg_is_live)
+ {
+ emit_move_insn (temp_reg, GEN_INT (-INTVAL (size)));
+ emit_insn (gen_add2_insn (temp_reg, stack_pointer_rtx));
+ }
+
+ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
+ emit_insn (gen_blockage ());
+ return;
+ }
+
+ /* Subtract frame size from stack pointer. */
+
+ if (DISP_IN_RANGE (INTVAL (size)))
+ {
+ insn = gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, size));
+ insn = emit_insn (insn);
+ }
+ else
+ {
+ if (!CONST_OK_FOR_K (INTVAL (size)))
+ size = force_const_mem (Pmode, size);
+
+ insn = emit_insn (gen_add2_insn (stack_pointer_rtx, size));
+ annotate_constant_pool_refs (&PATTERN (insn));
+ }
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ rtx real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+ real_frame_off)));
+}
+
/* Expand the prologue into a bunch of separate insns. */
void
@@ -11023,6 +11124,16 @@ s390_emit_prologue (void)
else
temp_reg = gen_rtx_REG (Pmode, 1);
+ /* When probing for stack-clash mitigation, we have to track the distance
+ between the stack pointer and closest known reference.
+
+ Most of the time we have to make a worst cast assumption. The
+ only exception is when TARGET_BACKCHAIN is active, in which case
+ we know *sp (offset 0) was written. */
+ HOST_WIDE_INT last_probe_offset
+ = (TARGET_BACKCHAIN
+ ? 0 : PROBE_INTERVAL - (STACK_BOUNDARY / UNITS_PER_WORD));
+
s390_save_gprs_to_fprs ();
/* Save call saved gprs. */
@@ -11034,6 +11145,14 @@ s390_emit_prologue (void)
- cfun_frame_layout.first_save_gpr_slot),
cfun_frame_layout.first_save_gpr,
cfun_frame_layout.last_save_gpr);
+
+ /* This is not 100% correct. If we have more than one register saved,
+ then LAST_PROBE_OFFSET can move even closer to sp. */
+ last_probe_offset
+ = (cfun_frame_layout.gprs_offset +
+ UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
+ - cfun_frame_layout.first_save_gpr_slot));
+
emit_insn (insn);
}
@@ -11050,6 +11169,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
save_fpr (stack_pointer_rtx, offset, i);
+ if (offset < last_probe_offset)
+ last_probe_offset = offset;
offset += 8;
}
else if (!TARGET_PACKED_STACK || cfun->stdarg)
@@ -11063,6 +11184,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
insn = save_fpr (stack_pointer_rtx, offset, i);
+ if (offset < last_probe_offset)
+ last_probe_offset = offset;
offset += 8;
/* If f4 and f6 are call clobbered they are saved due to
@@ -11085,6 +11208,8 @@ s390_emit_prologue (void)
if (cfun_fpr_save_p (i))
{
insn = save_fpr (stack_pointer_rtx, offset, i);
+ if (offset < last_probe_offset)
+ last_probe_offset = offset;
RTX_FRAME_RELATED_P (insn) = 1;
offset -= 8;
@@ -11104,7 +11229,6 @@ s390_emit_prologue (void)
if (cfun_frame_layout.frame_size > 0)
{
rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
- rtx real_frame_off;
if (s390_stack_size)
{
@@ -11177,31 +11301,8 @@ s390_emit_prologue (void)
if (TARGET_BACKCHAIN || next_fpr)
insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
- /* Subtract frame size from stack pointer. */
-
- if (DISP_IN_RANGE (INTVAL (frame_off)))
- {
- insn = gen_rtx_SET (stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- frame_off));
- insn = emit_insn (insn);
- }
- else
- {
- if (!CONST_OK_FOR_K (INTVAL (frame_off)))
- frame_off = force_const_mem (Pmode, frame_off);
-
- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
- annotate_constant_pool_refs (&PATTERN (insn));
- }
-
- RTX_FRAME_RELATED_P (insn) = 1;
- real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
- add_reg_note (insn, REG_FRAME_RELATED_EXPR,
- gen_rtx_SET (stack_pointer_rtx,
- gen_rtx_PLUS (Pmode, stack_pointer_rtx,
- real_frame_off)));
-
+ allocate_stack_space (frame_off, last_probe_offset, temp_reg,
+ TARGET_BACKCHAIN || next_fpr);
/* Set backchain. */
if (TARGET_BACKCHAIN)
@@ -11225,6 +11326,8 @@ s390_emit_prologue (void)
emit_clobber (addr);
}
}
+ else if (flag_stack_clash_protection)
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
/* Save fprs 8 - 15 (64 bit ABI). */
@@ -45,8 +45,8 @@ f7 (void)
foo (buf);
}
-/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" } } */
-/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" } } */
+/* { dg-final { scan-rtl-dump-times "Stack clash inline probes" 2 "pro_and_epilogue" { xfail s390*-*-*} } } */
+/* { dg-final { scan-rtl-dump-times "Stack clash probe loop" 2 "pro_and_epilogue" { xfail s390*-*-*} } } */
/* { dg-final { scan-rtl-dump-times "Stack clash residual allocation in prologue" 4 "pro_and_epilogue" } } */
/* { dg-final { scan-rtl-dump-times "Stack clash not noreturn" 4 "pro_and_epilogue" } } */