@@ -3676,12 +3676,14 @@ aarch64_expand_prologue (void)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
- frame_size - STACK_CHECK_PROTECT);
+ if (frame_size > PROBE_INTERVAL
+ && frame_size > get_stack_check_protect ())
+ aarch64_emit_probe_stack_range (get_stack_check_protect (),
+ (frame_size
+ - get_stack_check_protect ()));
}
else if (frame_size > 0)
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
+ aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
}
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
@@ -7741,7 +7741,7 @@ alpha_expand_prologue (void)
probed_size = frame_size;
if (flag_stack_check)
- probed_size += STACK_CHECK_PROTECT;
+ probed_size += get_stack_check_protect ();
if (probed_size <= 32768)
{
@@ -21680,13 +21680,13 @@ arm_expand_prologue (void)
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT,
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ arm_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect (),
regno, live_regs_mask);
}
else if (size > 0)
- arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
+ arm_emit_probe_stack_range (get_stack_check_protect (), size,
regno, live_regs_mask);
}
@@ -27854,7 +27854,7 @@ arm_frame_pointer_required (void)
{
/* We don't have the final size of the frame so adjust. */
size += 32 * UNITS_PER_WORD;
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
return true;
}
else
@@ -14638,7 +14638,7 @@ ix86_expand_prologue (void)
HOST_WIDE_INT size = allocate;
if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
- size = 0x80000000 - STACK_CHECK_PROTECT - 1;
+ size = 0x80000000 - get_stack_check_protect () - 1;
if (TARGET_STACK_PROBE)
{
@@ -14648,18 +14648,20 @@ ix86_expand_prologue (void)
ix86_emit_probe_stack_range (0, size);
}
else
- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+ ix86_emit_probe_stack_range (0,
+ size + get_stack_check_protect ());
}
else
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL
+ && size > get_stack_check_protect ())
+ ix86_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ ix86_emit_probe_stack_range (get_stack_check_protect (), size);
}
}
}
@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT,
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect (),
bs_size);
- else if (size + bs_size > STACK_CHECK_PROTECT)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+ else if (size + bs_size > get_stack_check_protect ())
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
+ 0, bs_size);
}
else if (size + bs_size > 0)
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+ ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
}
if (dump_file)
@@ -12081,12 +12081,12 @@ mips_expand_prologue (void)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- mips_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ mips_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else if (size > 0)
- mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ mips_emit_probe_stack_range (get_stack_check_protect (), size);
}
/* Save the registers. Allocate up to MIPS_MAX_FIRST_STACK_STEP
@@ -29597,12 +29597,12 @@ rs6000_emit_prologue (void)
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ rs6000_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else if (size > 0)
- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
}
if (TARGET_FIX_AND_CONTINUE)
@@ -26895,12 +26895,12 @@ rs6000_emit_prologue (void)
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ rs6000_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else if (size > 0)
- rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
}
if (TARGET_FIX_AND_CONTINUE)
@@ -5552,12 +5552,12 @@ sparc_expand_prologue (void)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ sparc_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else if (size > 0)
- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ sparc_emit_probe_stack_range (get_stack_check_protect (), size);
}
if (size == 0)
@@ -5663,12 +5663,12 @@ sparc_flat_expand_prologue (void)
{
if (crtl->is_leaf && !cfun->calls_alloca)
{
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
- size - STACK_CHECK_PROTECT);
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+ sparc_emit_probe_stack_range (get_stack_check_protect (),
+ size - get_stack_check_protect ());
}
else if (size > 0)
- sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+ sparc_emit_probe_stack_range (get_stack_check_protect (), size);
}
if (sparc_save_local_in_regs_p)
@@ -1408,8 +1408,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#endif
/* The default is not to move the stack pointer. */
+/* The default is not to move the stack pointer, unless we are using
+ stack clash prevention stack checking. */
#ifndef STACK_CHECK_MOVING_SP
-#define STACK_CHECK_MOVING_SP 0
+#define STACK_CHECK_MOVING_SP\
+ (flag_stack_check == STACK_CLASH_BUILTIN_STACK_CHECK)
#endif
/* This is a kludge to try to capture the discrepancy between the old
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
#include "output.h"
static rtx break_out_memory_refs (rtx);
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
/* Truncate and perhaps sign-extend C as appropriate for MODE. */
@@ -1272,6 +1273,25 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
*psize = size;
}
+/* Return the number of bytes to protect on the stack for -fstack-check.
+
+ The default is to protect STACK_CHECK_PROTECT bytes which should be
+ enough to handle a signal.
+
+ When mitigating stack clash style attacks we do not save enough
+ space to handle a signal, so we protect zero bytes.
+
+ The distinction is important because it determines both how far beyond
+ current need we probe the stack and it determines how many bytes are
+ assumed to have already been checked by prior callers in the call chain. */
+HOST_WIDE_INT
+get_stack_check_protect (void)
+{
+ if (flag_stack_check == STACK_CLASH_BUILTIN_STACK_CHECK)
+ return 0;
+ return STACK_CHECK_PROTECT;
+}
+
/* Return an rtx representing the address of an area of memory dynamically
pushed on the stack.
@@ -1430,7 +1450,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
size);
else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
- probe_stack_range (STACK_CHECK_PROTECT, size);
+ probe_stack_range (get_stack_check_protect (), size);
/* Don't let anti_adjust_stack emit notes. */
suppress_reg_args_size = true;
@@ -1482,7 +1502,12 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
saved_stack_pointer_delta = stack_pointer_delta;
if (flag_stack_check && STACK_CHECK_MOVING_SP)
- anti_adjust_stack_and_probe (size, false);
+ {
+ if (flag_stack_check == STACK_CLASH_BUILTIN_STACK_CHECK)
+ anti_adjust_stack_and_probe_stack_clash (size);
+ else
+ anti_adjust_stack_and_probe (size, false);
+ }
else
anti_adjust_stack (size);
@@ -1760,6 +1785,126 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
while probing it. This pushes when SIZE is positive. SIZE need not
+ be constant.
+
+ This is subtly different than anti_adjust_stack_and_probe to try and
+ prevent attacks that jump the stack guard.
+
+ 1. It assumes the prologue did not probe any residual stack allocation,
+ Thus the stack pointer could currently be in the guard page and if
+ this call results in any allocation, it must be probed.
+
+ 2. It never skips probes, whereas anti_adjust_stack_and_probe will
+ skip probes on the first couple PROBE_INTERVALs on the assumption
+ they're done elsewhere.
+
+ 3. It only allocates and probes SIZE bytes, it does not need to
+ allocate/probe beyond that because this probing style does not
+ guarantee signal handling capability if the guard is hit.
+
+ 4. It does not bother handling constant allocations. They do not
+ happen in practice here. */
+
+static void
+anti_adjust_stack_and_probe_stack_clash (rtx size)
+{
+ /* First ensure SIZE is Pmode. */
+ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
+ size = convert_to_mode (Pmode, size, 1);
+
+ /* We can get here with a constant size on some targets. But it is not
+ worth having paths for small, medium and large allocations.
+
+ We detect the cases where we can avoid the loop entirely or avoid the
+ residuals entirely. */
+
+ /* Step 1: round SIZE to the previous multiple of the interval. */
+
+ /* ROUNDED_SIZE = SIZE & -PROBE_INTERVAL */
+ rtx rounded_size
+ = simplify_gen_binary (AND, Pmode, size, GEN_INT (-PROBE_INTERVAL));
+ rtx rounded_size_op = force_operand (rounded_size, NULL_RTX);
+
+ if (rounded_size != CONST0_RTX (Pmode))
+ {
+
+ /* Step 2: compute final value of the loop counter. */
+
+ /* LAST_ADDR = SP + ROUNDED_SIZE. */
+ rtx last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
+ stack_pointer_rtx,
+ rounded_size_op),
+ NULL_RTX);
+
+
+ /* Step 3: the loop
+
+ while (SP != LAST_ADDR)
+ {
+ SP = SP + PROBE_INTERVAL
+ probe at SP - PROBE_INTERVAL + small constant
+ }
+
+ adjusts SP and probes into the newly allocated space as closely
+ to the original SP as possible, iterating until LAST_ADDR is hit. */
+
+ rtx loop_lab = gen_label_rtx ();
+ rtx end_lab = gen_label_rtx ();
+
+ emit_label (loop_lab);
+
+ /* Jump to END_LAB if SP == LAST_ADDR. */
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
+ Pmode, 1, end_lab);
+
+ /* SP = SP + PROBE_INTERVAL and probe at SP. */
+ anti_adjust_stack (GEN_INT (PROBE_INTERVAL));
+
+ /* The prologue does not probe residuals. Thus the offset here
+ to probe just beyond what the prologue had already allocated. */
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+ (PROBE_INTERVAL
+ - GET_MODE_SIZE (word_mode))));
+
+ emit_jump (loop_lab);
+
+ emit_label (end_lab);
+
+ /* This is primarily to make writing tests easy. */
+ if (dump_file)
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing in loop.\n");
+ }
+ else if (dump_file)
+ fprintf (dump_file,
+ "Stack clash skipped dynamic allocation "
+ "and probing loop.\n");
+
+ /* Step 4: adjust SP if we cannot assert at compile-time that
+ SIZE is equal to ROUNDED_SIZE. */
+
+ /* TEMP = SIZE - ROUNDED_SIZE. */
+ rtx temp = simplify_gen_binary (MINUS, Pmode, size, rounded_size);
+ if (temp != const0_rtx)
+ {
+ /* Manual CSE if the difference is not known at compile-time. */
+ if (GET_CODE (temp) != CONST_INT)
+ temp = gen_rtx_MINUS (Pmode, size, rounded_size_op);
+ anti_adjust_stack (temp);
+ rtx x = force_reg (Pmode, plus_constant (Pmode, temp,
+ -GET_MODE_SIZE (word_mode)));
+ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
+
+ /* This is primarily to make writing tests easy. */
+ if (dump_file)
+ fprintf (dump_file,
+ "Stack clash dynamic allocation and probing residuals.\n");
+ }
+}
+
+
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
+ while probing it. This pushes when SIZE is positive. SIZE need not
be constant. If ADJUST_BACK is true, adjust back the stack pointer
by plus SIZE at the end. */
@@ -2703,6 +2703,7 @@ get_full_set_src_cost (rtx x, machine_mode mode, struct full_rtx_costs *c)
/* In explow.c */
extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode);
extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
+extern HOST_WIDE_INT get_stack_check_protect (void);
/* In rtl.c */
extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
new file mode 100644
@@ -0,0 +1,75 @@
+/* The goal here is to ensure that dynamic allocations via vlas or
+ alloca calls receive probing.
+
+ Scanning the RTL or assembly code seems like insanity here as does
+ checking for particular allocation sizes and probe offsets. For
+ now we just verify that there's an allocation + probe loop and
+ residual allocation + probe for f?. */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-check=clash -fdump-rtl-expand -fno-optimize-sibling-calls" } */
+/* { dg-require-effective-target stack_clash_protected } */
+
+__attribute__((noinline, noclone)) void
+foo (char *p)
+{
+ asm volatile ("" : : "r" (p) : "memory");
+}
+
+/* Simple VLA, no other locals. */
+__attribute__((noinline, noclone)) void
+f0 (int x)
+{
+ char vla[x];
+ foo (vla);
+}
+
+/* Simple VLA, small local frame. */
+__attribute__((noinline, noclone)) void
+f1 (int x)
+{
+ char locals[128];
+ char vla[x];
+ foo (vla);
+}
+
+/* Small constant alloca, no other locals. */
+__attribute__((noinline, noclone)) void
+f2 (int x)
+{
+ char *vla = __builtin_alloca (128);
+ foo (vla);
+}
+
+/* Big constant alloca, small local frame. */
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+ char locals[128];
+ char *vla = __builtin_alloca (16384);
+ foo (vla);
+}
+
+/* Nonconstant alloca, no other locals. */
+__attribute__((noinline, noclone)) void
+f4 (int x)
+{
+ char *vla = __builtin_alloca (x);
+ foo (vla);
+}
+
+/* Nonconstant alloca, small local frame. */
+__attribute__((noinline, noclone)) void
+f5 (int x)
+{
+ char locals[128];
+ char *vla = __builtin_alloca (x);
+ foo (vla);
+}
+
+/* { dg-final { scan-rtl-dump-times "allocation and probing residuals" 6 "expand" } } */
+
+
+/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 6 "expand" { target callee_realigns_stack } } } */
+/* { dg-final { scan-rtl-dump-times "allocation and probing in loop" 5 "expand" { target { ! callee_realigns_stack } } } } */
+/* { dg-final { scan-rtl-dump-times "skipped dynamic allocation and probing loop" 1 "expand" { target { ! callee_realigns_stack } } } } */