From patchwork Tue Aug 2 19:19:00 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Thomas Klein
X-Patchwork-Id: 107976
Return-Path:
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
by ozlabs.org (Postfix) with SMTP id A18E4B71C5
for ;
Wed, 3 Aug 2011 03:22:40 +1000 (EST)
Received: (qmail 15252 invoked by alias); 2 Aug 2011 17:22:36 -0000
Received: (qmail 15233 invoked by uid 22791); 2 Aug 2011 17:22:33 -0000
X-SWARE-Spam-Status: No, hits=-1.0 required=5.0 tests=AWL, BAYES_00,
FREEMAIL_FROM, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE,
RP_MATCHES_RCVD, T_TO_NO_BRKTS_FREEMAIL
X-Spam-Check-By: sourceware.org
Received: from fmmailgate02.web.de (HELO fmmailgate02.web.de)
(217.72.192.227) by sourceware.org (qpsmtpd/0.43rc1) with
ESMTP; Tue, 02 Aug 2011 17:22:16 +0000
Received: from smtp04.web.de ( [172.20.0.225]) by fmmailgate02.web.de
(Postfix) with ESMTP id DC5CA1A79A61D for
; Tue, 2 Aug 2011 19:22:14 +0200 (CEST)
Received: from [80.138.240.32] (helo=[80.138.240.32]) by smtp04.web.de with
asmtp (TLSv1:AES256-SHA:256) (WEB.DE 4.110 #2) id
1QoIfV-0007Cj-00 for gcc-patches@gcc.gnu.org;
Tue, 02 Aug 2011 19:22:14 +0200
Message-ID: <4E384DA4.40500@web.de>
Date: Tue, 02 Aug 2011 19:19:00 +0000
From: Thomas Klein
User-Agent: Mozilla/5.0 (X11; U; FreeBSD amd64; de-DE;
rv:1.9.1.16) Gecko/20110503 Thunderbird/3.0.11
MIME-Version: 1.0
To: gcc-patches@gcc.gnu.org
Subject: Re: Ping: C-family stack check for threads
References: <4E123DE2.1090504@web.de> <4E1336D1.9030608@redhat.com>
In-Reply-To: <4E1336D1.9030608@redhat.com>
X-Sender: th.r.klein@web.de
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id:
List-Unsubscribe:
List-Archive:
List-Post:
List-Help:
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Hello
Here is my next try to put the stack check into rtl at prologue stage.
To me, it was not as easy as I hoped.
I've had little problems to get push/pop and the compare/jump working.
Hoping the way i choose is acceptable.
With rtl no extra pool to hold pointer or size values is required any more.
That's fine.
So this movement to rtl dose make sense.
Regards
Thomas Klein
;; being inserted into the upper 16 bits of the register.
(define_insn "*arm_movtas_ze"
Index: gcc/opts.c
===================================================================
--- gcc/opts.c (revision 176974)
+++ gcc/opts.c (working copy)
@@ -1644,6 +1644,12 @@ common_handle_option (struct gcc_options *opts,
: STACK_CHECK_STATIC_BUILTIN
? STATIC_BUILTIN_STACK_CHECK
: GENERIC_STACK_CHECK;
+ else if (!strcmp (arg, "indirect"))
+ /* This is an other stack checking method. */
+ opts->x_flag_stack_check = INDIRECT_STACK_CHECK;
+ else if (!strcmp (arg, "direct"))
+ /* This is an other stack checking method. */
+ opts->x_flag_stack_check = DIRECT_STACK_CHECK;
else
warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg);
break;
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h (revision 176974)
+++ gcc/flag-types.h (working copy)
@@ -153,7 +153,15 @@ enum stack_check_type
/* Check the stack and entirely rely on the target configuration
files, i.e. do not use the generic mechanism at all. */
- FULL_BUILTIN_STACK_CHECK
+ FULL_BUILTIN_STACK_CHECK,
+
+ /* Check the stack (if possible) before allocation of local variables at
+ each function entry. The stack limit is directly given e.g. by address
+ of a symbol */
+ DIRECT_STACK_CHECK,
+ /* Check the stack (if possible) before allocation of local variables at
+ each function entry. The stack limit is given by global variable. */
+ INDIRECT_STACK_CHECK
};
/* Names for the different levels of -Wstrict-overflow=N. The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c (revision 176974)
+++ gcc/explow.c (working copy)
@@ -1358,7 +1358,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s
/* If needed, check that we have the required amount of stack. Take
into
account what has already been checked. */
- if (STACK_CHECK_MOVING_SP)
+ if ( STACK_CHECK_MOVING_SP
+#ifdef HAVE_generic_limit_check_stack
+ || crtl->limit_stack
+#endif
+ || flag_stack_check == DIRECT_STACK_CHECK
+ || flag_stack_check == INDIRECT_STACK_CHECK)
;
else if (flag_stack_check == GENERIC_STACK_CHECK)
probe_stack_range (STACK_OLD_CHECK_PROTECT +
STACK_CHECK_MAX_FRAME_SIZE,
@@ -1392,19 +1397,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s
/* Check stack bounds if necessary. */
if (crtl->limit_stack)
{
+ rtx limit_rtx;
rtx available;
rtx space_available = gen_label_rtx ();
+ if ( GET_CODE (stack_limit_rtx) == SYMBOL_REF
+ && flag_stack_check == INDIRECT_STACK_CHECK)
+ limit_rtx = expand_unop (Pmode, mov_optab,
+ gen_rtx_MEM (Pmode, stack_limit_rtx),
+ NULL_RTX, 1);
+ else
+ limit_rtx = stack_limit_rtx;
#ifdef STACK_GROWS_DOWNWARD
available = expand_binop (Pmode, sub_optab,
- stack_pointer_rtx, stack_limit_rtx,
+ stack_pointer_rtx, limit_rtx,
NULL_RTX, 1, OPTAB_WIDEN);
#else
available = expand_binop (Pmode, sub_optab,
- stack_limit_rtx, stack_pointer_rtx,
+ limit_rtx, stack_pointer_rtx,
NULL_RTX, 1, OPTAB_WIDEN);
#endif
emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
space_available);
+#ifdef HAVE_stack_failure
+ if (HAVE_stack_failure)
+ emit_insn (gen_stack_failure ());
+ else
+#endif
#ifdef HAVE_trap
if (HAVE_trap)
emit_insn (gen_trap ());
@@ -1547,6 +1565,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size)
return;
}
#endif
+#ifdef HAVE_generic_limit_check_stack
+ else if (HAVE_generic_limit_check_stack)
+ {
+ rtx addr = memory_address (Pmode,stack_pointer_rtx);
+ emit_insn (gen_generic_limit_check_stack (addr));
+ }
+#endif
/* Otherwise we have to generate explicit probes. If we have a constant
small number of them to generate, that's the easy case. */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c (revision 176974)
+++ gcc/config/arm/arm.c (working copy)
@@ -15809,6 +15809,299 @@ thumb_set_frame_pointer (arm_stack_offsets *offset
RTX_FRAME_RELATED_P (insn) = 1;
}
+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+ int reg, i, k, n, nregs;
+
+ if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+ {
+ nregs = crtl->args.info.aapcs_next_ncrn;
+ }
+ else
+ nregs = crtl->args.info.nregs;
+
+
+ n = 0;
+ i = 0;
+ /* check if we can use one of the argument registers r0..r3 as long
as they
+ * not holding data*/
+ for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++)
+ {
+ if ( !df_regs_ever_live_p (reg)
+ || (cfun->machine->uses_anonymous_args &&
crtl->args.pretend_args_size
+ > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+ || (!cfun->machine->uses_anonymous_args && nregs < reg + 1)
+ )
+ {
+ workreg[i++] = gen_rtx_REG (SImode, reg);
+ n = (reg + 1) % 4;
+ }
+ }
+
+ /* otherwise try to use r4..r7*/
+ for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++)
+ {
+ if ( df_regs_ever_live_p (reg)
+ && !fixed_regs[reg]
+ && reg != FP_REGNUM )
+ {
+ workreg[i++] = gen_rtx_REG (SImode, reg);
+ }
+ }
+
+ if (TARGET_32BIT)
+ {
+ /* ARM and Thumb-2 can use high regs. */
+ for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++)
+ if ( df_regs_ever_live_p (reg)
+ && !fixed_regs[reg]
+ && reg != FP_REGNUM )
+ {
+ workreg[i++] = gen_rtx_REG (SImode, reg);
+ }
+ }
+
+ k = i;
+ /* if not enough found to be uses without extra push,
+ * collect next from r0..r4*/
+ for ( ; i<2; i++)
+ workreg[i] = gen_rtx_REG (SImode, n++);
+
+ /* only if k==0, two register will be pushed later
+ * only in this case the registers are guaranteed to be sorted */
+ return k;
+}
+
+/* push some registers to stack */
+static void
+emit_push_regs(int num_to_push, rtx *reg)
+{
+ int i;
+ rtvec tmpvec;
+ rtx par[16], dwarf, tmp, insn;
+
+ if (num_to_push > 15 || num_to_push < 0)
+ return;
+
+ tmpvec = gen_rtvec (1, reg[0]);
+ par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+ for (i=1; i 15 || num_to_pop < 0)
+ return;
+
+ tmpvec = gen_rtvec (1, reg[0]);
+ par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+ for (i=1; i= 2));
+ is_thumb2_hi_reg[0] = (TARGET_THUMB2 && INTVAL(reg[0])>7);
+ is_thumb2_hi_reg[1] = (TARGET_THUMB2 && INTVAL(reg[1])>7);
+
+ /* push as many as needed */
+ if (issym && amount_needsreg) /*need two temp regs for limit and amount*/
+ {
+ if (numregs >= 2)
+ ; /*have 2 regs => no need to push*/
+ else if (numregs == 1)
+ {
+ /*have one reg but need two regs => push temp reg for amount*/
+ emit_push_regs (1, ®[1]); /*push {reg1}*/
+ /*due to additional push try to correct amount*/
+ if (amount >= 4)
+ amount -= 4;
+ }
+ else
+ {
+ /*have no reg but need two => push temp regs for limit and
amount*/
+ emit_push_regs (2, ®[0]); /*push {reg0,reg1}*/
+ /*due to additional push try to correct amount*/
+ if (amount >= 8)
+ amount -= 8;
+ }
+ }
+ else if ((issym || amount_needsreg) && numregs == 0)
+ { /*push temp reg either for limit or amount*/
+ emit_push_regs (1, ®[0]); /*push {reg0}*/
+ /*due to additional push try to correct amount*/
+ if (amount >= 4)
+ {
+ if (amount_const_ok)
+ {
+ if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+ amount -= 4;
+ /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+ }
+ else /*will be loaded from pool*/
+ amount -= 4;
+ }
+ }
+
+ amount_rtx = GEN_INT (amount);
+
+ /* move limit plus amount to cmp_reg e.g. reg[0] */
+ if (issym)
+ {
+ if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+ arm_emit_movpair(reg[0], stack_limit_rtx);
+ else
+ emit_move_insn(reg[0], stack_limit_rtx);
+
+ if (flag_stack_check == INDIRECT_STACK_CHECK)
+ emit_insn (gen_movsi (reg[0], gen_rtx_MEM (SImode, reg[0])));
+ if (amount)
+ {
+ if (amount_const_ok)
+ emit_insn(gen_addsi3(reg[0], reg[0], amount_rtx));
+ else
+ {
+ if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+ arm_emit_movpair(reg[1], amount_rtx);
+ else
+ emit_insn (gen_movsi (reg[1], amount_rtx));
+ emit_insn(gen_addsi3(reg[0], reg[0], reg[1]));
+ }
+ }
+ cmp_reg = reg[0];
+ }
+ else if (amount)
+ {
+ if (amount_const_ok)
+ emit_move_insn(reg[0], amount_rtx);
+ else
+ {
+ if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+ arm_emit_movpair(reg[0], amount_rtx);
+ else
+ emit_insn (gen_movsi (reg[0], amount_rtx));
+ }
+ emit_insn(gen_addsi3(reg[0], reg[0], stack_limit_rtx));
+ cmp_reg = reg[0];
+ }
+ else
+ cmp_reg = stack_limit_rtx;
+
+ /*compare and jump*/
+ emit_insn (gen_blockage ());
+ label = gen_label_rtx ();
+ do_compare_rtx_and_jump (stack_pointer_rtx, cmp_reg, GEU, 1, Pmode,
+ NULL_RTX, NULL_RTX, label, -1);
+ jump = get_last_insn ();
+ gcc_assert (JUMP_P (jump));
+ JUMP_LABEL (jump) = label;
+ LABEL_NUSES (label)++;
+ if (lr_not_yet_pushed) /*push LR if not already done*/
+ {
+ rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+ emit_push_regs (1, &lr);
+ }
+ insn = emit_insn (gen_stack_failure ());
+ if (lr_not_yet_pushed)
+ {
+ /*the trap will not come back; but tell it has restored the stack*/
+ tmp = plus_constant (stack_pointer_rtx, 4);
+ dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+ }
+ emit_label (label);
+
+ /*restore registers*/
+ if (issym && amount_needsreg) /*pop temp regs used by limit and amount*/
+ {
+ if (numregs >= 2)
+ ; /*no need to pop*/
+ else if (numregs == 1)
+ emit_pop_regs (1, ®[1]); /*pop {reg1}*/
+ else
+ emit_pop_regs (2, ®[0]); /*pop {reg0, reg1}*/
+ }
+ else if ((issym || amount_needsreg) && numregs == 0)
+ { /*pop temp reg used by limit or amount*/
+ emit_pop_regs (1, ®[0]); /*pop {reg0}*/
+ }
+
+ return;
+}
+
/* Generate the prologue instructions for entry into an ARM or Thumb-2
function. */
void
@@ -16060,6 +16353,17 @@ arm_expand_prologue (void)
current_function_static_stack_size
= offsets->outgoing_args - offsets->saved_args;
+ if ( crtl->limit_stack
+ && !(IS_INTERRUPT (func_type))
+ && ( flag_stack_check == DIRECT_STACK_CHECK
+ || flag_stack_check == INDIRECT_STACK_CHECK)
+ && (offsets->outgoing_args - offsets->saved_args) > 0
+ )
+ {
+ emit_stack_check_insns (offsets->outgoing_args - saved_regs
+ - offsets->saved_args, !(live_regs_mask & (1<outgoing_args != offsets->saved_args + saved_regs)
{
/* This add can produce multiple insns for a large constant, so we
@@ -21261,6 +21565,16 @@ thumb1_expand_prologue (void)
amount = offsets->outgoing_args - offsets->saved_regs;
amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+
+ if( crtl->limit_stack
+ && ( flag_stack_check == DIRECT_STACK_CHECK
+ || flag_stack_check == INDIRECT_STACK_CHECK)
+ && (offsets->outgoing_args - offsets->saved_args)
+ )
+ {
+ emit_stack_check_insns (amount, !(l_mask & (1<limit_stack
+ && flag_stack_check != DIRECT_STACK_CHECK
+ && flag_stack_check != INDIRECT_STACK_CHECK"
+{
+ rtx label = gen_label_rtx ();
+ rtx addr = copy_rtx (operands[0]);
+ addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+ addr = force_operand (addr, NULL_RTX);
+ emit_insn (gen_blockage ());
+ emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+ label);
+ emit_insn (gen_stack_failure ());
+ emit_label (label);
+ emit_insn (gen_blockage ());
+ DONE;
+}
+)
+
+(define_insn "stack_failure"
+ [(trap_if (const_int 1) (const_int 0))
+ (clobber (reg:SI LR_REGNUM))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_EITHER"
+ "*
+ {
+ if (TARGET_ARM)
+ output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\",
operands);
+ else
+ output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\",
operands);
+ }
+ return \"\";
+ "
+ [(set_attr "conds" "clob")
+ (set_attr "length" "8")]
+)
+
;; We only care about the lower 16 bits of the constant