From patchwork Tue Aug 2 19:19:00 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Thomas Klein X-Patchwork-Id: 107976 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id A18E4B71C5 for ; Wed, 3 Aug 2011 03:22:40 +1000 (EST) Received: (qmail 15252 invoked by alias); 2 Aug 2011 17:22:36 -0000 Received: (qmail 15233 invoked by uid 22791); 2 Aug 2011 17:22:33 -0000 X-SWARE-Spam-Status: No, hits=-1.0 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RP_MATCHES_RCVD, T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from fmmailgate02.web.de (HELO fmmailgate02.web.de) (217.72.192.227) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Tue, 02 Aug 2011 17:22:16 +0000 Received: from smtp04.web.de ( [172.20.0.225]) by fmmailgate02.web.de (Postfix) with ESMTP id DC5CA1A79A61D for ; Tue, 2 Aug 2011 19:22:14 +0200 (CEST) Received: from [80.138.240.32] (helo=[80.138.240.32]) by smtp04.web.de with asmtp (TLSv1:AES256-SHA:256) (WEB.DE 4.110 #2) id 1QoIfV-0007Cj-00 for gcc-patches@gcc.gnu.org; Tue, 02 Aug 2011 19:22:14 +0200 Message-ID: <4E384DA4.40500@web.de> Date: Tue, 02 Aug 2011 19:19:00 +0000 From: Thomas Klein User-Agent: Mozilla/5.0 (X11; U; FreeBSD amd64; de-DE; rv:1.9.1.16) Gecko/20110503 Thunderbird/3.0.11 MIME-Version: 1.0 To: gcc-patches@gcc.gnu.org Subject: Re: Ping: C-family stack check for threads References: <4E123DE2.1090504@web.de> <4E1336D1.9030608@redhat.com> In-Reply-To: <4E1336D1.9030608@redhat.com> X-Sender: th.r.klein@web.de Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hello Here is my next try to put the stack check into rtl at prologue stage. To me, it was not as easy as I hoped. I've had little problems to get push/pop and the compare/jump working. Hoping the way i choose is acceptable. With rtl no extra pool to hold pointer or size values is required any more. That's fine. So this movement to rtl dose make sense. Regards Thomas Klein ;; being inserted into the upper 16 bits of the register. (define_insn "*arm_movtas_ze" Index: gcc/opts.c =================================================================== --- gcc/opts.c (revision 176974) +++ gcc/opts.c (working copy) @@ -1644,6 +1644,12 @@ common_handle_option (struct gcc_options *opts, : STACK_CHECK_STATIC_BUILTIN ? STATIC_BUILTIN_STACK_CHECK : GENERIC_STACK_CHECK; + else if (!strcmp (arg, "indirect")) + /* This is an other stack checking method. */ + opts->x_flag_stack_check = INDIRECT_STACK_CHECK; + else if (!strcmp (arg, "direct")) + /* This is an other stack checking method. */ + opts->x_flag_stack_check = DIRECT_STACK_CHECK; else warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg); break; Index: gcc/flag-types.h =================================================================== --- gcc/flag-types.h (revision 176974) +++ gcc/flag-types.h (working copy) @@ -153,7 +153,15 @@ enum stack_check_type /* Check the stack and entirely rely on the target configuration files, i.e. do not use the generic mechanism at all. */ - FULL_BUILTIN_STACK_CHECK + FULL_BUILTIN_STACK_CHECK, + + /* Check the stack (if possible) before allocation of local variables at + each function entry. The stack limit is directly given e.g. by address + of a symbol */ + DIRECT_STACK_CHECK, + /* Check the stack (if possible) before allocation of local variables at + each function entry. The stack limit is given by global variable. */ + INDIRECT_STACK_CHECK }; /* Names for the different levels of -Wstrict-overflow=N. The numeric Index: gcc/explow.c =================================================================== --- gcc/explow.c (revision 176974) +++ gcc/explow.c (working copy) @@ -1358,7 +1358,12 @@ allocate_dynamic_stack_space (rtx size, unsigned s /* If needed, check that we have the required amount of stack. Take into account what has already been checked. */ - if (STACK_CHECK_MOVING_SP) + if ( STACK_CHECK_MOVING_SP +#ifdef HAVE_generic_limit_check_stack + || crtl->limit_stack +#endif + || flag_stack_check == DIRECT_STACK_CHECK + || flag_stack_check == INDIRECT_STACK_CHECK) ; else if (flag_stack_check == GENERIC_STACK_CHECK) probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE, @@ -1392,19 +1397,32 @@ allocate_dynamic_stack_space (rtx size, unsigned s /* Check stack bounds if necessary. */ if (crtl->limit_stack) { + rtx limit_rtx; rtx available; rtx space_available = gen_label_rtx (); + if ( GET_CODE (stack_limit_rtx) == SYMBOL_REF + && flag_stack_check == INDIRECT_STACK_CHECK) + limit_rtx = expand_unop (Pmode, mov_optab, + gen_rtx_MEM (Pmode, stack_limit_rtx), + NULL_RTX, 1); + else + limit_rtx = stack_limit_rtx; #ifdef STACK_GROWS_DOWNWARD available = expand_binop (Pmode, sub_optab, - stack_pointer_rtx, stack_limit_rtx, + stack_pointer_rtx, limit_rtx, NULL_RTX, 1, OPTAB_WIDEN); #else available = expand_binop (Pmode, sub_optab, - stack_limit_rtx, stack_pointer_rtx, + limit_rtx, stack_pointer_rtx, NULL_RTX, 1, OPTAB_WIDEN); #endif emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1, space_available); +#ifdef HAVE_stack_failure + if (HAVE_stack_failure) + emit_insn (gen_stack_failure ()); + else +#endif #ifdef HAVE_trap if (HAVE_trap) emit_insn (gen_trap ()); @@ -1547,6 +1565,13 @@ probe_stack_range (HOST_WIDE_INT first, rtx size) return; } #endif +#ifdef HAVE_generic_limit_check_stack + else if (HAVE_generic_limit_check_stack) + { + rtx addr = memory_address (Pmode,stack_pointer_rtx); + emit_insn (gen_generic_limit_check_stack (addr)); + } +#endif /* Otherwise we have to generate explicit probes. If we have a constant small number of them to generate, that's the easy case. */ Index: gcc/config/arm/arm.c =================================================================== --- gcc/config/arm/arm.c (revision 176974) +++ gcc/config/arm/arm.c (working copy) @@ -15809,6 +15809,299 @@ thumb_set_frame_pointer (arm_stack_offsets *offset RTX_FRAME_RELATED_P (insn) = 1; } +/*search for possible work registers for stack-check operation at prologue + return the number of register that can be used without extra push/pop */ + +static int +stack_check_work_registers (rtx *workreg) +{ + int reg, i, k, n, nregs; + + if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL) + { + nregs = crtl->args.info.aapcs_next_ncrn; + } + else + nregs = crtl->args.info.nregs; + + + n = 0; + i = 0; + /* check if we can use one of the argument registers r0..r3 as long as they + * not holding data*/ + for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++) + { + if ( !df_regs_ever_live_p (reg) + || (cfun->machine->uses_anonymous_args && crtl->args.pretend_args_size + > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD) + || (!cfun->machine->uses_anonymous_args && nregs < reg + 1) + ) + { + workreg[i++] = gen_rtx_REG (SImode, reg); + n = (reg + 1) % 4; + } + } + + /* otherwise try to use r4..r7*/ + for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++) + { + if ( df_regs_ever_live_p (reg) + && !fixed_regs[reg] + && reg != FP_REGNUM ) + { + workreg[i++] = gen_rtx_REG (SImode, reg); + } + } + + if (TARGET_32BIT) + { + /* ARM and Thumb-2 can use high regs. */ + for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++) + if ( df_regs_ever_live_p (reg) + && !fixed_regs[reg] + && reg != FP_REGNUM ) + { + workreg[i++] = gen_rtx_REG (SImode, reg); + } + } + + k = i; + /* if not enough found to be uses without extra push, + * collect next from r0..r4*/ + for ( ; i<2; i++) + workreg[i] = gen_rtx_REG (SImode, n++); + + /* only if k==0, two register will be pushed later + * only in this case the registers are guaranteed to be sorted */ + return k; +} + +/* push some registers to stack */ +static void +emit_push_regs(int num_to_push, rtx *reg) +{ + int i; + rtvec tmpvec; + rtx par[16], dwarf, tmp, insn; + + if (num_to_push > 15 || num_to_push < 0) + return; + + tmpvec = gen_rtvec (1, reg[0]); + par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT); + for (i=1; i 15 || num_to_pop < 0) + return; + + tmpvec = gen_rtvec (1, reg[0]); + par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT); + for (i=1; i= 2)); + is_thumb2_hi_reg[0] = (TARGET_THUMB2 && INTVAL(reg[0])>7); + is_thumb2_hi_reg[1] = (TARGET_THUMB2 && INTVAL(reg[1])>7); + + /* push as many as needed */ + if (issym && amount_needsreg) /*need two temp regs for limit and amount*/ + { + if (numregs >= 2) + ; /*have 2 regs => no need to push*/ + else if (numregs == 1) + { + /*have one reg but need two regs => push temp reg for amount*/ + emit_push_regs (1, ®[1]); /*push {reg1}*/ + /*due to additional push try to correct amount*/ + if (amount >= 4) + amount -= 4; + } + else + { + /*have no reg but need two => push temp regs for limit and amount*/ + emit_push_regs (2, ®[0]); /*push {reg0,reg1}*/ + /*due to additional push try to correct amount*/ + if (amount >= 8) + amount -= 8; + } + } + else if ((issym || amount_needsreg) && numregs == 0) + { /*push temp reg either for limit or amount*/ + emit_push_regs (1, ®[0]); /*push {reg0}*/ + /*due to additional push try to correct amount*/ + if (amount >= 4) + { + if (amount_const_ok) + { + if (TARGET_THUMB1 || const_ok_for_arm(amount - 4)) + amount -= 4; + /*on Thumb2 or ARM may not corrected; shouldn't hurt*/ + } + else /*will be loaded from pool*/ + amount -= 4; + } + } + + amount_rtx = GEN_INT (amount); + + /* move limit plus amount to cmp_reg e.g. reg[0] */ + if (issym) + { + if (is_non_opt_thumb2 || is_thumb2_hi_reg[0]) + arm_emit_movpair(reg[0], stack_limit_rtx); + else + emit_move_insn(reg[0], stack_limit_rtx); + + if (flag_stack_check == INDIRECT_STACK_CHECK) + emit_insn (gen_movsi (reg[0], gen_rtx_MEM (SImode, reg[0]))); + if (amount) + { + if (amount_const_ok) + emit_insn(gen_addsi3(reg[0], reg[0], amount_rtx)); + else + { + if (is_non_opt_thumb2 || is_thumb2_hi_reg[1]) + arm_emit_movpair(reg[1], amount_rtx); + else + emit_insn (gen_movsi (reg[1], amount_rtx)); + emit_insn(gen_addsi3(reg[0], reg[0], reg[1])); + } + } + cmp_reg = reg[0]; + } + else if (amount) + { + if (amount_const_ok) + emit_move_insn(reg[0], amount_rtx); + else + { + if (is_non_opt_thumb2 || is_thumb2_hi_reg[0]) + arm_emit_movpair(reg[0], amount_rtx); + else + emit_insn (gen_movsi (reg[0], amount_rtx)); + } + emit_insn(gen_addsi3(reg[0], reg[0], stack_limit_rtx)); + cmp_reg = reg[0]; + } + else + cmp_reg = stack_limit_rtx; + + /*compare and jump*/ + emit_insn (gen_blockage ()); + label = gen_label_rtx (); + do_compare_rtx_and_jump (stack_pointer_rtx, cmp_reg, GEU, 1, Pmode, + NULL_RTX, NULL_RTX, label, -1); + jump = get_last_insn (); + gcc_assert (JUMP_P (jump)); + JUMP_LABEL (jump) = label; + LABEL_NUSES (label)++; + if (lr_not_yet_pushed) /*push LR if not already done*/ + { + rtx lr = gen_rtx_REG (SImode, LR_REGNUM); + emit_push_regs (1, &lr); + } + insn = emit_insn (gen_stack_failure ()); + if (lr_not_yet_pushed) + { + /*the trap will not come back; but tell it has restored the stack*/ + tmp = plus_constant (stack_pointer_rtx, 4); + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + } + emit_label (label); + + /*restore registers*/ + if (issym && amount_needsreg) /*pop temp regs used by limit and amount*/ + { + if (numregs >= 2) + ; /*no need to pop*/ + else if (numregs == 1) + emit_pop_regs (1, ®[1]); /*pop {reg1}*/ + else + emit_pop_regs (2, ®[0]); /*pop {reg0, reg1}*/ + } + else if ((issym || amount_needsreg) && numregs == 0) + { /*pop temp reg used by limit or amount*/ + emit_pop_regs (1, ®[0]); /*pop {reg0}*/ + } + + return; +} + /* Generate the prologue instructions for entry into an ARM or Thumb-2 function. */ void @@ -16060,6 +16353,17 @@ arm_expand_prologue (void) current_function_static_stack_size = offsets->outgoing_args - offsets->saved_args; + if ( crtl->limit_stack + && !(IS_INTERRUPT (func_type)) + && ( flag_stack_check == DIRECT_STACK_CHECK + || flag_stack_check == INDIRECT_STACK_CHECK) + && (offsets->outgoing_args - offsets->saved_args) > 0 + ) + { + emit_stack_check_insns (offsets->outgoing_args - saved_regs + - offsets->saved_args, !(live_regs_mask & (1<outgoing_args != offsets->saved_args + saved_regs) { /* This add can produce multiple insns for a large constant, so we @@ -21261,6 +21565,16 @@ thumb1_expand_prologue (void) amount = offsets->outgoing_args - offsets->saved_regs; amount -= 4 * thumb1_extra_regs_pushed (offsets, true); + + if( crtl->limit_stack + && ( flag_stack_check == DIRECT_STACK_CHECK + || flag_stack_check == INDIRECT_STACK_CHECK) + && (offsets->outgoing_args - offsets->saved_args) + ) + { + emit_stack_check_insns (amount, !(l_mask & (1<limit_stack + && flag_stack_check != DIRECT_STACK_CHECK + && flag_stack_check != INDIRECT_STACK_CHECK" +{ + rtx label = gen_label_rtx (); + rtx addr = copy_rtx (operands[0]); + addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0)); + addr = force_operand (addr, NULL_RTX); + emit_insn (gen_blockage ()); + emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1, + label); + emit_insn (gen_stack_failure ()); + emit_label (label); + emit_insn (gen_blockage ()); + DONE; +} +) + +(define_insn "stack_failure" + [(trap_if (const_int 1) (const_int 0)) + (clobber (reg:SI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM))] + "TARGET_EITHER" + "* + { + if (TARGET_ARM) + output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", operands); + else + output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", operands); + } + return \"\"; + " + [(set_attr "conds" "clob") + (set_attr "length" "8")] +) + ;; We only care about the lower 16 bits of the constant