diff mbox

Ping: C-family stack check for threads

Message ID 4E78D800.20004@web.de
State New
Headers show

Commit Message

Thomas Klein Sept. 20, 2011, 6:14 p.m. UTC
ping

references
http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00310.html
http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00216.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00281.html
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg00149.html
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg01872.html
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01226.html

gcc/ChangeLog

2011-09-20  Thomas Klein <th.r.klein@web.de>
     * opts.c (common_handle_option): introduce new parameters "direct" and
     "indirect"
     * flag-types.h (enum stack_check_type): Likewise

     * explow.c (allocate_dynamic_stack_space):
     - suppress stack probing if parameter "direct", "indirect" or if a
     stack-limit is given
     - do additional read of limit value if parameter "indirect" and a
     stack-limit symbol is given
     - emit a call to a stack_failure function [as an alternative to a trap
     call]
     (function probe_stack_range): if allowed to override the range porbe
     emit generic_limit_check_stack

     * config/arm/arm.c
     (stack_check_work_registers): new function to find possible working
     registers [only used by "stack check"]
     (emit_push_regs): add push RTL instruction without keeping regnumber
     and frame memory in mind.
     (emit_pop_regs): add pop RTL instruction to revert the above push
     (emit_stack_check_insns): new function to write RTL instructions for
     stack check at prologue stage.
     (arm_expand_prologue): stack check integration for ARM and Thumb-2
     (thumb1_output_function_prologue): stack check integration for Thumb-1

     * config/arm/arm.md
     (cbranchsi4_insn): allow compare and branch using stack pointer
     register [at thumb mode]
     (arm_cmpsi_insn): allow comparing using stack pointer register [at 
arm]
     (probe_stack): do not emit code when parameters "direct" or "indirect"
     is given, emit move code way same as in gcc/explow.c [function
     emit_stack_probe]
     (probe_stack_done): dummy to make sure probe_stack insns are not
     optimized away
     (generic_limit_check_stack): if stack-limit and parameter "generic" is
     given use the limit the same way as in function
     allocate_dynamic_stack_space
     (stack_failure): failure call used in stack check functions
     emit_stack_check_insns, generic_limit_check_stack or
     allocate_dynamic_stack_space [similar to a trap but avoid conflict 
with
     builtin_trap]

Comments

Joseph Myers Sept. 20, 2011, 8:41 p.m. UTC | #1
Please post your pings under a more meaningful subject line that indicates 
that this is an ARM back-end and middle-end patch.  It's not a C-family 
patch, C-family maintainers can't give it any useful review.
diff mbox

Patch

Index: gcc/opts.c
===================================================================
--- gcc/opts.c	(revision 179007)
+++ gcc/opts.c	(working copy)
@@ -1644,6 +1644,12 @@  common_handle_option (struct gcc_options *opts,
 			   : STACK_CHECK_STATIC_BUILTIN
 			     ? STATIC_BUILTIN_STACK_CHECK
 			     : GENERIC_STACK_CHECK;
+      else if (!strcmp (arg, "indirect"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = INDIRECT_STACK_CHECK;
+      else if (!strcmp (arg, "direct"))
+	/* This is an other stack checking method.  */
+	opts->x_flag_stack_check = DIRECT_STACK_CHECK;
       else
 	warning_at (loc, 0, "unknown stack check parameter \"%s\"", arg);
       break;
Index: gcc/flag-types.h
===================================================================
--- gcc/flag-types.h	(revision 179007)
+++ gcc/flag-types.h	(working copy)
@@ -153,7 +153,15 @@  enum stack_check_type
 
   /* Check the stack and entirely rely on the target configuration
      files, i.e. do not use the generic mechanism at all.  */
-  FULL_BUILTIN_STACK_CHECK
+  FULL_BUILTIN_STACK_CHECK,
+
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is directly given e.g. by address
+     of a symbol */
+  DIRECT_STACK_CHECK,
+  /* Check the stack (if possible) before allocation of local variables at
+     each function entry. The stack limit is given by global variable. */
+  INDIRECT_STACK_CHECK
 };
 
 /* Names for the different levels of -Wstrict-overflow=N.  The numeric
Index: gcc/explow.c
===================================================================
--- gcc/explow.c	(revision 179007)
+++ gcc/explow.c	(working copy)
@@ -1386,7 +1386,12 @@  allocate_dynamic_stack_space (rtx size, unsigned s
 
   /* If needed, check that we have the required amount of stack.  Take into
      account what has already been checked.  */
-  if (STACK_CHECK_MOVING_SP)
+  if (  STACK_CHECK_MOVING_SP 
+#ifdef HAVE_generic_limit_check_stack    
+     || crtl->limit_stack
+#endif
+     || flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
     ;
   else if (flag_stack_check == GENERIC_STACK_CHECK)
     probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
@@ -1423,19 +1428,32 @@  allocate_dynamic_stack_space (rtx size, unsigned s
       /* Check stack bounds if necessary.  */
       if (crtl->limit_stack)
 	{
+          rtx limit_rtx;
 	  rtx available;
 	  rtx space_available = gen_label_rtx ();
+          if (  GET_CODE (stack_limit_rtx) == SYMBOL_REF
+             && flag_stack_check == INDIRECT_STACK_CHECK)
+            limit_rtx = expand_unop (Pmode, mov_optab,
+				    gen_rtx_MEM (Pmode, stack_limit_rtx),
+				    NULL_RTX, 1);
+          else
+            limit_rtx = stack_limit_rtx;
 #ifdef STACK_GROWS_DOWNWARD
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_pointer_rtx, stack_limit_rtx,
+				    stack_pointer_rtx, limit_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #else
 	  available = expand_binop (Pmode, sub_optab,
-				    stack_limit_rtx, stack_pointer_rtx,
+				    limit_rtx, stack_pointer_rtx,
 				    NULL_RTX, 1, OPTAB_WIDEN);
 #endif
 	  emit_cmp_and_jump_insns (available, size, GEU, NULL_RTX, Pmode, 1,
 				   space_available);
+#ifdef HAVE_stack_failure
+	  if (HAVE_stack_failure)
+	    emit_insn (gen_stack_failure ());
+	  else
+#endif
 #ifdef HAVE_trap
 	  if (HAVE_trap)
 	    emit_insn (gen_trap ());
@@ -1582,6 +1600,13 @@  probe_stack_range (HOST_WIDE_INT first, rtx size)
 	return;
     }
 #endif
+#ifdef HAVE_generic_limit_check_stack
+  else if (HAVE_generic_limit_check_stack)
+    {
+      rtx addr = memory_address (Pmode,stack_pointer_rtx);
+      emit_insn (gen_generic_limit_check_stack (addr));
+    }
+#endif
 
   /* Otherwise we have to generate explicit probes.  If we have a constant
      small number of them to generate, that's the easy case.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 179007)
+++ gcc/config/arm/arm.c	(working copy)
@@ -16285,6 +16285,307 @@  thumb_set_frame_pointer (arm_stack_offsets *offset
   RTX_FRAME_RELATED_P (insn) = 1;
 }
 
+/*search for possible work registers for stack-check operation at prologue
+ return the number of register that can be used without extra push/pop */
+
+static int
+stack_check_work_registers (rtx *workreg)
+{
+  int reg, i, k, n, nregs;
+  
+  if (crtl->args.info.pcs_variant <= ARM_PCS_AAPCS_LOCAL)
+    {
+      nregs = crtl->args.info.aapcs_next_ncrn;
+    }
+  else
+    {
+      nregs = crtl->args.info.nregs;
+    }
+
+
+  n = 0;
+  i = 0;
+  /* check if we can use one of the argument registers r0..r3 as long as they
+   * not holding data*/
+  for (reg = 0; reg <= LAST_ARG_REGNUM && i < 2; reg++)
+    {
+      if (  !df_regs_ever_live_p (reg)
+         || (cfun->machine->uses_anonymous_args && crtl->args.pretend_args_size
+                  > (LAST_ARG_REGNUM - reg) * UNITS_PER_WORD)
+         || (!cfun->machine->uses_anonymous_args && nregs < reg + 1)
+         )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+          /*if only one register can be used without push
+           *keep the next register in mind (either r1,r2,r3 or r0) that 
+           *might be pushed later*/
+          if (i<2)
+            {
+              n = (reg + 1) % NUM_ARG_REGS;
+            }
+        }
+    }
+
+  /* otherwise try to use r4..r7*/
+  for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM && i < 2; reg++)
+    {
+      if (  df_regs_ever_live_p (reg)
+         && !fixed_regs[reg]
+         && reg != FP_REGNUM )
+        {
+          workreg[i++] = gen_rtx_REG (SImode, reg);
+        }
+    }
+
+  if (TARGET_32BIT)
+    {
+      /* ARM and Thumb-2 can use high regs.  */
+      for (reg = FIRST_HI_REGNUM; reg <= LAST_HI_REGNUM && i < 2; reg ++)
+        if (  df_regs_ever_live_p (reg)
+           && !fixed_regs[reg]
+           && reg != FP_REGNUM )
+          {
+            workreg[i++] = gen_rtx_REG (SImode, reg);
+          }
+    }
+
+  k = i;
+  /* if not enough found to be uses without extra push,
+   * collect next from r0..r3*/
+  for ( ; i<2; i++)
+    workreg[i] = gen_rtx_REG (SImode, n++);
+
+  /* only if k==0, two register will be pushed later
+   * only in this case the registers are guaranteed to be sorted */
+  return k;
+}
+
+/* push some registers to stack */
+static void
+emit_push_regs(int num_to_push, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_push > 15 || num_to_push < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_push; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+
+  tmp = plus_constant (stack_pointer_rtx, -4 * num_to_push);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0]= gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_push, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/* pop some registers from stack */
+static void
+emit_pop_regs(const int num_to_pop, rtx *reg)
+{
+  int i;
+  rtvec tmpvec;
+  rtx par[16], dwarf, tmp, insn;
+
+  if (num_to_pop > 15 || num_to_pop < 0)
+    return;
+
+  tmpvec = gen_rtvec (1, reg[0]);
+  par[0] = gen_rtx_UNSPEC (BLKmode, tmpvec, UNSPEC_PUSH_MULT);
+  for (i=1; i<num_to_pop; i++)
+    par[i] = gen_rtx_USE (VOIDmode, reg[i]);
+  tmp = plus_constant (stack_pointer_rtx, 4 * num_to_pop);
+  dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+  tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
+  tmp = gen_frame_mem (BLKmode, tmp);
+  par[0] = gen_rtx_SET (VOIDmode, tmp, par[0]);
+  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (num_to_pop, par));
+  insn = emit_insn (tmp);
+  RTX_FRAME_RELATED_P (insn) = 1;
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+  return;
+}
+
+/*
+ * Emit RTL instructions for stack check at prologue stage.
+ * For Thumb this may look like this:
+ *   push {rsym,ramn}
+ *   ldr rsym, =symbol_addr_of(stack_limit_rtx)
+ *   ldr rsym, [rsym]
+ *   ldr ramn, =lenght_of(amount)
+ *   add rsym, rsym, ramn
+ *   cmp sp, rsym
+ *   bhs .LSPCHK0
+ *   push {lr}
+ *   bl __thumb_stack_failure
+ * .LSPCHK0:
+ *   pop {rsym,ramn}
+ */
+static void
+emit_stack_check_insns (HOST_WIDE_INT amount, int lr_not_yet_pushed)
+{
+  unsigned numregs;
+  unsigned amount_needsreg;
+  bool amount_const_ok, is_non_opt_thumb2, is_thumb2_hi_reg[2];
+  bool issym=false;
+  rtx reg[2], cmp_reg, amount_rtx;
+  rtx dwarf, tmp, insn;
+  rtx jump, label;
+
+  numregs = stack_check_work_registers(reg);
+
+  if (TARGET_THUMB1)
+    amount_const_ok = (amount < 256);
+  else
+    amount_const_ok = const_ok_for_arm (amount);
+
+  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF) /*stack_limit_rtx*/
+    {
+      issym = true;
+      amount_needsreg = !amount_const_ok;
+    }
+  else
+    amount_needsreg = (amount != 0);
+
+  is_non_opt_thumb2 = (TARGET_THUMB2 && !(optimize_size || optimize >= 2));
+  is_thumb2_hi_reg[0] = (TARGET_THUMB2 && INTVAL(reg[0])>7);
+  is_thumb2_hi_reg[1] = (TARGET_THUMB2 && INTVAL(reg[1])>7);
+
+  /* push as many as needed */
+  if (issym && amount_needsreg) /*need two temp regs for limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*have 2 regs => no need to push*/
+      else if (numregs == 1)
+        {
+          /*have one reg but need two regs => push temp reg for amount*/
+    	  emit_push_regs (1, &reg[1]); /*push {reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 4)
+            amount -= 4;
+        }
+      else
+        {
+          /*have no reg but need two => push temp regs for limit and amount*/
+    	  emit_push_regs (2, &reg[0]); /*push {reg0,reg1}*/
+          /*due to additional push try to correct amount*/
+          if (amount >= 8)
+            amount -= 8;
+        }
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*push temp reg either for limit or amount*/
+      emit_push_regs (1, &reg[0]); /*push {reg0}*/
+      /*due to additional push try to correct amount*/
+      if (amount >= 4)
+        {
+          if (amount_const_ok)
+            {
+              if (TARGET_THUMB1 || const_ok_for_arm(amount - 4))
+                amount -= 4;
+              /*on Thumb2 or ARM may not corrected; shouldn't hurt*/
+            }
+          else /*will be loaded from pool*/
+            amount -= 4;
+        }
+    }
+
+  amount_rtx = GEN_INT (amount);
+
+  /* move limit plus amount to cmp_reg e.g. reg[0] */
+  if (issym)
+    {
+      if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+        arm_emit_movpair(reg[0], stack_limit_rtx);
+      else
+        emit_move_insn(reg[0], stack_limit_rtx);
+
+      if (flag_stack_check == INDIRECT_STACK_CHECK)
+        emit_insn (gen_movsi (reg[0], gen_rtx_MEM (SImode, reg[0])));
+      if (amount)
+        {
+          if (amount_const_ok)
+            emit_insn(gen_addsi3(reg[0], reg[0], amount_rtx));
+          else
+            {
+              if (is_non_opt_thumb2 || is_thumb2_hi_reg[1])
+                arm_emit_movpair(reg[1], amount_rtx);
+              else
+                emit_insn (gen_movsi (reg[1], amount_rtx));
+              emit_insn(gen_addsi3(reg[0], reg[0], reg[1]));
+            }
+        }
+      cmp_reg = reg[0];
+    }
+  else if (amount)
+    {
+      if (amount_const_ok)
+        emit_move_insn(reg[0], amount_rtx);
+      else
+        {
+          if (is_non_opt_thumb2 || is_thumb2_hi_reg[0])
+            arm_emit_movpair(reg[0], amount_rtx);
+          else
+            emit_insn (gen_movsi (reg[0], amount_rtx));
+        }
+      emit_insn(gen_addsi3(reg[0], reg[0], stack_limit_rtx));
+      cmp_reg = reg[0];
+    }
+  else
+    cmp_reg = stack_limit_rtx;
+
+  /*compare and jump*/
+  emit_insn (gen_blockage ());
+  label = gen_label_rtx ();
+  do_compare_rtx_and_jump (stack_pointer_rtx, cmp_reg, GEU, 1, Pmode,
+		  NULL_RTX, NULL_RTX, label, -1);
+  jump = get_last_insn ();
+  gcc_assert (JUMP_P (jump));
+  JUMP_LABEL (jump) = label;
+  LABEL_NUSES (label)++;
+  if (lr_not_yet_pushed) /*push LR if not already done*/
+    {
+      rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
+      emit_push_regs (1, &lr);
+    }
+  insn = emit_insn (gen_stack_failure ());
+  if (lr_not_yet_pushed)
+    {
+      /*the trap will not come back; but tell it has restored the stack*/
+      tmp = plus_constant (stack_pointer_rtx, 4);
+      dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+    }
+  emit_label (label);
+
+  /*restore registers*/
+  if (issym && amount_needsreg) /*pop temp regs used by limit and amount*/
+    {
+      if (numregs >= 2)
+        ; /*no need to pop*/
+      else if (numregs == 1)
+        emit_pop_regs (1, &reg[1]); /*pop {reg1}*/
+      else
+        emit_pop_regs (2, &reg[0]); /*pop {reg0, reg1}*/
+    }
+  else if ((issym || amount_needsreg) && numregs == 0)
+    { /*pop temp reg used by limit or amount*/
+      emit_pop_regs (1, &reg[0]); /*pop {reg0}*/
+    }
+
+  return;
+}
+
 /* Generate the prologue instructions for entry into an ARM or Thumb-2
    function.  */
 void
@@ -16536,6 +16837,17 @@  arm_expand_prologue (void)
     current_function_static_stack_size
       = offsets->outgoing_args - offsets->saved_args;
 
+  if (  crtl->limit_stack
+     && !(IS_INTERRUPT (func_type))
+     && (  flag_stack_check == DIRECT_STACK_CHECK 
+        || flag_stack_check == INDIRECT_STACK_CHECK)
+     && (offsets->outgoing_args - offsets->saved_args) > 0
+     )
+    {
+      emit_stack_check_insns (offsets->outgoing_args - saved_regs
+                  - offsets->saved_args, !(live_regs_mask & (1<<LR_REGNUM)) );
+    }
+    
   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
     {
       /* This add can produce multiple insns for a large constant, so we
@@ -21753,6 +22065,16 @@  thumb1_expand_prologue (void)
 
   amount = offsets->outgoing_args - offsets->saved_regs;
   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
+  
+  if(  crtl->limit_stack
+    && (  flag_stack_check == DIRECT_STACK_CHECK
+       || flag_stack_check == INDIRECT_STACK_CHECK)
+    && (offsets->outgoing_args - offsets->saved_args)
+    )
+    {
+      emit_stack_check_insns (amount, !(l_mask & (1<<LR_REGNUM)));
+    }
+  
   if (amount)
     {
       if (amount < 512)
@@ -21912,6 +22234,7 @@  thumb1_output_interwork (void)
   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
 
   return "";
+    
 }
 
 /* Handle the case of a double word load into a low register from
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 179007)
+++ gcc/config/arm/arm.md	(working copy)
@@ -112,6 +112,7 @@ 
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                         ; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_PROBE_STACK    ; probe stack memory reference
   UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
 			; unaligned locations, on architectures which support
 			; that.
@@ -6908,8 +6909,8 @@ 
 (define_insn "cbranchsi4_insn"
   [(set (pc) (if_then_else
 	      (match_operator 0 "arm_comparison_operator"
-	       [(match_operand:SI 1 "s_register_operand" "l,l*h")
-	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r")])
+	       [(match_operand:SI 1 "s_register_operand" "l,l*h,k")
+	        (match_operand:SI 2 "thumb1_cmp_operand" "lI*h,*r,r")])
 	      (label_ref (match_operand 3 "" ""))
 	      (pc)))]
   "TARGET_THUMB1"
@@ -7455,17 +7456,18 @@ 
 
 (define_insn "*arm_cmpsi_insn"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,r,r")
-		    (match_operand:SI 1 "arm_add_operand"    "Py,r,rI,L")))]
+	(compare:CC (match_operand:SI 0 "s_register_operand" "l,r,k,r,r")
+		    (match_operand:SI 1 "arm_add_operand"    "Py,r,r,rI,L")))]
   "TARGET_32BIT"
   "@
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
    cmp%?\\t%0, %1
+   cmp%?\\t%0, %1
    cmn%?\\t%0, #%n1"
   [(set_attr "conds" "set")
-   (set_attr "arch" "t2,t2,any,any")
-   (set_attr "length" "2,2,4,4")]
+   (set_attr "arch" "t2,t2,any,any,any")
+   (set_attr "length" "2,2,4,4,4")]
 )
 
 (define_insn "*cmpsi_shiftsi"
@@ -11180,6 +11182,69 @@ 
 
 ;;
 
+(define_expand "probe_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "TARGET_EITHER"
+{
+  if (  flag_stack_check == DIRECT_STACK_CHECK
+     || flag_stack_check == INDIRECT_STACK_CHECK)
+    ;
+  else
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      emit_insn (gen_probe_stack_done ());
+      emit_insn (gen_blockage ());
+    }
+  DONE;
+}
+)
+
+(define_insn "probe_stack_done"
+  [(unspec_volatile [(const_int 0)] UNSPEC_PROBE_STACK)]
+  "TARGET_EITHER"
+  {return \"@ probe stack done\";}
+  [(set_attr "type" "store1")
+   (set_attr "length" "0")]
+)
+
+(define_expand "generic_limit_check_stack"
+  [(match_operand 0 "memory_operand" "")]
+  "crtl->limit_stack 
+  && flag_stack_check != DIRECT_STACK_CHECK 
+  && flag_stack_check != INDIRECT_STACK_CHECK"
+{
+  rtx label = gen_label_rtx ();
+  rtx addr = copy_rtx (operands[0]);
+  addr = gen_rtx_fmt_ee (MINUS, Pmode, addr, GEN_INT (0));
+  addr = force_operand (addr, NULL_RTX);
+  emit_insn (gen_blockage ());
+  emit_cmp_and_jump_insns (stack_limit_rtx, addr, LEU, NULL_RTX, Pmode, 1,
+                           label);
+  emit_insn (gen_stack_failure ());
+  emit_label (label);
+  emit_insn (gen_blockage ());
+  DONE;
+}
+)
+
+(define_insn "stack_failure"
+  [(trap_if (const_int 1) (const_int 0))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_EITHER"
+  "*
+  {
+    if (TARGET_ARM)
+      output_asm_insn (\"bl\\t__arm_stack_failure\\t%@ trap call\", operands);
+    else
+      output_asm_insn (\"bl\\t__thumb_stack_failure\\t%@ trap call\", operands);
+  }
+  return \"\";
+  "
+  [(set_attr "conds" "clob")
+    (set_attr "length" "8")]
+)
+
 ;; We only care about the lower 16 bits of the constant 
 ;; being inserted into the upper 16 bits of the register.
 (define_insn "*arm_movtas_ze"