Patchwork optimize stack checking for leaf functions

login
register
mail settings
Submitter Eric Botcazou
Date Oct. 31, 2013, 3:18 p.m.
Message ID <53247467.cfRpzTFy4U@polaris>
Download mbox | patch
Permalink /patch/287515/
State New
Headers show

Comments

Eric Botcazou - Oct. 31, 2013, 3:18 p.m.
Given that we maintain a protection area of STACK_CHECK_PROTECT bytes during 
stack checking, there is no point in checking for leaf functions if:
  1. the static frame size is lower than STACK_CHECK_PROTECT bytes, and
  2. there is no dynamic frame allocation.

The attached patch does that for the back-ends which implement static stack 
checking modulo Alpha.  Tested on a bunch of platforms covering the various 
cases, any objections (given that I wrote essentially all the code here)?


2013-10-31  Eric Botcazou  <ebotcazou@adacore.com>

	* config/i386/i386.c (ix86_expand_prologue): Optimize stack checking for
	leaf functions without dynamic stack allocation.
	* config/ia64/ia64.c (ia64_emit_probe_stack_range): Adjust.
	(ia64_expand_prologue): Likewise.
	* config/mips/mips.c (mips_expand_prologue): Likewise.
	* config/rs6000/rs6000.c (rs6000_emit_prologue): Likewise.
	* config/sparc/sparc.c (sparc_expand_prologue): Likewise.
	(sparc_flat_expand_prologue): Likewise.

Patch

Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c	(revision 204201)
+++ config/sparc/sparc.c	(working copy)
@@ -5362,8 +5362,17 @@  sparc_expand_prologue (void)
   if (flag_stack_usage_info)
     current_function_static_stack_size = size;
 
-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
-    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					  size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
 
   if (size == 0)
     ; /* do nothing.  */
@@ -5464,8 +5473,17 @@  sparc_flat_expand_prologue (void)
   if (flag_stack_usage_info)
     current_function_static_stack_size = size;
 
-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
-    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					  size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
 
   if (sparc_save_local_in_regs_p)
     emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 204201)
+++ config/i386/i386.c	(working copy)
@@ -10653,8 +10653,12 @@  ix86_expand_prologue (void)
 
       if (STACK_CHECK_MOVING_SP)
 	{
-	  ix86_adjust_stack_and_probe (allocate);
-	  allocate = 0;
+	  if (!(crtl->is_leaf && !cfun->calls_alloca
+		&& allocate <= PROBE_INTERVAL))
+	    {
+	      ix86_adjust_stack_and_probe (allocate);
+	      allocate = 0;
+	    }
 	}
       else
 	{
@@ -10664,9 +10668,26 @@  ix86_expand_prologue (void)
 	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
 
 	  if (TARGET_STACK_PROBE)
-	    ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+	    {
+	      if (crtl->is_leaf && !cfun->calls_alloca)
+		{
+		  if (size > PROBE_INTERVAL)
+		    ix86_emit_probe_stack_range (0, size);
+		}
+	      else
+		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
+	    }
 	  else
-	    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	    {
+	      if (crtl->is_leaf && !cfun->calls_alloca)
+		{
+		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
+						 size - STACK_CHECK_PROTECT);
+		}
+	      else
+		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+	    }
 	}
     }
 
Index: config/ia64/ia64.c
===================================================================
--- config/ia64/ia64.c	(revision 204201)
+++ config/ia64/ia64.c	(working copy)
@@ -3206,61 +3206,54 @@  gen_fr_restore_x (rtx dest, rtx src, rtx
 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
 
 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
-   inclusive.  These are offsets from the current stack pointer.  SOL is the
-   size of local registers.  ??? This clobbers r2 and r3.  */
+   inclusive.  These are offsets from the current stack pointer.  BS_SIZE
+   is the size of the backing store.  ??? This clobbers r2 and r3.  */
 
 static void
-ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, int sol)
+ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
+			     int bs_size)
 {
- /* On the IA-64 there is a second stack in memory, namely the Backing Store
-    of the Register Stack Engine.  We also need to probe it after checking
-    that the 2 stacks don't overlap.  */
-  const int bs_size = BACKING_STORE_SIZE (sol);
   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
+  rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
 
-  /* Detect collision of the 2 stacks if necessary.  */
-  if (bs_size > 0 || size > 0)
-    {
-      rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
-
-      emit_insn (gen_bsp_value (r3));
-      emit_move_insn (r2, GEN_INT (-(first + size)));
-
-      /* Compare current value of BSP and SP registers.  */
-      emit_insn (gen_rtx_SET (VOIDmode, p6,
-			      gen_rtx_fmt_ee (LTU, BImode,
-					      r3, stack_pointer_rtx)));
-
-      /* Compute the address of the probe for the Backing Store (which grows
-	 towards higher addresses).  We probe only at the first offset of
-	 the next page because some OS (eg Linux/ia64) only extend the
-	 backing store when this specific address is hit (but generate a SEGV
-	 on other address).  Page size is the worst case (4KB).  The reserve
-	 size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
-	 Also compute the address of the last probe for the memory stack
-	 (which grows towards lower addresses).  */
-      emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
-      emit_insn (gen_rtx_SET (VOIDmode, r2,
-			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
-
-      /* Compare them and raise SEGV if the former has topped the latter.  */
-      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
-				    gen_rtx_fmt_ee (NE, VOIDmode, p6,
-						    const0_rtx),
-				    gen_rtx_SET (VOIDmode, p6,
-						 gen_rtx_fmt_ee (GEU, BImode,
-								 r3, r2))));
-      emit_insn (gen_rtx_SET (VOIDmode,
-			      gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
-						    const0_rtx),
-			      const0_rtx));
-      emit_insn (gen_rtx_COND_EXEC (VOIDmode,
-				    gen_rtx_fmt_ee (NE, VOIDmode, p6,
-						    const0_rtx),
-				    gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
-						     GEN_INT (11))));
-    }
+  /* On the IA-64 there is a second stack in memory, namely the Backing Store
+     of the Register Stack Engine.  We also need to probe it after checking
+     that the 2 stacks don't overlap.  */
+  emit_insn (gen_bsp_value (r3));
+  emit_move_insn (r2, GEN_INT (-(first + size)));
+
+  /* Compare current value of BSP and SP registers.  */
+  emit_insn (gen_rtx_SET (VOIDmode, p6,
+			  gen_rtx_fmt_ee (LTU, BImode,
+					  r3, stack_pointer_rtx)));
+
+  /* Compute the address of the probe for the Backing Store (which grows
+     towards higher addresses).  We probe only at the first offset of
+     the next page because some OS (eg Linux/ia64) only extend the
+     backing store when this specific address is hit (but generate a SEGV
+     on other address).  Page size is the worst case (4KB).  The reserve
+     size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
+     Also compute the address of the last probe for the memory stack
+     (which grows towards lower addresses).  */
+  emit_insn (gen_rtx_SET (VOIDmode, r3, plus_constant (Pmode, r3, 4095)));
+  emit_insn (gen_rtx_SET (VOIDmode, r2,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
+
+  /* Compare them and raise SEGV if the former has topped the latter.  */
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+				gen_rtx_SET (VOIDmode, p6,
+					     gen_rtx_fmt_ee (GEU, BImode,
+							     r3, r2))));
+  emit_insn (gen_rtx_SET (VOIDmode,
+			  gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
+						const0_rtx),
+			  const0_rtx));
+  emit_insn (gen_rtx_COND_EXEC (VOIDmode,
+				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
+				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
+						 GEN_INT (11))));
 
   /* Probe the Backing Store if necessary.  */
   if (bs_size > 0)
@@ -3444,10 +3437,23 @@  ia64_expand_prologue (void)
     current_function_static_stack_size = current_frame_info.total_size;
 
   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
-    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
-				 current_frame_info.total_size,
-				 current_frame_info.n_input_regs
-				   + current_frame_info.n_local_regs);
+    {
+      HOST_WIDE_INT size = current_frame_info.total_size;
+      int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
+					  + current_frame_info.n_local_regs);
+
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					 size - STACK_CHECK_PROTECT,
+					 bs_size);
+	  else if (size + bs_size > STACK_CHECK_PROTECT)
+	    ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
+	}
+      else if (size + bs_size > 0)
+	ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
+    }
 
   if (dump_file) 
     {
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c	(revision 204201)
+++ config/rs6000/rs6000.c	(working copy)
@@ -21526,8 +21526,19 @@  rs6000_emit_prologue (void)
   if (flag_stack_usage_info)
     current_function_static_stack_size = info->total_size;
 
-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && info->total_size)
-    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, info->total_size);
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      HOST_WIDE_INT size = info->total_size;
+
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					   size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
 
   if (TARGET_FIX_AND_CONTINUE)
     {
Index: config/mips/mips.c
===================================================================
--- config/mips/mips.c	(revision 204201)
+++ config/mips/mips.c	(working copy)
@@ -10994,8 +10994,17 @@  mips_expand_prologue (void)
   if (flag_stack_usage_info)
     current_function_static_stack_size = size;
 
-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
-    mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
+    {
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
+	    mips_emit_probe_stack_range (STACK_CHECK_PROTECT,
+					 size - STACK_CHECK_PROTECT);
+	}
+      else if (size > 0)
+	mips_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
+    }
 
   /* Save the registers.  Allocate up to MIPS_MAX_FIRST_STACK_STEP
      bytes beforehand; this is enough to cover the register save area