Patchwork [google] Updated patch (issue4438079)

login
register
mail settings
Submitter Sharad Singhai
Date April 29, 2011, 12:02 a.m.
Message ID <20110429000237.E346B15C1AB@nabu.mtv.corp.google.com>
Download mbox | patch
Permalink /patch/93354/
State New
Headers show

Comments

Sharad Singhai - April 29, 2011, 12:02 a.m.
I have fixed documentation so that new params are documented
separately along with default values. Also used profile_status
variable.

I experimented with scaling defaults but that was not always possible
as some parameters need to be more indpependent.

Thanks,
Sharad

Google Ref 40474

2011-04-28  Sharad Singhai  <singhai@google.com>

	gcc/ChangeLog.google-main
	* params.def: Add new parameters to control peeling.
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Use
	different peeling parameters when profile feedback is available.
	* loop-unroll.c (decide_peel_once_rolling): Ditto.
	(decide_peel_completely): Ditto.
	* doc/invoke.texi: Document new peeling parameters.

	testsuite/ChangeLog.google-main
	* gcc.dg/vect/O3-vect-pr34223.c: Add extra peeling
	parameters.
	* gcc.dg/vect/vect.exp: Allow reading flags in individual
	tests.


--
This patch is available for review at http://codereview.appspot.com/4438079

Patch

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 173140)
+++ doc/invoke.texi	(working copy)
@@ -8634,9 +8634,29 @@ 
 @item max-completely-peeled-insns
 The maximum number of insns of a completely peeled loop.
 
+@item max-completely-peeled-insns-feedback
+The maximum number of insns of a completely peeled loop when profile
+feedback is available and the loop is hot. Because of the real
+profiles, this value may set to be larger for hot loops. Its default
+value is 600.
+
+@item max-once-peeled-insns
+The maximum number of insns of a peeled loop that rolls only once.
+
+@item max-once-peeled-insns-feedback
+The maximum number of insns of a peeled loop when profile feedback is
+available and the loop is hot. Because of the real profiles, this
+value may set to be larger for hot loops. The default value is 600.
+
 @item max-completely-peel-times
 The maximum number of iterations of a loop to be suitable for complete peeling.
 
+@item max-completely-peel-times-feedback
+The maximum number of iterations of a loop to be suitable for complete
+peeling when profile feedback is available and the loop is
+hot. Because of the real profiles, this value may set to be larger for
+hot loops. Its default value is 16.
+
 @item max-completely-peel-loop-nest-depth
 The maximum depth of a loop nest suitable for complete peeling.
 
Index: testsuite/gcc.dg/vect/O3-vect-pr34223.c
===================================================================
--- testsuite/gcc.dg/vect/O3-vect-pr34223.c	(revision 173140)
+++ testsuite/gcc.dg/vect/O3-vect-pr34223.c	(working copy)
@@ -1,4 +1,5 @@ 
 /* { dg-require-effective-target vect_int } */
+/* { dg-options "[vect_cflags] --param max-completely-peel-times=16" } */
 
 #include "tree-vect.h"
 
Index: testsuite/gcc.dg/vect/vect.exp
===================================================================
--- testsuite/gcc.dg/vect/vect.exp	(revision 173140)
+++ testsuite/gcc.dg/vect/vect.exp	(working copy)
@@ -24,6 +24,12 @@ 
 global DEFAULT_VECTCFLAGS
 set DEFAULT_VECTCFLAGS ""
 
+# So that we can read flags in individual tests.
+proc vect_cflags { } {
+  global DEFAULT_VECTCFLAGS
+  return $DEFAULT_VECTCFLAGS
+}
+
 # If the target system supports vector instructions, the default action
 # for a test is 'run', otherwise it's 'compile'.  Save current default.
 # Executing vector instructions on a system without hardware vector support
Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 173140)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -326,6 +326,7 @@ 
 			    enum unroll_level ul)
 {
   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+  unsigned HOST_WIDE_INT max_peeled_insns;
   gimple cond;
   struct loop_size size;
 
@@ -336,9 +337,22 @@ 
     return false;
   n_unroll = tree_low_cst (niter, 1);
 
-  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+  if (profile_status == PROFILE_READ
+      && optimize_loop_for_speed_p (loop))
+    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+  else
+    max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+
   if (n_unroll > max_unroll)
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, "  Not unrolling loop %d limited by max unroll"
+                   " (%d > %d)\n",
+                   loop->num, (int) n_unroll, (int) max_unroll);
+        }
     return false;
+  }
 
   if (n_unroll)
     {
@@ -356,14 +370,20 @@ 
 		   (int) unr_insns);
 	}
 
-      if (unr_insns > ninsns
-	  && (unr_insns
-	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
+      if (profile_status == PROFILE_READ
+          && optimize_loop_for_speed_p (loop))
+        max_peeled_insns =
+          PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+      else
+        max_peeled_insns = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+
+      if (unr_insns > max_peeled_insns)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    fprintf (dump_file, "Not unrolling loop %d "
-		     "(--param max-completely-peeled-insns limit reached).\n",
-		     loop->num);
+		     "(--param max-completely-peeled-insns(-feedback) limit. "
+                     "(%u > %u)).\n",
+                     loop->num, (unsigned) unr_insns, (unsigned) max_peeled_insns);
 	  return false;
 	}
 
@@ -371,7 +391,8 @@ 
 	  && unr_insns > ninsns)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
+	    fprintf (dump_file, "Not unrolling loop %d (NO_GROWTH %d > %d).\n",
+                     loop->num, (int) unr_insns, (int) ninsns);
 	  return false;
 	}
     }
@@ -418,8 +439,9 @@ 
   update_stmt (cond);
   update_ssa (TODO_update_ssa);
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
+  if (dump_file)
+    fprintf (dump_file, "Unrolled loop %d completely by factor %d.\n",
+             loop->num, (int) n_unroll);
 
   return true;
 }
Index: loop-unroll.c
===================================================================
--- loop-unroll.c	(revision 173140)
+++ loop-unroll.c	(working copy)
@@ -324,15 +324,23 @@ 
 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
   struct niter_desc *desc;
+  unsigned max_peeled_insns;
 
+  if (profile_status == PROFILE_READ)
+    max_peeled_insns =
+      (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK);
+  else
+    max_peeled_insns = (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS);
+
   if (dump_file)
     fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
 
   /* Is the loop small enough?  */
-  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
+  if (max_peeled_insns < loop->ninsns)
     {
       if (dump_file)
-	fprintf (dump_file, ";; Not considering loop, is too big\n");
+	fprintf (dump_file, ";; Not considering loop, is too big (%d > %u)\n",
+                 loop->ninsns, max_peeled_insns);
       return;
     }
 
@@ -362,7 +370,7 @@ 
 static void
 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
 {
-  unsigned npeel;
+  unsigned npeel, max_insns, max_peel;
   struct niter_desc *desc;
 
   if (dump_file)
@@ -393,16 +401,30 @@ 
       return;
     }
 
+  if (profile_status == PROFILE_READ)
+    {
+      max_insns =
+        (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+      max_peel =
+        (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+    }
+  else
+    {
+      max_insns = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+      max_peel = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+    }
+
   /* npeel = number of iterations to peel.  */
-  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
-  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
-    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+  npeel = max_insns / loop->ninsns;
+  if (npeel > max_peel)
+    npeel = max_peel;
 
   /* Is the loop small enough?  */
   if (!npeel)
     {
       if (dump_file)
-	fprintf (dump_file, ";; Not considering loop, is too big\n");
+	fprintf (dump_file, ";; Not considering loop, is too big, npeel=%u.\n",
+                 npeel);
       return;
     }
 
@@ -435,7 +457,7 @@ 
 
   /* Success.  */
   if (dump_file)
-    fprintf (dump_file, ";; Decided to peel loop completely\n");
+    fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel);
   loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
 }
 
Index: params.def
===================================================================
--- params.def	(revision 173140)
+++ params.def	(working copy)
@@ -299,16 +299,37 @@ 
 	"max-completely-peeled-insns",
 	"The maximum number of insns of a completely peeled loop",
 	400, 0, 0)
+/* The maximum number of insns of a peeled loop, when feedback
+   information is available.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK,
+	"max-completely-peeled-insns-feedback",
+	"The maximum number of insns of a completely peeled loop when profile "
+         "feedback is available",
+	600, 0, 0)
 /* The maximum number of peelings of a single loop that is peeled completely.  */
 DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
 	"max-completely-peel-times",
 	"The maximum number of peelings of a single loop that is peeled completely",
-	16, 0, 0)
+	8, 0, 0)
+/* The maximum number of peelings of a single loop that is peeled
+   completely, when feedback information is available.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK,
+	"max-completely-peel-times-feedback",
+	"The maximum number of peelings of a single loop that is peeled "
+         "completely, when profile feedback is available",
+ 	16, 0, 0)
 /* The maximum number of insns of a peeled loop that rolls only once.  */
 DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS,
 	"max-once-peeled-insns",
 	"The maximum number of insns of a peeled loop that rolls only once",
 	400, 0, 0)
+/* The maximum number of insns of a peeled loop that rolls only once,
+   when feedback information is available.  */
+DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK,
+	"max-once-peeled-insns-feedback",
+	"The maximum number of insns of a peeled loop that rolls only once, "
+         "when profile feedback is available",
+         600, 0, 0)
 /* The maximum depth of a loop nest we completely peel.  */
 DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS,
 	 "max-completely-peel-loop-nest-depth",