===================================================================
@@ -8523,11 +8523,28 @@
The maximum number of peelings of a single loop.
@item max-completely-peeled-insns
+@item max-completely-peeled-insns-feedback
The maximum number of insns of a completely peeled loop.
+The @option{max-completely-peeled-insns-feedback} is used only when profile
+feedback is available and the loop is hot. Because of the real profiles, this
+value may set to be larger for hot loops.
+
+@item max-once-peeled-insns
+@item max-once-peeled-insns-feedback
+The maximum number of insns of a peeled loop that rolls only once.
+The @option{max-once-peeled-insns-feedback} is used only when profile feedback
+is available and the loop is hot. Because of the real profiles, this value
+may set to be larger for hot loops.
+
@item max-completely-peel-times
+@item max-completely-peel-times-feedback
The maximum number of iterations of a loop to be suitable for complete peeling.
+The @option{max-completely-peel-times-feedback} is used only when profile feedback
+is available and the loop is hot. Because of the real profiles, this value may
+set to be larger for hot loops.
+
@item max-completely-peel-loop-nest-depth
The maximum depth of a loop nest suitable for complete peeling.
===================================================================
@@ -1,4 +1,5 @@
/* { dg-require-effective-target vect_int } */
+/* { dg-options "[vect_cflags] --param max-completely-peel-times=16" } */
#include "tree-vect.h"
===================================================================
@@ -24,6 +24,12 @@
global DEFAULT_VECTCFLAGS
set DEFAULT_VECTCFLAGS ""
+# So that we can read flags in individual tests.
+proc vect_cflags { } {
+ global DEFAULT_VECTCFLAGS
+ return $DEFAULT_VECTCFLAGS
+}
+
# If the target system supports vector instructions, the default action
# for a test is 'run', otherwise it's 'compile'. Save current default.
# Executing vector instructions on a system without hardware vector support
===================================================================
@@ -326,6 +326,7 @@
enum unroll_level ul)
{
unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+ unsigned HOST_WIDE_INT max_peeled_insns;
gimple cond;
struct loop_size size;
@@ -336,9 +337,23 @@
return false;
n_unroll = tree_low_cst (niter, 1);
- max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+ if (profile_info
+ && flag_branch_probabilities
+ && optimize_loop_for_speed_p (loop))
+ max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+ else
+ max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+
if (n_unroll > max_unroll)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Not unrolling loop %d limited by max unroll"
+ " (%d > %d)\n",
+ loop->num, (int) n_unroll, (int) max_unroll);
+ }
return false;
+ }
if (n_unroll)
{
@@ -356,14 +371,21 @@
(int) unr_insns);
}
- if (unr_insns > ninsns
- && (unr_insns
- > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
+ if (profile_info
+ && flag_branch_probabilities
+ && optimize_loop_for_speed_p (loop))
+ max_peeled_insns =
+ PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+ else
+ max_peeled_insns = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+
+ if (unr_insns > max_peeled_insns)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d "
- "(--param max-completely-peeled-insns limit reached).\n",
- loop->num);
+ "(--param max-completely-peeled-insns(-feedback) limit. "
+ "(%u > %u)).\n",
+ loop->num, (unsigned) unr_insns, (unsigned) max_peeled_insns);
return false;
}
@@ -371,7 +393,8 @@
&& unr_insns > ninsns)
{
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
+ fprintf (dump_file, "Not unrolling loop %d (NO_GROWTH %d > %d).\n",
+ loop->num, (int) unr_insns, (int) ninsns);
return false;
}
}
@@ -418,8 +441,9 @@
update_stmt (cond);
update_ssa (TODO_update_ssa);
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
+ if (dump_file)
+ fprintf (dump_file, "Unrolled loop %d completely by factor %d.\n",
+ loop->num, (int) n_unroll);
return true;
}
===================================================================
@@ -324,15 +324,23 @@
decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
struct niter_desc *desc;
+ unsigned max_peeled_insns;
+ if (profile_info && flag_branch_probabilities)
+ max_peeled_insns =
+ (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK);
+ else
+ max_peeled_insns = (unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS);
+
if (dump_file)
fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
/* Is the loop small enough? */
- if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
+ if (max_peeled_insns < loop->ninsns)
{
if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
+ fprintf (dump_file, ";; Not considering loop, is too big (%d > %u)\n",
+ loop->ninsns, max_peeled_insns);
return;
}
@@ -362,7 +370,7 @@
static void
decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
{
- unsigned npeel;
+ unsigned npeel, max_insns, max_peel;
struct niter_desc *desc;
if (dump_file)
@@ -393,16 +401,30 @@
return;
}
+ if (profile_info && flag_branch_probabilities)
+ {
+ max_insns =
+ (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK);
+ max_peel =
+ (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK);
+ }
+ else
+ {
+ max_insns = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS);
+ max_peel = (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+ }
+
/* npeel = number of iterations to peel. */
- npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
- if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
- npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
+ npeel = max_insns / loop->ninsns;
+ if (npeel > max_peel)
+ npeel = max_peel;
/* Is the loop small enough? */
if (!npeel)
{
if (dump_file)
- fprintf (dump_file, ";; Not considering loop, is too big\n");
+ fprintf (dump_file, ";; Not considering loop, is too big, npeel=%u.\n",
+ npeel);
return;
}
@@ -435,7 +457,7 @@
/* Success. */
if (dump_file)
- fprintf (dump_file, ";; Decided to peel loop completely\n");
+ fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel);
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
}
===================================================================
@@ -299,16 +299,37 @@
"max-completely-peeled-insns",
"The maximum number of insns of a completely peeled loop",
400, 0, 0)
+/* The maximum number of insns of a peeled loop, when feedback
+ information is available. */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS_FEEDBACK,
+ "max-completely-peeled-insns-feedback",
+ "The maximum number of insns of a completely peeled loop when profile "
+ "feedback is available",
+ 600, 0, 0)
/* The maximum number of peelings of a single loop that is peeled completely. */
DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
"max-completely-peel-times",
"The maximum number of peelings of a single loop that is peeled completely",
- 16, 0, 0)
+ 8, 0, 0)
+/* The maximum number of peelings of a single loop that is peeled
+ completely, when feedback information is available. */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES_FEEDBACK,
+ "max-completely-peel-times-feedback",
+ "The maximum number of peelings of a single loop that is peeled "
+ "completely, when profile feedback is available",
+ 16, 0, 0)
/* The maximum number of insns of a peeled loop that rolls only once. */
DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS,
"max-once-peeled-insns",
"The maximum number of insns of a peeled loop that rolls only once",
400, 0, 0)
+/* The maximum number of insns of a peeled loop that rolls only once,
+ when feedback information is available. */
+DEFPARAM(PARAM_MAX_ONCE_PEELED_INSNS_FEEDBACK,
+ "max-once-peeled-insns-feedback",
+ "The maximum number of insns of a peeled loop that rolls only once, "
+ "when profile feedback is available",
+ 600, 0, 0)
/* The maximum depth of a loop nest we completely peel. */
DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS,
"max-completely-peel-loop-nest-depth",