===================================================================
@@ -400,7 +400,8 @@ Objective-C and Objective-C++ Dialects}.
-freorder-blocks-and-partition -freorder-functions @gol
-frerun-cse-after-loop -freschedule-modulo-scheduled-loops @gol
-fripa -fripa-disallow-asm-modules -fripa-disallow-opt-mismatch @gol
--fripa-no-promote-always-inline-func -fripa-verbose -frounding-math @gol
+-fripa-no-promote-always-inline-func -fripa-verbose @gol
+-fripa-peel-size-limit -fripa-unroll-size-limit -frounding-math @gol
-fsched2-use-superblocks -fsched-pressure @gol
-fsched-spec-load -fsched-spec-load-dangerous @gol
-fsched-stalled-insns-dep[=@var{n}] -fsched-stalled-insns[=@var{n}] @gol
@@ -8267,6 +8268,20 @@ Do not promote static functions with alw
Enable printing of verbose information about dynamic inter-procedural optimizations.
This is used in conjunction with the @option{-fripa}.
+@item -fripa-peel-size-limit
+@opindex fripa-peel-size-limit
+Limit loop peeling of non-const non-FP loops in a LIPO compilation under estimates
+of a large code footprint. Enabled by default under @option{-fripa}. Code size
+estimation and thresholds are controlled by the @option{codesize-hotness-threshold}
+and @option{unrollpeel-codesize-threshold} parameters.
+
+@item -fripa-unroll-size-limit
+@opindex fripa-unroll-size-limit
+Limit loop unrolling of non-const non-FP loops in a LIPO compilation under estimates
+of a large code footprint. Enabled by default under @option{-fripa}. Code size
+estimation and thresholds are controlled by the @option{codesize-hotness-threshold}
+and @option{unrollpeel-codesize-threshold} parameters.
+
@item -fcallgraph-profiles-sections
@opindex fcallgraph-profiles-sections
Emit call graph edge profile counts in .note.callgraph.text sections. This is
@@ -8991,6 +9006,13 @@ hot loops. Its default value is 16.
@item max-completely-peel-loop-nest-depth
The maximum depth of a loop nest suitable for complete peeling.
+@item codesize-hotness-threshold
+The minimum profile count of basic blocks to look at when estimating
+the code size footprint of the call graph in a LIPO compile.
+
+@item unrollpeel-codesize-threshold
+Maximum LIPO code size footprint estimate for loop unrolling and peeling.
+
@item max-unswitch-insns
The maximum number of insns of an unswitched loop.
===================================================================
@@ -181,6 +181,81 @@ report_unroll_peel(struct loop *loop, lo
"" : iter_str);
}
+/* Determine whether LOOP contains floating-point computation. */
+static bool
+loop_has_FP_comp(struct loop *loop)
+{
+ rtx set, dest;
+ basic_block *body, bb;
+ unsigned i;
+ rtx insn;
+
+ body = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ set = single_set (insn);
+ if (!set)
+ continue;
+
+ dest = SET_DEST (set);
+ if (FLOAT_MODE_P (GET_MODE (dest)))
+ {
+ free (body);
+ return true;
+ }
+ }
+ }
+ free (body);
+ return false;
+}
+
+/* This returns a bit vector */
+typedef enum {
+ NO_LIMIT = 0,
+ LIMIT_UNROLL = 0x1,
+ LIMIT_PEEL = 0x2,
+ LIMIT_BOTH = 0x3
+} limit_type;
+
+extern int cgraph_codesize_estimate;
+
+/* Determine whether LOOP unrolling/peeling should be constrained based
+ on code footprint estimates. */
+static limit_type
+limit_code_size(struct loop *loop)
+{
+ unsigned size_threshold;
+ limit_type result = NO_LIMIT;
+ int result_int = 0;
+
+ if (!flag_dyn_ipa)
+ return NO_LIMIT;
+
+ gcc_assert (cgraph_codesize_estimate >= 0);
+
+ /* Ignore FP loops, which are more likely to benefit heavily from
+ unrolling. */
+ if (loop_has_FP_comp(loop))
+ return NO_LIMIT;
+
+ size_threshold = PARAM_VALUE (PARAM_UNROLLPEEL_CODESIZE_THRESHOLD);
+ if (cgraph_codesize_estimate <= (int)size_threshold)
+ return NO_LIMIT;
+
+ if (flag_ripa_peel_size_limit)
+ result_int |= LIMIT_PEEL;
+
+ if (flag_ripa_unroll_size_limit)
+ result_int |= LIMIT_UNROLL;
+
+ result = (limit_type)result_int;
+ return result;
+}
+
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
unroll_and_peel_loops (int flags)
@@ -307,6 +382,7 @@ decide_unrolling_and_peeling (int flags)
struct loop *loop;
loop_iterator li;
location_t locus;
+ limit_type limit;
/* Scan the loops, inner ones first. */
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
@@ -347,19 +423,42 @@ decide_unrolling_and_peeling (int flags)
loop->ninsns = num_loop_insns (loop);
loop->av_ninsns = average_num_loop_insns (loop);
+ /* Determine whether to limit code size growth from unrolling and
+ peeling. This is currently enabled only under LIPO (dynamic IPA)
+ where we have a partial call graph. It is not applied to loops
+ with constant trip counts, as it is easier to determine the
+ profitability of unrolling and peeling such loops. */
+ limit = limit_code_size(loop);
+ if (limit != NO_LIMIT)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; Due to large code size footprint estimate, limit ");
+ if (limit == (LIMIT_UNROLL|LIMIT_PEEL))
+ fprintf (dump_file, "unrolling and peeling\n");
+ else if (limit == LIMIT_UNROLL)
+ fprintf (dump_file, "unrolling\n");
+ else
+ fprintf (dump_file, "peeling\n");
+ }
+ }
+
/* Try transformations one by one in decreasing order of
priority. */
decide_unroll_constant_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_UNROLL))
decide_unroll_runtime_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_UNROLL))
decide_unroll_stupid (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_PEEL))
decide_peel_simple (loop, flags);
- if (flag_opt_info >= OPT_INFO_MIN &&
- loop->lpt_decision.decision != LPT_NONE)
+ if (flag_opt_info >= OPT_INFO_MIN
+ && loop->lpt_decision.decision != LPT_NONE)
{
report_unroll_peel(loop, locus);
}
===================================================================
@@ -1129,6 +1129,14 @@ Common Report Var(flag_ripa_no_promote_a
Don't promote always inline static functions assuming they
will be inlined and no copy is needed.
+fripa-peel-size-limit
+Common Report Var(flag_ripa_peel_size_limit) Init(1) Optimization
+Limit non-const non-FP loop peeling under dynamic IPA estimates of large code footprint
+
+fripa-unroll-size-limit
+Common Report Var(flag_ripa_unroll_size_limit) Init(1) Optimization
+Limit non-const non-FP loop unrolling under dynamic IPA estimates of large code footprint
+
fearly-inlining
Common Report Var(flag_early_inlining) Init(1) Optimization
Perform early inlining
===================================================================
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
#include "except.h"
#include "plugin.h"
#include "regset.h" /* FIXME: For reg_obstack. */
+#include "params.h"
/* Gate: execute, or not, all of the non-trivial optimizations. */
@@ -149,6 +150,48 @@ struct gimple_opt_pass pass_all_early_op
}
};
+int cgraph_codesize_estimate = -1;
+
+/* Estimate the code size from the dynamic IPA call graph. */
+static void
+compute_codesize_estimate(void)
+{
+ struct cgraph_node *node;
+ basic_block bb;
+ gimple_stmt_iterator bsi;
+ struct function *my_function;
+
+ if (!flag_dyn_ipa)
+ return;
+
+ if (cgraph_codesize_estimate >= 0)
+ return;
+ cgraph_codesize_estimate = 0;
+
+ for (node = cgraph_nodes; node; node = node->next)
+ {
+ if (node->count == 0)
+ continue;
+
+ if (!gimple_has_body_p(node->decl))
+ continue;
+
+ my_function = DECL_STRUCT_FUNCTION (node->decl);
+
+ FOR_EACH_BB_FN (bb, my_function)
+ {
+ if (bb->count < PARAM_VALUE (PARAM_CODESIZE_HOTNESS_THRESHOLD))
+ continue;
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
+ {
+ gimple stmt = gsi_stmt (bsi);
+ cgraph_codesize_estimate += estimate_num_insns (stmt, &eni_size_weights);
+ }
+ }
+ }
+ if (dump_file)
+ fprintf (dump_file, "Code size estimate from cgraph: %d\n", cgraph_codesize_estimate);
+}
/* Pass: cleanup the CFG just before expanding trees to RTL.
This is just a round of label cleanups and case node grouping
@@ -158,6 +201,8 @@ struct gimple_opt_pass pass_all_early_op
static unsigned int
execute_cleanup_cfg_post_optimizing (void)
{
+ /* Estimate the code footprint for hot BBs before we enter RTL */
+ compute_codesize_estimate();
cleanup_tree_cfg ();
cleanup_dead_labels ();
group_case_labels ();
===================================================================
@@ -337,6 +337,21 @@ DEFPARAM(PARAM_MAX_UNROLL_ITERATIONS,
"max-completely-peel-loop-nest-depth",
"The maximum depth of a loop nest we completely peel",
8, 0, 0)
+/* The minimum profile count of basic blocks to look at when estimating
+ * the code size footprint of the call graph in a dynamic IPA compile. */
+DEFPARAM(PARAM_CODESIZE_HOTNESS_THRESHOLD,
+ "codesize-hotness-threshold",
+ "Minimum profile count of basic blocks counted towards dynamic IPA "
+ "code size footprint estimate",
+ 10000, 0, 0)
+/* The maximum code size estimate under which loop unrolling and peeling
+ * is allowed in a dynamic IPA compile. This currently applies to loops with
+ * non-constant iteration counts and no floating point computations. */
+DEFPARAM(PARAM_UNROLLPEEL_CODESIZE_THRESHOLD,
+ "unrollpeel-codesize-threshold",
+ "Maximum dynamic IPA code size footprint estimate for loop unrolling "
+ "and peeling",
+ 10000, 0, 0)
/* The maximum number of insns of an unswitched loop. */
DEFPARAM(PARAM_MAX_UNSWITCH_INSNS,