===================================================================
@@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgr
return want_inline;
}
+/* Compute time of the edge->caller + edge->callee execution when inlining
+ does not happen. */
+
+inline int
+compute_uninlined_call_time (struct inline_summary *callee_info,
+ struct cgraph_edge *edge)
+{
+ int uninlined_call_time =
+ RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
+ CGRAPH_FREQ_BASE);
+ int caller_time = inline_summary (edge->caller->global.inlined_to
+ ? edge->caller->global.inlined_to
+ : edge->caller)->time;
+ return uninlined_call_time + caller_time;
+}
+
+/* Same as compute_uinlined_call_time but compute time when inlining
+ does happen. */
+
+inline gcov_type
+compute_inlined_call_time (struct cgraph_edge *edge,
+ int edge_time)
+{
+ int caller_time = inline_summary (edge->caller->global.inlined_to
+ ? edge->caller->global.inlined_to
+ : edge->caller)->time;
+ int time = caller_time + RDIV ((edge_time - inline_edge_summary (edge)->call_stmt_time)
+ * MAX (edge->frequency, 1),
+ CGRAPH_FREQ_BASE);
+ /* Possible one roundoff error, but watch for overflows. */
+ gcc_checking_assert (time >= INT_MIN / 2);
+ if (time < 0)
+ time = 0;
+ return time;
+}
+
/* Return true if we are interested in inlining small function.
When REPORT is true, report reason to dump file. */
@@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (s
return true;
}
+#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
/* Return relative time improvement for inlining EDGE in range
- 1...2^9. */
+ 1...RELATIVE_TIME_BENEFIT_RANGE */
static inline int
relative_time_benefit (struct inline_summary *callee_info,
struct cgraph_edge *edge,
- int time_growth)
+ int edge_time)
{
int relbenefit;
- gcov_type uninlined_call_time;
+ int uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
+ int inlined_call_time = compute_inlined_call_time (edge, edge_time);
+
+ /* Inlining into extern inline function is not a win. */
+ if (DECL_EXTERNAL (edge->caller->global.inlined_to
+ ? edge->caller->global.inlined_to->symbol.decl
+ : edge->caller->symbol.decl))
+ return 1;
+
+ /* Watch overflows. */
+ gcc_checking_assert (uninlined_call_time >= 0);
+ gcc_checking_assert (inlined_call_time >= 0);
+ gcc_checking_assert (uninlined_call_time >= inlined_call_time);
- uninlined_call_time =
- ((gcov_type)
- (callee_info->time
- + inline_edge_summary (edge)->call_stmt_time) * edge->frequency
- + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
/* Compute relative time benefit, i.e. how much the call becomes faster.
??? perhaps computing how much the caller+calle together become faster
would lead to more realistic results. */
if (!uninlined_call_time)
uninlined_call_time = 1;
relbenefit =
- (uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
- relbenefit = MIN (relbenefit, 512);
+ RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
+ uninlined_call_time);
+ relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
+ gcc_checking_assert (relbenefit >= 0);
relbenefit = MAX (relbenefit, 1);
return relbenefit;
}
@@ -764,7 +810,7 @@ static int
edge_badness (struct cgraph_edge *edge, bool dump)
{
gcov_type badness;
- int growth, time_growth;
+ int growth, edge_time;
struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
NULL);
struct inline_summary *callee_info = inline_summary (callee);
@@ -774,17 +820,20 @@ edge_badness (struct cgraph_edge *edge,
return INT_MIN;
growth = estimate_edge_growth (edge);
- time_growth = estimate_edge_time (edge);
+ edge_time = estimate_edge_time (edge);
hints = estimate_edge_hints (edge);
+ gcc_checking_assert (edge_time >= 0);
+ gcc_checking_assert (edge_time <= callee_info->time);
+ gcc_checking_assert (growth <= callee_info->size);
if (dump)
{
fprintf (dump_file, " Badness calculation for %s -> %s\n",
xstrdup (cgraph_node_name (edge->caller)),
xstrdup (cgraph_node_name (callee)));
- fprintf (dump_file, " size growth %i, time growth %i ",
+ fprintf (dump_file, " size growth %i, time %i ",
growth,
- time_growth);
+ edge_time);
dump_inline_hints (dump_file, hints);
fprintf (dump_file, "\n");
}
@@ -802,7 +851,7 @@ edge_badness (struct cgraph_edge *edge,
relative_edge_count * relative_time_benefit
goodness = -------------------------------------------
- edge_growth
+ growth_f_caller
badness = -goodness
The fraction is upside down, because on edge counts and time beneits
@@ -810,11 +859,11 @@ edge_badness (struct cgraph_edge *edge,
else if (max_count)
{
- int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
+ int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
badness =
((int)
- ((double) edge->count * INT_MIN / 2 / max_count / 512) *
- relative_time_benefit (callee_info, edge, time_growth)) / growth;
+ ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) *
+ relbenefit) / growth;
/* Be sure that insanity of the profile won't lead to increasing counts
in the scalling and thus to overflow in the computation above. */
@@ -826,73 +875,53 @@ edge_badness (struct cgraph_edge *edge,
" * Relative benefit %f\n",
(int) badness, (double) badness / INT_MIN,
(double) edge->count / max_count,
- relbenefit * 100 / 256.0);
+ relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
}
}
/* When function local profile is available. Compute badness as:
-
- growth_of_callee
- badness = -------------------------------------- + growth_for-all
- relative_time_benefit * edge_frequency
+ relative_time_benefit
+ goodness = ---------------------------------
+ growth_of_caller * overall_growth
+ badness = - goodness
+
+ compensated by the inline hints.
*/
else if (flag_guess_branch_prob)
{
- int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
-
- div = MAX (div, 1);
- gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
- div *= relative_time_benefit (callee_info, edge, time_growth);
-
- /* frequency is normalized in range 1...2^10.
- relbenefit in range 1...2^9
- DIV should be in range 1....2^19. */
- gcc_checking_assert (div >= 1 && div <= (1<<19));
-
- /* Result must be integer in range 0...INT_MAX.
- Set the base of fixed point calculation so we don't lose much of
- precision for small bandesses (those are interesting) yet we don't
- overflow for growths that are still in interesting range.
-
- Fixed point arithmetic with point at 6th bit. */
- badness = ((gcov_type)growth) * (1<<(19+6));
- badness = (badness + div / 2) / div;
-
- /* Overall growth of inlining all calls of function matters: we want to
- inline so offline copy of function is no longer needed.
-
- Additionally functions that can be fully inlined without much of
- effort are better inline candidates than functions that can be fully
- inlined only after noticeable overall unit growths. The latter
- are better in a sense compressing of code size by factoring out common
- code into separate function shared by multiple code paths.
-
- We might mix the valud into the fraction by taking into account
- relative growth of the unit, but for now just add the number
- into resulting fraction. */
- if (badness > INT_MAX / 8)
- {
- badness = INT_MAX / 8;
- if (dump)
- fprintf (dump_file, "Badness overflow\n");
- }
- if (hints & (INLINE_HINT_indirect_call
- | INLINE_HINT_loop_iterations
- | INLINE_HINT_loop_stride))
- badness /= 8;
+ badness = (relative_time_benefit (callee_info, edge, edge_time)
+ * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
+ badness /= (growth * MAX (1, callee_info->growth));
+ gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
+ if ((hints & (INLINE_HINT_indirect_call
+ | INLINE_HINT_loop_iterations
+ | INLINE_HINT_loop_stride))
+ || callee_info->growth <= 0)
+ badness *= 8;
if (hints & (INLINE_HINT_same_scc))
- badness *= 4;
- if (hints & (INLINE_HINT_in_scc))
- badness *= 2;
+ badness /= 16;
+ else if (hints & (INLINE_HINT_in_scc))
+ badness /= 8;
+ else if (hints & (INLINE_HINT_cross_module))
+ badness /= 2;
+ gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
+ if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
+ badness *= 16;
if (dump)
{
fprintf (dump_file,
" %i: guessed profile. frequency %f,"
- " benefit %f%%, divisor %i\n",
+ " benefit %f%%, time w/o inlining %i, time w inlining %i"
+ " overall growth %i (current) %i (original)\n",
(int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
- relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
+ relative_time_benefit (callee_info, edge, edge_time) * 100.0
+ / RELATIVE_TIME_BENEFIT_RANGE,
+ compute_uninlined_call_time (callee_info, edge),
+ (int)compute_inlined_call_time (edge, edge_time),
+ estimate_growth (callee),
+ callee_info->growth);
}
}
/* When function local profile is not available or it does not give
@@ -1371,6 +1400,7 @@ inline_small_functions (void)
if (!DECL_EXTERNAL (node->symbol.decl))
initial_size += info->size;
+ info->growth = estimate_growth (node);
if (dfs && dfs->next_cycle)
{
struct cgraph_node *n2;
===================================================================
@@ -49,7 +49,9 @@ enum inline_hints_vals {
INLINE_HINT_loop_iterations = 2,
INLINE_HINT_loop_stride = 4,
INLINE_HINT_same_scc = 8,
- INLINE_HINT_in_scc = 16
+ INLINE_HINT_in_scc = 16,
+ INLINE_HINT_declared_inline = 32,
+ INLINE_HINT_cross_module = 64
};
typedef int inline_hints;
@@ -129,6 +131,12 @@ struct GTY(()) inline_summary
/* Predicate on when some loop in the function becomes to have known
stride. */
struct predicate * GTY((skip)) loop_stride;
+ /* Estimated growth for inlining all copies of the function before start
+ of small functions inlining.
+ This value will get out of date as the callers are duplicated, but
+ using up-to-date value in the badness metric mean a lot of extra
+ expenses. */
+ int growth;
/* Number of SCC on the beggining of inlining process. */
int scc_no;
};
===================================================================
@@ -649,6 +649,16 @@ dump_inline_hints (FILE *f, inline_hints
hints &= ~INLINE_HINT_in_scc;
fprintf (f, " in_scc");
}
+ if (hints & INLINE_HINT_cross_module)
+ {
+ hints &= ~INLINE_HINT_cross_module;
+ fprintf (f, " cross_module");
+ }
+ if (hints & INLINE_HINT_declared_inline)
+ {
+ hints &= ~INLINE_HINT_declared_inline;
+ fprintf (f, " declared_inline");
+ }
gcc_assert (!hints);
}
@@ -983,6 +993,7 @@ reset_inline_summary (struct cgraph_node
info->stack_frame_offset = 0;
info->size = 0;
info->time = 0;
+ info->growth = 0;
info->scc_no = 0;
if (info->loop_iterations)
{
@@ -1375,6 +1386,9 @@ dump_inline_summary (FILE * f, struct cg
(int) s->estimated_self_stack_size);
fprintf (f, " global stack: %i\n",
(int) s->estimated_stack_size);
+ if (s->growth)
+ fprintf (f, " estimated growth:%i\n",
+ (int) s->growth);
if (s->scc_no)
fprintf (f, " In SCC: %i\n",
(int) s->scc_no);
@@ -1977,10 +1991,11 @@ will_be_nonconstant_predicate (struct ip
return p;
/* Stores will stay anyway. */
- if (gimple_vdef (stmt))
+ if (gimple_store_p (stmt))
return p;
- is_load = gimple_vuse (stmt) != NULL;
+ is_load = gimple_assign_load_p (stmt);
+
/* Loads can be optimized when the value is known. */
if (is_load)
{
@@ -2857,6 +2872,8 @@ estimate_node_size_and_time (struct cgra
hints |=INLINE_HINT_loop_stride;
if (info->scc_no)
hints |= INLINE_HINT_in_scc;
+ if (DECL_DECLARED_INLINE_P (node->symbol.decl))
+ hints |= INLINE_HINT_declared_inline;
estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths,
known_vals, known_binfos, known_aggs);
@@ -2865,7 +2882,6 @@ estimate_node_size_and_time (struct cgra
time = RDIV (time, INLINE_TIME_SCALE);
size = RDIV (size, INLINE_SIZE_SCALE);
-
if (dump_file
&& (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n size:%i time:%i\n", (int)size, (int)time);
@@ -3315,6 +3331,26 @@ inline_update_overall_summary (struct cg
info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
}
+/* Return hints derrived from EDGE. */
+int
+simple_edge_hints (struct cgraph_edge *edge)
+{
+ int hints = 0;
+ struct cgraph_node *to = (edge->caller->global.inlined_to
+ ? edge->caller->global.inlined_to
+ : edge->caller);
+ if (inline_summary (to)->scc_no
+ && inline_summary (to)->scc_no == inline_summary (edge->callee)->scc_no
+ && !cgraph_edge_recursive_p (edge))
+ hints |= INLINE_HINT_same_scc;
+
+ if (to->symbol.lto_file_data && edge->callee->symbol.lto_file_data
+ && to->symbol.lto_file_data != edge->callee->symbol.lto_file_data)
+ hints |= INLINE_HINT_cross_module;
+
+ return hints;
+}
+
/* Estimate the time cost for the caller when inlining EDGE.
Only to be called via estimate_edge_time, that handles the
caching mechanism.
@@ -3328,7 +3364,6 @@ do_estimate_edge_time (struct cgraph_edg
int time;
int size;
inline_hints hints;
- gcov_type ret;
struct cgraph_node *callee;
clause_t clause;
VEC (tree, heap) *known_vals;
@@ -3347,33 +3382,26 @@ do_estimate_edge_time (struct cgraph_edg
VEC_free (tree, heap, known_vals);
VEC_free (tree, heap, known_binfos);
VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
-
- ret = RDIV ((gcov_type)time * edge->frequency,
- CGRAPH_FREQ_BASE);
+ gcc_checking_assert (size >= 0);
+ gcc_checking_assert (time >= 0);
/* When caching, update the cache entry. */
if (edge_growth_cache)
{
- struct cgraph_node *to = (edge->caller->global.inlined_to
- ? edge->caller->global.inlined_to
- : edge->caller);
if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache)
<= edge->uid)
VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache,
cgraph_edge_max_uid);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).time
- = ret + (ret >= 0);
+ = time + (time >= 0);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).size
= size + (size >= 0);
- if (inline_summary (to)->scc_no
- && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
- && !cgraph_edge_recursive_p (edge))
- hints |= INLINE_HINT_same_scc;
+ hints |= simple_edge_hints (edge);
VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).hints
= hints + 1;
}
- return ret;
+ return time;
}
@@ -3430,9 +3458,6 @@ do_estimate_edge_hints (struct cgraph_ed
VEC (tree, heap) *known_vals;
VEC (tree, heap) *known_binfos;
VEC (ipa_agg_jump_function_p, heap) *known_aggs;
- struct cgraph_node *to = (edge->caller->global.inlined_to
- ? edge->caller->global.inlined_to
- : edge->caller);
/* When we do caching, use do_estimate_edge_time to populate the entry. */
@@ -3458,10 +3483,7 @@ do_estimate_edge_hints (struct cgraph_ed
VEC_free (tree, heap, known_vals);
VEC_free (tree, heap, known_binfos);
VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
- if (inline_summary (to)->scc_no
- && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
- && !cgraph_edge_recursive_p (edge))
- hints |= INLINE_HINT_same_scc;
+ hints |= simple_edge_hints (edge);
return hints;
}
@@ -3549,10 +3571,11 @@ do_estimate_growth (struct cgraph_node *
return zero or negative growths. */
if (d.self_recursive)
d.growth = d.growth < info->size ? info->size : d.growth;
+ else if (DECL_EXTERNAL (node->symbol.decl))
+ ;
else
{
- if (!DECL_EXTERNAL (node->symbol.decl)
- && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
+ if (cgraph_will_be_removed_from_program_if_no_direct_calls (node))
d.growth -= info->size;
/* COMDAT functions are very often not shared across multiple units
since they come from various template instantiations.