diff mbox series

Free inline summaries for inline clones

Message ID 20191024222306.zm2ac2jndbinsgxd@kam.mff.cuni.cz
State New
Headers show
Series Free inline summaries for inline clones | expand

Commit Message

Jan Hubicka Oct. 24, 2019, 10:23 p.m. UTC
Hi,
most of IPA summaries we maintain actually needs to be kept for
offline functions.  This patch releases fnsummary and call summary
for inline clones. This needs bit of refactoring since we need to keep
size info for clones and for lto partitioning, so I split it out into
separate size summary.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

	* cgraphunit.c (symbol_table::process_new_functions): Call
	ipa_free_size_summary.
	* ipa-cp.c (ipcp_cloning_candidate_p): Update.
	(devirtualization_time_bonus): Update.
	(ipcp_propagate_stage): Update.
	* ipa-fnsummary.c (ipa_size_summaries): New.
	(ipa_fn_summary_alloc): Alloc size summary.
	(dump_ipa_call_summary): Update.
	(ipa_dump_fn_summary): Update.
	(analyze_function_body): Update.
	(compute_fn_summary): Likewise.
	(ipa_get_stack_frame_offset): New function.
	(inline_update_callee_summaries): Do not update frame offsets.
	(ipa_merge_fn_summary_after_inlining): Update frame offsets here;
	remove call and function summary.
	(ipa_update_overall_fn_summary): Update.
	(inline_read_section): Update.
	(ipa_fn_summary_write): Update.
	(ipa_free_fn_summary): Do not remove summaries.
	(ipa_free_size_summary): New.
	(release summary pass): Also run at WPA.
	* ipa-fnsummary.h (ipa_size_summary): Declare.
	(ipa_fn_summary): Remove size, self_size, stack_frame_offset,
	estimated_self_stack_size.
	(ipa_size_summary_t): New type.
	(ipa_size_summaries): Declare.
	(ipa_free_size_summary): Declare.
	(ipa_get_stack_frame_offset): Declare.
	* ipa-icf.c (sem_function::merge): Update.
	* ipa-inline-analysis.c (estimate_size_after_inlining): Update.
	(estimate_growth): Update.
	(growth_likely_positive): Update.
	(clone_inlined_nodes): Update.
	(inline_call): Update.
	* ipa-inline.c (caller_growth_limits): Update.
	(edge_badness): Update.
	(recursive_inlining): Update.
	(inline_small_functions): Update.
	(inline_to_all_callers_1): Update.
	* ipa-prop.h (ipa_edge_args_sum_t): Update comment.
	* lto-partition.c (add_symbol_to_partition_1): Update.
	(undo_parittion): Update.
diff mbox series

Patch

Index: cgraphunit.c
===================================================================
--- cgraphunit.c	(revision 277423)
+++ cgraphunit.c	(working copy)
@@ -340,7 +340,10 @@  symbol_table::process_new_functions (voi
 		 and splitting.  This is redundant for functions added late.
 		 Just throw away whatever it did.  */
 	      if (!summaried_computed)
-		ipa_free_fn_summary ();
+		{
+		  ipa_free_fn_summary ();
+		  ipa_free_size_summary ();
+		}
 	    }
 	  else if (ipa_fn_summaries != NULL)
 	    compute_fn_summary (node, true);
Index: ipa-cp.c
===================================================================
--- ipa-cp.c	(revision 277423)
+++ ipa-cp.c	(working copy)
@@ -731,7 +731,7 @@  ipcp_cloning_candidate_p (struct cgraph_
   init_caller_stats (&stats);
   node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
 
-  if (ipa_fn_summaries->get (node)->self_size < stats.n_calls)
+  if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
     {
       if (dump_file)
 	fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
@@ -2629,13 +2629,14 @@  devirtualization_time_bonus (struct cgra
       if (!isummary->inlinable)
 	continue;
 
+      int size = ipa_size_summaries->get (callee)->size;
       /* FIXME: The values below need re-considering and perhaps also
 	 integrating into the cost metrics, at lest in some very basic way.  */
-      if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4)
+      if (size <= MAX_INLINE_INSNS_AUTO / 4)
 	res += 31 / ((int)speculative + 1);
-      else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2)
+      else if (size <= MAX_INLINE_INSNS_AUTO / 2)
 	res += 15 / ((int)speculative + 1);
-      else if (isummary->size <= MAX_INLINE_INSNS_AUTO
+      else if (size <= MAX_INLINE_INSNS_AUTO
 	       || DECL_DECLARED_INLINE_P (callee->decl))
 	res += 7 / ((int)speculative + 1);
     }
@@ -3334,7 +3335,7 @@  ipcp_propagate_stage (class ipa_topo_inf
 				   ipa_get_param_count (info));
 	initialize_node_lattices (node);
       }
-    ipa_fn_summary *s = ipa_fn_summaries->get (node);
+    ipa_size_summary *s = ipa_size_summaries->get (node);
     if (node->definition && !node->alias && s != NULL)
       overall_size += s->self_size;
     max_count = max_count.max (node->count.ipa ());
Index: ipa-fnsummary.c
===================================================================
--- ipa-fnsummary.c	(revision 277423)
+++ ipa-fnsummary.c	(working copy)
@@ -86,6 +86,7 @@  along with GCC; see the file COPYING3.
 
 /* Summaries.  */
 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
+fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
 
 /* Edge predicates goes here.  */
@@ -552,6 +553,8 @@  ipa_fn_summary_alloc (void)
 {
   gcc_checking_assert (!ipa_fn_summaries);
   ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
+  ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
+							 (symtab);
   ipa_call_summaries = new ipa_call_summary_t (symtab);
 }
 
@@ -791,9 +794,10 @@  dump_ipa_call_summary (FILE *f, int inde
 	       es->call_stmt_size, es->call_stmt_time);
 
       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
+      ipa_size_summary *ss = ipa_size_summaries->get (callee);
       if (s != NULL)
-	fprintf (f, "callee size:%2i stack:%2i",
-		 (int) (s->size / ipa_fn_summary::size_scale),
+	fprintf (f, " callee size:%2i stack:%2i",
+		 (int) (ss->size / ipa_fn_summary::size_scale),
 		 (int) s->estimated_stack_size);
 
       if (es->predicate)
@@ -817,13 +821,11 @@  dump_ipa_call_summary (FILE *f, int inde
 	  }
       if (!edge->inline_failed)
 	{
-	  ipa_fn_summary *s = ipa_fn_summaries->get (callee);
-	  fprintf (f, "%*sStack frame offset %i, callee self size %i,"
-		   " callee size %i\n",
+	  ipa_size_summary *ss = ipa_size_summaries->get (callee);
+	  fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
 		   indent + 2, "",
-		   (int) s->stack_frame_offset,
-		   (int) s->estimated_self_stack_size,
-		   (int) s->estimated_stack_size);
+		   (int) ipa_get_stack_frame_offset (callee),
+		   (int) ss->estimated_self_stack_size);
 	  dump_ipa_call_summary (f, indent + 2, callee, info);
 	}
     }
@@ -853,6 +855,7 @@  ipa_dump_fn_summary (FILE *f, struct cgr
   if (node->definition)
     {
       class ipa_fn_summary *s = ipa_fn_summaries->get (node);
+      class ipa_size_summary *ss = ipa_size_summaries->get (node);
       if (s != NULL)
 	{
 	  size_time_entry *e;
@@ -865,11 +868,11 @@  ipa_dump_fn_summary (FILE *f, struct cgr
 	  if (s->fp_expressions)
 	    fprintf (f, " fp_expression");
 	  fprintf (f, "\n  global time:     %f\n", s->time.to_double ());
-	  fprintf (f, "  self size:       %i\n", s->self_size);
-	  fprintf (f, "  global size:     %i\n", s->size);
+	  fprintf (f, "  self size:       %i\n", ss->self_size);
+	  fprintf (f, "  global size:     %i\n", ss->size);
 	  fprintf (f, "  min size:       %i\n", s->min_size);
 	  fprintf (f, "  self stack:      %i\n",
-		   (int) s->estimated_self_stack_size);
+		   (int) ss->estimated_self_stack_size);
 	  fprintf (f, "  global stack:    %i\n", (int) s->estimated_stack_size);
 	  if (s->growth)
 	    fprintf (f, "  estimated growth:%i\n", (int) s->growth);
@@ -2655,8 +2658,9 @@  analyze_function_body (struct cgraph_nod
 	}
     }
   ipa_fn_summary *s = ipa_fn_summaries->get (node);
+  ipa_size_summary *ss = ipa_size_summaries->get (node);
   s->time = time;
-  s->self_size = size;
+  ss->self_size = size;
   nonconstant_names.release ();
   ipa_release_body_info (&fbi);
   if (opt_for_fn (node->decl, optimize))
@@ -2684,7 +2688,6 @@  compute_fn_summary (struct cgraph_node *
 {
   HOST_WIDE_INT self_stack_size;
   struct cgraph_edge *e;
-  class ipa_fn_summary *info;
 
   gcc_assert (!node->global.inlined_to);
 
@@ -2694,14 +2697,14 @@  compute_fn_summary (struct cgraph_node *
   /* Create a new ipa_fn_summary.  */
   ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
   ipa_fn_summaries->remove (node);
-  info = ipa_fn_summaries->get_create (node);
+  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
+  class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
 
   /* Estimate the stack size for the function if we're optimizing.  */
   self_stack_size = optimize && !node->thunk.thunk_p
 		    ? estimated_stack_frame_size (node) : 0;
-  info->estimated_self_stack_size = self_stack_size;
+  size_info->estimated_self_stack_size = self_stack_size;
   info->estimated_stack_size = self_stack_size;
-  info->stack_frame_offset = 0;
 
   if (node->thunk.thunk_p)
     {
@@ -2719,7 +2722,7 @@  compute_fn_summary (struct cgraph_node *
       t = predicate::not_inlined ();
       info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
       ipa_update_overall_fn_summary (node);
-      info->self_size = info->size;
+      size_info->self_size = size_info->size;
       if (stdarg_p (TREE_TYPE (node->decl)))
 	{
 	  info->inlinable = false;
@@ -2775,16 +2778,15 @@  compute_fn_summary (struct cgraph_node *
   node->calls_comdat_local = (e != NULL);
 
   /* Inlining characteristics are maintained by the cgraph_mark_inline.  */
-  info->size = info->self_size;
-  info->stack_frame_offset = 0;
-  info->estimated_stack_size = info->estimated_self_stack_size;
+  size_info->size = size_info->self_size;
+  info->estimated_stack_size = size_info->estimated_self_stack_size;
 
   /* Code above should compute exactly the same result as
      ipa_update_overall_fn_summary but because computation happens in
      different order the roundoff errors result in slight changes.  */
   ipa_update_overall_fn_summary (node);
   /* In LTO mode we may have speculative edges set.  */
-  gcc_assert (in_lto_p || info->size == info->self_size);
+  gcc_assert (in_lto_p || size_info->size == size_info->self_size);
 }
 
 
@@ -3104,6 +3106,26 @@  estimate_ipcp_clone_size_and_time (struc
 			       ret_nonspec_time, hints, vNULL);
 }
 
+/* Return stack frame offset where frame of NODE is supposed to start inside
+   of the function it is inlined to.
+   Return 0 for functions that are not inlined.  */
+
+HOST_WIDE_INT
+ipa_get_stack_frame_offset (struct cgraph_node *node)
+{
+  HOST_WIDE_INT offset = 0;
+  if (!node->global.inlined_to)
+    return 0;
+  node = node->callers->caller;
+  while (true)
+    {
+      offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
+      if (!node->global.inlined_to)
+	return offset;
+      node = node->callers->caller;
+    }
+}
+
 
 /* Update summary information of inline clones after inlining.
    Compute peak stack usage.  */
@@ -3112,19 +3134,7 @@  static void
 inline_update_callee_summaries (struct cgraph_node *node, int depth)
 {
   struct cgraph_edge *e;
-  ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
-  ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
-  HOST_WIDE_INT peak;
-
-  callee_info->stack_frame_offset
-    = caller_info->stack_frame_offset
-    + caller_info->estimated_self_stack_size;
-  peak = callee_info->stack_frame_offset
-    + callee_info->estimated_self_stack_size;
-
-  ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
-  if (s->estimated_stack_size < peak)
-    s->estimated_stack_size = peak;
+
   ipa_propagate_frequency (node);
   for (e = node->callees; e; e = e->next_callee)
     {
@@ -3284,11 +3294,10 @@  ipa_merge_fn_summary_after_inlining (str
   class ipa_fn_summary *info = ipa_fn_summaries->get (to);
   clause_t clause = 0;	/* not_inline is known to be false.  */
   size_time_entry *e;
-  vec<int> operand_map = vNULL;
-  vec<int> offset_map = vNULL;
+  auto_vec<int, 8> operand_map;
+  auto_vec<int, 8> offset_map;
   int i;
   predicate toplev_predicate;
-  predicate true_p = true;
   class ipa_call_summary *es = ipa_call_summaries->get (edge);
 
   if (es->predicate)
@@ -3375,39 +3384,43 @@  ipa_merge_fn_summary_after_inlining (str
 			&callee_info->loop_stride,
 			operand_map, offset_map, clause, &toplev_predicate);
 
-  ipa_call_summary *s = ipa_call_summaries->get (edge);
-  inline_update_callee_summaries (edge->callee, s->loop_depth);
+  HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
+  HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
 
-  /* We do not maintain predicates of inlined edges, free it.  */
-  edge_set_predicate (edge, &true_p);
-  /* Similarly remove param summaries.  */
-  es->param.release ();
-  operand_map.release ();
-  offset_map.release ();
+  if (info->estimated_stack_size < peak)
+    info->estimated_stack_size = peak;
+
+  inline_update_callee_summaries (edge->callee, es->loop_depth);
+
+  /* Free summaries that are not maintained for inline clones/edges.  */
+  ipa_call_summaries->remove (edge);
+  ipa_fn_summaries->remove (edge->callee);
 }
 
-/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
-   and time.  Recompute it.  */
+/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
+   overall size and time.  Recompute it.  */
 
 void
 ipa_update_overall_fn_summary (struct cgraph_node *node)
 {
   class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
+  class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
   size_time_entry *e;
   int i;
 
-  info->size = 0;
+  size_info->size = 0;
   info->time = 0;
   for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
     {
-      info->size += e->size;
+      size_info->size += e->size;
       info->time += e->time;
     }
-  estimate_calls_size_and_time (node, &info->size, &info->min_size,
+  estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
 				&info->time, NULL,
 				~(clause_t) (1 << predicate::false_condition),
 				vNULL, vNULL, vNULL);
-  info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
+  size_info->size = (size_info->size + ipa_fn_summary::size_scale / 2)
+		    / ipa_fn_summary::size_scale;
 }
 
 
@@ -3558,6 +3571,7 @@  inline_read_section (struct lto_file_dec
       unsigned int index;
       struct cgraph_node *node;
       class ipa_fn_summary *info;
+      class ipa_size_summary *size_info;
       lto_symtab_encoder_t encoder;
       struct bitpack_d bp;
       struct cgraph_edge *e;
@@ -3568,6 +3582,8 @@  inline_read_section (struct lto_file_dec
       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
 								index));
       info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
+      size_info = node->prevailing_p ()
+		  ? ipa_size_summaries->get_create (node) : NULL;
 
       int stack_size = streamer_read_uhwi (&ib);
       int size = streamer_read_uhwi (&ib);
@@ -3576,8 +3592,8 @@  inline_read_section (struct lto_file_dec
       if (info)
 	{
 	  info->estimated_stack_size
-	    = info->estimated_self_stack_size = stack_size;
-	  info->size = info->self_size = size;
+	    = size_info->estimated_self_stack_size = stack_size;
+	  size_info->size = size_info->self_size = size;
 	  info->time = time;
 	}
 
@@ -3768,6 +3784,7 @@  ipa_fn_summary_write (void)
       if (cnode && cnode->definition && !cnode->alias)
 	{
 	  class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
+	  class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
 	  struct bitpack_d bp;
 	  struct cgraph_edge *edge;
 	  int i;
@@ -3775,8 +3792,8 @@  ipa_fn_summary_write (void)
 	  struct condition *c;
 
 	  streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
-	  streamer_write_hwi (ob, info->estimated_self_stack_size);
-	  streamer_write_hwi (ob, info->self_size);
+	  streamer_write_hwi (ob, size_info->estimated_self_stack_size);
+	  streamer_write_hwi (ob, size_info->self_size);
 	  info->time.stream_out (ob);
 	  bp = bitpack_create (ob->main_stream);
 	  bp_pack_value (&bp, info->inlinable, 1);
@@ -3846,23 +3863,33 @@  ipa_fn_summary_write (void)
 }
 
 
-/* Release inline summary.  */
+/* Release function summary.  */
 
 void
 ipa_free_fn_summary (void)
 {
-  struct cgraph_node *node;
   if (!ipa_call_summaries)
     return;
-  FOR_EACH_DEFINED_FUNCTION (node)
-    if (!node->alias)
-      ipa_fn_summaries->remove (node);
   ipa_fn_summaries->release ();
   ipa_fn_summaries = NULL;
   ipa_call_summaries->release ();
   delete ipa_call_summaries;
   ipa_call_summaries = NULL;
   edge_predicate_pool.release ();
+  /* During IPA this is one of largest datastructures to release.  */
+  if (flag_wpa)
+    ggc_trim ();
+}
+
+/* Release function summary.  */
+
+void
+ipa_free_size_summary (void)
+{
+  if (!ipa_size_summaries)
+    return;
+  ipa_size_summaries->release ();
+  ipa_size_summaries = NULL;
 }
 
 namespace {
@@ -3937,10 +3964,12 @@  public:
       gcc_assert (n == 0);
       small_p = param;
     }
-  virtual bool gate (function *) { return small_p || !flag_wpa; }
+  virtual bool gate (function *) { return true; }
   virtual unsigned int execute (function *)
     {
       ipa_free_fn_summary ();
+      if (!flag_wpa)
+	ipa_free_size_summary ();
       return 0;
     }
 
Index: ipa-fnsummary.h
===================================================================
--- ipa-fnsummary.h	(revision 277423)
+++ ipa-fnsummary.h	(working copy)
@@ -82,6 +82,30 @@  public:
   sreal GTY((skip)) time;
 };
 
+/* Summary about function and stack frame sizes.  We keep this info 
+   for inline clones and also for WPA streaming. For this reason this is not
+   part of ipa_fn_summary which exists only for offline functions.  */
+class ipa_size_summary
+{
+public:
+  /* Estimated stack frame consumption by the function.  */
+  HOST_WIDE_INT estimated_self_stack_size;
+  /* Size of the function body.  */
+  int self_size;
+  /* Estimated size of the function after inlining.  */
+  int size;
+
+  ipa_size_summary ()
+  : estimated_self_stack_size (0), self_size (0), size (0)
+  {
+  }
+  /* Copy constructor.  */
+  ipa_size_summary (const ipa_size_summary &s)
+  : estimated_self_stack_size (0), self_size (s.self_size), size (s.size)
+  {
+  }
+};
+
 /* Function inlining information.  */
 class GTY(()) ipa_fn_summary
 {
@@ -89,10 +113,10 @@  public:
   /* Keep all field empty so summary dumping works during its computation.
      This is useful for debugging.  */
   ipa_fn_summary ()
-    : estimated_self_stack_size (0), self_size (0), min_size (0),
+    : min_size (0),
       inlinable (false), single_caller (false),
       fp_expressions (false), estimated_stack_size (false),
-      stack_frame_offset (false), time (0), size (0), conds (NULL),
+      time (0), conds (NULL),
       size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
       growth (0), scc_no (0)
   {
@@ -100,13 +124,11 @@  public:
 
   /* Copy constructor.  */
   ipa_fn_summary (const ipa_fn_summary &s)
-    : estimated_self_stack_size (s.estimated_self_stack_size),
-    self_size (s.self_size), min_size (s.min_size),
+    : min_size (s.min_size),
     inlinable (s.inlinable), single_caller (s.single_caller),
     fp_expressions (s.fp_expressions),
     estimated_stack_size (s.estimated_stack_size),
-    stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size),
-    conds (s.conds), size_time_table (s.size_time_table),
+    time (s.time), conds (s.conds), size_time_table (s.size_time_table),
     loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
     growth (s.growth), scc_no (s.scc_no)
   {}
@@ -116,10 +138,6 @@  public:
 
   /* Information about the function body itself.  */
 
-  /* Estimated stack frame consumption by the function.  */
-  HOST_WIDE_INT estimated_self_stack_size;
-  /* Size of the function body.  */
-  int self_size;
   /* Minimal size increase after inlining.  */
   int min_size;
 
@@ -137,11 +155,8 @@  public:
 
   /* Estimated stack frame consumption by the function.  */
   HOST_WIDE_INT estimated_stack_size;
-  /* Expected offset of the stack frame of function.  */
-  HOST_WIDE_INT stack_frame_offset;
-  /* Estimated size of the function after inlining.  */
+  /* Estimated runtime of function after inlining.  */
   sreal GTY((skip)) time;
-  int size;
 
   /* Conditional size/time information.  The summaries are being
      merged during inlining.  */
@@ -201,6 +216,24 @@  public:
 extern GTY(()) fast_function_summary <ipa_fn_summary *, va_gc>
   *ipa_fn_summaries;
 
+class ipa_size_summary_t:
+  public fast_function_summary <ipa_size_summary *, va_gc>
+{
+public:
+  ipa_size_summary_t (symbol_table *symtab):
+    fast_function_summary <ipa_size_summary *, va_gc> (symtab) {}
+
+  static ipa_size_summary_t *create_ggc (symbol_table *symtab)
+  {
+    class ipa_size_summary_t *summary = new (ggc_alloc <ipa_size_summary_t> ())
+      ipa_size_summary_t (symtab);
+    summary->disable_insertion_hook ();
+    return summary;
+  }
+};
+extern fast_function_summary <ipa_size_summary *, va_heap>
+  *ipa_size_summaries;
+
 /* Information kept about callgraph edges.  */
 class ipa_call_summary
 {
@@ -256,6 +289,7 @@  void ipa_dump_fn_summaries (FILE *f);
 void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node);
 void ipa_dump_hints (FILE *f, ipa_hints);
 void ipa_free_fn_summary (void);
+void ipa_free_size_summary (void);
 void inline_analyze_function (struct cgraph_node *node);
 void estimate_ipcp_clone_size_and_time (struct cgraph_node *,
 					vec<tree>,
@@ -289,5 +323,6 @@  void estimate_node_size_and_time (struct
 				  inline_param_summary);
 
 void ipa_fnsummary_c_finalize (void);
+HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
 
 #endif /* GCC_IPA_FNSUMMARY_H */
Index: ipa-icf.c
===================================================================
--- ipa-icf.c	(revision 277423)
+++ ipa-icf.c	(working copy)
@@ -1142,8 +1142,8 @@  sem_function::merge (sem_item *alias_ite
 			 "cannot create wrapper of stdarg function.\n");
 	}
       else if (ipa_fn_summaries
-	       && ipa_fn_summaries->get (alias) != NULL
-	       && ipa_fn_summaries->get (alias)->self_size <= 2)
+	       && ipa_size_summaries->get (alias) != NULL
+	       && ipa_size_summaries->get (alias)->self_size <= 2)
 	{
 	  if (dump_enabled_p ())
 	    dump_printf (MSG_MISSED_OPTIMIZATION, "Wrapper creation is not "
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 277423)
+++ ipa-inline-analysis.c	(working copy)
@@ -161,7 +161,8 @@  do_estimate_edge_time (struct cgraph_edg
   /* When caching, update the cache entry.  */
   if (edge_growth_cache != NULL)
     {
-      ipa_fn_summaries->get_create (edge->callee)->min_size = min_size;
+      ipa_fn_summaries->get (edge->callee->function_symbol ())->min_size
+	 = min_size;
       edge_growth_cache_entry *entry
 	= edge_growth_cache->get_create (edge);
       entry->time = time;
@@ -265,7 +266,7 @@  estimate_size_after_inlining (struct cgr
 			      struct cgraph_edge *edge)
 {
   class ipa_call_summary *es = ipa_call_summaries->get (edge);
-  ipa_fn_summary *s = ipa_fn_summaries->get (node);
+  ipa_size_summary *s = ipa_size_summaries->get (node);
   if (!es->predicate || *es->predicate != false)
     {
       int size = s->size + estimate_edge_growth (edge);
@@ -321,7 +322,7 @@  int
 estimate_growth (struct cgraph_node *node)
 {
   struct growth_data d = { node, false, false, 0 };
-  class ipa_fn_summary *info = ipa_fn_summaries->get (node);
+  class ipa_size_summary *info = ipa_size_summaries->get (node);
 
   node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
 
@@ -396,7 +397,7 @@  growth_likely_positive (struct cgraph_no
       || node->address_taken)
     return true;
 
-  max_callers = ipa_fn_summaries->get (node)->size * 4 / edge_growth + 2;
+  max_callers = ipa_size_summaries->get (node)->size * 4 / edge_growth + 2;
 
   for (e = node->callers; e; e = e->next_caller)
     {
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c	(revision 277423)
+++ ipa-inline-transform.c	(working copy)
@@ -200,7 +200,7 @@  clone_inlined_nodes (struct cgraph_edge
 	    {
 	      gcc_assert (!e->callee->alias);
 	      if (overall_size)
-		*overall_size -= ipa_fn_summaries->get (e->callee)->size;
+		*overall_size -= ipa_size_summaries->get (e->callee)->size;
 	      nfunctions_inlined++;
 	    }
 	  duplicate = false;
@@ -478,7 +478,7 @@  inline_call (struct cgraph_edge *e, bool
 
   gcc_assert (curr->callee->global.inlined_to == to);
 
-  old_size = ipa_fn_summaries->get (to)->size;
+  old_size = ipa_size_summaries->get (to)->size;
   ipa_merge_fn_summary_after_inlining (e);
   if (e->in_polymorphic_cdtor)
     mark_all_inlined_calls_cdtor (e->callee);
@@ -492,8 +492,8 @@  inline_call (struct cgraph_edge *e, bool
        work for further inlining into this function.  Before inlining
        the function we inlined to again we expect the caller to update
        the overall summary.  */
-    ipa_fn_summaries->get (to)->size += estimated_growth;
-  new_size = ipa_fn_summaries->get (to)->size;
+    ipa_size_summaries->get (to)->size += estimated_growth;
+  new_size = ipa_size_summaries->get (to)->size;
 
   if (callee->calls_comdat_local)
     to->calls_comdat_local = true;
Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 277423)
+++ ipa-inline.c	(working copy)
@@ -150,8 +150,7 @@  caller_growth_limits (struct cgraph_edge
   int newsize;
   int limit = 0;
   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
-  ipa_fn_summary *info, *what_info;
-  ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
+  ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 
   /* Look for function e->caller is inlined to.  While doing
      so work out the largest function body on the way.  As
@@ -163,28 +162,29 @@  caller_growth_limits (struct cgraph_edge
      too much in order to prevent compiler from exploding".  */
   while (true)
     {
-      info = ipa_fn_summaries->get (to);
-      if (limit < info->self_size)
-	limit = info->self_size;
-      if (stack_size_limit < info->estimated_self_stack_size)
-	stack_size_limit = info->estimated_self_stack_size;
+      ipa_size_summary *size_info = ipa_size_summaries->get (to);
+      if (limit < size_info->self_size)
+	limit = size_info->self_size;
+      if (stack_size_limit < size_info->estimated_self_stack_size)
+	stack_size_limit = size_info->estimated_self_stack_size;
       if (to->global.inlined_to)
         to = to->callers->caller;
       else
 	break;
     }
 
-  what_info = ipa_fn_summaries->get (what);
+  ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
+  ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 
-  if (limit < what_info->self_size)
-    limit = what_info->self_size;
+  if (limit < what_size_info->self_size)
+    limit = what_size_info->self_size;
 
   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 
   /* Check the size after inlining against the function limits.  But allow
      the function to shrink if it went over the limits by forced inlining.  */
   newsize = estimate_size_after_inlining (to, e);
-  if (newsize >= info->size
+  if (newsize >= ipa_size_summaries->get (what)->size
       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
       && newsize > limit)
     {
@@ -203,7 +203,7 @@  caller_growth_limits (struct cgraph_edge
   stack_size_limit += ((gcov_type)stack_size_limit
 		       * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 
-  inlined_stack = (outer_info->stack_frame_offset
+  inlined_stack = (ipa_get_stack_frame_offset (to)
 		   + outer_info->estimated_self_stack_size
 		   + what_info->estimated_stack_size);
   /* Check new stack consumption with stack consumption at the place
@@ -213,7 +213,7 @@  caller_growth_limits (struct cgraph_edge
 	 inline call, we can inline, too.
 	 This bit overoptimistically assume that we are good at stack
 	 packing.  */
-      && inlined_stack > info->estimated_stack_size
+      && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
     {
       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
@@ -1115,7 +1115,7 @@  edge_badness (struct cgraph_edge *edge,
   gcc_checking_assert ((edge_time * 100
 			- callee_info->time * 101).to_int () <= 0
 			|| callee->count.ipa ().initialized_p ());
-  gcc_checking_assert (growth <= callee_info->size);
+  gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
 
   if (dump)
     {
@@ -1219,7 +1219,7 @@  edge_badness (struct cgraph_edge *edge,
 	     and it is not called once.  */
 	  if (!caller_info->single_caller && overall_growth < caller_growth
 	      && caller_info->inlinable
-	      && caller_info->size
+	      && ipa_size_summaries->get (caller)->size
 		 < (DECL_DECLARED_INLINE_P (caller->decl)
 		    ? inline_insns_single (caller, false)
 		    : inline_insns_auto (caller, false)))
@@ -1243,7 +1243,7 @@  edge_badness (struct cgraph_edge *edge,
 	    overall_growth += 256 * 256 - 256;
 	  denominator *= overall_growth;
         }
-      denominator *= ipa_fn_summaries->get (caller)->size + growth;
+      denominator *= ipa_size_summaries->get (caller)->size + growth;
 
       badness = - numerator / denominator;
 
@@ -1646,8 +1646,8 @@  recursive_inlining (struct cgraph_edge *
     dump_printf_loc (MSG_NOTE, edge->call_stmt,
 		     "\n   Inlined %i times, "
 		     "body grown from size %i to %i, time %f to %f\n", n,
-		     ipa_fn_summaries->get (master_clone)->size,
-		     ipa_fn_summaries->get (node)->size,
+		     ipa_size_summaries->get (master_clone)->size,
+		     ipa_size_summaries->get (node)->size,
 		     ipa_fn_summaries->get (master_clone)->time.to_double (),
 		     ipa_fn_summaries->get (node)->time.to_double ());
 
@@ -1871,7 +1871,7 @@  inline_small_functions (void)
 	    /* Do not account external functions, they will be optimized out
 	       if not inlined.  Also only count the non-cold portion of program.  */
 	    if (inline_account_function_p (node))
-	      initial_size += info->size;
+	      initial_size += ipa_size_summaries->get (node)->size;
 	    info->growth = estimate_growth (node);
 
 	    int num_calls = 0;
@@ -1887,7 +1887,8 @@  inline_small_functions (void)
 		     n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
 		  if (opt_for_fn (n2->decl, optimize))
 		    {
-		      ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
+		      ipa_fn_summary *info2 = ipa_fn_summaries->get
+			 (n2->global.inlined_to ? n2->global.inlined_to : n2);
 		      if (info2->scc_no)
 			break;
 		      info2->scc_no = id;
@@ -2048,7 +2049,7 @@  inline_small_functions (void)
 	  fprintf (dump_file,
 		   "\nConsidering %s with %i size\n",
 		   callee->dump_name (),
-		   ipa_fn_summaries->get (callee)->size);
+		   ipa_size_summaries->get (callee)->size);
 	  fprintf (dump_file,
 		   " to be inlined into %s in %s:%i\n"
 		   " Estimated badness is %f, frequency %.2f.\n",
@@ -2174,7 +2175,7 @@  inline_small_functions (void)
 
       if (dump_enabled_p ())
 	{
-	  ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
+	  ipa_fn_summary *s = ipa_fn_summaries->get (where);
 
 	  /* dump_printf can't handle %+i.  */
 	  char buf_net_change[100];
@@ -2185,7 +2186,9 @@  inline_small_functions (void)
 			   " Inlined %C into %C which now has time %f and "
 			   "size %i, net change of %s.\n",
 			   edge->callee, edge->caller,
-			   s->time.to_double (), s->size, buf_net_change);
+			   s->time.to_double (),
+			   ipa_size_summaries->get (edge->caller)->size,
+			   buf_net_change);
 	}
       if (min_size > overall_size)
 	{
@@ -2322,11 +2325,11 @@  inline_to_all_callers_1 (struct cgraph_n
 	  fprintf (dump_file,
 		   "\nInlining %s size %i.\n",
 		   ultimate->name (),
-		   ipa_fn_summaries->get (ultimate)->size);
+		   ipa_size_summaries->get (ultimate)->size);
 	  fprintf (dump_file,
 		   " Called once from %s %i insns.\n",
 		   node->callers->caller->name (),
-		   ipa_fn_summaries->get (node->callers->caller)->size);
+		   ipa_size_summaries->get (node->callers->caller)->size);
 	}
 
       /* Remember which callers we inlined to, delaying updating the
@@ -2337,7 +2340,7 @@  inline_to_all_callers_1 (struct cgraph_n
 	fprintf (dump_file,
 		 " Inlined into %s which now has %i size\n",
 		 caller->name (),
-		 ipa_fn_summaries->get (caller)->size);
+		 ipa_size_summaries->get (caller)->size);
       if (!(*num_calls)--)
 	{
 	  if (dump_file)
Index: ipa-prop.h
===================================================================
--- ipa-prop.h	(revision 277423)
+++ ipa-prop.h	(working copy)
@@ -641,7 +641,7 @@  class GTY((user)) ipa_edge_args_sum_t :
   ipa_edge_args_sum_t (symbol_table *table, bool ggc)
     : call_summary<ipa_edge_args *> (table, ggc) { }
 
-  /* Hook that is called by summary when an edge is duplicated.  */
+  /* Hook that is called by summary when an edge is removed.  */
   virtual void remove (cgraph_edge *cs, ipa_edge_args *args);
   /* Hook that is called by summary when an edge is duplicated.  */
   virtual void duplicate (cgraph_edge *src,
Index: lto/lto-partition.c
===================================================================
--- lto/lto-partition.c	(revision 277423)
+++ lto/lto-partition.c	(working copy)
@@ -171,7 +171,7 @@  add_symbol_to_partition_1 (ltrans_partit
     {
       struct cgraph_edge *e;
       if (!node->alias && c == SYMBOL_PARTITION)
-	part->insns += ipa_fn_summaries->get (cnode)->size;
+	part->insns += ipa_size_summaries->get (cnode)->size;
 
       /* Add all inline clones and callees that are duplicated.  */
       for (e = cnode->callees; e; e = e->next_callee)
@@ -291,7 +291,7 @@  undo_partition (ltrans_partition partiti
 
       if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
           && node->get_partitioning_class () == SYMBOL_PARTITION)
-	partition->insns -= ipa_fn_summaries->get (cnode)->size;
+	partition->insns -= ipa_size_summaries->get (cnode)->size;
       lto_symtab_encoder_delete_node (partition->encoder, node);
       node->aux = (void *)((size_t)node->aux - 1);
     }
@@ -528,7 +528,7 @@  lto_balanced_map (int n_lto_partitions,
 	else
 	  order.safe_push (node);
 	if (!node->alias)
-	  total_size += ipa_fn_summaries->get (node)->size;
+	  total_size += ipa_size_summaries->get (node)->size;
       }
 
   original_total_size = total_size;
Index: lto/lto.c
===================================================================
--- lto/lto.c	(revision 277423)
+++ lto/lto.c	(working copy)
@@ -498,9 +498,9 @@  do_whole_program_analysis (void)
   else
     gcc_unreachable ();
 
-  /* Inline summaries are needed for balanced partitioning.  Free them now so
+  /* Size summaries are needed for balanced partitioning.  Free them now so
      the memory can be used for streamer caches.  */
-  ipa_free_fn_summary ();
+  ipa_free_size_summary ();
 
   /* AUX pointers are used by partitioning code to bookkeep number of
      partitions symbol is in.  This is no longer needed.  */