Index: ipa-inline.c
===================================================================
--- ipa-inline.c	(revision 193157)
+++ ipa-inline.c	(working copy)
@@ -456,6 +456,42 @@ want_early_inline_function_p (struct cgr
   return want_inline;
 }
 
+/* Compute time of the edge->caller + edge->callee execution when inlining
+   does not happen.  */
+
+inline int
+compute_uninlined_call_time (struct inline_summary *callee_info,
+			     struct cgraph_edge *edge)
+{
+  int uninlined_call_time =
+    RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
+	  CGRAPH_FREQ_BASE);
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+				    ? edge->caller->global.inlined_to
+				    : edge->caller)->time;
+  return uninlined_call_time + caller_time;
+}
+
+/* Same as compute_uinlined_call_time but compute time when inlining
+   does happen.  */
+
+inline gcov_type
+compute_inlined_call_time (struct cgraph_edge *edge,
+			   int edge_time)
+{
+  int caller_time = inline_summary (edge->caller->global.inlined_to
+				    ? edge->caller->global.inlined_to
+				    : edge->caller)->time;
+  int time = caller_time + RDIV ((edge_time - inline_edge_summary (edge)->call_stmt_time)
+			         * MAX (edge->frequency, 1),
+				 CGRAPH_FREQ_BASE);
+  /* Possible one roundoff error, but watch for overflows.  */
+  gcc_checking_assert (time >= INT_MIN / 2);
+  if (time < 0)
+    time = 0;
+  return time;
+}
+
 /* Return true if we are interested in inlining small function.
    When REPORT is true, report reason to dump file.  */
 
@@ -724,31 +760,41 @@ want_inline_function_to_all_callers_p (s
    return true;
 }
 
+#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 
 /* Return relative time improvement for inlining EDGE in range
-   1...2^9.  */
+   1...RELATIVE_TIME_BENEFIT_RANGE  */
 
 static inline int
 relative_time_benefit (struct inline_summary *callee_info,
 		       struct cgraph_edge *edge,
-		       int time_growth)
+		       int edge_time)
 {
   int relbenefit;
-  gcov_type uninlined_call_time;
+  int uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
+  int inlined_call_time = compute_inlined_call_time (edge, edge_time);
+
+  /* Inlining into extern inline function is not a win.  */
+  if (DECL_EXTERNAL (edge->caller->global.inlined_to
+		     ? edge->caller->global.inlined_to->symbol.decl
+		     : edge->caller->symbol.decl))
+    return 1;
+
+  /* Watch overflows.  */
+  gcc_checking_assert (uninlined_call_time >= 0);
+  gcc_checking_assert (inlined_call_time >= 0);
+  gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 
-  uninlined_call_time =
-    ((gcov_type)
-     (callee_info->time
-      + inline_edge_summary (edge)->call_stmt_time) * edge->frequency
-     + CGRAPH_FREQ_BASE / 2) / CGRAPH_FREQ_BASE;
   /* Compute relative time benefit, i.e. how much the call becomes faster.
      ??? perhaps computing how much the caller+calle together become faster
      would lead to more realistic results.  */
   if (!uninlined_call_time)
     uninlined_call_time = 1;
   relbenefit =
-    (uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
-  relbenefit = MIN (relbenefit, 512);
+    RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
+	  uninlined_call_time);
+  relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
+  gcc_checking_assert (relbenefit >= 0);
   relbenefit = MAX (relbenefit, 1);
   return relbenefit;
 }
@@ -764,7 +810,7 @@ static int
 edge_badness (struct cgraph_edge *edge, bool dump)
 {
   gcov_type badness;
-  int growth, time_growth;
+  int growth, edge_time;
   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 							      NULL);
   struct inline_summary *callee_info = inline_summary (callee);
@@ -774,17 +820,20 @@ edge_badness (struct cgraph_edge *edge, 
     return INT_MIN;
 
   growth = estimate_edge_growth (edge);
-  time_growth = estimate_edge_time (edge);
+  edge_time = estimate_edge_time (edge);
   hints = estimate_edge_hints (edge);
+  gcc_checking_assert (edge_time >= 0);
+  gcc_checking_assert (edge_time <= callee_info->time);
+  gcc_checking_assert (growth <= callee_info->size);
 
   if (dump)
     {
       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
 	       xstrdup (cgraph_node_name (edge->caller)),
 	       xstrdup (cgraph_node_name (callee)));
-      fprintf (dump_file, "      size growth %i, time growth %i ",
+      fprintf (dump_file, "      size growth %i, time %i ",
 	       growth,
-	       time_growth);
+	       edge_time);
       dump_inline_hints (dump_file, hints);
       fprintf (dump_file, "\n");
     }
@@ -802,7 +851,7 @@ edge_badness (struct cgraph_edge *edge, 
 
 	        relative_edge_count * relative_time_benefit
      goodness = -------------------------------------------
-		edge_growth
+		growth_f_caller
      badness = -goodness  
 
     The fraction is upside down, because on edge counts and time beneits
@@ -810,11 +859,11 @@ edge_badness (struct cgraph_edge *edge, 
 
   else if (max_count)
     {
-      int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
+      int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
       badness =
 	((int)
-	 ((double) edge->count * INT_MIN / 2 / max_count / 512) *
-	 relative_time_benefit (callee_info, edge, time_growth)) / growth;
+	 ((double) edge->count * INT_MIN / 2 / max_count / RELATIVE_TIME_BENEFIT_RANGE) *
+	 relbenefit) / growth;
       
       /* Be sure that insanity of the profile won't lead to increasing counts
 	 in the scalling and thus to overflow in the computation above.  */
@@ -826,73 +875,53 @@ edge_badness (struct cgraph_edge *edge, 
 		   " * Relative benefit %f\n",
 		   (int) badness, (double) badness / INT_MIN,
 		   (double) edge->count / max_count,
-		   relbenefit * 100 / 256.0);
+		   relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 	}
     }
 
   /* When function local profile is available. Compute badness as:
-
      
-               growth_of_callee
-     badness = -------------------------------------- + growth_for-all
-	       relative_time_benefit * edge_frequency
+                 relative_time_benefit
+     goodness =  ---------------------------------
+	         growth_of_caller * overall_growth
 
+     badness = - goodness
+
+     compensated by the inline hints.
   */
   else if (flag_guess_branch_prob)
     {
-      int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
-
-      div = MAX (div, 1);
-      gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
-      div *= relative_time_benefit (callee_info, edge, time_growth);
-
-      /* frequency is normalized in range 1...2^10.
-         relbenefit in range 1...2^9
-	 DIV should be in range 1....2^19.  */
-      gcc_checking_assert (div >= 1 && div <= (1<<19));
-
-      /* Result must be integer in range 0...INT_MAX.
-	 Set the base of fixed point calculation so we don't lose much of
-	 precision for small bandesses (those are interesting) yet we don't
-	 overflow for growths that are still in interesting range.
-
-	 Fixed point arithmetic with point at 6th bit. */
-      badness = ((gcov_type)growth) * (1<<(19+6));
-      badness = (badness + div / 2) / div;
-
-      /* Overall growth of inlining all calls of function matters: we want to
-	 inline so offline copy of function is no longer needed.
-
-	 Additionally functions that can be fully inlined without much of
-	 effort are better inline candidates than functions that can be fully
-	 inlined only after noticeable overall unit growths. The latter
-	 are better in a sense compressing of code size by factoring out common
-	 code into separate function shared by multiple code paths.
-
-	 We might mix the valud into the fraction by taking into account
-	 relative growth of the unit, but for now just add the number
-	 into resulting fraction.  */
-      if (badness > INT_MAX / 8)
-	{
-	  badness = INT_MAX / 8;
-	  if (dump)
-	    fprintf (dump_file, "Badness overflow\n");
-	}
-      if (hints & (INLINE_HINT_indirect_call
-		   | INLINE_HINT_loop_iterations
-		   | INLINE_HINT_loop_stride))
-	badness /= 8;
+      badness = (relative_time_benefit (callee_info, edge, edge_time)
+		 * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
+      badness /= (growth * MAX (1, callee_info->growth));
+      gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
+      if ((hints & (INLINE_HINT_indirect_call
+		    | INLINE_HINT_loop_iterations
+		    | INLINE_HINT_loop_stride))
+	  || callee_info->growth <= 0)
+	badness *= 8;
       if (hints & (INLINE_HINT_same_scc))
-	badness *= 4;
-      if (hints & (INLINE_HINT_in_scc))
-	badness *= 2;
+	badness /= 16;
+      else if (hints & (INLINE_HINT_in_scc))
+	badness /= 8;
+      else if (hints & (INLINE_HINT_cross_module))
+	badness /= 2;
+      gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
+      if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
+	badness *= 16;
       if (dump)
 	{
 	  fprintf (dump_file,
 		   "      %i: guessed profile. frequency %f,"
-		   " benefit %f%%, divisor %i\n",
+		   " benefit %f%%, time w/o inlining %i, time w inlining %i"
+		   " overall growth %i (current) %i (original)\n",
 		   (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
-		   relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
+		   relative_time_benefit (callee_info, edge, edge_time) * 100.0
+		   / RELATIVE_TIME_BENEFIT_RANGE, 
+		   compute_uninlined_call_time (callee_info, edge),
+		   (int)compute_inlined_call_time (edge, edge_time),
+		   estimate_growth (callee),
+		   callee_info->growth);
 	}
     }
   /* When function local profile is not available or it does not give
@@ -1371,6 +1400,7 @@ inline_small_functions (void)
 
 	    if (!DECL_EXTERNAL (node->symbol.decl))
 	      initial_size += info->size;
+	    info->growth = estimate_growth (node);
 	    if (dfs && dfs->next_cycle)
 	      {
 		struct cgraph_node *n2;
Index: ipa-inline.h
===================================================================
--- ipa-inline.h	(revision 193155)
+++ ipa-inline.h	(working copy)
@@ -49,7 +49,9 @@ enum inline_hints_vals {
   INLINE_HINT_loop_iterations = 2,
   INLINE_HINT_loop_stride = 4,
   INLINE_HINT_same_scc = 8,
-  INLINE_HINT_in_scc = 16
+  INLINE_HINT_in_scc = 16,
+  INLINE_HINT_declared_inline = 32,
+  INLINE_HINT_cross_module = 64
 };
 typedef int inline_hints;
 
@@ -129,6 +131,12 @@ struct GTY(()) inline_summary
   /* Predicate on when some loop in the function becomes to have known
      stride.   */
   struct predicate * GTY((skip)) loop_stride;
+  /* Estimated growth for inlining all copies of the function before start
+     of small functions inlining.
+     This value will get out of date as the callers are duplicated, but
+     using up-to-date value in the badness metric mean a lot of extra
+     expenses.  */
+  int growth;
   /* Number of SCC on the beggining of inlining process.  */
   int scc_no;
 };
Index: ipa-inline-analysis.c
===================================================================
--- ipa-inline-analysis.c	(revision 193155)
+++ ipa-inline-analysis.c	(working copy)
@@ -649,6 +649,16 @@ dump_inline_hints (FILE *f, inline_hints
       hints &= ~INLINE_HINT_in_scc;
       fprintf (f, " in_scc");
     }
+  if (hints & INLINE_HINT_cross_module)
+    {
+      hints &= ~INLINE_HINT_cross_module;
+      fprintf (f, " cross_module");
+    }
+  if (hints & INLINE_HINT_declared_inline)
+    {
+      hints &= ~INLINE_HINT_declared_inline;
+      fprintf (f, " declared_inline");
+    }
   gcc_assert (!hints);
 }
 
@@ -983,6 +993,7 @@ reset_inline_summary (struct cgraph_node
   info->stack_frame_offset = 0;
   info->size = 0;
   info->time = 0;
+  info->growth = 0;
   info->scc_no = 0;
   if (info->loop_iterations)
     {
@@ -1375,6 +1386,9 @@ dump_inline_summary (FILE * f, struct cg
 	       (int) s->estimated_self_stack_size);
       fprintf (f, "  global stack:    %i\n",
 	       (int) s->estimated_stack_size);
+      if (s->growth)
+        fprintf (f, "  estimated growth:%i\n",
+	         (int) s->growth);
       if (s->scc_no)
         fprintf (f, "  In SCC:          %i\n",
 	         (int) s->scc_no);
@@ -1977,10 +1991,11 @@ will_be_nonconstant_predicate (struct ip
     return p;
 
   /* Stores will stay anyway.  */
-  if (gimple_vdef (stmt))
+  if (gimple_store_p (stmt))
     return p;
 
-  is_load = gimple_vuse (stmt) != NULL;
+  is_load = gimple_assign_load_p (stmt);
+
   /* Loads can be optimized when the value is known.  */
   if (is_load)
     {
@@ -2857,6 +2872,8 @@ estimate_node_size_and_time (struct cgra
     hints |=INLINE_HINT_loop_stride;
   if (info->scc_no)
     hints |= INLINE_HINT_in_scc;
+  if (DECL_DECLARED_INLINE_P (node->symbol.decl))
+    hints |= INLINE_HINT_declared_inline;
 
   estimate_calls_size_and_time (node, &size, &time, &hints, possible_truths,
 				known_vals, known_binfos, known_aggs);
@@ -2865,7 +2882,6 @@ estimate_node_size_and_time (struct cgra
   time = RDIV (time, INLINE_TIME_SCALE);
   size = RDIV (size, INLINE_SIZE_SCALE);
 
-
   if (dump_file
       && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "\n   size:%i time:%i\n", (int)size, (int)time);
@@ -3315,6 +3331,26 @@ inline_update_overall_summary (struct cg
   info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
 }
 
+/* Return hints derrived from EDGE.   */
+int
+simple_edge_hints (struct cgraph_edge *edge)
+{
+  int hints = 0;
+  struct cgraph_node *to = (edge->caller->global.inlined_to
+			    ? edge->caller->global.inlined_to
+			    : edge->caller);
+  if (inline_summary (to)->scc_no
+      && inline_summary (to)->scc_no == inline_summary (edge->callee)->scc_no
+      && !cgraph_edge_recursive_p (edge))
+    hints |= INLINE_HINT_same_scc;
+
+  if (to->symbol.lto_file_data && edge->callee->symbol.lto_file_data
+      && to->symbol.lto_file_data != edge->callee->symbol.lto_file_data)
+    hints |= INLINE_HINT_cross_module;
+
+  return hints;
+}
+
 /* Estimate the time cost for the caller when inlining EDGE.
    Only to be called via estimate_edge_time, that handles the
    caching mechanism.
@@ -3328,7 +3364,6 @@ do_estimate_edge_time (struct cgraph_edg
   int time;
   int size;
   inline_hints hints;
-  gcov_type ret;
   struct cgraph_node *callee;
   clause_t clause;
   VEC (tree, heap) *known_vals;
@@ -3347,33 +3382,26 @@ do_estimate_edge_time (struct cgraph_edg
   VEC_free (tree, heap, known_vals);
   VEC_free (tree, heap, known_binfos);
   VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
-
-  ret = RDIV ((gcov_type)time * edge->frequency,
-	      CGRAPH_FREQ_BASE);
+  gcc_checking_assert (size >= 0);
+  gcc_checking_assert (time >= 0);
 
   /* When caching, update the cache entry.  */
   if (edge_growth_cache)
     {
-      struct cgraph_node *to = (edge->caller->global.inlined_to
-			        ? edge->caller->global.inlined_to
-				: edge->caller);
       if ((int)VEC_length (edge_growth_cache_entry, edge_growth_cache)
 	  <= edge->uid)
 	VEC_safe_grow_cleared (edge_growth_cache_entry, heap, edge_growth_cache,
 			       cgraph_edge_max_uid);
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).time
-	= ret + (ret >= 0);
+	= time + (time >= 0);
 
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).size
 	= size + (size >= 0);
-      if (inline_summary (to)->scc_no
-	  && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
-	  && !cgraph_edge_recursive_p (edge))
-	hints |= INLINE_HINT_same_scc;
+      hints |= simple_edge_hints (edge);
       VEC_index (edge_growth_cache_entry, edge_growth_cache, edge->uid).hints
 	= hints + 1;
     }
-  return ret;
+  return time;
 }
 
 
@@ -3430,9 +3458,6 @@ do_estimate_edge_hints (struct cgraph_ed
   VEC (tree, heap) *known_vals;
   VEC (tree, heap) *known_binfos;
   VEC (ipa_agg_jump_function_p, heap) *known_aggs;
-  struct cgraph_node *to = (edge->caller->global.inlined_to
-		            ? edge->caller->global.inlined_to
-			    : edge->caller);
 
   /* When we do caching, use do_estimate_edge_time to populate the entry.  */
 
@@ -3458,10 +3483,7 @@ do_estimate_edge_hints (struct cgraph_ed
   VEC_free (tree, heap, known_vals);
   VEC_free (tree, heap, known_binfos);
   VEC_free (ipa_agg_jump_function_p, heap, known_aggs);
-  if (inline_summary (to)->scc_no
-      && inline_summary (to)->scc_no == inline_summary (callee)->scc_no
-      && !cgraph_edge_recursive_p (edge))
-    hints |= INLINE_HINT_same_scc;
+  hints |= simple_edge_hints (edge);
   return hints;
 }
 
@@ -3549,10 +3571,11 @@ do_estimate_growth (struct cgraph_node *
      return zero or negative growths. */
   if (d.self_recursive)
     d.growth = d.growth < info->size ? info->size : d.growth;
+  else if (DECL_EXTERNAL (node->symbol.decl))
+    ;
   else
     {
-      if (!DECL_EXTERNAL (node->symbol.decl)
-	  && cgraph_will_be_removed_from_program_if_no_direct_calls (node))
+      if (cgraph_will_be_removed_from_program_if_no_direct_calls (node))
 	d.growth -= info->size;
       /* COMDAT functions are very often not shared across multiple units
 	 since they come from various template instantiations.
