diff mbox

Make profile scaling during cloning more precise

Message ID 20170610230840.GB11824@kam.mff.cuni.cz
State New
Headers show

Commit Message

Jan Hubicka June 10, 2017, 11:08 p.m. UTC
Hi,
this patch makes profile scaling to use same logic in tree-inline and cgraphclones.
This reduces roundoff errors and makes it more easy to propagate more info.
Compiling tramp3d with profile feedback the number of mismatches after inlining
goes down from 248 to 95.

Honza

	* cgraph.h (cgraph_edge::clone): Update prototype.
	* cgraphclones.c (cgraph_edge::clone): Update profile scaling.
	(cgraph_node::create_clone): Update.
	(cgraph_node::create_version_clone): Update.
	* tree-inline.c (copy_bb): Update.
	(expand_call_inline): Update.
diff mbox

Patch

Index: cgraph.h
===================================================================
--- cgraph.h	(revision 249092)
+++ cgraph.h	(working copy)
@@ -1649,7 +1649,7 @@  struct GTY((chain_next ("%h.next_caller"
   /* Create clone of edge in the node N represented
      by CALL_EXPR the callgraph.  */
   cgraph_edge * clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
-		       gcov_type count_scale, int freq_scale,
+		       profile_count num, profile_count den, int freq_scale,
 		       bool update_original);
 
   /* Verify edge count and frequency.  */
Index: cgraphclones.c
===================================================================
--- cgraphclones.c	(revision 249092)
+++ cgraphclones.c	(working copy)
@@ -86,10 +86,13 @@  along with GCC; see the file COPYING3.
 
 cgraph_edge *
 cgraph_edge::clone (cgraph_node *n, gcall *call_stmt, unsigned stmt_uid,
-		    gcov_type count_scale, int freq_scale, bool update_original)
+		    profile_count num, profile_count den,
+		    int freq_scale, bool update_original)
 {
   cgraph_edge *new_edge;
-  profile_count gcov_count = count.apply_scale (count_scale, REG_BR_PROB_BASE);
+  profile_count gcov_count
+	 = (num == profile_count::zero () || den > 0)
+	   ? count.apply_scale (num, den) : count;
   gcov_type freq;
 
   /* We do not want to ignore loop nest after frequency drops to 0.  */
@@ -116,7 +119,7 @@  cgraph_edge::clone (cgraph_node *n, gcal
 	{
 	  new_edge = n->create_indirect_edge (call_stmt,
 					      indirect_info->ecf_flags,
-					      count, freq, false);
+					      gcov_count, freq, false);
 	  *new_edge->indirect_info = *indirect_info;
 	}
     }
@@ -428,7 +431,6 @@  cgraph_node::create_clone (tree new_decl
 {
   cgraph_node *new_node = symtab->create_empty ();
   cgraph_edge *e;
-  gcov_type count_scale;
   unsigned i;
 
   if (new_inlined_to)
@@ -453,7 +455,6 @@  cgraph_node::create_clone (tree new_decl
   new_node->global = global;
   new_node->global.inlined_to = new_inlined_to;
   new_node->rtl = rtl;
-  new_node->count = count;
   new_node->frequency = frequency;
   new_node->tp_first_run = tp_first_run;
   new_node->tm_clone = tm_clone;
@@ -475,18 +476,6 @@  cgraph_node::create_clone (tree new_decl
   else
     new_node->clone.combined_args_to_skip = args_to_skip;
 
-  if (count.initialized_p ())
-    {
-      if (new_node->count > count)
-        count_scale = REG_BR_PROB_BASE;
-      else
-	count_scale = new_node->count.probability_in (count);
-    }
-  else
-    count_scale = 0;
-  if (update_original)
-    count -= prof_count;
-
   FOR_EACH_VEC_ELT (redirect_callers, i, e)
     {
       /* Redirect calls to the old version node to point to its new
@@ -500,12 +489,12 @@  cgraph_node::create_clone (tree new_decl
   new_node->expand_all_artificial_thunks ();
 
   for (e = callees;e; e=e->next_callee)
-    e->clone (new_node, e->call_stmt, e->lto_stmt_uid, count_scale,
+    e->clone (new_node, e->call_stmt, e->lto_stmt_uid, new_node->count, count,
 	      freq, update_original);
 
   for (e = indirect_calls; e; e = e->next_callee)
     e->clone (new_node, e->call_stmt, e->lto_stmt_uid,
-	      count_scale, freq, update_original);
+	      new_node->count, count, freq, update_original);
   new_node->clone_references (this);
 
   new_node->next_sibling_clone = clones;
@@ -514,6 +503,9 @@  cgraph_node::create_clone (tree new_decl
   clones = new_node;
   new_node->clone_of = this;
 
+  if (update_original)
+    count -= prof_count;
+
   if (call_duplication_hook)
     symtab->call_cgraph_duplication_hooks (this, new_node);
 
@@ -911,14 +903,14 @@  cgraph_node::create_version_clone (tree
      if (!bbs_to_copy
 	 || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index))
        e->clone (new_version, e->call_stmt,
-		 e->lto_stmt_uid, REG_BR_PROB_BASE,
+		 e->lto_stmt_uid, count, count,
 		 CGRAPH_FREQ_BASE,
 		 true);
    for (e = indirect_calls; e; e=e->next_callee)
      if (!bbs_to_copy
 	 || bitmap_bit_p (bbs_to_copy, gimple_bb (e->call_stmt)->index))
        e->clone (new_version, e->call_stmt,
-		 e->lto_stmt_uid, REG_BR_PROB_BASE,
+		 e->lto_stmt_uid, count, count,
 		 CGRAPH_FREQ_BASE,
 		 true);
    FOR_EACH_VEC_ELT (redirect_callers, i, e)
Index: tree-inline.c
===================================================================
--- tree-inline.c	(revision 249092)
+++ tree-inline.c	(working copy)
@@ -2009,7 +2009,9 @@  copy_bb (copy_body_data *id, basic_block
 		      struct cgraph_edge *old_edge = edge;
 		      edge = edge->clone (id->dst_node, call_stmt,
 					  gimple_uid (stmt),
-					  REG_BR_PROB_BASE, CGRAPH_FREQ_BASE,
+					  profile_count::one (),
+					  profile_count::one (),
+					  CGRAPH_FREQ_BASE,
 					  true);
 		      /* We could also just rescale the frequency, but
 		         doing so would introduce roundoff errors and make
@@ -2028,7 +2030,9 @@  copy_bb (copy_body_data *id, basic_block
 			  old_edge->speculative_call_info (direct, indirect, ref);
 			  indirect = indirect->clone (id->dst_node, call_stmt,
 						      gimple_uid (stmt),
-						      REG_BR_PROB_BASE, CGRAPH_FREQ_BASE,
+						      profile_count::one (),
+						      profile_count::one (),
+						      CGRAPH_FREQ_BASE,
 						      true);
 			  if (old_edge->frequency + indirect->frequency)
 			    {
@@ -4509,7 +4522,9 @@  expand_call_inline (basic_block bb, gimp
       cg_edge->remove ();
       edge = id->src_node->callees->clone (id->dst_node, call_stmt,
 		   		           gimple_uid (stmt),
-				   	   REG_BR_PROB_BASE, CGRAPH_FREQ_BASE,
+				   	   profile_count::one (),
+					   profile_count::one (),
+					   CGRAPH_FREQ_BASE,
 				           true);
       edge->frequency = freq;
       edge->count = count;