diff mbox series

Fix profile updating bug in ipa-split

Message ID 20171113175859.GA49629@kam.mff.cuni.cz
State New
Headers show
Series Fix profile updating bug in ipa-split | expand

Commit Message

Jan Hubicka Nov. 13, 2017, 5:58 p.m. UTC
Hi,
this patch fixes bug I noticed while looking for profile mismatches in tramp3d.
When split part starts with the loop, the frequency of call to split function
is not the same as frequency of this block.  This also affects the heuristics
which should not count the loopback edges.

Patch aslo gets rid of frequencies and uses counts consistently.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

	* ipa-split.c (struct split_point): Add count.
	(consider_split): Do not compute incoming frequency; compute incoming
	count and store it to split_point.
	(split_function): Set count of the call to split part correctly.

	* testsuite/gcc.dg/tree-ssa/fnsplit-2.c: New testcase.
diff mbox series

Patch

Index: ipa-split.c
===================================================================
--- ipa-split.c	(revision 254700)
+++ ipa-split.c	(working copy)
@@ -129,6 +129,10 @@  struct split_point
   /* Basic block where we split (that will become entry point of new function.  */
   basic_block entry_bb;
 
+  /* Count for entering the split part.
+     This is not count of the entry_bb because it may be in loop.  */
+  profile_count count;
+
   /* Basic blocks we are splitting away.  */
   bitmap split_bbs;
 
@@ -426,7 +430,6 @@  consider_split (struct split_point *curr
   edge_iterator ei;
   gphi_iterator bsi;
   unsigned int i;
-  int incoming_freq = 0;
   tree retval;
   tree retbnd;
   bool back_edge = false;
@@ -434,18 +437,21 @@  consider_split (struct split_point *curr
   if (dump_file && (dump_flags & TDF_DETAILS))
     dump_split_point (dump_file, current);
 
+  current->count = profile_count::zero ();
   FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
     {
       if (e->flags & EDGE_DFS_BACK)
 	back_edge = true;
       if (!bitmap_bit_p (current->split_bbs, e->src->index))
-        incoming_freq += EDGE_FREQUENCY (e);
+	current->count += e->count ();
     }
 
-  /* Do not split when we would end up calling function anyway.  */
-  if (incoming_freq
-      >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)
-	  * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100))
+  /* Do not split when we would end up calling function anyway.
+     Compares are three state, use !(...<...) to also give up when outcome
+     is unknown.  */
+  if (!(current->count
+       < (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale
+	   (PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY), 100))))
     {
       /* When profile is guessed, we can not expect it to give us
 	 realistic estimate on likelyness of function taking the
@@ -454,14 +460,17 @@  consider_split (struct split_point *curr
 	 is likely noticeable win.  */
       if (back_edge
 	  && profile_status_for_fn (cfun) != PROFILE_READ
-	  && incoming_freq
-		 < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
+	  && current->count
+		 < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
 	{
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file,
-		     "  Split before loop, accepting despite low frequencies %i %i.\n",
-		     incoming_freq,
-		     ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun));
+	    {
+	      fprintf (dump_file,
+		       "  Split before loop, accepting despite low counts");
+	      current->count.dump (dump_file);
+	      fprintf (dump_file, " ");
+	      ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.dump (dump_file);
+	    }
 	}
       else
 	{
@@ -711,14 +720,13 @@  consider_split (struct split_point *curr
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "  Accepted!\n");
 
-  /* At the moment chose split point with lowest frequency and that leaves
+  /* At the moment chose split point with lowest count and that leaves
      out smallest size of header.
      In future we might re-consider this heuristics.  */
   if (!best_split_point.split_bbs
-      || best_split_point.entry_bb->count.to_frequency (cfun)
-	 > current->entry_bb->count.to_frequency (cfun)
-      || (best_split_point.entry_bb->count.to_frequency (cfun)
-	  == current->entry_bb->count.to_frequency (cfun)
+      || best_split_point.count
+	 > current->count
+      || (best_split_point.count == current->count 
 	  && best_split_point.split_size < current->split_size))
 	
     {
@@ -1446,6 +1454,7 @@  split_function (basic_block return_bb, s
       }
     else
       break;
+  call_bb->count = split_point->count;
   e = split_block (split_point->entry_bb, last_stmt);
   remove_edge (e);
 
Index: testsuite/gcc.dg/tree-ssa/fnsplit-2.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/fnsplit-2.c	(nonexistent)
+++ testsuite/gcc.dg/tree-ssa/fnsplit-2.c	(working copy)
@@ -0,0 +1,32 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fnsplit-blocks-details" } */
+int b;
+void test (void);
+void
+split_me (int *a)
+{
+  if (__builtin_expect (a==0, 0))
+    do
+    {
+      test();
+      test();
+      test();
+      test();
+      test();
+    }
+    while (b);
+  else
+    q();
+}
+
+int
+main(void)
+{
+  int i;
+  for (i = 0; i < 1000; i++)
+    split_me(&i);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Splitting function at:" 1 "fnsplit"} } */
+/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "fnsplit"} } */