diff mbox

[PR,tree-optimization/pr67755] Fix profile insanity adjustments

Message ID 5695D005.7040508@redhat.com
State New
Headers show

Commit Message

Jeff Law Jan. 13, 2016, 4:18 a.m. UTC
tree-ssa-threadupdate.c has code to compensate for situations where 
threading will produce "profile insanities".  Specifically if we have 
multiple jump thread paths through a common joiner to different final 
targets, then we can end up increasing the counts on one path to 
compensate for counts that will get lost when threading the later path.

I feel that code is papering over the real issue elsewhere in the 
threader's profile update code, but I'm not sure we can fix without some 
significant revamping, which is probably riskier than we'd like at this 
stage.

So this patch detects more accurately when those adjustments may be 
needed rather than just blindly applying them all the time.

The net result is more accurate counts/probabilities in cases where 
there's a single destination for one or more jump threads through a 
common join block as is the case in the BZ.  I also ran some older GCC 
.i files and saw it making similar fixes in a half-dozen or so files via 
that test.

Additionally, I verified that there are no new "Invalid sum of..." 
messages across blob of .i files I had lying around with the patch 
installed for both vrp and dom passes.

And, of course the usual bootstrap and regression testing on 
x86_64-linux-gnu and verification of the testcase using a ppc64 cross 
compiler.

Installed on the trunk.

Jeff
commit f1af94e0b32743de707f7f872975034ff1a56b64
Author: law <law@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed Jan 13 04:17:36 2016 +0000

    [PATCH][PR tree-optimization/pr67755] Fix profile insanity adjustments
    
    	PR tree-optimization/pr67755
    	* tree-ssa-threadupdate.c (struct ssa_local_info_t): Add new field
    	"need_profile_correction".
    	(thread_block_1): Initialize new field to false by default.  If we
    	have multiple thread paths through a common joiner to different
    	final targets, then set new field to true.
    	(compute_path_counts): Only do count adjustment when it's really
    	needed.
    
    	PR tree-optimization/67755
    	* gcc.dg/tree-ssa/pr67755.c: New test.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@232313 138bc75d-0d04-0410-961f-82ee72b054a4
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6423a37..f8f818b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@ 
+2016-01-12  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/pr67755
+	* tree-ssa-threadupdate.c (struct ssa_local_info_t): Add new field
+	"need_profile_correction".
+	(thread_block_1): Initialize new field to false by default.  If we
+	have multiple thread paths through a common joiner to different
+	final targets, then set new field to true.
+	(compute_path_counts): Only do count adjustment when it's really
+	needed.
+
 2016-01-12  Sandra Loosemore <sandra@codesourcery.com>
 
 	* doc/invoke.texi (Spec Files): Move section down in file, past
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 11f3b0c..240fae7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,4 +1,9 @@ 
-2015-01-13  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+2016-01-13  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/67755
+	* gcc.dg/tree-ssa/pr67755.c: New test.
+
+2016-01-13  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
 	* gcc.c-torture/unsorted/dump-noaddr.x (dump_compare): Replace static
 	pass number in output by a star.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr67755.c b/gcc/testsuite/gcc.dg/tree-ssa/pr67755.c
new file mode 100644
index 0000000..64ffd0b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr67755.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-dom2-details-blocks" } */
+/* We want to verify no outgoing edge from a conditional
+   has a probability of 100%.  */
+/* { dg-final { scan-tree-dump-not "succ:\[ \]+. .100.0%.  .\(TRUE|FALSE\)_VALUE" "dom2"} } */
+
+
+void (*zend_block_interruptions) (void);
+
+int * _zend_mm_alloc_int (int * heap, long int size)
+{
+  int *best_fit;
+  long int true_size = (size < 15 ? 32 : size);
+
+  if (zend_block_interruptions)
+    zend_block_interruptions ();
+
+  if (__builtin_expect ((true_size < 543), 1))
+    best_fit = heap + 2;
+  else
+    best_fit = heap;
+
+  return best_fit;
+}
+
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 1bf9ae6..4783c4b 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -239,6 +239,11 @@  struct ssa_local_info_t
 
   /* Blocks duplicated for the thread.  */
   bitmap duplicate_blocks;
+
+  /* When we have multiple paths through a joiner which reach different
+     final destinations, then we may need to correct for potential
+     profile insanities.  */
+  bool need_profile_correction;
 };
 
 /* Passes which use the jump threading code register jump threading
@@ -826,7 +831,8 @@  compute_path_counts (struct redirection_data *rd,
      So ensure that this path's path_out_count is at least the
      difference between elast->count and nonpath_count.  Otherwise the edge
      counts after threading will not be sane.  */
-  if (has_joiner && path_out_count < elast->count - nonpath_count)
+  if (local_info->need_profile_correction
+      && has_joiner && path_out_count < elast->count - nonpath_count)
     {
       path_out_count = elast->count - nonpath_count;
       /* But neither can we go above the minimum count along the path
@@ -1492,6 +1498,7 @@  thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
   ssa_local_info_t local_info;
 
   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
+  local_info.need_profile_correction = false;
 
   /* To avoid scanning a linear array for the element we need we instead
      use a hash table.  For normal code there should be no noticeable
@@ -1502,6 +1509,7 @@  thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
 
   /* Record each unique threaded destination into a hash table for
      efficient lookups.  */
+  edge last = NULL;
   FOR_EACH_EDGE (e, ei, bb->preds)
     {
       if (e->aux == NULL)
@@ -1555,6 +1563,17 @@  thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
       /* Insert the outgoing edge into the hash table if it is not
 	 already in the hash table.  */
       lookup_redirection_data (e, INSERT);
+
+      /* When we have thread paths through a common joiner with different
+	 final destinations, then we may need corrections to deal with
+	 profile insanities.  See the big comment before compute_path_counts.  */
+      if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
+	{
+	  if (!last)
+	    last = e2;
+	  else if (e2 != last)
+	    local_info.need_profile_correction = true;
+	}
     }
 
   /* We do not update dominance info.  */