diff mbox series

[committed,PR,tree-optimization/36550] Thread through blocks with real statements if they're all dead with -Os

Message ID fc9389ee-b76d-f517-15f1-d38a5c0baf47@redhat.com
State New
Headers show
Series [committed,PR,tree-optimization/36550] Thread through blocks with real statements if they're all dead with -Os | expand

Commit Message

Jeff Law Dec. 15, 2017, 10:36 p.m. UTC
Phew.  Got this one fixed before it hit its 10th birthday...

pr36550 is a false positive from -Wuninitialized at -Os.

When optimizing for size, jump threading is severely limited.
Essentially we only thread through blocks that transfer control with no
other side effects.

This unnecessarily rejects a block where all the real statements will be
dead after jump threading.  With Alex's recent work we can easily detect
that case and thread through such blocks which fixes the BZ.

While looking at this I realized the loop to prune jump threading
requests that require statement copying was wrong in a multitude of ways
and it probably was the most ineffective in the cases that matter the
most for codesize (joiner blocks).

I totally rewrote that chunk of code to reflect reality.  Most
importantly, it has to walk down the jump threading path looking for
EDGE_COPY_SRC_JOINER_BLOCK or EDGE_COPY_SRC_BLOCK nodes.  We can ignore
the latter if all the statements in the block are going to be dead.

There is a nonzero chance this may trigger Wuninitialized issues with
PGO bootstraps, particularly since we would happy thread in many cases
where we should not have.  We'll deal with those if/when they arise.

Bootstrapped and regression tested on x86_64.  Installing on the trunk.

Jeff
commit 8491227c04432e1c87f276bd25b6d8999e9e23c5
Author: Jeff Law <law@torsion.usersys.redhat.com>
Date:   Fri Dec 15 17:21:28 2017 -0500

            PR tree-optimization/36550
            * tree-ssa-threadupdate.c (count_stmts_and_phis_in_block): New.
            (mark_threaded_blocks): Rewrite code to avoid block copying when
            optimizing for size.  Don't pessimize blocks which will be
            copied, but all the statements will be dead.
    
            PR tree-optimization/36550
            * gcc.dg/tree-ssa/pr36550.c: New test.
diff mbox series

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e5ab1b1a4c7..4528b6db5f2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@ 
+2017-12-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/36550
+	* tree-ssa-threadupdate.c (count_stmts_and_phis_in_block): New.
+	(mark_threaded_blocks): Rewrite code to avoid block copying when
+	optimizing for size.  Don't pessimize blocks which will be
+	copied, but all the statements will be dead.
+
 2017-12-15  Alexandre Oliva <aoliva@redhat.com>
 
 	PR tree-optimization/81165
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bc005b8de26..5c91661c71b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@ 
+2017-12-15  Jeff Law  <law@redhat.com>
+
+	PR tree-optimization/36550
+	* gcc.dg/tree-ssa/pr36550.c: New test.
+
 2017-12-15  Alexandre Oliva  <aoliva@redhat.com>
 
 	PR tree-optimization/81165
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr36550.c b/gcc/testsuite/gcc.dg/tree-ssa/pr36550.c
new file mode 100644
index 00000000000..3eda5081c9b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr36550.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-options "-Os -Wuninitialized" } */
+void bail(void) __attribute__((noreturn));
+unsigned once(void);
+int pr(char**argv)
+{
+	char *bug;
+	unsigned check = once();
+	if (check) {
+		if (*argv)
+			bug = *++argv;
+	} else {
+		bug = *argv++;
+		if (!*argv)
+			bail();
+	}
+	/* now bug is set except if (check && !*argv) */
+	if (check) {
+		if (!*argv)
+			return 0;
+	}
+	/* if we ever get here then bug is set */
+	return *bug != 'X';
+}
+
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index b29ffe195c8..7b823d130fa 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -1737,6 +1737,31 @@  phi_args_equal_on_edges (edge e1, edge e2)
   return true;
 }
 
+/* Return the number of non-debug statements and non-virtual PHIs in a
+   block.  */
+
+static unsigned int
+count_stmts_and_phis_in_block (basic_block bb)
+{
+  unsigned int num_stmts = 0;
+
+  gphi_iterator gpi;
+  for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
+    if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
+      num_stmts++;
+
+  gimple_stmt_iterator gsi;
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+      if (!is_gimple_debug (stmt))
+        num_stmts++;
+    }
+
+  return num_stmts;
+}
+
+
 /* Walk through the registered jump threads and convert them into a
    form convenient for this pass.
 
@@ -1856,28 +1881,51 @@  mark_threaded_blocks (bitmap threaded_blocks)
 	}
     }
 
-  /* If optimizing for size, only thread through block if we don't have
-     to duplicate it or it's an otherwise empty redirection block.  */
+  /* When optimizing for size, prune all thread paths where statement
+     duplication is necessary.
+
+     We walk the jump thread path looking for copied blocks.  There's
+     two types of copied blocks.
+
+       EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
+       cancel the jump threading request when optimizing for size.
+
+       EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
+       will be killed by threading.  If threading does not kill all of
+       its statements, then we should cancel the jump threading request
+       when optimizing for size.  */
   if (optimize_function_for_size_p (cfun))
     {
       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
 	{
-	  bb = BASIC_BLOCK_FOR_FN (cfun, i);
-	  if (EDGE_COUNT (bb->preds) > 1
-	      && !redirection_block_p (bb))
-	    {
-	      FOR_EACH_EDGE (e, ei, bb->preds)
-		{
-		  if (e->aux)
-		    {
-		      vec<jump_thread_edge *> *path = THREAD_PATH (e);
-		      delete_jump_thread_path (path);
-		      e->aux = NULL;
-		    }
-		}
-	    }
-	  else
-	    bitmap_set_bit (threaded_blocks, i);
+	  FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
+	    if (e->aux)
+	      {
+		vec<jump_thread_edge *> *path = THREAD_PATH (e);
+
+		unsigned int j;
+		for (j = 1; j < path->length (); j++)
+		  {
+		    bb = (*path)[j]->e->src;
+		    if (redirection_block_p (bb))
+		      ;
+		    else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
+			     || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
+			         && (count_stmts_and_phis_in_block (bb)
+				     != estimate_threading_killed_stmts (bb))))
+		      break;
+		  }
+
+		if (j != path->length ())
+		  {
+		    if (dump_file && (dump_flags & TDF_DETAILS))
+		      dump_jump_thread_path (dump_file, *path, 0);
+		    delete_jump_thread_path (path);
+		    e->aux = NULL;
+		  }
+		else
+		  bitmap_set_bit (threaded_blocks, i);
+	      }
 	}
     }
   else