diff mbox

[lto/66752] Fix missed FSM jump thread

Message ID 55E08B52.70903@redhat.com
State New
Headers show

Commit Message

Jeff Law Aug. 28, 2015, 4:24 p.m. UTC
It's taken a month to get back to this, but it's time to re-install this 
patch onto the trunk with a minor update.

This patch gives the FSM jump threading code the opportunity to find 
known values when we have a condition like (x != 0).  Previously it just 
allowed a naked SSA_NAME (which is what appears in a SWITCH_EXPR).  By 
handling (x != 0) the FSM bits can thread through some COND_EXPRs.

Basically given (x != 0), we just ask the FSM bits to do their thing on 
(x) and the right things just happen.

Which brings us to what changed between this and the prior version of 
this patch.  When we ask the FSM bits to lookup the value of an SSA_NAME 
that appears in a COND_EXPR or SWITCH_EXPR, we must ask for the SSA_NAME 
from the *original* expression -- without any 
simplifications/substitutions.  The reason for this is those 
simplifications/substitutions may have been made using a context 
sensitive equivalence that is not guaranteed to hold on whatever paths 
the FSM threader finds.

The SWITCH_EXPR handling code handled this correctly, and I can recall 
discussing the issue with Sebastian.  However, the issue slipped my mind 
when I did the extension to handle COND_EXPRs.   This led to the PPC 
bootstrap failures we saw when this patch was originally installed.

Sadly, I've made multiple attempts to reduce the bootstrap failure to a 
reasonable testcase for the regression suite without success.   GCC 
sources are getting harder and harder to turn into execution tests ;(

Anyway, this patch includes a fix for that issue.  In fact, recovering 
the original operands for the comparison is the only change I've made.

Bootstrapped and regression tested on x86_64-linux and ppc64-linux. 
Installed on the trunk.


Jeff
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 09d4a6d..de7f367 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@ 
+2015-08-28  Jeff Law  <law@redhat.com>
+
+	PR lto/66752
+	* tree-ssa-threadedge.c (simplify_conrol_stmt_condition): If we are
+	unable to find X NE 0 in the tables, return X as the simplified
+	condition.
+	(fsm_find_control_statement_thread_paths): If nodes in NEXT_PATH are
+	in VISISTED_BBS, then return failure.  Else add nodes from NEXT_PATH
+	to VISISTED_BBS.
+	* tree-ssa-threadupdate.c (duplicate_thread_path): Fix up edge flags
+	after removing the control flow statement and unnecessary edges.
+
 2015-08-28  Alan Lawrence  <alan.lawrence@arm.com>
 
 	Revert:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 388417a..8d4c3f6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@ 
+2015-08-28  Jeff Law  <law@redhat.com>
+
+	PR lto/66752
+	* gcc.dg/tree-ssa/pr66752-2.c: New test.
+	* gcc.dg/torture/pr66752-1.c: New test
+	* g++.dg/torture/pr66752-2.C: New test.
+
 2015-08-28  Alan Lawrence  <alan.lawrence@arm.com>
 	Revert:
 	2015-08-27  Alan Lawrence  <alan.lawrence@arm.com>
diff --git a/gcc/testsuite/g++.dg/torture/pr66752-2.C b/gcc/testsuite/g++.dg/torture/pr66752-2.C
new file mode 100644
index 0000000..96d3fe9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr66752-2.C
@@ -0,0 +1,60 @@ 
+/* { dg-do compile } */
+extern "C"
+{
+  typedef struct _IO_FILE FILE;
+  extern int fprintf (FILE * __restrict __stream,
+		      const char *__restrict __format, ...);
+}
+typedef union tree_node *tree;
+class ipa_polymorphic_call_context
+{
+};
+class ipcp_value_base
+{
+};
+template < typename valtype > class ipcp_value:public ipcp_value_base
+{
+public:valtype value;
+  ipcp_value *next;
+};
+
+template < typename valtype > class ipcp_lattice
+{
+public:ipcp_value < valtype > *values;
+  void print (FILE * f, bool dump_sources, bool dump_benefits);
+};
+
+class ipcp_param_lattices
+{
+public:ipcp_lattice < tree > itself;
+  ipcp_lattice < ipa_polymorphic_call_context > ctxlat;
+};
+template < typename valtype > void ipcp_lattice < valtype >::print (FILE * f,
+								    bool
+								    dump_sources,
+								    bool
+								    dump_benefits)
+{
+  ipcp_value < valtype > *val;
+  bool prev = false;
+  for (val = values; val; val = val->next)
+    {
+      if (dump_benefits && prev)
+	fprintf (f, "               ");
+      else if (!dump_benefits && prev)
+	fprintf (f, ", ");
+      else
+	prev = true;
+      if (dump_sources)
+	fprintf (f, "]");
+      if (dump_benefits)
+	fprintf (f, "shit");
+    }
+}
+
+void
+print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
+{
+  struct ipcp_param_lattices *plats;
+  plats->ctxlat.print (f, dump_sources, dump_benefits);
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr66752-1.c b/gcc/testsuite/gcc.dg/torture/pr66752-1.c
new file mode 100644
index 0000000..a742555
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr66752-1.c
@@ -0,0 +1,27 @@ 
+/* { dg-do compile } */
+
+typedef unsigned int size_t;
+struct fde_vector
+{
+  size_t count;
+  const struct dwarf_fde *array[];
+};
+struct object;
+typedef struct dwarf_fde fde;
+typedef int (*fde_compare_t) (struct object *, const fde *, const fde *);
+void
+fde_merge (struct object *ob, fde_compare_t fde_compare,
+	   struct fde_vector *v1, struct fde_vector *v2)
+{
+  size_t i1, i2;
+  const fde *fde2;
+  do
+    {
+      i2--;
+      while (i1 > 0 && fde_compare (ob, v1->array[i1 - 1], fde2) > 0)
+	{
+	  i1--;
+	}
+    }
+  while (i2 > 0);
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr66752-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr66752-3.c
new file mode 100644
index 0000000..f15b598
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr66752-3.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details -fdump-tree-optimized" } */
+
+extern int status, pt;
+extern int count;
+void
+foo (int N, int c, int b, int *a)
+{
+  int i, flag;
+  i = b -1;
+  flag = 1;
+  if (status && i < N && a[i] == b) {
+    N--;
+    flag = 0;
+   if (pt)
+     count++;
+  }
+  else    
+    for (i = -1, flag = 1; ++i < N && flag;)
+      if (a[i] == b)
+        {
+          --N;
+          flag = 0;
+          if (i < N)
+            a[i] = a[N];
+           else
+            a[i] = 0;
+          if (pt)
+            count++;
+        }
+ if(status && flag)
+   pt--;
+}
+
+/* There are 3 FSM jump threading opportunities.  */
+/* { dg-final { scan-tree-dump-times "FSM" 3 "dom1"} } */
+
+/* There should be no assignments or references to FLAG.  */
+/* { dg-final { scan-tree-dump-not "flag" "optimized"} } */
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 7164122..451dc2e 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -553,6 +553,24 @@  simplify_control_stmt_condition (edge e,
           || !is_gimple_min_invariant (cached_lhs))
         cached_lhs = (*simplify) (dummy_cond, stmt);
 
+      /* If we were just testing that an integral type was != 0, and that
+	 failed, just return the first operand.  This gives the FSM code a
+	 chance to optimize the path.  */
+      if (cached_lhs == NULL
+	  && cond_code == NE_EXPR)
+	{
+	  /* Recover the original operands.  They may have been simplified
+	     using context sensitive equivalences.  Those context sensitive
+	     equivalences may not be valid on paths found by the FSM optimizer.  */
+	  tree op0 = gimple_cond_lhs (stmt);
+	  tree op1 = gimple_cond_rhs (stmt);
+
+	  if (INTEGRAL_TYPE_P (TREE_TYPE (op0))
+	      && TREE_CODE (op0) == SSA_NAME
+	      && integer_zerop (op1))
+	    return op0;
+	}
+
       return cached_lhs;
     }
 
@@ -974,6 +992,21 @@  fsm_find_control_statement_thread_paths (tree expr,
 	  return;
 	}
 
+      /* Make sure we haven't already visited any of the nodes in
+	 NEXT_PATH.  Don't add them here to avoid pollution.  */
+      for (unsigned int i = 0; i < next_path->length () - 1; i++)
+	{
+	  if (visited_bbs->contains ((*next_path)[i]))
+	    {
+	      vec_free (next_path);
+	      return;
+	    }
+	}
+
+      /* Now add the nodes to VISISTED_BBS.  */
+      for (unsigned int i = 0; i < next_path->length () - 1; i++)
+	visited_bbs->add ((*next_path)[i]);
+
       /* Append all the nodes from NEXT_PATH to PATH.  */
       vec_safe_splice (path, next_path);
       next_path_length = next_path->length ();
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 5a5f8df..f4d3fdc 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -2493,6 +2493,12 @@  duplicate_thread_path (edge entry, edge exit,
 
   /* Remove the last branch in the jump thread path.  */
   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
+
+  /* And fixup the flags on the single remaining edge.  */
+  edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
+  fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
+  fix_e->flags |= EDGE_FALLTHRU;
+
   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
 
   if (e) {