diff mbox

Improving jump-thread pass for PR 54742

Message ID 20141204142956.GD18834@f1.c.bardezibar.internal
State New
Headers show

Commit Message

Sebastian Pop Dec. 4, 2014, 2:29 p.m. UTC
Sebastian Pop wrote:
> Jeff Law wrote:
> > I'm a bit worried about compile-time impacts of the all the
> > recursion
> 
> I will also restrict the recursion to the loop in which we look for the FSM
> thread.

The attached patch includes this change.  It passed bootstrap and regression
test on x86_64-linux.  Ok to commit?

Thanks,
Sebastian

Comments

Jeff Law Dec. 5, 2014, 8:12 p.m. UTC | #1
On 12/04/14 07:29, Sebastian Pop wrote:
> Sebastian Pop wrote:
>> Jeff Law wrote:
>>> I'm a bit worried about compile-time impacts of the all the
>>> recursion
>>
>> I will also restrict the recursion to the loop in which we look for the FSM
>> thread.
>
> The attached patch includes this change.  It passed bootstrap and regression
> test on x86_64-linux.  Ok to commit?
OK to commit.  Thanks for your patience.

Can you follow-up with a change which throttles this optimization when 
-Os is in effect.  You can check optimize_function_for_size_p (cfun) and 
simply avoid the backward traversal or you could allow it in that case 
if the amount of copying is suitably small.  Your call.



jeff
diff mbox

Patch

From 0e3312921c07c0e0dd5c1bf5f24050b2336475ef Mon Sep 17 00:00:00 2001
From: Sebastian Pop <s.pop@samsung.com>
Date: Fri, 26 Sep 2014 14:54:20 -0500
Subject: [PATCH] extend jump thread for finite state automata PR 54742

Adapted from a patch from James Greenhalgh.

	* params.def (max-fsm-thread-path-insns, max-fsm-thread-length,
	max-fsm-thread-paths): New.

	* doc/invoke.texi (max-fsm-thread-path-insns, max-fsm-thread-length,
	max-fsm-thread-paths): Documented.

	* tree-cfg.c (split_edge_bb_loc): Export.
	* tree-cfg.h (split_edge_bb_loc): Declared extern.

	* tree-ssa-threadedge.c (simplify_control_stmt_condition): Restore the
	original value of cond when simplification fails.
	(fsm_find_thread_path): New.
	(fsm_find_control_statement_thread_paths): New.
	(fsm_thread_through_normal_block): Call
	find_control_statement_thread_paths.

	* tree-ssa-threadupdate.c (dump_jump_thread_path): Pretty print
	EDGE_FSM_THREAD.
	(verify_seme): New.
	(duplicate_seme_region): New.
	(thread_through_all_blocks): Generate code for EDGE_FSM_THREAD edges
	calling duplicate_seme_region.

	* tree-ssa-threadupdate.h (jump_thread_edge_type): Add EDGE_FSM_THREAD.

	* testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c: New.
	* testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c: New.
---
 gcc/doc/invoke.texi                              |   12 +
 gcc/params.def                                   |   15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c |   43 ++++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c |  127 ++++++++++
 gcc/tree-cfg.c                                   |    2 +-
 gcc/tree-cfg.h                                   |    1 +
 gcc/tree-ssa-threadedge.c                        |  277 +++++++++++++++++++++-
 gcc/tree-ssa-threadupdate.c                      |  203 +++++++++++++++-
 gcc/tree-ssa-threadupdate.h                      |    1 +
 9 files changed, 678 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 89edddb..074183f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10624,6 +10624,18 @@  large and significantly increase compile time at optimization level
 @option{-O1} and higher.  This parameter is a maximum nubmer of statements
 in a single generated constructor.  Default value is 5000.
 
+@item max-fsm-thread-path-insns
+Maximum number of instructions to copy when duplicating blocks on a
+finite state automaton jump thread path.  The default is 100.
+
+@item max-fsm-thread-length
+Maximum number of basic blocks on a finite state automaton jump thread
+path.  The default is 10.
+
+@item max-fsm-thread-paths
+Maximum number of new jump thread paths to create for a finite state
+automaton.  The default is 50.
+
 @end table
 @end table
 
diff --git a/gcc/params.def b/gcc/params.def
index 9b21c07..edf3f53 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1140,6 +1140,21 @@  DEFPARAM (PARAM_CHKP_MAX_CTOR_SIZE,
 	  "Maximum number of statements to be included into a single static "
 	  "constructor generated by Pointer Bounds Checker",
 	  5000, 100, 0)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATH_INSNS,
+	  "max-fsm-thread-path-insns",
+	  "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path",
+	  100, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_LENGTH,
+	  "max-fsm-thread-length",
+	  "Maximum number of basic blocks on a finite state automaton jump thread path",
+	  10, 1, 999999)
+
+DEFPARAM (PARAM_MAX_FSM_THREAD_PATHS,
+	  "max-fsm-thread-paths",
+	  "Maximum number of new jump thread paths to create for a finite state automaton",
+	  50, 1, 999999)
 /*
 
 Local variables:
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
new file mode 100644
index 0000000..bb34a74
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-6.c
@@ -0,0 +1,43 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 6 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+int sum0, sum1, sum2, sum3;
+int foo (char *s, char **ret)
+{
+  int state=0;
+  char c;
+
+  for (; *s && state != 4; s++)
+    {
+      c = *s;
+      if (c == '*')
+	{
+	  s++;
+	  break;
+	}
+      switch (state)
+	{
+	case 0:
+	  if (c == '+')
+	    state = 1;
+	  else if (c != '-')
+	    sum0+=c;
+	  break;
+	case 1:
+	  if (c == '+')
+	    state = 2;
+	  else if (c == '-')
+	    state = 0;
+	  else
+	    sum1+=c;
+	  break;
+	default:
+	  break;
+	}
+
+    }
+  *ret = s;
+  return state;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
new file mode 100644
index 0000000..21474f0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
@@ -0,0 +1,127 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dom1-details" } */
+/* { dg-final { scan-tree-dump-times "FSM" 19 "dom1" } } */
+/* { dg-final { cleanup-tree-dump "dom1" } } */
+
+enum STATE {
+  S0=0,
+  SI,
+  S1,
+  S2,
+  S3,
+  S4,
+  S5,
+  S6
+};
+
+int bar (enum STATE s);
+
+enum STATE foo (unsigned char **y, unsigned *c)
+{
+  unsigned char *x = *y;
+  unsigned char n;
+  enum STATE s = S0;
+
+  for( ; *x && s != SI; x++ )
+    {
+      n = *x;
+      if (n == 'x')
+	{
+	  x++;
+	  break;
+	}
+      switch(s)
+	{
+	case S0:
+	  if(bar(n))
+	    s = S3;
+	  else if( n == 'a' || n == 'b' )
+	    s = S1;
+	  else if( n == 'c' )
+	    s = S4;
+	  else
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  c[S0]++;
+	  break;
+	case S1:
+	  if(bar(n))
+	    {
+	      s = S3;
+	      c[S1]++;
+	    }
+	  else if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S1]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S1]++;
+	    }
+	  break;
+	case S3:
+	  if( n == 'c' )
+	    {
+	      s = S4;
+	      c[S3]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S3]++;
+	    }
+	  break;
+	case S4:
+	  if( n == 'E' || n == 'e' )
+	    {
+	      s = S2;
+	      c[S4]++;
+	    }
+	  else if(!bar(n))
+	    {
+	      s = SI;
+	      c[S4]++;
+	    }
+	  break;
+	case S2:
+	  if( n == 'a' || n == 'b' )
+	    {
+	      s = S5;
+	      c[S2]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S2]++;
+	    }
+	  break;
+	case S5:
+	  if(bar(n))
+	    {
+	      s = S6;
+	      c[S5]++;
+	    }
+	  else
+	    {
+	      s = SI;
+	      c[S5]++;
+	    }
+	  break;
+	case S6:
+	  if(!bar(n))
+	    {
+	      s = SI;
+	      c[SI]++;
+	    }
+	  break;
+	default:
+	  break;
+	}
+    }
+  *y=x;
+  return s;
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 0a8d7a9..a4ac9d8 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -2666,7 +2666,7 @@  reinstall_phi_args (edge new_edge, edge old_edge)
    near its "logical" location.  This is of most help to humans looking
    at debugging dumps.  */
 
-static basic_block
+basic_block
 split_edge_bb_loc (edge edge_in)
 {
   basic_block dest = edge_in->dest;
diff --git a/gcc/tree-cfg.h b/gcc/tree-cfg.h
index d35e5ba..834fa71 100644
--- a/gcc/tree-cfg.h
+++ b/gcc/tree-cfg.h
@@ -67,6 +67,7 @@  extern void verify_gimple_in_cfg (struct function *, bool);
 extern tree gimple_block_label (basic_block);
 extern void add_phi_args_after_copy_bb (basic_block);
 extern void add_phi_args_after_copy (basic_block *, unsigned, edge);
+extern basic_block split_edge_bb_loc (edge);
 extern bool gimple_duplicate_sese_region (edge, edge, basic_block *, unsigned,
 					basic_block *, bool);
 extern bool gimple_duplicate_sese_tail (edge, edge, basic_block *, unsigned,
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 8b0b7b8..29b20c8 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -56,6 +56,8 @@  along with GCC; see the file COPYING3.  If not see
 #include "params.h"
 #include "tree-ssa-threadedge.h"
 #include "builtins.h"
+#include "cfg.h"
+#include "cfganal.h"
 
 /* To avoid code explosion due to jump threading, we limit the
    number of statements we are going to copy.  This variable
@@ -661,6 +663,7 @@  simplify_control_stmt_condition (edge e,
      rather than use a relational operator.  These are simpler to handle.  */
   if (TREE_CODE (cond) == SSA_NAME)
     {
+      tree original_lhs = cond;
       cached_lhs = cond;
 
       /* Get the variable's current value from the equivalence chains.
@@ -689,6 +692,12 @@  simplify_control_stmt_condition (edge e,
 	 pass specific callback to try and simplify it further.  */
       if (cached_lhs && ! is_gimple_min_invariant (cached_lhs))
         cached_lhs = (*simplify) (stmt, stmt);
+
+      /* We couldn't find an invariant.  But, callers of this
+	 function may be able to do something useful with the
+	 unmodified destination.  */
+      if (!cached_lhs)
+	cached_lhs = original_lhs;
     }
   else
     cached_lhs = NULL;
@@ -948,6 +957,249 @@  thread_around_empty_blocks (edge taken_edge,
   return false;
 }
 
+/* Return true if the CFG contains at least one path from START_BB to END_BB.
+   When a path is found, record in PATH the blocks from END_BB to START_BB.
+   VISITED_BBS is used to make sure we don't fall into an infinite loop.  Bound
+   the recursion to basic blocks belonging to LOOP.  */
+
+static bool
+fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
+		      vec<basic_block, va_gc> *&path,
+		      hash_set<basic_block> *visited_bbs, loop_p loop)
+{
+  if (loop != start_bb->loop_father)
+    return false;
+
+  if (start_bb == end_bb)
+    {
+      vec_safe_push (path, start_bb);
+      return true;
+    }
+
+  if (!visited_bbs->add (start_bb))
+    {
+      edge e;
+      edge_iterator ei;
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
+	  {
+	    vec_safe_push (path, start_bb);
+	    return true;
+	  }
+    }
+
+  return false;
+}
+
+static int max_threaded_paths;
+
+/* We trace the value of the variable EXPR back through any phi nodes looking
+   for places where it gets a constant value and save the path.  Stop after
+   having recorded MAX_PATHS jump threading paths.  */
+
+static void
+fsm_find_control_statement_thread_paths (tree expr,
+					 hash_set<gimple> *visited_phis,
+					 vec<basic_block, va_gc> *&path)
+{
+  tree var = SSA_NAME_VAR (expr);
+  gimple def_stmt = SSA_NAME_DEF_STMT (expr);
+  basic_block var_bb = gimple_bb (def_stmt);
+
+  if (var == NULL || var_bb == NULL)
+    return;
+
+  /* For the moment we assume that an SSA chain only contains phi nodes, and
+     eventually one of the phi arguments will be an integer constant.  In the
+     future, this could be extended to also handle simple assignments of
+     arithmetic operations.  */
+  if (gimple_code (def_stmt) != GIMPLE_PHI)
+    return;
+
+  /* Avoid infinite recursion.  */
+  if (visited_phis->add (def_stmt))
+    return;
+
+  gphi *phi = as_a <gphi *> (def_stmt);
+  int next_path_length = 0;
+  basic_block last_bb_in_path = path->last ();
+
+  /* Following the chain of SSA_NAME definitions, we jumped from a definition in
+     LAST_BB_IN_PATH to a definition in VAR_BB.  When these basic blocks are
+     different, append to PATH the blocks from LAST_BB_IN_PATH to VAR_BB.  */
+  if (var_bb != last_bb_in_path)
+    {
+      edge e;
+      int e_count = 0;
+      edge_iterator ei;
+      vec<basic_block, va_gc> *next_path;
+      vec_alloc (next_path, n_basic_blocks_for_fn (cfun));
+
+      FOR_EACH_EDGE (e, ei, last_bb_in_path->preds)
+	{
+	  hash_set<basic_block> *visited_bbs = new hash_set<basic_block>;
+
+	  if (fsm_find_thread_path (var_bb, e->src, next_path, visited_bbs,
+				    e->src->loop_father))
+	    ++e_count;
+
+	  delete visited_bbs;
+
+	  /* If there is more than one path, stop.  */
+	  if (e_count > 1)
+	    {
+	      vec_free (next_path);
+	      return;
+	    }
+	}
+
+      /* Stop if we have not found a path: this could occur when the recursion
+	 is stopped by one of the bounds.  */
+      if (e_count == 0)
+	{
+	  vec_free (next_path);
+	  return;
+	}
+
+      /* Append all the nodes from NEXT_PATH to PATH.  */
+      vec_safe_splice (path, next_path);
+      next_path_length = next_path->length ();
+      vec_free (next_path);
+    }
+
+  gcc_assert (path->last () == var_bb);
+
+  /* Iterate over the arguments of PHI.  */
+  unsigned int i;
+  for (i = 0; i < gimple_phi_num_args (phi); i++)
+    {
+      tree arg = gimple_phi_arg_def (phi, i);
+      basic_block bbi = gimple_phi_arg_edge (phi, i)->src;
+
+      /* Skip edges pointing outside the current loop.  */
+      if (!arg || var_bb->loop_father != bbi->loop_father)
+	continue;
+
+      if (TREE_CODE (arg) == SSA_NAME)
+	{
+	  vec_safe_push (path, bbi);
+	  /* Recursively follow SSA_NAMEs looking for a constant definition.  */
+	  fsm_find_control_statement_thread_paths (arg, visited_phis, path);
+	  path->pop ();
+	  continue;
+	}
+
+      if (TREE_CODE (arg) != INTEGER_CST)
+	continue;
+
+      int path_length = path->length ();
+      /* A path with less than 2 basic blocks should not be jump-threaded.  */
+      if (path_length < 2)
+	continue;
+
+      if (path_length > PARAM_VALUE (PARAM_MAX_FSM_THREAD_LENGTH))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of basic blocks on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
+	  continue;
+	}
+
+      if (max_threaded_paths <= 0)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of previously recorded FSM paths to thread "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATHS.\n");
+	  continue;
+	}
+
+      /* Add BBI to the path.  */
+      vec_safe_push (path, bbi);
+      ++path_length;
+
+      int n_insns = 0;
+      gimple_stmt_iterator gsi;
+      int j;
+      loop_p loop = (*path)[0]->loop_father;
+      bool path_crosses_loops = false;
+
+      /* Count the number of instructions on the path: as these instructions
+	 will have to be duplicated, we will not record the path if there are
+	 too many instructions on the path.  Also check that all the blocks in
+	 the path belong to a single loop.  */
+      for (j = 1; j < path_length - 1; j++)
+	{
+	  basic_block bb = (*path)[j];
+
+	  if (bb->loop_father != loop)
+	    {
+	      path_crosses_loops = true;
+	      break;
+	    }
+
+	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	    {
+	      gimple stmt = gsi_stmt (gsi);
+	      /* Do not count empty statements and labels.  */
+	      if (gimple_code (stmt) != GIMPLE_NOP
+		  && gimple_code (stmt) != GIMPLE_LABEL
+		  && !is_gimple_debug (stmt))
+		++n_insns;
+	    }
+	}
+
+      if (path_crosses_loops)
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the path crosses loops.\n");
+	  path->pop ();
+	  continue;
+	}
+
+      if (n_insns >= PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATH_INSNS))
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "FSM jump-thread path not considered: "
+		     "the number of instructions on the path "
+		     "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
+	  path->pop ();
+	  continue;
+	}
+
+      vec<jump_thread_edge *> *jump_thread_path
+	= new vec<jump_thread_edge *> ();
+
+      /* Record the edges between the blocks in PATH.  */
+      for (j = 0; j < path_length - 1; j++)
+	{
+	  edge e = find_edge ((*path)[path_length - j - 1],
+			      (*path)[path_length - j - 2]);
+	  gcc_assert (e);
+	  jump_thread_edge *x = new jump_thread_edge (e, EDGE_FSM_THREAD);
+	  jump_thread_path->safe_push (x);
+	}
+
+      /* Add the edge taken when the control variable has value ARG.  */
+      edge taken_edge = find_taken_edge ((*path)[0], arg);
+      jump_thread_edge *x
+	= new jump_thread_edge (taken_edge, EDGE_NO_COPY_SRC_BLOCK);
+      jump_thread_path->safe_push (x);
+
+      register_jump_thread (jump_thread_path);
+      --max_threaded_paths;
+
+      /* Remove BBI from the path.  */
+      path->pop ();
+    }
+
+  /* Remove all the nodes that we added from NEXT_PATH.  */
+  if (next_path_length)
+    vec_safe_truncate (path, (path->length () - next_path_length));
+}
+
 /* We are exiting E->src, see if E->dest ends with a conditional
    jump which has a known value when reached via E.
 
@@ -1033,7 +1285,10 @@  thread_through_normal_block (edge e,
       cond = simplify_control_stmt_condition (e, stmt, dummy_cond, simplify,
 					      handle_dominating_asserts);
 
-      if (cond && is_gimple_min_invariant (cond))
+      if (!cond)
+	return 0;
+
+      if (is_gimple_min_invariant (cond))
 	{
 	  edge taken_edge = find_taken_edge (e->dest, cond);
 	  basic_block dest = (taken_edge ? taken_edge->dest : NULL);
@@ -1079,6 +1334,26 @@  thread_through_normal_block (edge e,
 				      backedge_seen_p);
 	  return 1;
 	}
+
+      if (!flag_expensive_optimizations
+	  || TREE_CODE (cond) != SSA_NAME
+	  || e->dest->loop_father != e->src->loop_father
+	  || loop_depth (e->dest->loop_father) == 0)
+	return 0;
+
+      /* When COND cannot be simplified, try to find paths from a control
+	 statement back through the PHI nodes which would affect that control
+	 statement.  */
+      vec<basic_block, va_gc> *bb_path;
+      vec_alloc (bb_path, n_basic_blocks_for_fn (cfun));
+      vec_safe_push (bb_path, e->dest);
+      hash_set<gimple> *visited_phis = new hash_set<gimple>;
+
+      max_threaded_paths = PARAM_VALUE (PARAM_MAX_FSM_THREAD_PATHS);
+      fsm_find_control_statement_thread_paths (cond, visited_phis, bb_path);
+
+      delete visited_phis;
+      vec_free (bb_path);
     }
   return 0;
 }
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index ca0b8bf..4f83a2e 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -167,8 +167,9 @@  dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 		       bool registering)
 {
   fprintf (dump_file,
-	   "  %s jump thread: (%d, %d) incoming edge; ",
+	   "  %s%s jump thread: (%d, %d) incoming edge; ",
 	   (registering ? "Registering" : "Cancelling"),
+	   (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 	   path[0]->e->src->index, path[0]->e->dest->index);
 
   for (unsigned int i = 1; i < path.length (); i++)
@@ -2317,6 +2318,155 @@  bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
   return false;
 }
 
+/* Verify that the REGION is a Single Entry Multiple Exits region: make sure no
+   edge other than ENTRY is entering the REGION.  */
+
+DEBUG_FUNCTION void
+verify_seme (edge entry, basic_block *region, unsigned n_region)
+{
+  bitmap bbs = BITMAP_ALLOC (NULL);
+
+  for (unsigned i = 0; i < n_region; i++)
+    bitmap_set_bit (bbs, region[i]->index);
+
+  for (unsigned i = 0; i < n_region; i++)
+    {
+      edge e;
+      edge_iterator ei;
+      basic_block bb = region[i];
+
+      /* All predecessors other than ENTRY->src should be in the region.  */
+      for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
+	if (e != entry)
+	  gcc_assert (bitmap_bit_p (bbs, e->src->index));
+    }
+
+  BITMAP_FREE (bbs);
+}
+
+/* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic
+   blocks).  The ENTRY edge is redirected to the duplicate of the region.  If
+   REGION is not a Single Entry region, ignore any incoming edges other than
+   ENTRY: this makes the copied region a Single Entry region.
+
+   Remove the last conditional statement in the last basic block in the REGION,
+   and create a single fallthru edge pointing to the same destination as the
+   EXIT edge.
+
+   The new basic blocks are stored to REGION_COPY in the same order as they had
+   in REGION, provided that REGION_COPY is not NULL.
+
+   Returns false if it is unable to copy the region, true otherwise.  */
+
+static bool
+duplicate_seme_region (edge entry, edge exit,
+		       basic_block *region, unsigned n_region,
+		       basic_block *region_copy)
+{
+  unsigned i;
+  bool free_region_copy = false, copying_header = false;
+  struct loop *loop = entry->dest->loop_father;
+  edge exit_copy;
+  edge redirected;
+  int total_freq = 0, entry_freq = 0;
+  gcov_type total_count = 0, entry_count = 0;
+
+  if (!can_copy_bbs_p (region, n_region))
+    return false;
+
+  /* Some sanity checking.  Note that we do not check for all possible
+     missuses of the functions.  I.e. if you ask to copy something weird,
+     it will work, but the state of structures probably will not be
+     correct.  */
+  for (i = 0; i < n_region; i++)
+    {
+      /* We do not handle subloops, i.e. all the blocks must belong to the
+	 same loop.  */
+      if (region[i]->loop_father != loop)
+	return false;
+    }
+
+  initialize_original_copy_tables ();
+
+  if (copying_header)
+    set_loop_copy (loop, loop_outer (loop));
+  else
+    set_loop_copy (loop, loop);
+
+  if (!region_copy)
+    {
+      region_copy = XNEWVEC (basic_block, n_region);
+      free_region_copy = true;
+    }
+
+  if (entry->dest->count)
+    {
+      total_count = entry->dest->count;
+      entry_count = entry->count;
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+	 frequencies.  */
+      if (entry_count > total_count)
+	entry_count = total_count;
+    }
+  else
+    {
+      total_freq = entry->dest->frequency;
+      entry_freq = EDGE_FREQUENCY (entry);
+      /* Fix up corner cases, to avoid division by zero or creation of negative
+	 frequencies.  */
+      if (total_freq == 0)
+	total_freq = 1;
+      else if (entry_freq > total_freq)
+	entry_freq = total_freq;
+    }
+
+  copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
+	    split_edge_bb_loc (entry), 0);
+  if (total_count)
+    {
+      scale_bbs_frequencies_gcov_type (region, n_region,
+				       total_count - entry_count,
+				       total_count);
+      scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
+				       total_count);
+    }
+  else
+    {
+      scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
+				 total_freq);
+      scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
+    }
+
+#ifdef ENABLE_CHECKING
+  /* Make sure no edge other than ENTRY is entering the copied region.  */
+  verify_seme (entry, region_copy, n_region);
+#endif
+
+  /* Remove the last branch in the jump thread path.  */
+  remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
+  edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
+
+  if (e) {
+    rescan_loop_exit (e, true, false);
+    e->probability = REG_BR_PROB_BASE;
+    e->count = region_copy[n_region - 1]->count;
+  }
+
+  /* Redirect the entry and add the phi node arguments.  */
+  redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
+  gcc_assert (redirected != NULL);
+  flush_pending_stmts (entry);
+
+  /* Add the other PHI node arguments.  */
+  add_phi_args_after_copy (region_copy, n_region, NULL);
+
+  if (free_region_copy)
+    free (region_copy);
+
+  free_original_copy_tables ();
+  return true;
+}
+
 /* Walk through all blocks and thread incoming edges to the appropriate
    outgoing edge for each edge pair recorded in THREADED_EDGES.
 
@@ -2343,6 +2493,57 @@  thread_through_all_blocks (bool may_peel_loop_headers)
   threaded_blocks = BITMAP_ALLOC (NULL);
   memset (&thread_stats, 0, sizeof (thread_stats));
 
+  /* Jump-thread all FSM threads before other jump-threads.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      if ((*path)[0]->type != EDGE_FSM_THREAD
+	  /* Do not jump-thread twice from the same block.  */
+	  || bitmap_bit_p (threaded_blocks, entry->src->index)) {
+	i++;
+	continue;
+      }
+
+      unsigned len = path->length ();
+      edge exit = (*path)[len - 1]->e;
+      basic_block *region = XNEWVEC (basic_block, len - 1);
+
+      for (unsigned int j = 0; j < len - 1; j++)
+	region[j] = (*path)[j]->e->dest;
+
+      if (duplicate_seme_region (entry, exit, region, len - 1, NULL))
+	{
+	  /* We do not update dominance info.  */
+	  free_dominance_info (CDI_DOMINATORS);
+	  bitmap_set_bit (threaded_blocks, entry->src->index);
+	  retval = true;
+	}
+
+      delete_jump_thread_path (path);
+      paths.unordered_remove (i);
+    }
+
+  /* Remove from PATHS all the jump-threads starting with an edge already
+     jump-threaded.  */
+  for (i = 0; i < paths.length ();)
+    {
+      vec<jump_thread_edge *> *path = paths[i];
+      edge entry = (*path)[0]->e;
+
+      /* Do not jump-thread twice from the same block.  */
+      if (bitmap_bit_p (threaded_blocks, entry->src->index))
+	{
+	  delete_jump_thread_path (path);
+	  paths.unordered_remove (i);
+	}
+      else
+	i++;
+    }
+
+  bitmap_clear (threaded_blocks);
+
   mark_threaded_blocks (threaded_blocks);
 
   initialize_original_copy_tables ();
diff --git a/gcc/tree-ssa-threadupdate.h b/gcc/tree-ssa-threadupdate.h
index 426aca5..22c5bce 100644
--- a/gcc/tree-ssa-threadupdate.h
+++ b/gcc/tree-ssa-threadupdate.h
@@ -26,6 +26,7 @@  extern bool thread_through_all_blocks (bool);
 enum jump_thread_edge_type
 {
   EDGE_START_JUMP_THREAD,
+  EDGE_FSM_THREAD,
   EDGE_COPY_SRC_BLOCK,
   EDGE_COPY_SRC_JOINER_BLOCK,
   EDGE_NO_COPY_SRC_BLOCK
-- 
1.7.10.4