diff mbox

Switch elimination pass for PR 54742

Message ID 1408480796.31355.7.camel@ubuntu-sellcey
State New
Headers show

Commit Message

Steve Ellcey Aug. 19, 2014, 8:39 p.m. UTC
Here is an official submission for the switch optimization described in
PR 54742.  I have addressed the formatting/comment issues that were raised
and also added a test case based on comment #27 from PR 54742 and I fixed a
bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
generating an ICE in a routine with multiple switch statements).

I ran the benchmarking to see if I could find any more tests that are
helped like coremark is and while I found a number of benchmarks in
SPEC 2006 and EEMBC where the optimization is triggered, this optimization
generally didn't affect the performance of those benchmarks.  The biggest
impact I could find was on the perl benchmark in SPEC where I saw around
a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.

So, OK to checkin?

Steve Ellcey
sellcey@mips.com


2014-08-12  Steve Ellcey  <sellcey@mips.com>

	PR tree-opt/54742
	* Makefile.in (OBJS): Add tree-switch-shortcut.o.
	* common.opt (ftree-switch-shortcut): New.
	* opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
	* params.def (PARAM_MAX_SWITCH_INSNS): New.
	(PARAM_MAX_SWITCH_PATHS): New.
	* passes.def (pass_tree_switch_shortcut): New.
	* timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
	* tree-pass.h (make_pass_tree_switch_shortcut): New.
	* tree-switch-shortcut.c: New.


2014-08-12  Steve Ellcey  <sellcey@mips.com>

	PR tree-opt/54742
	* gcc.dg/pr54742.c: New test.

Comments

James Greenhalgh Aug. 20, 2014, 5:04 p.m. UTC | #1
On Tue, Aug 19, 2014 at 09:39:56PM +0100, Steve Ellcey wrote:
> Here is an official submission for the switch optimization described in
> PR 54742.  I have addressed the formatting/comment issues that were raised
> and also added a test case based on comment #27 from PR 54742 and I fixed a
> bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
> generating an ICE in a routine with multiple switch statements).
> 
> I ran the benchmarking to see if I could find any more tests that are
> helped like coremark is and while I found a number of benchmarks in
> SPEC 2006 and EEMBC where the optimization is triggered, this optimization
> generally didn't affect the performance of those benchmarks.  The biggest
> impact I could find was on the perl benchmark in SPEC where I saw around
> a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.

For what it is worth, I see a nice (~4%) improvement in Crafty from
SPEC 2000. I haven't investigated too deeply, but at a first glance the
number of branch mispredictions has dropped just over 1%, as you
might hope from this optimisation.

I can also attest to there being a number of places the optimisation is
triggered (with high enough parameters; I was running with
--param max-switch-paths=1000 --param max-switch-insns=10000), but like
you I don't see much measurable change in execution time.

Thanks,
James

> 
> So, OK to checkin?
> 
> Steve Ellcey
> sellcey@mips.com
> 
> 
> 2014-08-12  Steve Ellcey  <sellcey@mips.com>
> 
> 	PR tree-opt/54742
> 	* Makefile.in (OBJS): Add tree-switch-shortcut.o.
> 	* common.opt (ftree-switch-shortcut): New.
> 	* opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
> 	* params.def (PARAM_MAX_SWITCH_INSNS): New.
> 	(PARAM_MAX_SWITCH_PATHS): New.
> 	* passes.def (pass_tree_switch_shortcut): New.
> 	* timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
> 	* tree-pass.h (make_pass_tree_switch_shortcut): New.
> 	* tree-switch-shortcut.c: New.
> 
> 
> 2014-08-12  Steve Ellcey  <sellcey@mips.com>
> 
> 	PR tree-opt/54742
> 	* gcc.dg/pr54742.c: New test.

> diff --git a/gcc/testsuite/gcc.dg/pr54742.c b/gcc/testsuite/gcc.dg/pr54742.c
> new file mode 100644
> index 0000000..77aa8ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr54742.c
> @@ -0,0 +1,50 @@
> +/* PR tree-optimization/54742
> +   Verify that the tree-optimization-shortcut pass completely removes
> +   the switch statement.  */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O3" } */
> +
> +int sum0, sum1, sum2, sum3;
> +int foo(char * s, char** ret)
> +{
> +  int state=0;
> +  char c;
> +
> +  for (; *s && state != 4; s++)
> +    {
> +      c = *s;
> +      if (c == '*')
> +	{
> +	  s++;
> +	  break;
> +	}
> +      switch (state) {
> +	case 0:
> +	  if (c == '+') state = 1;
> +	  else if (c != '-') sum0+=c;
> +	  break;
> +	case 1:
> +	  if (c == '+') state = 2;
> +	  else if (c == '-') state = 0;
> +	  else sum1+=c;
> +	  break;
> +	case 2:
> +	  if (c == '+') state = 3;
> +	  else if (c == '-') state = 1;
> +	  else sum2+=c;
> +	  break;
> +	case 3:
> +	  if (c == '-') state = 2;
> +	  else if (c == 'x') state = 4;
> +	  break;
> +	default:
> +	  break;
> +      }
> +    }
> +  *ret = s;
> +  return state;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "switch" "optimized" } } */
> +/* { dg-final { cleanup-tree-dump "optimized" } } */
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 31c1f4d..94e8ec4 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1411,6 +1411,7 @@ OBJS = \
>  	tree-scalar-evolution.o \
>  	tree-sra.o \
>  	tree-switch-conversion.o \
> +	tree-switch-shortcut.o \
>  	tree-ssa-address.o \
>  	tree-ssa-alias.o \
>  	tree-ssa-ccp.o \
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 0c4f86b..fe0664a 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2249,6 +2249,10 @@ ftree-sra
>  Common Report Var(flag_tree_sra) Optimization
>  Perform scalar replacement of aggregates
>  
> +ftree-switch-shortcut
> +Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization
> +Convert jumps to switch statements into jumps to case statement.
> +
>  ftree-ter
>  Common Report Var(flag_tree_ter) Optimization
>  Replace temporary expressions in the SSA->normal pass
> diff --git a/gcc/opts.c b/gcc/opts.c
> index be1867c..f1ac2e5 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -514,6 +514,7 @@ static const struct default_options default_options_table[] =
>      { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
>      { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
>      { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
> +    { OPT_LEVELS_3_PLUS, OPT_ftree_switch_shortcut, NULL, 1 },
>  
>      /* -Ofast adds optimizations to -O3.  */
>      { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
> diff --git a/gcc/params.def b/gcc/params.def
> index cad00e2..65377d3 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -1058,6 +1058,20 @@ DEFPARAM (PARAM_MAX_SLSR_CANDIDATE_SCAN,
>  	  "strength reduction",
>  	  50, 1, 999999)
>  
> +/* Maximum number of instructions to duplicate when shortcutting a switch.  */
> +DEFPARAM (PARAM_MAX_SWITCH_INSNS,
> +	  "max-switch-insns",
> +	  "Maximum number of instructions to duplicate when "
> +	  "shortcutting a switch statement",
> +	  100, 1, 999999)
> +
> +/* Maximum number of paths to duplicate when shortcutting a switch.  */
> +DEFPARAM (PARAM_MAX_SWITCH_PATHS,
> +	  "max-switch-paths",
> +	  "Maximum number of new paths to create when"
> +	  " shortcutting a switch statement",
> +	  50, 1, 999999)
> +
>  DEFPARAM (PARAM_ASAN_STACK,
>           "asan-stack",
>           "Enable asan stack protection",
> diff --git a/gcc/passes.def b/gcc/passes.def
> index f13df6c..8bbf2d0 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -157,6 +157,7 @@ along with GCC; see the file COPYING3.  If not see
>        NEXT_PASS (pass_cselim);
>        NEXT_PASS (pass_copy_prop);
>        NEXT_PASS (pass_tree_ifcombine);
> +      NEXT_PASS (pass_tree_switch_shortcut);
>        NEXT_PASS (pass_phiopt);
>        NEXT_PASS (pass_tail_recursion);
>        NEXT_PASS (pass_ch);
> diff --git a/gcc/timevar.def b/gcc/timevar.def
> index a04d05c..d9ee915 100644
> --- a/gcc/timevar.def
> +++ b/gcc/timevar.def
> @@ -170,6 +170,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON     , "tree canonical iv")
>  DEFTIMEVAR (TV_SCEV_CONST            , "scev constant prop")
>  DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
>  DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
> +DEFTIMEVAR (TV_TREE_SWITCH_SHORTCUT  , "switch statement shortcuts")
>  DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
>  DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
>  DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index 1477d1f..f898e27 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -575,6 +575,7 @@ extern gimple_opt_pass *make_pass_early_inline (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_tree_switch_shortcut (gcc::context *ctxt);
>  
>  /* Current optimization pass.  */
>  extern opt_pass *current_pass;
> diff --git a/gcc/tree-switch-shortcut.c b/gcc/tree-switch-shortcut.c
> new file mode 100644
> index 0000000..4518f79
> --- /dev/null
> +++ b/gcc/tree-switch-shortcut.c
> @@ -0,0 +1,438 @@
> +/* Switch shortcutting optimization for GNU C
> +   Copyright (C) 2013 Free Software Foundation, Inc.
> +   Contributed by Steve Ellcey (steve.ellcey@imgtec.com).
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +/* This file implements an optimization where, when a variable is set
> +   to a constant value and there is a path that leads from that definition
> +   to a switch statement that uses that variable as its controlling expression
> +   we duplicate the blocks on this path and change the jump to the switch
> +   statement with a direct jump to the label of the switch block that control
> +   would goto based on the value of the variable.  This can come up in
> +   loops/switch statements that implement state machines.
> +
> +   Example (modified from PR 54742):
> +
> +   foo(char *str) {
> +     int sum=0;
> +     int state=0;
> +     char *s=str;
> +     for (; *s; s++) {
> +       char c=*s;
> +       <CODE BLOCK 1>
> +       switch (state) {
> +         case 0:
> +           if (c == '+')       { state = 1; sum += 9; }
> +           else if (c != '-')  { state = 2; sum += 3; }
> +           break;
> +         case 1:
> +           if (c == '+')       { state = 2; sum += 4; }
> +           else if (c == '-')  { state = 0; sum += 7; }
> +           break;
> +         case 2:
> +           if (c == '+')       { state = 0; sum += 8; }
> +           else if (c == '-')  { state = 1; sum += 2; }
> +           break;
> +       }
> +       <CODE BLOCK 2>
> +     }
> +     return state;
> +   }
> +
> +  This pass will convert the code inside 'case 0' to something like:
> +
> +    case 0:
> +      if (c == '+')      { state = 1; sum += 9;
> +                           <CODE BLOCK 2>
> +                           s++; if (!s) goto loop_exit;
> +                           <CODE BLOCK 1>
> +                           goto case_1; }
> +      else if (c != '-') { state = 2; sum += 3;
> +                           <CODE BLOCK 2>
> +                           s++; if (!s) goto loop_exit;
> +                           <CODE BLOCK 1>
> +                           goto case_2; }
> +      else               { <CODE BLOCK 2>
> +			   s++; if (!s) goto exit;
> +                           <CODE BLOCK 1>
> +                           goto case_0; }
> +
> +Similar transformations would apply to the other parts of the switch
> +statement.  This obviously can lead to a lot of code duplication but
> +it can also result in faster code since we are replacing two jumps
> +(one indirect) with a single direct jump.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "tm.h"
> +#include "params.h"
> +#include "flags.h"
> +#include "tree.h"
> +#include "tree-pass.h"
> +#include "basic-block.h"
> +#include "function.h"
> +#include "hash-table.h"
> +#include "tree-ssa-alias.h"
> +#include "tree-cfg.h"
> +#include "tree-ssa-operands.h"
> +#include "tree-inline.h"
> +#include "gimple-expr.h"
> +#include "is-a.h"
> +#include "gimple.h"
> +#include "tree-phinodes.h"
> +#include "gimple-iterator.h"
> +#include "gimple-ssa.h"
> +#include "ssa-iterators.h"
> +#include "tree-into-ssa.h"
> +#include "cfgloop.h"
> +
> +/* Helper function for find_path, visited_bbs is used to make sure we don't
> +   fall into an infinite loop.  */
> +
> +static int
> +find_path_1 (basic_block start_bb, basic_block end_bb,
> +	     hash_set<basic_block> *visited_bbs)
> +{
> +  edge_iterator ei;
> +  edge e;
> +
> +  if (start_bb == end_bb) return 1;
> +
> +  if (!visited_bbs->add (start_bb))
> +    {
> +      FOR_EACH_EDGE (e, ei, start_bb->succs)
> +	if (find_path_1 (e->dest, end_bb, visited_bbs))
> +	  return 1;
> +    }
> +  return 0;
> +}
> +
> +/* Return 1 if there is a path from start_bb to end_bb and 0 if there
> +   is not.  There may be multiple paths from start_bb to end_bb.  */
> +
> +static int
> +find_path (basic_block start_bb, basic_block end_bb)
> +{
> +  edge_iterator ei;
> +  edge e;
> +  hash_set<basic_block> visited_bbs;
> +  int p = 0;
> +
> +  if (start_bb == end_bb) return 1;
> +
> +  if (!visited_bbs.add (start_bb))
> +    {
> +      FOR_EACH_EDGE (e, ei, start_bb->succs)
> +	if (find_path_1 (e->dest, end_bb, &visited_bbs))
> +	  {
> +	    p = 1;
> +	    break;
> +	  }
> +    }
> +  return p;
> +}
> +
> +
> +/* We save the paths we want to copy in bbs_list_array.  n_bbs_list is the
> +   number of paths saved, bbs_list_array[i] is the list of basic blocks in
> +   one path.  Each path starts with the block where a variable is assigned
> +   a constant value (bbs_list_array[i][0]) and ends with the switch statement
> +   block (bbs_list_array[i][bbs_list_size[i]-2]) followed by the block that
> +   the switch statement is going to go to given the constant value of the
> +   variable (bbs_list_array[i][bbs_list_size[i]-1]).  */
> +
> +struct path_info
> +{
> +  basic_block **bbs_list_array;
> +  int *val_array;
> +  int *bbs_list_size;
> +  int max_path_count;
> +  int max_insn_count;
> +  int n_bbs_list;
> +};
> +
> +/* bbs_list[0] is the block with the switch statement,
> +   bbs_list[n-1] is the block where the switch statement variable is assigned
> +     a constant value,
> +   The entries in between make a (reverse) path between the two.
> +
> +   We don't want to change bb_list, we want to leave that alone and
> +   and copy the path to bbs_list_array so that we wind up with a list (array)
> +   of paths that we want to update.  We also want to add the block that the
> +   switch is going to go to on to the list so that we know which exit from
> +   the switch statement is important.  */
> +
> +static void
> +save_new_path (basic_block *bbs_list, int n, tree val, path_info *pi)
> +{
> +  int i;
> +  int insn_count;
> +  basic_block bb;
> +  edge switch_taken_edge;
> +  gimple_stmt_iterator gsi;
> +
> +  if (n <= 1) return;
> +
> +  if (pi->n_bbs_list >= pi->max_path_count)
> +    return;
> +
> +  /* Put the blocks in 'correct' order and add in where we want to go after
> +     the switch statement, We want to leave bbs_list untouched for future
> +     calls.  */
> +
> +  pi->bbs_list_array[pi->n_bbs_list] = XNEWVEC (basic_block, n+1);
> +  for (i = 0; i < n; i++)
> +    pi->bbs_list_array[pi->n_bbs_list][i] = bbs_list[n-i-1];
> +
> +  switch_taken_edge = find_taken_edge (bbs_list[0], val);
> +  pi->bbs_list_array[pi->n_bbs_list][n] = switch_taken_edge->dest;
> +
> +  pi->bbs_list_size[pi->n_bbs_list] = n + 1;
> +  pi->val_array[pi->n_bbs_list] = (int) TREE_INT_CST_LOW (val);
> +
> +  /* Count how many instructions are in the blocks we are going to
> +     duplicate and if there are too many do not save this path
> +     (return without incrementing n_bbs_list).  */
> +
> +  insn_count = 0;
> +  for (i = 1; i < n; i++)
> +    {
> +      bb = pi->bbs_list_array[pi->n_bbs_list][i];
> +      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +	insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
> +    }
> +
> +  if (insn_count > pi->max_insn_count)
> +    return;
> +
> +  pi->n_bbs_list = pi->n_bbs_list + 1;
> +}
> +
> +/* switch_stmt is a switch statement whose switch index expression
> +   is the variable expr.  We trace the value of the variable back
> +   through any phi nodes looking for places where it gets a constant
> +   value and save the path in bbs_list.  Then we call save_new_path
> +   to create a list of such paths.  */
> +
> +static void
> +process_switch (tree expr, gimple switch_stmt,
> +		hash_set<gimple> *visited_phis,
> +	        basic_block *bbs_list, int n,
> +		path_info *pi)
> +{
> +  gimple def_stmt;
> +  tree var;
> +  unsigned int i;
> +  edge e;
> +  edge_iterator ei;
> +  basic_block bbx;
> +  basic_block var_bb;
> +  int e_count;
> +
> +  gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH);
> +  var = SSA_NAME_VAR (expr);
> +  def_stmt = SSA_NAME_DEF_STMT (expr);
> +  var_bb = gimple_bb (def_stmt);
> +
> +  if (var == NULL || var_bb == NULL) return;
> +
> +  /* We have a variable definition (var) that is defined in var_bb,
> +     We want to put the path from var_bb to the current bb into the
> +     bbs_list.  If there is more then one path, skip this and don't
> +     try to do the optimization.  */
> +
> +  bbx = bbs_list[n-1];
> +  while (bbx != var_bb)
> +    {
> +      e_count = 0;
> +      FOR_EACH_EDGE (e, ei, bbx->preds)
> +	if (find_path (var_bb, e->src))
> +	  {
> +	    bbs_list[n] = e->src;
> +	    n = n + 1;
> +	    e_count = e_count + 1;
> +	  }
> +      if (e_count != 1) return;
> +      bbx = bbs_list[n-1];
> +    }
> +
> +  if (gimple_code (def_stmt) == GIMPLE_PHI
> +      && !visited_phis->add (def_stmt))
> +    {
> +      for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
> +	{
> +	  tree arg = gimple_phi_arg_def (def_stmt, i);
> +	  if (arg && TREE_CODE (arg) == INTEGER_CST)
> +	    {
> +	      /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */
> +	      bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
> +	      save_new_path (bbs_list, n + 1, arg, pi);
> +	    }
> +	  else if (arg && TREE_CODE (arg) == SSA_NAME)
> +	    {
> +	      bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
> +	      process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1, pi);
> +	    }
> +	}
> +    }
> +}
> +
> +/* Find paths that lead from blocks where a variable is assigned a constant
> +   value to a switch statement where that variable is used as the switch
> +   index.  Save the paths in bbs_list_array so that they can be processed
> +   by copy_switch_paths.  */
> +
> +static unsigned int
> +find_switch_shortcuts (function *fun, path_info *pi)
> +{
> +  basic_block bb;
> +  hash_set<gimple> visited_phis;
> +  basic_block *bbs_list;
> +  int n = 1;
> +
> +  bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
> +  FOR_EACH_BB_FN (bb, fun)
> +    {
> +      gimple stmt = last_stmt (bb);
> +      if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
> +	{
> +	  tree op = gimple_switch_index (stmt);
> +	  tree var = SSA_NAME_VAR (op);
> +	  if (var)
> +	    {
> +	      bbs_list[0] = bb;
> +	      process_switch (op, stmt, &visited_phis, bbs_list, n, pi);
> +	    }
> +	}
> +    }
> +  XDELETEVEC (bbs_list);
> +  return 0;
> +}
> +
> +/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list.
> +   We free and recalculate all ssa and dominance information afterwords
> +   because the region being copied is not really SESE and so we cannot
> +   trust gimple_duplicate_sese_region to correctly update the dataflow
> +   information.  */
> +
> +static void
> +duplicate_blocks (basic_block *bb_list, int bb_count)
> +{
> +  edge orig_edge, exit_edge;
> +  loop_p loop;
> +
> +  orig_edge = find_edge (bb_list[0], bb_list[1]);
> +  exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]);
> +  /* Earlier block duplications may have removed the path that we
> +     saved earlier and are trying to duplicate here.  */
> +  if (orig_edge != NULL && exit_edge != NULL)
> +    {
> +      gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1],
> +				    bb_count-2, NULL, false);
> +      free_dominance_info (CDI_DOMINATORS);
> +      update_ssa (TODO_update_ssa);
> +      calculate_dominance_info (CDI_DOMINATORS);
> +      loops_state_set (LOOPS_NEED_FIXUP);
> +    }
> +}
> +
> +/* Go through the paths saved in bbs_list_array and make copies of them.  */
> +
> +static void
> +copy_switch_paths (path_info *pi)
> +{
> +  int i;
> +
> +  /* Process each path in bbs_list_size.  */
> +  for (i = 0; i < pi->n_bbs_list; i++)
> +    {
> +    /* For each path in bbs_list_size loop through and copy each block in
> +       the path (except the first on where the constant is assigned and
> +       the final one where the switch statement goes to.  */
> +
> +    if (!single_pred_p (pi->bbs_list_array[i][1]))
> +      duplicate_blocks (pi->bbs_list_array[i], pi->bbs_list_size[i]);
> +    }
> +}
> +
> +
> +/* Main entry for the tree if-conversion pass.  */
> +
> +namespace {
> +
> +const pass_data pass_data_tree_switch_shortcut =
> +{
> +  GIMPLE_PASS, /* type */
> +  "switch_shortcut", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_TREE_SWITCH_SHORTCUT, /* tv_id */
> +  ( PROP_cfg | PROP_ssa ), /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  TODO_update_ssa, /* todo_flags_finish */
> +};
> +
> +class pass_tree_switch_shortcut : public gimple_opt_pass
> +{
> +public:
> +  pass_tree_switch_shortcut (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_tree_switch_shortcut, ctxt)
> +  {}
> +
> +  /* opt_pass methods: */
> +  virtual bool gate (function *)
> +    {
> +      return flag_tree_switch_shortcut;
> +    }
> +  virtual unsigned int execute (function *);
> +
> +}; // class pass_tree_switch_shortcut
> +
> +unsigned int
> +pass_tree_switch_shortcut::execute (function *fun)
> +{
> +  int i;
> +  path_info *pi;
> +
> +  pi = XNEW (path_info);
> +  pi->n_bbs_list = 0;
> +  pi->max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS);
> +  pi->max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS);
> +  pi->val_array = XNEWVEC (int, pi->max_path_count);
> +  pi->bbs_list_size = XNEWVEC (int, pi->max_path_count);
> +  pi->bbs_list_array = XNEWVEC (basic_block *, pi->max_path_count);
> +  find_switch_shortcuts (fun, pi);
> +  copy_switch_paths (pi);
> +  XDELETEVEC (pi->val_array);
> +  XDELETEVEC (pi->bbs_list_size);
> +  for (i = 0; i < pi->n_bbs_list; i++)
> +    XDELETEVEC (pi->bbs_list_array[i]);
> +  XDELETEVEC (pi->bbs_list_array);
> +  XDELETE (pi);
> +  return 0;
> +}
> +
> +} // anon namespace
> +
> +gimple_opt_pass *
> +make_pass_tree_switch_shortcut (gcc::context *ctxt)
> +{
> +  return new pass_tree_switch_shortcut (ctxt);
> +}
Sebastian Pop Aug. 20, 2014, 8:29 p.m. UTC | #2
James Greenhalgh wrote:
> On Tue, Aug 19, 2014 at 09:39:56PM +0100, Steve Ellcey wrote:
> > Here is an official submission for the switch optimization described in
> > PR 54742.  I have addressed the formatting/comment issues that were raised
> > and also added a test case based on comment #27 from PR 54742 and I fixed a
> > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
> > generating an ICE in a routine with multiple switch statements).
> > 
> > I ran the benchmarking to see if I could find any more tests that are
> > helped like coremark is and while I found a number of benchmarks in
> > SPEC 2006 and EEMBC where the optimization is triggered, this optimization
> > generally didn't affect the performance of those benchmarks.  The biggest
> > impact I could find was on the perl benchmark in SPEC where I saw around
> > a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
> 
> For what it is worth, I see a nice (~4%) improvement in Crafty from
> SPEC 2000. I haven't investigated too deeply, but at a first glance the
> number of branch mispredictions has dropped just over 1%, as you
> might hope from this optimisation.
> 
> I can also attest to there being a number of places the optimisation is
> triggered (with high enough parameters; I was running with
> --param max-switch-paths=1000 --param max-switch-insns=10000), but like
> you I don't see much measurable change in execution time.

Without change to the default params, I see the switch shortcut having a
performance impact on both png and jpeg, compress and decompress mode.

I think that's enough to remove the "benchmarketing" label from the switch
shortcut transform.

Sebastian
Richard Biener Aug. 21, 2014, 8:53 a.m. UTC | #3
On Wed, Aug 20, 2014 at 10:29 PM, Sebastian Pop <sebpop@gmail.com> wrote:
> James Greenhalgh wrote:
>> On Tue, Aug 19, 2014 at 09:39:56PM +0100, Steve Ellcey wrote:
>> > Here is an official submission for the switch optimization described in
>> > PR 54742.  I have addressed the formatting/comment issues that were raised
>> > and also added a test case based on comment #27 from PR 54742 and I fixed a
>> > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
>> > generating an ICE in a routine with multiple switch statements).
>> >
>> > I ran the benchmarking to see if I could find any more tests that are
>> > helped like coremark is and while I found a number of benchmarks in
>> > SPEC 2006 and EEMBC where the optimization is triggered, this optimization
>> > generally didn't affect the performance of those benchmarks.  The biggest
>> > impact I could find was on the perl benchmark in SPEC where I saw around
>> > a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
>>
>> For what it is worth, I see a nice (~4%) improvement in Crafty from
>> SPEC 2000. I haven't investigated too deeply, but at a first glance the
>> number of branch mispredictions has dropped just over 1%, as you
>> might hope from this optimisation.
>>
>> I can also attest to there being a number of places the optimisation is
>> triggered (with high enough parameters; I was running with
>> --param max-switch-paths=1000 --param max-switch-insns=10000), but like
>> you I don't see much measurable change in execution time.
>
> Without change to the default params, I see the switch shortcut having a
> performance impact on both png and jpeg, compress and decompress mode.
>
> I think that's enough to remove the "benchmarketing" label from the switch
> shortcut transform.

Did you look at the actual code transformation the pass does to these?
(what is 'png' and 'jpeg'?)  What's the code size impact?

Richard.

> Sebastian
Richard Biener Aug. 21, 2014, 8:57 a.m. UTC | #4
On Tue, Aug 19, 2014 at 10:39 PM, Steve Ellcey <sellcey@mips.com> wrote:
> Here is an official submission for the switch optimization described in
> PR 54742.  I have addressed the formatting/comment issues that were raised
> and also added a test case based on comment #27 from PR 54742 and I fixed a
> bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
> generating an ICE in a routine with multiple switch statements).
>
> I ran the benchmarking to see if I could find any more tests that are
> helped like coremark is and while I found a number of benchmarks in
> SPEC 2006 and EEMBC where the optimization is triggered, this optimization
> generally didn't affect the performance of those benchmarks.  The biggest
> impact I could find was on the perl benchmark in SPEC where I saw around
> a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
>
> So, OK to checkin?

Without looking at the patch in detail what is the rationale for the
pass placement (looks quite early)?  I would have guessed that
the pass could benefit from value-range analysis.

Jeff, Steve is it possible to trigger the transform by simply
"manually forcing" the right "path" jump-threads from
inside VRP?  That is, basically integrate the transform part
with the existing jump threading framework but do an
alternate discovery pass?

Thanks,
Richard.

> Steve Ellcey
> sellcey@mips.com
>
>
> 2014-08-12  Steve Ellcey  <sellcey@mips.com>
>
>         PR tree-opt/54742
>         * Makefile.in (OBJS): Add tree-switch-shortcut.o.
>         * common.opt (ftree-switch-shortcut): New.
>         * opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
>         * params.def (PARAM_MAX_SWITCH_INSNS): New.
>         (PARAM_MAX_SWITCH_PATHS): New.
>         * passes.def (pass_tree_switch_shortcut): New.
>         * timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
>         * tree-pass.h (make_pass_tree_switch_shortcut): New.
>         * tree-switch-shortcut.c: New.
>
>
> 2014-08-12  Steve Ellcey  <sellcey@mips.com>
>
>         PR tree-opt/54742
>         * gcc.dg/pr54742.c: New test.
>
James Greenhalgh Aug. 21, 2014, 9:41 a.m. UTC | #5
On Thu, Aug 21, 2014 at 09:57:56AM +0100, Richard Biener wrote:
> On Tue, Aug 19, 2014 at 10:39 PM, Steve Ellcey <sellcey@mips.com> wrote:
> > Here is an official submission for the switch optimization described in
> > PR 54742.  I have addressed the formatting/comment issues that were raised
> > and also added a test case based on comment #27 from PR 54742 and I fixed a
> > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
> > generating an ICE in a routine with multiple switch statements).
> >
> > I ran the benchmarking to see if I could find any more tests that are
> > helped like coremark is and while I found a number of benchmarks in
> > SPEC 2006 and EEMBC where the optimization is triggered, this optimization
> > generally didn't affect the performance of those benchmarks.  The biggest
> > impact I could find was on the perl benchmark in SPEC where I saw around
> > a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
> >
> > So, OK to checkin?
> 
> Without looking at the patch in detail what is the rationale for the
> pass placement (looks quite early)?  I would have guessed that
> the pass could benefit from value-range analysis.
> 
> Jeff, Steve is it possible to trigger the transform by simply
> "manually forcing" the right "path" jump-threads from
> inside VRP?  That is, basically integrate the transform part
> with the existing jump threading framework but do an
> alternate discovery pass?

This seems like what I tried to do last year with:

  https://gcc.gnu.org/ml/gcc-patches/2013-06/msg01121.html

It turns Jeff's jump-threading code in to a strange franken-pass of bits and
pieces of detection and optimisation, and would need some substantial
reworking to fit in with Jeff's changes last Autumn, but if it is more
likely to be acceptable for trunk then perhaps we could look to revive it.
It would be nice to reuse the path copy code Jeff added last year, but I
don't have much intuition as to how feasible that is.

Was this the sort of thing that you were imagining? 

Steve, Jeff?

James

> 
> Thanks,
> Richard.
> 
> > Steve Ellcey
> > sellcey@mips.com
> >
> >
> > 2014-08-12  Steve Ellcey  <sellcey@mips.com>
> >
> >         PR tree-opt/54742
> >         * Makefile.in (OBJS): Add tree-switch-shortcut.o.
> >         * common.opt (ftree-switch-shortcut): New.
> >         * opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
> >         * params.def (PARAM_MAX_SWITCH_INSNS): New.
> >         (PARAM_MAX_SWITCH_PATHS): New.
> >         * passes.def (pass_tree_switch_shortcut): New.
> >         * timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
> >         * tree-pass.h (make_pass_tree_switch_shortcut): New.
> >         * tree-switch-shortcut.c: New.
> >
> >
> > 2014-08-12  Steve Ellcey  <sellcey@mips.com>
> >
> >         PR tree-opt/54742
> >         * gcc.dg/pr54742.c: New test.
> >
>
Richard Biener Aug. 21, 2014, 10:30 a.m. UTC | #6
On Thu, Aug 21, 2014 at 11:41 AM, James Greenhalgh
<james.greenhalgh@arm.com> wrote:
> On Thu, Aug 21, 2014 at 09:57:56AM +0100, Richard Biener wrote:
>> On Tue, Aug 19, 2014 at 10:39 PM, Steve Ellcey <sellcey@mips.com> wrote:
>> > Here is an official submission for the switch optimization described in
>> > PR 54742.  I have addressed the formatting/comment issues that were raised
>> > and also added a test case based on comment #27 from PR 54742 and I fixed a
>> > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
>> > generating an ICE in a routine with multiple switch statements).
>> >
>> > I ran the benchmarking to see if I could find any more tests that are
>> > helped like coremark is and while I found a number of benchmarks in
>> > SPEC 2006 and EEMBC where the optimization is triggered, this optimization
>> > generally didn't affect the performance of those benchmarks.  The biggest
>> > impact I could find was on the perl benchmark in SPEC where I saw around
>> > a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
>> >
>> > So, OK to checkin?
>>
>> Without looking at the patch in detail what is the rationale for the
>> pass placement (looks quite early)?  I would have guessed that
>> the pass could benefit from value-range analysis.
>>
>> Jeff, Steve is it possible to trigger the transform by simply
>> "manually forcing" the right "path" jump-threads from
>> inside VRP?  That is, basically integrate the transform part
>> with the existing jump threading framework but do an
>> alternate discovery pass?
>
> This seems like what I tried to do last year with:
>
>   https://gcc.gnu.org/ml/gcc-patches/2013-06/msg01121.html
>
> It turns Jeff's jump-threading code in to a strange franken-pass of bits and
> pieces of detection and optimisation, and would need some substantial
> reworking to fit in with Jeff's changes last Autumn, but if it is more
> likely to be acceptable for trunk then perhaps we could look to revive it.
> It would be nice to reuse the path copy code Jeff added last year, but I
> don't have much intuition as to how feasible that is.
>
> Was this the sort of thing that you were imagining?

Yeah, didn't look too closely though.

Richard.

> Steve, Jeff?
>
> James
>
>>
>> Thanks,
>> Richard.
>>
>> > Steve Ellcey
>> > sellcey@mips.com
>> >
>> >
>> > 2014-08-12  Steve Ellcey  <sellcey@mips.com>
>> >
>> >         PR tree-opt/54742
>> >         * Makefile.in (OBJS): Add tree-switch-shortcut.o.
>> >         * common.opt (ftree-switch-shortcut): New.
>> >         * opts.c (default_options_table): Add OPT_ftree_switch_shortcut.
>> >         * params.def (PARAM_MAX_SWITCH_INSNS): New.
>> >         (PARAM_MAX_SWITCH_PATHS): New.
>> >         * passes.def (pass_tree_switch_shortcut): New.
>> >         * timevar.def (TV_TREE_SWITCH_SHORTCUT): New.
>> >         * tree-pass.h (make_pass_tree_switch_shortcut): New.
>> >         * tree-switch-shortcut.c: New.
>> >
>> >
>> > 2014-08-12  Steve Ellcey  <sellcey@mips.com>
>> >
>> >         PR tree-opt/54742
>> >         * gcc.dg/pr54742.c: New test.
>> >
>>
Sebastian Pop Aug. 22, 2014, 8:13 p.m. UTC | #7
Richard Biener wrote:
> On Wed, Aug 20, 2014 at 10:29 PM, Sebastian Pop <sebpop@gmail.com> wrote:
> > James Greenhalgh wrote:
> >> On Tue, Aug 19, 2014 at 09:39:56PM +0100, Steve Ellcey wrote:
> >> > Here is an official submission for the switch optimization described in
> >> > PR 54742.  I have addressed the formatting/comment issues that were raised
> >> > and also added a test case based on comment #27 from PR 54742 and I fixed a
> >> > bug I found while doing benchmarking with SPEC2006 (the perl benchmark was
> >> > generating an ICE in a routine with multiple switch statements).
> >> >
> >> > I ran the benchmarking to see if I could find any more tests that are
> >> > helped like coremark is and while I found a number of benchmarks in
> >> > SPEC 2006 and EEMBC where the optimization is triggered, this optimization
> >> > generally didn't affect the performance of those benchmarks.  The biggest
> >> > impact I could find was on the perl benchmark in SPEC where I saw around
> >> > a 0.4% improvement on a MIPS 74k.  Not huge, but not nothing.
> >>
> >> For what it is worth, I see a nice (~4%) improvement in Crafty from
> >> SPEC 2000. I haven't investigated too deeply, but at a first glance the
> >> number of branch mispredictions has dropped just over 1%, as you
> >> might hope from this optimisation.
> >>
> >> I can also attest to there being a number of places the optimisation is
> >> triggered (with high enough parameters; I was running with
> >> --param max-switch-paths=1000 --param max-switch-insns=10000), but like
> >> you I don't see much measurable change in execution time.
> >
> > Without change to the default params, I see the switch shortcut having a
> > performance impact on both png and jpeg, compress and decompress mode.
> >
> > I think that's enough to remove the "benchmarketing" label from the switch
> > shortcut transform.
> 
> Did you look at the actual code transformation the pass does to these?
> (what is 'png' and 'jpeg'?)  What's the code size impact?

google("jddctmgr.c") the start_pass function contains a for loop with a switch
stmt that is optimized by Steve's pass.

The png one occurs in google("pngread.c png_image_read_colormap")

There is not much code duplicated in both cases.

Sebastian
Jeff Law Aug. 25, 2014, 5:34 p.m. UTC | #8
On 08/21/14 04:30, Richard Biener wrote:
>> It turns Jeff's jump-threading code in to a strange franken-pass of bits and
>> pieces of detection and optimisation, and would need some substantial
>> reworking to fit in with Jeff's changes last Autumn, but if it is more
>> likely to be acceptable for trunk then perhaps we could look to revive it.
>> It would be nice to reuse the path copy code Jeff added last year, but I
>> don't have much intuition as to how feasible that is.
>>
>> Was this the sort of thing that you were imagining?
>
> Yeah, didn't look too closely though.
It'd be pretty ugly I suspect.  But it's probably worth pondering since 
that approach would eliminate the concerns about the cost of detection 
(which is problematical for the jump threader) by using Steve's code for 
that.

On the update side, I suspect most, if not all of the framework is in 
place to handle this kind of update if the right threading paths were 
passed to the updater.  I can probably cobble together that by-hand and 
see what the tree-ssa-threadupdate does with it.  But it'll be a week or 
so before I could look at it.

jeff
diff mbox

Patch

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31c1f4d..94e8ec4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1411,6 +1411,7 @@  OBJS = \
 	tree-scalar-evolution.o \
 	tree-sra.o \
 	tree-switch-conversion.o \
+	tree-switch-shortcut.o \
 	tree-ssa-address.o \
 	tree-ssa-alias.o \
 	tree-ssa-ccp.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 0c4f86b..fe0664a 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2249,6 +2249,10 @@  ftree-sra
 Common Report Var(flag_tree_sra) Optimization
 Perform scalar replacement of aggregates
 
+ftree-switch-shortcut
+Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization
+Convert jumps to switch statements into jumps to case statement.
+
 ftree-ter
 Common Report Var(flag_tree_ter) Optimization
 Replace temporary expressions in the SSA->normal pass
diff --git a/gcc/opts.c b/gcc/opts.c
index be1867c..f1ac2e5 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -514,6 +514,7 @@  static const struct default_options default_options_table[] =
     { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
     { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
+    { OPT_LEVELS_3_PLUS, OPT_ftree_switch_shortcut, NULL, 1 },
 
     /* -Ofast adds optimizations to -O3.  */
     { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
diff --git a/gcc/params.def b/gcc/params.def
index cad00e2..65377d3 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1058,6 +1058,20 @@  DEFPARAM (PARAM_MAX_SLSR_CANDIDATE_SCAN,
 	  "strength reduction",
 	  50, 1, 999999)
 
+/* Maximum number of instructions to duplicate when shortcutting a switch.  */
+DEFPARAM (PARAM_MAX_SWITCH_INSNS,
+	  "max-switch-insns",
+	  "Maximum number of instructions to duplicate when "
+	  "shortcutting a switch statement",
+	  100, 1, 999999)
+
+/* Maximum number of paths to duplicate when shortcutting a switch.  */
+DEFPARAM (PARAM_MAX_SWITCH_PATHS,
+	  "max-switch-paths",
+	  "Maximum number of new paths to create when"
+	  " shortcutting a switch statement",
+	  50, 1, 999999)
+
 DEFPARAM (PARAM_ASAN_STACK,
          "asan-stack",
          "Enable asan stack protection",
diff --git a/gcc/passes.def b/gcc/passes.def
index f13df6c..8bbf2d0 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -157,6 +157,7 @@  along with GCC; see the file COPYING3.  If not see
       NEXT_PASS (pass_cselim);
       NEXT_PASS (pass_copy_prop);
       NEXT_PASS (pass_tree_ifcombine);
+      NEXT_PASS (pass_tree_switch_shortcut);
       NEXT_PASS (pass_phiopt);
       NEXT_PASS (pass_tail_recursion);
       NEXT_PASS (pass_ch);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index a04d05c..d9ee915 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -170,6 +170,7 @@  DEFTIMEVAR (TV_TREE_LOOP_IVCANON     , "tree canonical iv")
 DEFTIMEVAR (TV_SCEV_CONST            , "scev constant prop")
 DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
 DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
+DEFTIMEVAR (TV_TREE_SWITCH_SHORTCUT  , "switch statement shortcuts")
 DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
 DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 1477d1f..f898e27 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -575,6 +575,7 @@  extern gimple_opt_pass *make_pass_early_inline (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_tree_switch_shortcut (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-switch-shortcut.c b/gcc/tree-switch-shortcut.c
new file mode 100644
index 0000000..4518f79
--- /dev/null
+++ b/gcc/tree-switch-shortcut.c
@@ -0,0 +1,438 @@ 
+/* Switch shortcutting optimization for GNU C
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   Contributed by Steve Ellcey (steve.ellcey@imgtec.com).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* This file implements an optimization where, when a variable is set
+   to a constant value and there is a path that leads from that definition
+   to a switch statement that uses that variable as its controlling expression
+   we duplicate the blocks on this path and change the jump to the switch
+   statement with a direct jump to the label of the switch block that control
+   would goto based on the value of the variable.  This can come up in
+   loops/switch statements that implement state machines.
+
+   Example (modified from PR 54742):
+
+   foo(char *str) {
+     int sum=0;
+     int state=0;
+     char *s=str;
+     for (; *s; s++) {
+       char c=*s;
+       <CODE BLOCK 1>
+       switch (state) {
+         case 0:
+           if (c == '+')       { state = 1; sum += 9; }
+           else if (c != '-')  { state = 2; sum += 3; }
+           break;
+         case 1:
+           if (c == '+')       { state = 2; sum += 4; }
+           else if (c == '-')  { state = 0; sum += 7; }
+           break;
+         case 2:
+           if (c == '+')       { state = 0; sum += 8; }
+           else if (c == '-')  { state = 1; sum += 2; }
+           break;
+       }
+       <CODE BLOCK 2>
+     }
+     return state;
+   }
+
+  This pass will convert the code inside 'case 0' to something like:
+
+    case 0:
+      if (c == '+')      { state = 1; sum += 9;
+                           <CODE BLOCK 2>
+                           s++; if (!s) goto loop_exit;
+                           <CODE BLOCK 1>
+                           goto case_1; }
+      else if (c != '-') { state = 2; sum += 3;
+                           <CODE BLOCK 2>
+                           s++; if (!s) goto loop_exit;
+                           <CODE BLOCK 1>
+                           goto case_2; }
+      else               { <CODE BLOCK 2>
+			   s++; if (!s) goto exit;
+                           <CODE BLOCK 1>
+                           goto case_0; }
+
+Similar transformations would apply to the other parts of the switch
+statement.  This obviously can lead to a lot of code duplication but
+it can also result in faster code since we are replacing two jumps
+(one indirect) with a single direct jump.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "params.h"
+#include "flags.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "function.h"
+#include "hash-table.h"
+#include "tree-ssa-alias.h"
+#include "tree-cfg.h"
+#include "tree-ssa-operands.h"
+#include "tree-inline.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "gimple.h"
+#include "tree-phinodes.h"
+#include "gimple-iterator.h"
+#include "gimple-ssa.h"
+#include "ssa-iterators.h"
+#include "tree-into-ssa.h"
+#include "cfgloop.h"
+
+/* Helper function for find_path, visited_bbs is used to make sure we don't
+   fall into an infinite loop.  */
+
+static int
+find_path_1 (basic_block start_bb, basic_block end_bb,
+	     hash_set<basic_block> *visited_bbs)
+{
+  edge_iterator ei;
+  edge e;
+
+  if (start_bb == end_bb) return 1;
+
+  if (!visited_bbs->add (start_bb))
+    {
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (find_path_1 (e->dest, end_bb, visited_bbs))
+	  return 1;
+    }
+  return 0;
+}
+
+/* Return 1 if there is a path from start_bb to end_bb and 0 if there
+   is not.  There may be multiple paths from start_bb to end_bb.  */
+
+static int
+find_path (basic_block start_bb, basic_block end_bb)
+{
+  edge_iterator ei;
+  edge e;
+  hash_set<basic_block> visited_bbs;
+  int p = 0;
+
+  if (start_bb == end_bb) return 1;
+
+  if (!visited_bbs.add (start_bb))
+    {
+      FOR_EACH_EDGE (e, ei, start_bb->succs)
+	if (find_path_1 (e->dest, end_bb, &visited_bbs))
+	  {
+	    p = 1;
+	    break;
+	  }
+    }
+  return p;
+}
+
+
+/* We save the paths we want to copy in bbs_list_array.  n_bbs_list is the
+   number of paths saved, bbs_list_array[i] is the list of basic blocks in
+   one path.  Each path starts with the block where a variable is assigned
+   a constant value (bbs_list_array[i][0]) and ends with the switch statement
+   block (bbs_list_array[i][bbs_list_size[i]-2]) followed by the block that
+   the switch statement is going to go to given the constant value of the
+   variable (bbs_list_array[i][bbs_list_size[i]-1]).  */
+
+struct path_info
+{
+  basic_block **bbs_list_array;
+  int *val_array;
+  int *bbs_list_size;
+  int max_path_count;
+  int max_insn_count;
+  int n_bbs_list;
+};
+
+/* bbs_list[0] is the block with the switch statement,
+   bbs_list[n-1] is the block where the switch statement variable is assigned
+     a constant value,
+   The entries in between make a (reverse) path between the two.
+
+   We don't want to change bb_list, we want to leave that alone and
+   and copy the path to bbs_list_array so that we wind up with a list (array)
+   of paths that we want to update.  We also want to add the block that the
+   switch is going to go to on to the list so that we know which exit from
+   the switch statement is important.  */
+
+static void
+save_new_path (basic_block *bbs_list, int n, tree val, path_info *pi)
+{
+  int i;
+  int insn_count;
+  basic_block bb;
+  edge switch_taken_edge;
+  gimple_stmt_iterator gsi;
+
+  if (n <= 1) return;
+
+  if (pi->n_bbs_list >= pi->max_path_count)
+    return;
+
+  /* Put the blocks in 'correct' order and add in where we want to go after
+     the switch statement, We want to leave bbs_list untouched for future
+     calls.  */
+
+  pi->bbs_list_array[pi->n_bbs_list] = XNEWVEC (basic_block, n+1);
+  for (i = 0; i < n; i++)
+    pi->bbs_list_array[pi->n_bbs_list][i] = bbs_list[n-i-1];
+
+  switch_taken_edge = find_taken_edge (bbs_list[0], val);
+  pi->bbs_list_array[pi->n_bbs_list][n] = switch_taken_edge->dest;
+
+  pi->bbs_list_size[pi->n_bbs_list] = n + 1;
+  pi->val_array[pi->n_bbs_list] = (int) TREE_INT_CST_LOW (val);
+
+  /* Count how many instructions are in the blocks we are going to
+     duplicate and if there are too many do not save this path
+     (return without incrementing n_bbs_list).  */
+
+  insn_count = 0;
+  for (i = 1; i < n; i++)
+    {
+      bb = pi->bbs_list_array[pi->n_bbs_list][i];
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+    }
+
+  if (insn_count > pi->max_insn_count)
+    return;
+
+  pi->n_bbs_list = pi->n_bbs_list + 1;
+}
+
+/* switch_stmt is a switch statement whose switch index expression
+   is the variable expr.  We trace the value of the variable back
+   through any phi nodes looking for places where it gets a constant
+   value and save the path in bbs_list.  Then we call save_new_path
+   to create a list of such paths.  */
+
+static void
+process_switch (tree expr, gimple switch_stmt,
+		hash_set<gimple> *visited_phis,
+	        basic_block *bbs_list, int n,
+		path_info *pi)
+{
+  gimple def_stmt;
+  tree var;
+  unsigned int i;
+  edge e;
+  edge_iterator ei;
+  basic_block bbx;
+  basic_block var_bb;
+  int e_count;
+
+  gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH);
+  var = SSA_NAME_VAR (expr);
+  def_stmt = SSA_NAME_DEF_STMT (expr);
+  var_bb = gimple_bb (def_stmt);
+
+  if (var == NULL || var_bb == NULL) return;
+
+  /* We have a variable definition (var) that is defined in var_bb,
+     We want to put the path from var_bb to the current bb into the
+     bbs_list.  If there is more then one path, skip this and don't
+     try to do the optimization.  */
+
+  bbx = bbs_list[n-1];
+  while (bbx != var_bb)
+    {
+      e_count = 0;
+      FOR_EACH_EDGE (e, ei, bbx->preds)
+	if (find_path (var_bb, e->src))
+	  {
+	    bbs_list[n] = e->src;
+	    n = n + 1;
+	    e_count = e_count + 1;
+	  }
+      if (e_count != 1) return;
+      bbx = bbs_list[n-1];
+    }
+
+  if (gimple_code (def_stmt) == GIMPLE_PHI
+      && !visited_phis->add (def_stmt))
+    {
+      for (i = 0; i < gimple_phi_num_args (def_stmt); i++)
+	{
+	  tree arg = gimple_phi_arg_def (def_stmt, i);
+	  if (arg && TREE_CODE (arg) == INTEGER_CST)
+	    {
+	      /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */
+	      bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+	      save_new_path (bbs_list, n + 1, arg, pi);
+	    }
+	  else if (arg && TREE_CODE (arg) == SSA_NAME)
+	    {
+	      bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src;
+	      process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1, pi);
+	    }
+	}
+    }
+}
+
+/* Find paths that lead from blocks where a variable is assigned a constant
+   value to a switch statement where that variable is used as the switch
+   index.  Save the paths in bbs_list_array so that they can be processed
+   by copy_switch_paths.  */
+
+static unsigned int
+find_switch_shortcuts (function *fun, path_info *pi)
+{
+  basic_block bb;
+  hash_set<gimple> visited_phis;
+  basic_block *bbs_list;
+  int n = 1;
+
+  bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple stmt = last_stmt (bb);
+      if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
+	{
+	  tree op = gimple_switch_index (stmt);
+	  tree var = SSA_NAME_VAR (op);
+	  if (var)
+	    {
+	      bbs_list[0] = bb;
+	      process_switch (op, stmt, &visited_phis, bbs_list, n, pi);
+	    }
+	}
+    }
+  XDELETEVEC (bbs_list);
+  return 0;
+}
+
+/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list.
+   We free and recalculate all ssa and dominance information afterwords
+   because the region being copied is not really SESE and so we cannot
+   trust gimple_duplicate_sese_region to correctly update the dataflow
+   information.  */
+
+static void
+duplicate_blocks (basic_block *bb_list, int bb_count)
+{
+  edge orig_edge, exit_edge;
+  loop_p loop;
+
+  orig_edge = find_edge (bb_list[0], bb_list[1]);
+  exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]);
+  /* Earlier block duplications may have removed the path that we
+     saved earlier and are trying to duplicate here.  */
+  if (orig_edge != NULL && exit_edge != NULL)
+    {
+      gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1],
+				    bb_count-2, NULL, false);
+      free_dominance_info (CDI_DOMINATORS);
+      update_ssa (TODO_update_ssa);
+      calculate_dominance_info (CDI_DOMINATORS);
+      loops_state_set (LOOPS_NEED_FIXUP);
+    }
+}
+
+/* Go through the paths saved in bbs_list_array and make copies of them.  */
+
+static void
+copy_switch_paths (path_info *pi)
+{
+  int i;
+
+  /* Process each path in bbs_list_size.  */
+  for (i = 0; i < pi->n_bbs_list; i++)
+    {
+    /* For each path in bbs_list_size loop through and copy each block in
+       the path (except the first on where the constant is assigned and
+       the final one where the switch statement goes to.  */
+
+    if (!single_pred_p (pi->bbs_list_array[i][1]))
+      duplicate_blocks (pi->bbs_list_array[i], pi->bbs_list_size[i]);
+    }
+}
+
+
+/* Main entry for the tree if-conversion pass.  */
+
+namespace {
+
+const pass_data pass_data_tree_switch_shortcut =
+{
+  GIMPLE_PASS, /* type */
+  "switch_shortcut", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_TREE_SWITCH_SHORTCUT, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_tree_switch_shortcut : public gimple_opt_pass
+{
+public:
+  pass_tree_switch_shortcut (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_tree_switch_shortcut, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return flag_tree_switch_shortcut;
+    }
+  virtual unsigned int execute (function *);
+
+}; // class pass_tree_switch_shortcut
+
+unsigned int
+pass_tree_switch_shortcut::execute (function *fun)
+{
+  int i;
+  path_info *pi;
+
+  pi = XNEW (path_info);
+  pi->n_bbs_list = 0;
+  pi->max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS);
+  pi->max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS);
+  pi->val_array = XNEWVEC (int, pi->max_path_count);
+  pi->bbs_list_size = XNEWVEC (int, pi->max_path_count);
+  pi->bbs_list_array = XNEWVEC (basic_block *, pi->max_path_count);
+  find_switch_shortcuts (fun, pi);
+  copy_switch_paths (pi);
+  XDELETEVEC (pi->val_array);
+  XDELETEVEC (pi->bbs_list_size);
+  for (i = 0; i < pi->n_bbs_list; i++)
+    XDELETEVEC (pi->bbs_list_array[i]);
+  XDELETEVEC (pi->bbs_list_array);
+  XDELETE (pi);
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_tree_switch_shortcut (gcc::context *ctxt)
+{
+  return new pass_tree_switch_shortcut (ctxt);
+}