diff mbox series

[V3] Loop split upon semi-invariant condition (PR tree-optimization/89134)

Message ID BYAPR01MB4869701BB6BAB25FEFCB8384F7B00@BYAPR01MB4869.prod.exchangelabs.com
State New
Headers show
Series [V3] Loop split upon semi-invariant condition (PR tree-optimization/89134) | expand

Commit Message

Feng Xue OS Sept. 12, 2019, 10:23 a.m. UTC
---

Comments

Philipp Tomsich Oct. 15, 2019, 3:49 p.m. UTC | #1
Feng,

This looks good from our side and has shown useful (combined with the other 2 patches) in
our testing with SPEC2017.
Given that this looks final: what is the plan for getting this merged?

Thanks,
Philipp.

> On 12.09.2019, at 12:23, Feng Xue OS <fxue at os dot amperecomputing dot com> wrote:
> 
> ---
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 1391a562c35..28981fa1048 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -11418,6 +11418,19 @@ The maximum number of branches unswitched in a single loop.
> @item lim-expensive
> The minimum cost of an expensive expression in the loop invariant motion.
> 
> +@item max-cond-loop-split-insns
> +In a loop, if a branch of a conditional statement is selected since certain
> +loop iteration, any operand that contributes to computation of the conditional
> +expression remains unchanged in all following iterations, the statement is
> +semi-invariant, upon which we can do a kind of loop split transformation.
> +@option{max-cond-loop-split-insns} controls maximum number of insns to be
> +added due to loop split on semi-invariant conditional statement.
> +
> +@item min-cond-loop-split-prob
> +When FDO profile information is available, @option{min-cond-loop-split-prob}
> +specifies minimum threshold for probability of semi-invariant condition
> +statement to trigger loop split.
> +
> @item iv-consider-all-candidates-bound
> Bound on number of candidates for induction variables, below which
> all candidates are considered for each use in induction variable
> diff --git a/gcc/params.def b/gcc/params.def
> index 13001a7bb2d..12bc8c26c9e 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -386,6 +386,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
> 	"The maximum number of unswitchings in a single loop.",
> 	3, 0, 0)
> 
> +/* The maximum number of increased insns due to loop split on semi-invariant
> +   condition statement.  */
> +DEFPARAM(PARAM_MAX_COND_LOOP_SPLIT_INSNS,
> +	"max-cond-loop-split-insns",
> +	"The maximum number of insns to be added due to loop split on "
> +	"semi-invariant condition statement.",
> +	100, 0, 0)
> +
> +DEFPARAM(PARAM_MIN_COND_LOOP_SPLIT_PROB,
> +	"min-cond-loop-split-prob",
> +	"The minimum threshold for probability of semi-invariant condition "
> +	"statement to trigger loop split.",
> +	30, 0, 100)
> +
> /* The maximum number of insns in loop header duplicated by the copy loop
>    headers pass.  */
> DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,
> 
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> new file mode 100644
> index 00000000000..51f9da22fc7
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> +
> +#include <string>
> +#include <map>
> +
> +using namespace std;
> +
> +class  A
> +{
> +public:
> +  bool empty;
> +  void set (string s);
> +};
> +
> +class  B
> +{
> +  map<int, string> m;
> +  void f ();
> +};
> +
> +extern A *ga;
> +
> +void B::f ()
> +{
> +  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
> +    {
> +      if (ga->empty)
> +        ga->set (iter->second);
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> new file mode 100644
> index 00000000000..bbd522d6bcd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> +
> +__attribute__((pure)) __attribute__((noinline)) int inc (int i)
> +{
> +  return i + 1;
> +}
> +
> +extern int do_something (void);
> +extern int b;
> +
> +void test(int n)
> +{
> +  int i;
> +
> +  for (i = 0; i < n; i = inc (i))
> +    {
> +      if (b)
> +        b = do_something();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index f5f083384bc..e4a1b6d2019 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -32,7 +32,10 @@ along with GCC; see the file COPYING3.  If not see
> #include "tree-ssa-loop.h"
> #include "tree-ssa-loop-manip.h"
> #include "tree-into-ssa.h"
> +#include "tree-inline.h"
> +#include "tree-cfgcleanup.h"
> #include "cfgloop.h"
> +#include "params.h"
> #include "tree-scalar-evolution.h"
> #include "gimple-iterator.h"
> #include "gimple-pretty-print.h"
> @@ -40,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
> #include "gimple-fold.h"
> #include "gimplify-me.h"
> 
> -/* This file implements loop splitting, i.e. transformation of loops like
> +/* This file implements two kinds of loop splitting.
> +
> +   One transformation of loops like:
> 
>    for (i = 0; i < 100; i++)
>      {
> @@ -612,6 +617,722 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
>   return changed;
> }
> 
> +/* Another transformation of loops like:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> +     {
> +       if (expr (a_1, a_2, ..., a_n))  // expr is pure
> +         a_j = ...;  // change at least one a_j
> +       else
> +         S;          // not change any a_j
> +     }
> +
> +   into:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> +     {
> +       if (expr (a_1, a_2, ..., a_n))
> +         a_j = ...;
> +       else
> +         {
> +           S;
> +           i = NEXT ();
> +           break;
> +         }
> +     }
> +
> +   for (; CHECK (i); i = NEXT ())
> +     {
> +       S;
> +     }
> +
> +   */
> +
> +/* Data structure to hold temporary information during loop split upon
> +   semi-invariant conditional statement.  */
> +class split_info {
> +public:
> +  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
> +  basic_block *bbs;
> +
> +  /* All memory store/clobber statements in a loop.  */
> +  auto_vec<gimple *> memory_stores;
> +
> +  /* Whether above memory stores vector has been filled.  */
> +  int need_init;
> +
> +  split_info () : bbs (NULL),  need_init (true) { }
> +
> +  ~split_info ()
> +    {
> +      if (bbs)
> +	free (bbs);
> +    }
> +};
> +
> +/* Find all statements with memory-write effect in LOOP, including memory
> +   store and non-pure function call, and keep those in a vector.  This work
> +   is only done one time, for the vector should be constant during analysis
> +   stage of semi-invariant condition.  */
> +
> +static void
> +find_vdef_in_loop (struct loop *loop)
> +{
> +  split_info *info = (split_info *) loop->aux;
> +  gphi *vphi = get_virtual_phi (loop->header);
> +
> +  /* Indicate memory store vector has been filled.  */
> +  info->need_init = false;
> +
> +  /* If loop contains memory operation, there must be a virtual PHI node in
> +     loop header basic block.  */
> +  if (vphi == NULL)
> +    return;
> +
> +  /* All virtual SSA names inside the loop are connected to be a cyclic
> +     graph via virtual PHI nodes.  The virtual PHI node in loop header just
> +     links the first and the last virtual SSA names, by using the last as
> +     PHI operand to define the first.  */
> +  const edge latch = loop_latch_edge (loop);
> +  const tree first = gimple_phi_result (vphi);
> +  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
> +
> +  /* The virtual SSA cyclic graph might consist of only one SSA name, who
> +     is defined by itself.
> +
> +       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
> +
> +     This means the loop contains only memory loads, so we can skip it.  */
> +  if (first == last)
> +    return;
> +
> +  auto_vec<gimple *> other_stores;
> +  auto_vec<tree> worklist;
> +  auto_bitmap visited;
> +
> +  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
> +  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
> +  worklist.safe_push (last);
> +
> +  do
> +    {
> +      tree vuse = worklist.pop ();
> +      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> +
> +      /* We mark the first and last SSA names as visited at the beginning,
> +	 and reversely start the process from the last SSA name towards the
> +	 first, which ensures that this do-while will not touch SSA names
> +	 defined outside of the loop.  */
> +      gcc_assert (gimple_bb (stmt)
> +		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
> +
> +      if (gimple_code (stmt) == GIMPLE_PHI)
> +	{
> +	  gphi *phi = as_a <gphi *> (stmt);
> +
> +	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> +	    {
> +	      tree arg = gimple_phi_arg_def (stmt, i);
> +
> +	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
> +		worklist.safe_push (arg);
> +	    }
> +	}
> +      else
> +	{
> +	  tree prev = gimple_vuse (stmt);
> +
> +	  /* Non-pure call statement is conservatively assumed to impact all
> +	     memory locations.  So place call statements ahead of other memory
> +	     stores in the vector with an idea of of using them as shortcut
> +	     terminators to memory alias analysis.  */
> +	  if (gimple_code (stmt) == GIMPLE_CALL)
> +	    info->memory_stores.safe_push (stmt);
> +	  else
> +	    other_stores.safe_push (stmt);
> +
> +	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
> +	    worklist.safe_push (prev);
> +	}
> +    } while (!worklist.is_empty ());
> +
> +    info->memory_stores.safe_splice (other_stores);
> +}
> +
> +
> +/* Given STMT, memory load or pure call statement, check whether it is impacted
> +   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
> +   trace is composed of SKIP_HEAD and those basic block dominated by it, always
> +   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
> +   NULL, all basic blocks of LOOP are checked.  */
> +
> +static bool
> +vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
> +		       const_basic_block skip_head)
> +{
> +  split_info *info = (split_info *) loop->aux;
> +
> +  /* Collect memory store/clobber statements if have not do that.  */
> +  if (info->need_init)
> +    find_vdef_in_loop (loop);
> +
> +  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
> +  ao_ref ref;
> +  gimple *store;
> +  unsigned i;
> +
> +  ao_ref_init (&ref, rhs);
> +
> +  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
> +    {
> +      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
> +      if (skip_head
> +	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
> +	continue;
> +
> +      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
> +	return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Forward declaration.  */
> +
> +static bool
> +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> +		       const_basic_block skip_head);
> +
> +/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
> +   certain iteration of LOOP, check whether an SSA name (NAME) remains
> +   unchanged in next interation.  We call this characterisic as semi-
> +   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
> +   basic blocks and control flows in the loop will be considered.  If non-
> +   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
> +
> +static bool
> +ssa_semi_invariant_p (struct loop *loop, const tree name,
> +		      const_basic_block skip_head)
> +{
> +  gimple *def = SSA_NAME_DEF_STMT (name);
> +  const_basic_block def_bb = gimple_bb (def);
> +
> +  /* An SSA name defined outside a loop is definitely semi-invariant.  */
> +  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
> +    return true;
> +
> +  if (gimple_code (def) == GIMPLE_PHI)
> +    {
> +      /* For PHI node that is not in loop header, its source operands should
> +	 be defined inside the loop, which are seen as loop variant.  */
> +      if (def_bb != loop->header || !skip_head)
> +	return false;
> +
> +      const_edge latch = loop_latch_edge (loop);
> +      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
> +
> +      /* A PHI node in loop header contains two source operands, one is
> +	 initial value, the other is the copy of last iteration through loop
> +	 latch, we call it latch value.  From the PHI node to definition
> +	 of latch value, if excluding branch trace from SKIP_HEAD, there
> +	 is no definition of other version of same variable, SSA name defined
> +	 by the PHI node is semi-invariant.
> +
> +                         loop entry
> +                              |     .--- latch ---.
> +                              |     |             |
> +                              v     v             |
> +                  x_1 = PHI <x_0,  x_3>           |
> +                           |                      |
> +                           v                      |
> +              .------- if (cond) -------.         |
> +              |                         |         |
> +              |                     [ SKIP ]      |
> +              |                         |         |
> +              |                     x_2 = ...     |
> +              |                         |         |
> +              '---- T ---->.<---- F ----'         |
> +                           |                      |
> +                           v                      |
> +                  x_3 = PHI <x_1, x_2>            |
> +                           |                      |
> +                           '----------------------'
> +
> +	Suppose in certain iteration, execution flow in above graph goes
> +	through true branch, which means that one source value to define
> +	x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
> +	x_1 in next iterations is defined by x_3, we know that x_1 will
> +	never changed if COND always chooses true branch from then on.  */
> +
> +      while (from != name)
> +	{
> +	  /* A new value comes from a CONSTANT.  */
> +	  if (TREE_CODE (from) != SSA_NAME)
> +	    return false;
> +
> +	  gimple *stmt = SSA_NAME_DEF_STMT (from);
> +	  const_basic_block bb = gimple_bb (stmt);
> +
> +	  /* A new value comes from outside of loop.  */
> +	  if (!bb || !flow_bb_inside_loop_p (loop, bb))
> +	    return false;
> +
> +	  from = NULL_TREE;
> +
> +	  if (gimple_code (stmt) == GIMPLE_PHI)
> +	    {
> +	      gphi *phi = as_a <gphi *> (stmt);
> +
> +	      for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> +		{
> +		  const_edge e = gimple_phi_arg_edge (phi, i);
> +
> +		  /* Not consider redefinitions in excluded basic blocks.  */
> +		  if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> +		    {
> +		      /* There are more than one source operands that can
> +			 provide value to the SSA name, it is variant.  */
> +		      if (from)
> +			return false;
> +
> +		      from = gimple_phi_arg_def (phi, i);
> +		    }
> +		}
> +	    }
> +	  else if (gimple_code (stmt) == GIMPLE_ASSIGN)
> +	    {
> +	      /* For simple value copy, check its rhs instead.  */
> +	      if (gimple_assign_ssa_name_copy_p (stmt))
> +		from = gimple_assign_rhs1 (stmt);
> +	    }
> +
> +	  /* Any other kind of definition is deemed to introduce a new value
> +	     to the SSA name.  */
> +	  if (!from)
> +	    return false;
> +	}
> +	return true;
> +    }
> +
> +  /* Value originated from volatile memory load or return of normal (non-
> +     const/pure) call should not be treated as constant in each iteration.  */
> +  if (gimple_has_side_effects (def))
> +    return false;
> +
> +  /* Check if any memory store may kill memory load at this place.  */
> +  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
> +    return false;
> +
> +  /* Check operands of definition statement of the SSA name.  */
> +  return stmt_semi_invariant_p (loop, def, skip_head);
> +}
> +
> +/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
> +   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
> +   dominated by it are excluded from the loop.  */
> +
> +static bool
> +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> +		       const_basic_block skip_head)
> +{
> +  ssa_op_iter iter;
> +  tree use;
> +
> +  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
> +     here we only need to check SSA name operands.  This is because check on
> +     VARDECL operands, which involve memory loads, must have been done
> +     prior to invocation of this function in vuse_semi_invariant_p.  */
> +  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> +    {
> +      if (!ssa_semi_invariant_p (loop, use, skip_head))
> +	return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Determine when conditional statement never transfers execution to one of its
> +   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
> +   and those basic blocks dominated by BRANCH_BB.  */
> +
> +static bool
> +branch_removable_p (basic_block branch_bb)
> +{
> +  if (single_pred_p (branch_bb))
> +    return true;
> +
> +  edge e;
> +  edge_iterator ei;
> +
> +  FOR_EACH_EDGE (e, ei, branch_bb->preds)
> +    {
> +      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
> +	continue;
> +
> +      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
> +	continue;
> +
> +       /* The branch can be reached from opposite branch, or from some
> +	  statement not dominated by the conditional statement.  */
> +      return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Find out which branch of a conditional statement (COND) is invariant in the
> +   execution context of LOOP.  That is: once the branch is selected in certain
> +   iteration of the loop, any operand that contributes to computation of the
> +   conditional statement remains unchanged in all following iterations.  */
> +
> +static edge
> +get_cond_invariant_branch (struct loop *loop, gcond *cond)
> +{
> +  basic_block cond_bb = gimple_bb (cond);
> +  basic_block targ_bb[2];
> +  bool invar[2];
> +  unsigned invar_checks;
> +
> +  for (unsigned i = 0; i < 2; i++)
> +    {
> +      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
> +
> +      /* One branch directs to loop exit, no need to perform loop split upon
> +	 this conditional statement.  Firstly, it is trivial if the exit branch
> +	 is semi-invariant, for the statement is just to break loop.  Secondly,
> +	 if the opposite branch is semi-invariant, it means that the statement
> +	 is real loop-invariant, which is covered by loop unswitch.  */
> +      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
> +	return NULL;
> +    }
> +
> +  invar_checks = 0;
> +
> +  for (unsigned i = 0; i < 2; i++)
> +    {
> +      invar[!i] = false;
> +
> +      if (!branch_removable_p (targ_bb[i]))
> +	continue;
> +
> +      /* Given a semi-invariant branch, if its opposite branch dominates
> +	 loop latch, it and its following trace will only be executed in
> +	 final iteration of loop, namely it is not part of repeated body
> +	 of the loop.  Similar to the above case that the branch is loop
> +	 exit, no need to split loop.  */
> +      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
> +	continue;
> +
> +      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
> +      invar_checks++;
> +    }
> +
> +  /* With both branches being invariant (handled by loop unswitch) or
> +     variant is not what we want.  */
> +  if (invar[0] ^ !invar[1])
> +    return NULL;
> +
> +  /* Found a real loop-invariant condition, do nothing.  */
> +  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
> +    return NULL;
> +
> +  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
> +}
> +
> +/* Calculate increased code size measured by estimated insn number if applying
> +   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
> +
> +static int
> +compute_added_num_insns (struct loop *loop, const_edge branch_edge)
> +{
> +  basic_block cond_bb = branch_edge->src;
> +  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
> +  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
> +  basic_block *bbs = ((split_info *) loop->aux)->bbs;
> +  int num = 0;
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    {
> +      /* Do no count basic blocks only in opposite branch.  */
> +      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
> +	continue;
> +
> +      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> +    }
> +
> +  /* It is unnecessary to evaluate expression of the conditional statement
> +     in new loop that contains only invariant branch.  This expresion should
> +     be constant value (either true or false).  Exclude code size of insns
> +     that contribute to computation of the expression.  */
> +
> +  auto_vec<gimple *> worklist;
> +  hash_set<gimple *> removed;
> +  gimple *stmt = last_stmt (cond_bb);
> +
> +  worklist.safe_push (stmt);
> +  removed.add (stmt);
> +  num -= estimate_num_insns (stmt, &eni_size_weights);
> +
> +  do
> +    {
> +      ssa_op_iter opnd_iter;
> +      use_operand_p opnd_p;
> +
> +      stmt = worklist.pop ();
> +      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
> +	{
> +	  tree opnd = USE_FROM_PTR (opnd_p);
> +
> +	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
> +	    continue;
> +
> +	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
> +	  use_operand_p use_p;
> +	  imm_use_iterator use_iter;
> +
> +	  if (removed.contains (opnd_stmt)
> +	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
> +	    continue;
> +
> +	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
> +	    {
> +              gimple *use_stmt = USE_STMT (use_p);
> +
> +	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
> +		{
> +		  opnd_stmt = NULL;
> +		  break;
> +		}
> +	    }
> +
> +	  if (opnd_stmt)
> +	    {
> +	      worklist.safe_push (opnd_stmt);
> +	      removed.add (opnd_stmt);
> +	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
> +	    }
> +	}
> +    } while (!worklist.is_empty ());
> +
> +  gcc_assert (num >= 0);
> +  return num;
> +}
> +
> +/* Find out loop-invariant branch of a conditional statement (COND) if it has,
> +   and check whether it is eligible and profitable to perform loop split upon
> +   this branch in LOOP.  */
> +
> +static edge
> +get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
> +{
> +  edge invar_branch = get_cond_invariant_branch (loop, cond);
> +
> +  if (!invar_branch)
> +    return NULL;
> +
> +  profile_probability prob = invar_branch->probability;
> +
> +  /* When accurate profile information is available, and execution
> +     frequency of the branch is too low, just let it go.  */
> +  if (prob.reliable_p ())
> +    {
> +      int thres = PARAM_VALUE (PARAM_MIN_COND_LOOP_SPLIT_PROB);
> +
> +      if (prob < profile_probability::always ().apply_scale (thres, 100))
> +	return NULL;
> +    }
> +
> +  /* Add a threshold for increased code size to disable loop split.  */
> +  if (compute_added_num_insns (loop, invar_branch)
> +      > PARAM_VALUE (PARAM_MAX_COND_LOOP_SPLIT_INSNS))
> +    return NULL;
> +
> +  return invar_branch;
> +}
> +
> +/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
> +   conditional statement, perform loop split transformation illustrated
> +   as the following graph.
> +
> +               .-------T------ if (true) ------F------.
> +               |                    .---------------. |
> +               |                    |               | |
> +               v                    |               v v
> +          pre-header                |            pre-header
> +               | .------------.     |                 | .------------.
> +               | |            |     |                 | |            |
> +               | v            |     |                 | v            |
> +             header           |     |               header           |
> +               |              |     |                 |              |
> +       [ bool r = cond; ]     |     |                 |              |
> +               |              |     |                 |              |
> +      .---- if (r) -----.     |     |        .--- if (true) ---.     |
> +      |                 |     |     |        |                 |     |
> +  invariant             |     |     |    invariant             |     |
> +      |                 |     |     |        |                 |     |
> +      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
> +               |              |    /                  |              |
> +             stmts            |   /                 stmts            |
> +               |              |  /                    |              |
> +              / \             | /                    / \             |
> +     .-------*   *       [ if (!r) ]        .-------*   *            |
> +     |           |            |             |           |            |
> +     |         latch          |             |         latch          |
> +     |           |            |             |           |            |
> +     |           '------------'             |           '------------'
> +     '------------------------. .-----------'
> +             loop1            | |                   loop2
> +                              v v
> +                             exits
> +
> +   In the graph, loop1 represents the part derived from original one, and
> +   loop2 is duplicated using loop_version (), which corresponds to the part
> +   of original one being splitted out.  In loop1, a new bool temporary (r)
> +   is introduced to keep value of the condition result.  In original latch
> +   edge of loop1, we insert a new conditional statement whose value comes
> +   from previous temporary (r), one of its branch goes back to loop1 header
> +   as a latch edge, and the other branch goes to loop2 pre-header as an entry
> +   edge.  And also in loop2, we abandon the variant branch of the conditional
> +   statement candidate by setting a constant bool condition, based on which
> +   branch is semi-invariant.  */
> +
> +static bool
> +do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> +{
> +  basic_block cond_bb = invar_branch->src;
> +  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
> +  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
> +
> +  gcc_assert (cond_bb->loop_father == loop1);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +   {
> +     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
> +	      current_function_name (), loop1->num,
> +	      true_invar ? "T" : "F", cond_bb->index);
> +     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
> +   }
> +
> +  initialize_original_copy_tables ();
> +
> +  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> +				     profile_probability::always (),
> +				     profile_probability::never (),
> +				     profile_probability::always (),
> +				     profile_probability::always (),
> +				     true);
> +  if (!loop2)
> +    {
> +      free_original_copy_tables ();
> +      return false;
> +    }
> +
> +  /* Generate a bool type temporary to hold result of the condition.  */
> +  tree tmp = make_ssa_name (boolean_type_node);
> +  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> +  gimple *stmt = gimple_build_assign (tmp,
> +				      gimple_cond_code (cond),
> +				      gimple_cond_lhs (cond),
> +				      gimple_cond_rhs (cond));
> +
> +  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> +  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
> +  update_stmt (cond);
> +
> +  basic_block cond_bb_copy = get_bb_copy (cond_bb);
> +  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
> +
> +  /* Replace the condition in loop2 with a bool constant to let PassManager
> +     remove the variant branch after current pass completes.  */
> +  if (true_invar)
> +    gimple_cond_make_true (cond_copy);
> +  else
> +    gimple_cond_make_false (cond_copy);
> +
> +  update_stmt (cond_copy);
> +
> +  /* Insert a new conditional statement on latch edge of loop1.  This
> +     statement acts as a switch to transfer execution from loop1 to loop2,
> +     when loop1 enters into invariant state.  */
> +  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
> +  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
> +  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
> +					  NULL_TREE, NULL_TREE);
> +
> +  gsi = gsi_last_bb (break_bb);
> +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> +
> +  edge to_loop1 = single_succ_edge (break_bb);
> +  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
> +
> +  to_loop1->flags &= ~EDGE_FALLTHRU;
> +  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> +  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> +
> +  update_ssa (TODO_update_ssa);
> +
> +  /* Due to introduction of a control flow edge from loop1 latch to loop2
> +     pre-header, we should update PHIs in loop2 to reflect this connection
> +     between loop1 and loop2.  */
> +  connect_loop_phis (loop1, loop2, to_loop2);
> +
> +  free_original_copy_tables ();
> +
> +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
> +
> +  return true;
> +}
> +
> +/* Traverse all conditional statements in LOOP, to find out a good candidate
> +   upon which we can do loop split.  */
> +
> +static bool
> +split_loop_on_cond (struct loop *loop)
> +{
> +  split_info *info = new split_info ();
> +  basic_block *bbs = info->bbs = get_loop_body (loop);
> +  bool do_split = false;
> +
> +  /* Allocate an area to keep temporary info, and associate its address
> +     with loop aux field.  */
> +  loop->aux = info;
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    {
> +      basic_block bb = bbs[i];
> +
> +      /* We only consider conditional statement, which be executed at most once
> +	 in each iteration of the loop.  So skip statements in inner loops.  */
> +      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
> +	continue;
> +
> +      /* Actually this check is not a must constraint. With it, we can ensure
> +	 conditional statement will always be executed in each iteration. */
> +      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> +	continue;
> +
> +      gimple *last = last_stmt (bb);
> +
> +      if (!last || gimple_code (last) != GIMPLE_COND)
> +	continue;
> +
> +      gcond *cond = as_a <gcond *> (last);
> +      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
> +
> +      if (branch_edge)
> +	{
> +	  do_split_loop_on_cond (loop, branch_edge);
> +	  do_split = true;
> +	  break;
> +	}
> +    }
> +
> +  delete info;
> +  loop->aux = NULL;
> +
> +  return do_split;
> +}
> +
> /* Main entry point.  Perform loop splitting on all suitable loops.  */
> 
> static unsigned int
> @@ -662,6 +1383,32 @@ tree_ssa_split_loops (void)
> 	}
>     }
> 
> +  if (changed)
> +    {
> +      cleanup_tree_cfg ();
> +      changed = false;
> +    }
> +
> +  /* Perform loop splitting for suitable if-conditions in all loops.  */
> +  FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
> +    loop->aux = NULL;
> +
> +  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> +    {
> +      if (loop->aux)
> +        {
> +	  loop_outer (loop)->aux = loop;
> +	  continue;
> +	}
> +
> +      if (!optimize_loop_for_size_p (loop)
> +	  && split_loop_on_cond (loop))
> +	{
> +	  loop_outer (loop)->aux = loop;
> +	  changed = true;
> +	}
> +    }
> +
>   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
>     loop->aux = NULL;
> 
> -- 
> 2.17.1
>
Michael Matz Oct. 15, 2019, 4:01 p.m. UTC | #2
Hi,

On Tue, 15 Oct 2019, Philipp Tomsich wrote:

> This looks good from our side and has shown useful (combined with the other 2 patches) in
> our testing with SPEC2017.
> Given that this looks final: what is the plan for getting this merged?

I'll get to review this v3 version this week.


Ciao,
Michael.

> 
> Thanks,
> Philipp.
> 
> > On 12.09.2019, at 12:23, Feng Xue OS <fxue at os dot amperecomputing dot com> wrote:
> > 
> > ---
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index 1391a562c35..28981fa1048 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -11418,6 +11418,19 @@ The maximum number of branches unswitched in a single loop.
> > @item lim-expensive
> > The minimum cost of an expensive expression in the loop invariant motion.
> > 
> > +@item max-cond-loop-split-insns
> > +In a loop, if a branch of a conditional statement is selected since certain
> > +loop iteration, any operand that contributes to computation of the conditional
> > +expression remains unchanged in all following iterations, the statement is
> > +semi-invariant, upon which we can do a kind of loop split transformation.
> > +@option{max-cond-loop-split-insns} controls maximum number of insns to be
> > +added due to loop split on semi-invariant conditional statement.
> > +
> > +@item min-cond-loop-split-prob
> > +When FDO profile information is available, @option{min-cond-loop-split-prob}
> > +specifies minimum threshold for probability of semi-invariant condition
> > +statement to trigger loop split.
> > +
> > @item iv-consider-all-candidates-bound
> > Bound on number of candidates for induction variables, below which
> > all candidates are considered for each use in induction variable
> > diff --git a/gcc/params.def b/gcc/params.def
> > index 13001a7bb2d..12bc8c26c9e 100644
> > --- a/gcc/params.def
> > +++ b/gcc/params.def
> > @@ -386,6 +386,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
> > 	"The maximum number of unswitchings in a single loop.",
> > 	3, 0, 0)
> > 
> > +/* The maximum number of increased insns due to loop split on semi-invariant
> > +   condition statement.  */
> > +DEFPARAM(PARAM_MAX_COND_LOOP_SPLIT_INSNS,
> > +	"max-cond-loop-split-insns",
> > +	"The maximum number of insns to be added due to loop split on "
> > +	"semi-invariant condition statement.",
> > +	100, 0, 0)
> > +
> > +DEFPARAM(PARAM_MIN_COND_LOOP_SPLIT_PROB,
> > +	"min-cond-loop-split-prob",
> > +	"The minimum threshold for probability of semi-invariant condition "
> > +	"statement to trigger loop split.",
> > +	30, 0, 100)
> > +
> > /* The maximum number of insns in loop header duplicated by the copy loop
> >    headers pass.  */
> > DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,
> > 
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> > new file mode 100644
> > index 00000000000..51f9da22fc7
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> > @@ -0,0 +1,33 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> > +
> > +#include <string>
> > +#include <map>
> > +
> > +using namespace std;
> > +
> > +class  A
> > +{
> > +public:
> > +  bool empty;
> > +  void set (string s);
> > +};
> > +
> > +class  B
> > +{
> > +  map<int, string> m;
> > +  void f ();
> > +};
> > +
> > +extern A *ga;
> > +
> > +void B::f ()
> > +{
> > +  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
> > +    {
> > +      if (ga->empty)
> > +        ga->set (iter->second);
> > +    }
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> > new file mode 100644
> > index 00000000000..bbd522d6bcd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> > @@ -0,0 +1,23 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> > +
> > +__attribute__((pure)) __attribute__((noinline)) int inc (int i)
> > +{
> > +  return i + 1;
> > +}
> > +
> > +extern int do_something (void);
> > +extern int b;
> > +
> > +void test(int n)
> > +{
> > +  int i;
> > +
> > +  for (i = 0; i < n; i = inc (i))
> > +    {
> > +      if (b)
> > +        b = do_something();
> > +    }
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> > index f5f083384bc..e4a1b6d2019 100644
> > --- a/gcc/tree-ssa-loop-split.c
> > +++ b/gcc/tree-ssa-loop-split.c
> > @@ -32,7 +32,10 @@ along with GCC; see the file COPYING3.  If not see
> > #include "tree-ssa-loop.h"
> > #include "tree-ssa-loop-manip.h"
> > #include "tree-into-ssa.h"
> > +#include "tree-inline.h"
> > +#include "tree-cfgcleanup.h"
> > #include "cfgloop.h"
> > +#include "params.h"
> > #include "tree-scalar-evolution.h"
> > #include "gimple-iterator.h"
> > #include "gimple-pretty-print.h"
> > @@ -40,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
> > #include "gimple-fold.h"
> > #include "gimplify-me.h"
> > 
> > -/* This file implements loop splitting, i.e. transformation of loops like
> > +/* This file implements two kinds of loop splitting.
> > +
> > +   One transformation of loops like:
> > 
> >    for (i = 0; i < 100; i++)
> >      {
> > @@ -612,6 +617,722 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
> >   return changed;
> > }
> > 
> > +/* Another transformation of loops like:
> > +
> > +   for (i = INIT (); CHECK (i); i = NEXT ())
> > +     {
> > +       if (expr (a_1, a_2, ..., a_n))  // expr is pure
> > +         a_j = ...;  // change at least one a_j
> > +       else
> > +         S;          // not change any a_j
> > +     }
> > +
> > +   into:
> > +
> > +   for (i = INIT (); CHECK (i); i = NEXT ())
> > +     {
> > +       if (expr (a_1, a_2, ..., a_n))
> > +         a_j = ...;
> > +       else
> > +         {
> > +           S;
> > +           i = NEXT ();
> > +           break;
> > +         }
> > +     }
> > +
> > +   for (; CHECK (i); i = NEXT ())
> > +     {
> > +       S;
> > +     }
> > +
> > +   */
> > +
> > +/* Data structure to hold temporary information during loop split upon
> > +   semi-invariant conditional statement.  */
> > +class split_info {
> > +public:
> > +  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
> > +  basic_block *bbs;
> > +
> > +  /* All memory store/clobber statements in a loop.  */
> > +  auto_vec<gimple *> memory_stores;
> > +
> > +  /* Whether above memory stores vector has been filled.  */
> > +  int need_init;
> > +
> > +  split_info () : bbs (NULL),  need_init (true) { }
> > +
> > +  ~split_info ()
> > +    {
> > +      if (bbs)
> > +	free (bbs);
> > +    }
> > +};
> > +
> > +/* Find all statements with memory-write effect in LOOP, including memory
> > +   store and non-pure function call, and keep those in a vector.  This work
> > +   is only done one time, for the vector should be constant during analysis
> > +   stage of semi-invariant condition.  */
> > +
> > +static void
> > +find_vdef_in_loop (struct loop *loop)
> > +{
> > +  split_info *info = (split_info *) loop->aux;
> > +  gphi *vphi = get_virtual_phi (loop->header);
> > +
> > +  /* Indicate memory store vector has been filled.  */
> > +  info->need_init = false;
> > +
> > +  /* If loop contains memory operation, there must be a virtual PHI node in
> > +     loop header basic block.  */
> > +  if (vphi == NULL)
> > +    return;
> > +
> > +  /* All virtual SSA names inside the loop are connected to be a cyclic
> > +     graph via virtual PHI nodes.  The virtual PHI node in loop header just
> > +     links the first and the last virtual SSA names, by using the last as
> > +     PHI operand to define the first.  */
> > +  const edge latch = loop_latch_edge (loop);
> > +  const tree first = gimple_phi_result (vphi);
> > +  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
> > +
> > +  /* The virtual SSA cyclic graph might consist of only one SSA name, who
> > +     is defined by itself.
> > +
> > +       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
> > +
> > +     This means the loop contains only memory loads, so we can skip it.  */
> > +  if (first == last)
> > +    return;
> > +
> > +  auto_vec<gimple *> other_stores;
> > +  auto_vec<tree> worklist;
> > +  auto_bitmap visited;
> > +
> > +  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
> > +  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
> > +  worklist.safe_push (last);
> > +
> > +  do
> > +    {
> > +      tree vuse = worklist.pop ();
> > +      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> > +
> > +      /* We mark the first and last SSA names as visited at the beginning,
> > +	 and reversely start the process from the last SSA name towards the
> > +	 first, which ensures that this do-while will not touch SSA names
> > +	 defined outside of the loop.  */
> > +      gcc_assert (gimple_bb (stmt)
> > +		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
> > +
> > +      if (gimple_code (stmt) == GIMPLE_PHI)
> > +	{
> > +	  gphi *phi = as_a <gphi *> (stmt);
> > +
> > +	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> > +	    {
> > +	      tree arg = gimple_phi_arg_def (stmt, i);
> > +
> > +	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
> > +		worklist.safe_push (arg);
> > +	    }
> > +	}
> > +      else
> > +	{
> > +	  tree prev = gimple_vuse (stmt);
> > +
> > +	  /* Non-pure call statement is conservatively assumed to impact all
> > +	     memory locations.  So place call statements ahead of other memory
> > +	     stores in the vector with an idea of of using them as shortcut
> > +	     terminators to memory alias analysis.  */
> > +	  if (gimple_code (stmt) == GIMPLE_CALL)
> > +	    info->memory_stores.safe_push (stmt);
> > +	  else
> > +	    other_stores.safe_push (stmt);
> > +
> > +	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
> > +	    worklist.safe_push (prev);
> > +	}
> > +    } while (!worklist.is_empty ());
> > +
> > +    info->memory_stores.safe_splice (other_stores);
> > +}
> > +
> > +
> > +/* Given STMT, memory load or pure call statement, check whether it is impacted
> > +   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
> > +   trace is composed of SKIP_HEAD and those basic block dominated by it, always
> > +   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
> > +   NULL, all basic blocks of LOOP are checked.  */
> > +
> > +static bool
> > +vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +		       const_basic_block skip_head)
> > +{
> > +  split_info *info = (split_info *) loop->aux;
> > +
> > +  /* Collect memory store/clobber statements if have not do that.  */
> > +  if (info->need_init)
> > +    find_vdef_in_loop (loop);
> > +
> > +  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
> > +  ao_ref ref;
> > +  gimple *store;
> > +  unsigned i;
> > +
> > +  ao_ref_init (&ref, rhs);
> > +
> > +  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
> > +    {
> > +      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
> > +      if (skip_head
> > +	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
> > +	continue;
> > +
> > +      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
> > +	return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Forward declaration.  */
> > +
> > +static bool
> > +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +		       const_basic_block skip_head);
> > +
> > +/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
> > +   certain iteration of LOOP, check whether an SSA name (NAME) remains
> > +   unchanged in next interation.  We call this characterisic as semi-
> > +   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
> > +   basic blocks and control flows in the loop will be considered.  If non-
> > +   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
> > +
> > +static bool
> > +ssa_semi_invariant_p (struct loop *loop, const tree name,
> > +		      const_basic_block skip_head)
> > +{
> > +  gimple *def = SSA_NAME_DEF_STMT (name);
> > +  const_basic_block def_bb = gimple_bb (def);
> > +
> > +  /* An SSA name defined outside a loop is definitely semi-invariant.  */
> > +  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
> > +    return true;
> > +
> > +  if (gimple_code (def) == GIMPLE_PHI)
> > +    {
> > +      /* For PHI node that is not in loop header, its source operands should
> > +	 be defined inside the loop, which are seen as loop variant.  */
> > +      if (def_bb != loop->header || !skip_head)
> > +	return false;
> > +
> > +      const_edge latch = loop_latch_edge (loop);
> > +      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
> > +
> > +      /* A PHI node in loop header contains two source operands, one is
> > +	 initial value, the other is the copy of last iteration through loop
> > +	 latch, we call it latch value.  From the PHI node to definition
> > +	 of latch value, if excluding branch trace from SKIP_HEAD, there
> > +	 is no definition of other version of same variable, SSA name defined
> > +	 by the PHI node is semi-invariant.
> > +
> > +                         loop entry
> > +                              |     .--- latch ---.
> > +                              |     |             |
> > +                              v     v             |
> > +                  x_1 = PHI <x_0,  x_3>           |
> > +                           |                      |
> > +                           v                      |
> > +              .------- if (cond) -------.         |
> > +              |                         |         |
> > +              |                     [ SKIP ]      |
> > +              |                         |         |
> > +              |                     x_2 = ...     |
> > +              |                         |         |
> > +              '---- T ---->.<---- F ----'         |
> > +                           |                      |
> > +                           v                      |
> > +                  x_3 = PHI <x_1, x_2>            |
> > +                           |                      |
> > +                           '----------------------'
> > +
> > +	Suppose in certain iteration, execution flow in above graph goes
> > +	through true branch, which means that one source value to define
> > +	x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
> > +	x_1 in next iterations is defined by x_3, we know that x_1 will
> > +	never changed if COND always chooses true branch from then on.  */
> > +
> > +      while (from != name)
> > +	{
> > +	  /* A new value comes from a CONSTANT.  */
> > +	  if (TREE_CODE (from) != SSA_NAME)
> > +	    return false;
> > +
> > +	  gimple *stmt = SSA_NAME_DEF_STMT (from);
> > +	  const_basic_block bb = gimple_bb (stmt);
> > +
> > +	  /* A new value comes from outside of loop.  */
> > +	  if (!bb || !flow_bb_inside_loop_p (loop, bb))
> > +	    return false;
> > +
> > +	  from = NULL_TREE;
> > +
> > +	  if (gimple_code (stmt) == GIMPLE_PHI)
> > +	    {
> > +	      gphi *phi = as_a <gphi *> (stmt);
> > +
> > +	      for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> > +		{
> > +		  const_edge e = gimple_phi_arg_edge (phi, i);
> > +
> > +		  /* Not consider redefinitions in excluded basic blocks.  */
> > +		  if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> > +		    {
> > +		      /* There are more than one source operands that can
> > +			 provide value to the SSA name, it is variant.  */
> > +		      if (from)
> > +			return false;
> > +
> > +		      from = gimple_phi_arg_def (phi, i);
> > +		    }
> > +		}
> > +	    }
> > +	  else if (gimple_code (stmt) == GIMPLE_ASSIGN)
> > +	    {
> > +	      /* For simple value copy, check its rhs instead.  */
> > +	      if (gimple_assign_ssa_name_copy_p (stmt))
> > +		from = gimple_assign_rhs1 (stmt);
> > +	    }
> > +
> > +	  /* Any other kind of definition is deemed to introduce a new value
> > +	     to the SSA name.  */
> > +	  if (!from)
> > +	    return false;
> > +	}
> > +	return true;
> > +    }
> > +
> > +  /* Value originated from volatile memory load or return of normal (non-
> > +     const/pure) call should not be treated as constant in each iteration.  */
> > +  if (gimple_has_side_effects (def))
> > +    return false;
> > +
> > +  /* Check if any memory store may kill memory load at this place.  */
> > +  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
> > +    return false;
> > +
> > +  /* Check operands of definition statement of the SSA name.  */
> > +  return stmt_semi_invariant_p (loop, def, skip_head);
> > +}
> > +
> > +/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
> > +   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
> > +   dominated by it are excluded from the loop.  */
> > +
> > +static bool
> > +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +		       const_basic_block skip_head)
> > +{
> > +  ssa_op_iter iter;
> > +  tree use;
> > +
> > +  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
> > +     here we only need to check SSA name operands.  This is because check on
> > +     VARDECL operands, which involve memory loads, must have been done
> > +     prior to invocation of this function in vuse_semi_invariant_p.  */
> > +  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> > +    {
> > +      if (!ssa_semi_invariant_p (loop, use, skip_head))
> > +	return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Determine when conditional statement never transfers execution to one of its
> > +   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
> > +   and those basic blocks dominated by BRANCH_BB.  */
> > +
> > +static bool
> > +branch_removable_p (basic_block branch_bb)
> > +{
> > +  if (single_pred_p (branch_bb))
> > +    return true;
> > +
> > +  edge e;
> > +  edge_iterator ei;
> > +
> > +  FOR_EACH_EDGE (e, ei, branch_bb->preds)
> > +    {
> > +      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
> > +	continue;
> > +
> > +      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
> > +	continue;
> > +
> > +       /* The branch can be reached from opposite branch, or from some
> > +	  statement not dominated by the conditional statement.  */
> > +      return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Find out which branch of a conditional statement (COND) is invariant in the
> > +   execution context of LOOP.  That is: once the branch is selected in certain
> > +   iteration of the loop, any operand that contributes to computation of the
> > +   conditional statement remains unchanged in all following iterations.  */
> > +
> > +static edge
> > +get_cond_invariant_branch (struct loop *loop, gcond *cond)
> > +{
> > +  basic_block cond_bb = gimple_bb (cond);
> > +  basic_block targ_bb[2];
> > +  bool invar[2];
> > +  unsigned invar_checks;
> > +
> > +  for (unsigned i = 0; i < 2; i++)
> > +    {
> > +      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
> > +
> > +      /* One branch directs to loop exit, no need to perform loop split upon
> > +	 this conditional statement.  Firstly, it is trivial if the exit branch
> > +	 is semi-invariant, for the statement is just to break loop.  Secondly,
> > +	 if the opposite branch is semi-invariant, it means that the statement
> > +	 is real loop-invariant, which is covered by loop unswitch.  */
> > +      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
> > +	return NULL;
> > +    }
> > +
> > +  invar_checks = 0;
> > +
> > +  for (unsigned i = 0; i < 2; i++)
> > +    {
> > +      invar[!i] = false;
> > +
> > +      if (!branch_removable_p (targ_bb[i]))
> > +	continue;
> > +
> > +      /* Given a semi-invariant branch, if its opposite branch dominates
> > +	 loop latch, it and its following trace will only be executed in
> > +	 final iteration of loop, namely it is not part of repeated body
> > +	 of the loop.  Similar to the above case that the branch is loop
> > +	 exit, no need to split loop.  */
> > +      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
> > +	continue;
> > +
> > +      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
> > +      invar_checks++;
> > +    }
> > +
> > +  /* With both branches being invariant (handled by loop unswitch) or
> > +     variant is not what we want.  */
> > +  if (invar[0] ^ !invar[1])
> > +    return NULL;
> > +
> > +  /* Found a real loop-invariant condition, do nothing.  */
> > +  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
> > +    return NULL;
> > +
> > +  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
> > +}
> > +
> > +/* Calculate increased code size measured by estimated insn number if applying
> > +   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
> > +
> > +static int
> > +compute_added_num_insns (struct loop *loop, const_edge branch_edge)
> > +{
> > +  basic_block cond_bb = branch_edge->src;
> > +  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
> > +  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
> > +  basic_block *bbs = ((split_info *) loop->aux)->bbs;
> > +  int num = 0;
> > +
> > +  for (unsigned i = 0; i < loop->num_nodes; i++)
> > +    {
> > +      /* Do no count basic blocks only in opposite branch.  */
> > +      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
> > +	continue;
> > +
> > +      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> > +    }
> > +
> > +  /* It is unnecessary to evaluate expression of the conditional statement
> > +     in new loop that contains only invariant branch.  This expresion should
> > +     be constant value (either true or false).  Exclude code size of insns
> > +     that contribute to computation of the expression.  */
> > +
> > +  auto_vec<gimple *> worklist;
> > +  hash_set<gimple *> removed;
> > +  gimple *stmt = last_stmt (cond_bb);
> > +
> > +  worklist.safe_push (stmt);
> > +  removed.add (stmt);
> > +  num -= estimate_num_insns (stmt, &eni_size_weights);
> > +
> > +  do
> > +    {
> > +      ssa_op_iter opnd_iter;
> > +      use_operand_p opnd_p;
> > +
> > +      stmt = worklist.pop ();
> > +      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
> > +	{
> > +	  tree opnd = USE_FROM_PTR (opnd_p);
> > +
> > +	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
> > +	    continue;
> > +
> > +	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
> > +	  use_operand_p use_p;
> > +	  imm_use_iterator use_iter;
> > +
> > +	  if (removed.contains (opnd_stmt)
> > +	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
> > +	    continue;
> > +
> > +	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
> > +	    {
> > +              gimple *use_stmt = USE_STMT (use_p);
> > +
> > +	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
> > +		{
> > +		  opnd_stmt = NULL;
> > +		  break;
> > +		}
> > +	    }
> > +
> > +	  if (opnd_stmt)
> > +	    {
> > +	      worklist.safe_push (opnd_stmt);
> > +	      removed.add (opnd_stmt);
> > +	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
> > +	    }
> > +	}
> > +    } while (!worklist.is_empty ());
> > +
> > +  gcc_assert (num >= 0);
> > +  return num;
> > +}
> > +
> > +/* Find out loop-invariant branch of a conditional statement (COND) if it has,
> > +   and check whether it is eligible and profitable to perform loop split upon
> > +   this branch in LOOP.  */
> > +
> > +static edge
> > +get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
> > +{
> > +  edge invar_branch = get_cond_invariant_branch (loop, cond);
> > +
> > +  if (!invar_branch)
> > +    return NULL;
> > +
> > +  profile_probability prob = invar_branch->probability;
> > +
> > +  /* When accurate profile information is available, and execution
> > +     frequency of the branch is too low, just let it go.  */
> > +  if (prob.reliable_p ())
> > +    {
> > +      int thres = PARAM_VALUE (PARAM_MIN_COND_LOOP_SPLIT_PROB);
> > +
> > +      if (prob < profile_probability::always ().apply_scale (thres, 100))
> > +	return NULL;
> > +    }
> > +
> > +  /* Add a threshold for increased code size to disable loop split.  */
> > +  if (compute_added_num_insns (loop, invar_branch)
> > +      > PARAM_VALUE (PARAM_MAX_COND_LOOP_SPLIT_INSNS))
> > +    return NULL;
> > +
> > +  return invar_branch;
> > +}
> > +
> > +/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
> > +   conditional statement, perform loop split transformation illustrated
> > +   as the following graph.
> > +
> > +               .-------T------ if (true) ------F------.
> > +               |                    .---------------. |
> > +               |                    |               | |
> > +               v                    |               v v
> > +          pre-header                |            pre-header
> > +               | .------------.     |                 | .------------.
> > +               | |            |     |                 | |            |
> > +               | v            |     |                 | v            |
> > +             header           |     |               header           |
> > +               |              |     |                 |              |
> > +       [ bool r = cond; ]     |     |                 |              |
> > +               |              |     |                 |              |
> > +      .---- if (r) -----.     |     |        .--- if (true) ---.     |
> > +      |                 |     |     |        |                 |     |
> > +  invariant             |     |     |    invariant             |     |
> > +      |                 |     |     |        |                 |     |
> > +      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
> > +               |              |    /                  |              |
> > +             stmts            |   /                 stmts            |
> > +               |              |  /                    |              |
> > +              / \             | /                    / \             |
> > +     .-------*   *       [ if (!r) ]        .-------*   *            |
> > +     |           |            |             |           |            |
> > +     |         latch          |             |         latch          |
> > +     |           |            |             |           |            |
> > +     |           '------------'             |           '------------'
> > +     '------------------------. .-----------'
> > +             loop1            | |                   loop2
> > +                              v v
> > +                             exits
> > +
> > +   In the graph, loop1 represents the part derived from original one, and
> > +   loop2 is duplicated using loop_version (), which corresponds to the part
> > +   of original one being splitted out.  In loop1, a new bool temporary (r)
> > +   is introduced to keep value of the condition result.  In original latch
> > +   edge of loop1, we insert a new conditional statement whose value comes
> > +   from previous temporary (r), one of its branch goes back to loop1 header
> > +   as a latch edge, and the other branch goes to loop2 pre-header as an entry
> > +   edge.  And also in loop2, we abandon the variant branch of the conditional
> > +   statement candidate by setting a constant bool condition, based on which
> > +   branch is semi-invariant.  */
> > +
> > +static bool
> > +do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> > +{
> > +  basic_block cond_bb = invar_branch->src;
> > +  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
> > +  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
> > +
> > +  gcc_assert (cond_bb->loop_father == loop1);
> > +
> > +  if (dump_file && (dump_flags & TDF_DETAILS))
> > +   {
> > +     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
> > +	      current_function_name (), loop1->num,
> > +	      true_invar ? "T" : "F", cond_bb->index);
> > +     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
> > +   }
> > +
> > +  initialize_original_copy_tables ();
> > +
> > +  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> > +				     profile_probability::always (),
> > +				     profile_probability::never (),
> > +				     profile_probability::always (),
> > +				     profile_probability::always (),
> > +				     true);
> > +  if (!loop2)
> > +    {
> > +      free_original_copy_tables ();
> > +      return false;
> > +    }
> > +
> > +  /* Generate a bool type temporary to hold result of the condition.  */
> > +  tree tmp = make_ssa_name (boolean_type_node);
> > +  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> > +  gimple *stmt = gimple_build_assign (tmp,
> > +				      gimple_cond_code (cond),
> > +				      gimple_cond_lhs (cond),
> > +				      gimple_cond_rhs (cond));
> > +
> > +  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> > +  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
> > +  update_stmt (cond);
> > +
> > +  basic_block cond_bb_copy = get_bb_copy (cond_bb);
> > +  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
> > +
> > +  /* Replace the condition in loop2 with a bool constant to let PassManager
> > +     remove the variant branch after current pass completes.  */
> > +  if (true_invar)
> > +    gimple_cond_make_true (cond_copy);
> > +  else
> > +    gimple_cond_make_false (cond_copy);
> > +
> > +  update_stmt (cond_copy);
> > +
> > +  /* Insert a new conditional statement on latch edge of loop1.  This
> > +     statement acts as a switch to transfer execution from loop1 to loop2,
> > +     when loop1 enters into invariant state.  */
> > +  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
> > +  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
> > +  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
> > +					  NULL_TREE, NULL_TREE);
> > +
> > +  gsi = gsi_last_bb (break_bb);
> > +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> > +
> > +  edge to_loop1 = single_succ_edge (break_bb);
> > +  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
> > +
> > +  to_loop1->flags &= ~EDGE_FALLTHRU;
> > +  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> > +  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> > +
> > +  update_ssa (TODO_update_ssa);
> > +
> > +  /* Due to introduction of a control flow edge from loop1 latch to loop2
> > +     pre-header, we should update PHIs in loop2 to reflect this connection
> > +     between loop1 and loop2.  */
> > +  connect_loop_phis (loop1, loop2, to_loop2);
> > +
> > +  free_original_copy_tables ();
> > +
> > +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
> > +
> > +  return true;
> > +}
> > +
> > +/* Traverse all conditional statements in LOOP, to find out a good candidate
> > +   upon which we can do loop split.  */
> > +
> > +static bool
> > +split_loop_on_cond (struct loop *loop)
> > +{
> > +  split_info *info = new split_info ();
> > +  basic_block *bbs = info->bbs = get_loop_body (loop);
> > +  bool do_split = false;
> > +
> > +  /* Allocate an area to keep temporary info, and associate its address
> > +     with loop aux field.  */
> > +  loop->aux = info;
> > +
> > +  for (unsigned i = 0; i < loop->num_nodes; i++)
> > +    {
> > +      basic_block bb = bbs[i];
> > +
> > +      /* We only consider conditional statement, which be executed at most once
> > +	 in each iteration of the loop.  So skip statements in inner loops.  */
> > +      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
> > +	continue;
> > +
> > +      /* Actually this check is not a must constraint. With it, we can ensure
> > +	 conditional statement will always be executed in each iteration. */
> > +      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> > +	continue;
> > +
> > +      gimple *last = last_stmt (bb);
> > +
> > +      if (!last || gimple_code (last) != GIMPLE_COND)
> > +	continue;
> > +
> > +      gcond *cond = as_a <gcond *> (last);
> > +      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
> > +
> > +      if (branch_edge)
> > +	{
> > +	  do_split_loop_on_cond (loop, branch_edge);
> > +	  do_split = true;
> > +	  break;
> > +	}
> > +    }
> > +
> > +  delete info;
> > +  loop->aux = NULL;
> > +
> > +  return do_split;
> > +}
> > +
> > /* Main entry point.  Perform loop splitting on all suitable loops.  */
> > 
> > static unsigned int
> > @@ -662,6 +1383,32 @@ tree_ssa_split_loops (void)
> > 	}
> >     }
> > 
> > +  if (changed)
> > +    {
> > +      cleanup_tree_cfg ();
> > +      changed = false;
> > +    }
> > +
> > +  /* Perform loop splitting for suitable if-conditions in all loops.  */
> > +  FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
> > +    loop->aux = NULL;
> > +
> > +  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> > +    {
> > +      if (loop->aux)
> > +        {
> > +	  loop_outer (loop)->aux = loop;
> > +	  continue;
> > +	}
> > +
> > +      if (!optimize_loop_for_size_p (loop)
> > +	  && split_loop_on_cond (loop))
> > +	{
> > +	  loop_outer (loop)->aux = loop;
> > +	  changed = true;
> > +	}
> > +    }
> > +
> >   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
> >     loop->aux = NULL;
> > 
> > -- 
> > 2.17.1
> > 
>
Feng Xue OS Oct. 16, 2019, 1:54 a.m. UTC | #3
Hi Philipp,

   This is an updated patch based on comments form Michael, and if he think this is ok, we will merge it into trunk. Thanks,

Feng
Feng Xue OS Oct. 22, 2019, 10:06 a.m. UTC | #4
Hi, Michael,

  Since gcc 10 release is coming, that will be good if we can add this patch before that. Thanks

Feng.
Michael Matz Oct. 22, 2019, 11:15 a.m. UTC | #5
Hello,

I've only noticed a couple typos, and one minor remark.  From my 
perspective it's okay, but you still need the okay of a proper reviewer, 
for which you might want to state the testing/regression state of this 
patch relative to trunk.  The remarks follow:

On Tue, 22 Oct 2019, Feng Xue OS wrote:

> > > +/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
> > > +   certain iteration of LOOP, check whether an SSA name (NAME) remains
> > > +   unchanged in next interation.  We call this characterisic as semi-

"iteration", "characteristic", remove "as"

> > > +             /* Not consider redefinitions in excluded basic blocks.  */

"Don't consider"

> > > +  /* It is unnecessary to evaluate expression of the conditional statement
> > > +     in new loop that contains only invariant branch.  This expresion should

"expression"

> > > @@ -662,6 +1383,32 @@ tree_ssa_split_loops (void)
> > >     }
> > >     }
> > >
> > > +  if (changed)
> > > +    {
> > > +      cleanup_tree_cfg ();
> > > +      changed = false;
> > > +    }
> > > +
> > > +  /* Perform loop splitting for suitable if-conditions in all loops.  */
> > > +  FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
> > > +    loop->aux = NULL;
> > > +
> > > +  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> > > +    {
> > > +      if (loop->aux)
> > > +        {
> > > +     loop_outer (loop)->aux = loop;
> > > +     continue;
> > > +   }
> > > +
> > > +      if (!optimize_loop_for_size_p (loop)
> > > +     && split_loop_on_cond (loop))
> > > +   {
> > > +     loop_outer (loop)->aux = loop;
> > > +     changed = true;
> > > +   }
> > > +    }
> > > +
> > >   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
> > >     loop->aux = NULL;

I just wonder why you duplicated these three loops instead of integrating 
the real body into the existing LI_FROM_INNERMOST loop.  I would have 
expected your "if (!optimize_loop_for_size_p && split_loop_on_cond)" block 
to simply be the else block of the existing
"if (... conditions for normal loop splitting ...)" block.

Either way it's okay with me.


Ciao,
Michael.
Feng Xue OS Oct. 23, 2019, 3:36 a.m. UTC | #6
Michael,

> I've only noticed a couple typos, and one minor remark. 
Typos corrected.

> I just wonder why you duplicated these three loops instead of integrating
> the real body into the existing LI_FROM_INNERMOST loop.  I would have
> expected your "if (!optimize_loop_for_size_p && split_loop_on_cond)" block
> to simply be the else block of the existing
> "if (... conditions for normal loop splitting ...)" block.
Adjusted to do two kinds of loop-split in same LI_FROM_INNERMOST loop.

> From my perspective it's okay, but you still need the okay of a proper reviewer,
> for which you might want to state the testing/regression state of this
> patch relative to trunk. 

Richard,
  
  Is it ok to commit this patch? Bootstrap and regression test passed. And for
performance, we can get about 7% improvement on spec2017 omnetpp with this
patch.

Thanks,
Feng

---
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1407d019d14..d41e5aa0215 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11481,6 +11481,19 @@ The maximum number of branches unswitched in a single loop.
 @item lim-expensive
 The minimum cost of an expensive expression in the loop invariant motion.
 
+@item max-loop-cond-split-insns
+In a loop, if a branch of a conditional statement is selected since certain
+loop iteration, any operand that contributes to computation of the conditional
+expression remains unchanged in all following iterations, the statement is
+semi-invariant, upon which we can do a kind of loop split transformation.
+@option{max-loop-cond-split-insns} controls maximum number of insns to be
+added due to loop split on semi-invariant conditional statement.
+
+@item min-loop-cond-split-prob
+When FDO profile information is available, @option{min-loop-cond-split-prob}
+specifies minimum threshold for probability of semi-invariant condition
+statement to trigger loop split.
+
 @item iv-consider-all-candidates-bound
 Bound on number of candidates for induction variables, below which
 all candidates are considered for each use in induction variable
diff --git a/gcc/params.def b/gcc/params.def
index 322c37f8b96..73b59f7465e 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -415,6 +415,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
 	"The maximum number of unswitchings in a single loop.",
 	3, 0, 0)
 
+/* The maximum number of increased insns due to loop split on semi-invariant
+   condition statement.  */
+DEFPARAM(PARAM_MAX_LOOP_COND_SPLIT_INSNS,
+	"max-loop-cond-split-insns",
+	"The maximum number of insns to be added due to loop split on "
+	"semi-invariant condition statement.",
+	100, 0, 0)
+
+DEFPARAM(PARAM_MIN_LOOP_COND_SPLIT_PROB,
+	"min-loop-cond-split-prob",
+	"The minimum threshold for probability of semi-invariant condition "
+	"statement to trigger loop split.",
+	30, 0, 100)
+
 /* The maximum number of insns in loop header duplicated by the copy loop
    headers pass.  */
 DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,

diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
new file mode 100644
index 00000000000..51f9da22fc7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
+
+#include <string>
+#include <map>
+
+using namespace std;
+
+class  A
+{
+public:
+  bool empty;
+  void set (string s);
+};
+
+class  B
+{
+  map<int, string> m;
+  void f ();
+};
+
+extern A *ga;
+
+void B::f ()
+{
+  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
+    {
+      if (ga->empty)
+        ga->set (iter->second);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
new file mode 100644
index 00000000000..bbd522d6bcd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
+
+__attribute__((pure)) __attribute__((noinline)) int inc (int i)
+{
+  return i + 1;
+}
+
+extern int do_something (void);
+extern int b;
+
+void test(int n)
+{
+  int i;
+
+  for (i = 0; i < n; i = inc (i))
+    {
+      if (b)
+        b = do_something();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index f5f083384bc..5cffd4bb508 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -32,7 +32,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-into-ssa.h"
+#include "tree-inline.h"
+#include "tree-cfgcleanup.h"
 #include "cfgloop.h"
+#include "params.h"
 #include "tree-scalar-evolution.h"
 #include "gimple-iterator.h"
 #include "gimple-pretty-print.h"
@@ -40,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "gimplify-me.h"
 
-/* This file implements loop splitting, i.e. transformation of loops like
+/* This file implements two kinds of loop splitting.
+
+   One transformation of loops like:
 
    for (i = 0; i < 100; i++)
      {
@@ -487,8 +492,9 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
    single exit of LOOP.  */
 
 static bool
-split_loop (class loop *loop1, class tree_niter_desc *niter)
+split_loop (class loop *loop1)
 {
+  class tree_niter_desc niter;
   basic_block *bbs;
   unsigned i;
   bool changed = false;
@@ -496,8 +502,28 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
   tree border = NULL_TREE;
   affine_iv iv;
 
+  if (!single_exit (loop1)
+      /* ??? We could handle non-empty latches when we split the latch edge
+         (not the exit edge), and put the new exit condition in the new block.
+	 OTOH this executes some code unconditionally that might have been
+	 skipped by the original exit before.  */
+      || !empty_block_p (loop1->latch)
+      || !easy_exit_values (loop1)
+      || !number_of_iterations_exit (loop1, single_exit (loop1), &niter,
+				     false, true)
+      || niter.cmp == ERROR_MARK
+      /* We can't yet handle loops controlled by a != predicate.  */
+      || niter.cmp == NE_EXPR)
+    return false;
+
   bbs = get_loop_body (loop1);
 
+  if (!can_copy_bbs_p (bbs, loop1->num_nodes))
+    {
+      free (bbs);
+      return false;
+    }
+
   /* Find a splitting opportunity.  */
   for (i = 0; i < loop1->num_nodes; i++)
     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
@@ -505,8 +531,8 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
 	/* Handling opposite steps is not implemented yet.  Neither
 	   is handling different step sizes.  */
 	if ((tree_int_cst_sign_bit (iv.step)
-	     != tree_int_cst_sign_bit (niter->control.step))
-	    || !tree_int_cst_equal (iv.step, niter->control.step))
+	     != tree_int_cst_sign_bit (niter.control.step))
+	    || !tree_int_cst_equal (iv.step, niter.control.step))
 	  continue;
 
 	/* Find a loop PHI node that defines guard_iv directly,
@@ -575,7 +601,7 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
 	   Compute the new bound for the guarding IV and patch the
 	   loop exit to use it instead of original IV and bound.  */
 	gimple_seq stmts = NULL;
-	tree newend = compute_new_first_bound (&stmts, niter, border,
+	tree newend = compute_new_first_bound (&stmts, &niter, border,
 					       guard_code, guard_init);
 	if (stmts)
 	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
@@ -612,6 +638,722 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
   return changed;
 }
 
+/* Another transformation of loops like:
+
+   for (i = INIT (); CHECK (i); i = NEXT ())
+     {
+       if (expr (a_1, a_2, ..., a_n))  // expr is pure
+         a_j = ...;  // change at least one a_j
+       else
+         S;          // not change any a_j
+     }
+
+   into:
+
+   for (i = INIT (); CHECK (i); i = NEXT ())
+     {
+       if (expr (a_1, a_2, ..., a_n))
+         a_j = ...;
+       else
+         {
+           S;
+           i = NEXT ();
+           break;
+         }
+     }
+
+   for (; CHECK (i); i = NEXT ())
+     {
+       S;
+     }
+
+   */
+
+/* Data structure to hold temporary information during loop split upon
+   semi-invariant conditional statement.  */
+class split_info {
+public:
+  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
+  basic_block *bbs;
+
+  /* All memory store/clobber statements in a loop.  */
+  auto_vec<gimple *> memory_stores;
+
+  /* Whether above memory stores vector has been filled.  */
+  int need_init;
+
+  split_info () : bbs (NULL),  need_init (true) { }
+
+  ~split_info ()
+    {
+      if (bbs)
+	free (bbs);
+    }
+};
+
+/* Find all statements with memory-write effect in LOOP, including memory
+   store and non-pure function call, and keep those in a vector.  This work
+   is only done one time, for the vector should be constant during analysis
+   stage of semi-invariant condition.  */
+
+static void
+find_vdef_in_loop (struct loop *loop)
+{
+  split_info *info = (split_info *) loop->aux;
+  gphi *vphi = get_virtual_phi (loop->header);
+
+  /* Indicate memory store vector has been filled.  */
+  info->need_init = false;
+
+  /* If loop contains memory operation, there must be a virtual PHI node in
+     loop header basic block.  */
+  if (vphi == NULL)
+    return;
+
+  /* All virtual SSA names inside the loop are connected to be a cyclic
+     graph via virtual PHI nodes.  The virtual PHI node in loop header just
+     links the first and the last virtual SSA names, by using the last as
+     PHI operand to define the first.  */
+  const edge latch = loop_latch_edge (loop);
+  const tree first = gimple_phi_result (vphi);
+  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
+
+  /* The virtual SSA cyclic graph might consist of only one SSA name, who
+     is defined by itself.
+
+       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
+
+     This means the loop contains only memory loads, so we can skip it.  */
+  if (first == last)
+    return;
+
+  auto_vec<gimple *> other_stores;
+  auto_vec<tree> worklist;
+  auto_bitmap visited;
+
+  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
+  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
+  worklist.safe_push (last);
+
+  do
+    {
+      tree vuse = worklist.pop ();
+      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
+
+      /* We mark the first and last SSA names as visited at the beginning,
+	 and reversely start the process from the last SSA name towards the
+	 first, which ensures that this do-while will not touch SSA names
+	 defined outside of the loop.  */
+      gcc_assert (gimple_bb (stmt)
+		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
+
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	{
+	  gphi *phi = as_a <gphi *> (stmt);
+
+	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+	    {
+	      tree arg = gimple_phi_arg_def (stmt, i);
+
+	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
+		worklist.safe_push (arg);
+	    }
+	}
+      else
+	{
+	  tree prev = gimple_vuse (stmt);
+
+	  /* Non-pure call statement is conservatively assumed to impact all
+	     memory locations.  So place call statements ahead of other memory
+	     stores in the vector with an idea of of using them as shortcut
+	     terminators to memory alias analysis.  */
+	  if (gimple_code (stmt) == GIMPLE_CALL)
+	    info->memory_stores.safe_push (stmt);
+	  else
+	    other_stores.safe_push (stmt);
+
+	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
+	    worklist.safe_push (prev);
+	}
+    } while (!worklist.is_empty ());
+
+    info->memory_stores.safe_splice (other_stores);
+}
+
+
+/* Given STMT, memory load or pure call statement, check whether it is impacted
+   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
+   trace is composed of SKIP_HEAD and those basic block dominated by it, always
+   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
+   NULL, all basic blocks of LOOP are checked.  */
+
+static bool
+vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head)
+{
+  split_info *info = (split_info *) loop->aux;
+
+  /* Collect memory store/clobber statements if have not do that.  */
+  if (info->need_init)
+    find_vdef_in_loop (loop);
+
+  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
+  ao_ref ref;
+  gimple *store;
+  unsigned i;
+
+  ao_ref_init (&ref, rhs);
+
+  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
+    {
+      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
+      if (skip_head
+	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
+	continue;
+
+      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
+	return false;
+    }
+
+  return true;
+}
+
+/* Forward declaration.  */
+
+static bool
+stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head);
+
+/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
+   certain iteration of LOOP, check whether an SSA name (NAME) remains
+   unchanged in next iteration.  We call this characteristic semi-
+   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
+   basic blocks and control flows in the loop will be considered.  If non-
+   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
+
+static bool
+ssa_semi_invariant_p (struct loop *loop, const tree name,
+		      const_basic_block skip_head)
+{
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  const_basic_block def_bb = gimple_bb (def);
+
+  /* An SSA name defined outside a loop is definitely semi-invariant.  */
+  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
+    return true;
+
+  if (gimple_code (def) == GIMPLE_PHI)
+    {
+      /* For PHI node that is not in loop header, its source operands should
+	 be defined inside the loop, which are seen as loop variant.  */
+      if (def_bb != loop->header || !skip_head)
+	return false;
+
+      const_edge latch = loop_latch_edge (loop);
+      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
+
+      /* A PHI node in loop header contains two source operands, one is
+	 initial value, the other is the copy of last iteration through loop
+	 latch, we call it latch value.  From the PHI node to definition
+	 of latch value, if excluding branch trace from SKIP_HEAD, there
+	 is no definition of other version of same variable, SSA name defined
+	 by the PHI node is semi-invariant.
+
+                         loop entry
+                              |     .--- latch ---.
+                              |     |             |
+                              v     v             |
+                  x_1 = PHI <x_0,  x_3>           |
+                           |                      |
+                           v                      |
+              .------- if (cond) -------.         |
+              |                         |         |
+              |                     [ SKIP ]      |
+              |                         |         |
+              |                     x_2 = ...     |
+              |                         |         |
+              '---- T ---->.<---- F ----'         |
+                           |                      |
+                           v                      |
+                  x_3 = PHI <x_1, x_2>            |
+                           |                      |
+                           '----------------------'
+
+	Suppose in certain iteration, execution flow in above graph goes
+	through true branch, which means that one source value to define
+	x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
+	x_1 in next iterations is defined by x_3, we know that x_1 will
+	never changed if COND always chooses true branch from then on.  */
+
+      while (from != name)
+	{
+	  /* A new value comes from a CONSTANT.  */
+	  if (TREE_CODE (from) != SSA_NAME)
+	    return false;
+
+	  gimple *stmt = SSA_NAME_DEF_STMT (from);
+	  const_basic_block bb = gimple_bb (stmt);
+
+	  /* A new value comes from outside of loop.  */
+	  if (!bb || !flow_bb_inside_loop_p (loop, bb))
+	    return false;
+
+	  from = NULL_TREE;
+
+	  if (gimple_code (stmt) == GIMPLE_PHI)
+	    {
+	      gphi *phi = as_a <gphi *> (stmt);
+
+	      for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+		{
+		  const_edge e = gimple_phi_arg_edge (phi, i);
+
+		  /* Don't consider redefinitions in excluded basic blocks.  */
+		  if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
+		    {
+		      /* There are more than one source operands that can
+			 provide value to the SSA name, it is variant.  */
+		      if (from)
+			return false;
+
+		      from = gimple_phi_arg_def (phi, i);
+		    }
+		}
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	    {
+	      /* For simple value copy, check its rhs instead.  */
+	      if (gimple_assign_ssa_name_copy_p (stmt))
+		from = gimple_assign_rhs1 (stmt);
+	    }
+
+	  /* Any other kind of definition is deemed to introduce a new value
+	     to the SSA name.  */
+	  if (!from)
+	    return false;
+	}
+	return true;
+    }
+
+  /* Value originated from volatile memory load or return of normal (non-
+     const/pure) call should not be treated as constant in each iteration.  */
+  if (gimple_has_side_effects (def))
+    return false;
+
+  /* Check if any memory store may kill memory load at this place.  */
+  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
+    return false;
+
+  /* Check operands of definition statement of the SSA name.  */
+  return stmt_semi_invariant_p (loop, def, skip_head);
+}
+
+/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
+   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
+   dominated by it are excluded from the loop.  */
+
+static bool
+stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head)
+{
+  ssa_op_iter iter;
+  tree use;
+
+  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
+     here we only need to check SSA name operands.  This is because check on
+     VARDECL operands, which involve memory loads, must have been done
+     prior to invocation of this function in vuse_semi_invariant_p.  */
+  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+    {
+      if (!ssa_semi_invariant_p (loop, use, skip_head))
+	return false;
+    }
+
+  return true;
+}
+
+/* Determine when conditional statement never transfers execution to one of its
+   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
+   and those basic blocks dominated by BRANCH_BB.  */
+
+static bool
+branch_removable_p (basic_block branch_bb)
+{
+  if (single_pred_p (branch_bb))
+    return true;
+
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, branch_bb->preds)
+    {
+      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
+	continue;
+
+      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
+	continue;
+
+       /* The branch can be reached from opposite branch, or from some
+	  statement not dominated by the conditional statement.  */
+      return false;
+    }
+
+  return true;
+}
+
+/* Find out which branch of a conditional statement (COND) is invariant in the
+   execution context of LOOP.  That is: once the branch is selected in certain
+   iteration of the loop, any operand that contributes to computation of the
+   conditional statement remains unchanged in all following iterations.  */
+
+static edge
+get_cond_invariant_branch (struct loop *loop, gcond *cond)
+{
+  basic_block cond_bb = gimple_bb (cond);
+  basic_block targ_bb[2];
+  bool invar[2];
+  unsigned invar_checks;
+
+  for (unsigned i = 0; i < 2; i++)
+    {
+      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
+
+      /* One branch directs to loop exit, no need to perform loop split upon
+	 this conditional statement.  Firstly, it is trivial if the exit branch
+	 is semi-invariant, for the statement is just to break loop.  Secondly,
+	 if the opposite branch is semi-invariant, it means that the statement
+	 is real loop-invariant, which is covered by loop unswitch.  */
+      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
+	return NULL;
+    }
+
+  invar_checks = 0;
+
+  for (unsigned i = 0; i < 2; i++)
+    {
+      invar[!i] = false;
+
+      if (!branch_removable_p (targ_bb[i]))
+	continue;
+
+      /* Given a semi-invariant branch, if its opposite branch dominates
+	 loop latch, it and its following trace will only be executed in
+	 final iteration of loop, namely it is not part of repeated body
+	 of the loop.  Similar to the above case that the branch is loop
+	 exit, no need to split loop.  */
+      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
+	continue;
+
+      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
+      invar_checks++;
+    }
+
+  /* With both branches being invariant (handled by loop unswitch) or
+     variant is not what we want.  */
+  if (invar[0] ^ !invar[1])
+    return NULL;
+
+  /* Found a real loop-invariant condition, do nothing.  */
+  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
+    return NULL;
+
+  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
+}
+
+/* Calculate increased code size measured by estimated insn number if applying
+   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
+
+static int
+compute_added_num_insns (struct loop *loop, const_edge branch_edge)
+{
+  basic_block cond_bb = branch_edge->src;
+  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
+  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
+  basic_block *bbs = ((split_info *) loop->aux)->bbs;
+  int num = 0;
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    {
+      /* Do no count basic blocks only in opposite branch.  */
+      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
+	continue;
+
+      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+    }
+
+  /* It is unnecessary to evaluate expression of the conditional statement
+     in new loop that contains only invariant branch.  This expression should
+     be constant value (either true or false).  Exclude code size of insns
+     that contribute to computation of the expression.  */
+
+  auto_vec<gimple *> worklist;
+  hash_set<gimple *> removed;
+  gimple *stmt = last_stmt (cond_bb);
+
+  worklist.safe_push (stmt);
+  removed.add (stmt);
+  num -= estimate_num_insns (stmt, &eni_size_weights);
+
+  do
+    {
+      ssa_op_iter opnd_iter;
+      use_operand_p opnd_p;
+
+      stmt = worklist.pop ();
+      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
+	{
+	  tree opnd = USE_FROM_PTR (opnd_p);
+
+	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
+	    continue;
+
+	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
+	  use_operand_p use_p;
+	  imm_use_iterator use_iter;
+
+	  if (removed.contains (opnd_stmt)
+	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
+	    continue;
+
+	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
+	    {
+	      gimple *use_stmt = USE_STMT (use_p);
+
+	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
+		{
+		  opnd_stmt = NULL;
+		  break;
+		}
+	    }
+
+	  if (opnd_stmt)
+	    {
+	      worklist.safe_push (opnd_stmt);
+	      removed.add (opnd_stmt);
+	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
+	    }
+	}
+    } while (!worklist.is_empty ());
+
+  gcc_assert (num >= 0);
+  return num;
+}
+
+/* Find out loop-invariant branch of a conditional statement (COND) if it has,
+   and check whether it is eligible and profitable to perform loop split upon
+   this branch in LOOP.  */
+
+static edge
+get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
+{
+  edge invar_branch = get_cond_invariant_branch (loop, cond);
+
+  if (!invar_branch)
+    return NULL;
+
+  profile_probability prob = invar_branch->probability;
+
+  /* When accurate profile information is available, and execution
+     frequency of the branch is too low, just let it go.  */
+  if (prob.reliable_p ())
+    {
+      int thres = PARAM_VALUE (PARAM_MIN_LOOP_COND_SPLIT_PROB);
+
+      if (prob < profile_probability::always ().apply_scale (thres, 100))
+	return NULL;
+    }
+
+  /* Add a threshold for increased code size to disable loop split.  */
+  if (compute_added_num_insns (loop, invar_branch)
+      > PARAM_VALUE (PARAM_MAX_LOOP_COND_SPLIT_INSNS))
+    return NULL;
+
+  return invar_branch;
+}
+
+/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
+   conditional statement, perform loop split transformation illustrated
+   as the following graph.
+
+               .-------T------ if (true) ------F------.
+               |                    .---------------. |
+               |                    |               | |
+               v                    |               v v
+          pre-header                |            pre-header
+               | .------------.     |                 | .------------.
+               | |            |     |                 | |            |
+               | v            |     |                 | v            |
+             header           |     |               header           |
+               |              |     |                 |              |
+       [ bool r = cond; ]     |     |                 |              |
+               |              |     |                 |              |
+      .---- if (r) -----.     |     |        .--- if (true) ---.     |
+      |                 |     |     |        |                 |     |
+  invariant             |     |     |    invariant             |     |
+      |                 |     |     |        |                 |     |
+      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
+               |              |    /                  |              |
+             stmts            |   /                 stmts            |
+               |              |  /                    |              |
+              / \             | /                    / \             |
+     .-------*   *       [ if (!r) ]        .-------*   *            |
+     |           |            |             |           |            |
+     |         latch          |             |         latch          |
+     |           |            |             |           |            |
+     |           '------------'             |           '------------'
+     '------------------------. .-----------'
+             loop1            | |                   loop2
+                              v v
+                             exits
+
+   In the graph, loop1 represents the part derived from original one, and
+   loop2 is duplicated using loop_version (), which corresponds to the part
+   of original one being splitted out.  In loop1, a new bool temporary (r)
+   is introduced to keep value of the condition result.  In original latch
+   edge of loop1, we insert a new conditional statement whose value comes
+   from previous temporary (r), one of its branch goes back to loop1 header
+   as a latch edge, and the other branch goes to loop2 pre-header as an entry
+   edge.  And also in loop2, we abandon the variant branch of the conditional
+   statement candidate by setting a constant bool condition, based on which
+   branch is semi-invariant.  */
+
+static bool
+do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
+{
+  basic_block cond_bb = invar_branch->src;
+  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
+  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
+
+  gcc_assert (cond_bb->loop_father == loop1);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
+	      current_function_name (), loop1->num,
+	      true_invar ? "T" : "F", cond_bb->index);
+     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
+   }
+
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
+				     profile_probability::always (),
+				     profile_probability::never (),
+				     profile_probability::always (),
+				     profile_probability::always (),
+				     true);
+  if (!loop2)
+    {
+      free_original_copy_tables ();
+      return false;
+    }
+
+  /* Generate a bool type temporary to hold result of the condition.  */
+  tree tmp = make_ssa_name (boolean_type_node);
+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+  gimple *stmt = gimple_build_assign (tmp,
+				      gimple_cond_code (cond),
+				      gimple_cond_lhs (cond),
+				      gimple_cond_rhs (cond));
+
+  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
+  update_stmt (cond);
+
+  basic_block cond_bb_copy = get_bb_copy (cond_bb);
+  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
+
+  /* Replace the condition in loop2 with a bool constant to let PassManager
+     remove the variant branch after current pass completes.  */
+  if (true_invar)
+    gimple_cond_make_true (cond_copy);
+  else
+    gimple_cond_make_false (cond_copy);
+
+  update_stmt (cond_copy);
+
+  /* Insert a new conditional statement on latch edge of loop1.  This
+     statement acts as a switch to transfer execution from loop1 to loop2,
+     when loop1 enters into invariant state.  */
+  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
+  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
+  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
+					  NULL_TREE, NULL_TREE);
+
+  gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_loop1 = single_succ_edge (break_bb);
+  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
+
+  to_loop1->flags &= ~EDGE_FALLTHRU;
+  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
+  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
+
+  update_ssa (TODO_update_ssa);
+
+  /* Due to introduction of a control flow edge from loop1 latch to loop2
+     pre-header, we should update PHIs in loop2 to reflect this connection
+     between loop1 and loop2.  */
+  connect_loop_phis (loop1, loop2, to_loop2);
+
+  free_original_copy_tables ();
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
+
+  return true;
+}
+
+/* Traverse all conditional statements in LOOP, to find out a good candidate
+   upon which we can do loop split.  */
+
+static bool
+split_loop_on_cond (struct loop *loop)
+{
+  split_info *info = new split_info ();
+  basic_block *bbs = info->bbs = get_loop_body (loop);
+  bool do_split = false;
+
+  /* Allocate an area to keep temporary info, and associate its address
+     with loop aux field.  */
+  loop->aux = info;
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block bb = bbs[i];
+
+      /* We only consider conditional statement, which be executed at most once
+	 in each iteration of the loop.  So skip statements in inner loops.  */
+      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
+	continue;
+
+      /* Actually this check is not a must constraint.  With it, we can ensure
+	 conditional statement will always be executed in each iteration.  */
+      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
+	continue;
+
+      gimple *last = last_stmt (bb);
+
+      if (!last || gimple_code (last) != GIMPLE_COND)
+	continue;
+
+      gcond *cond = as_a <gcond *> (last);
+      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
+
+      if (branch_edge)
+	{
+	  do_split_loop_on_cond (loop, branch_edge);
+	  do_split = true;
+	  break;
+	}
+    }
+
+  delete info;
+  loop->aux = NULL;
+
+  return do_split;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */
 
 static unsigned int
@@ -627,7 +1369,6 @@ tree_ssa_split_loops (void)
   /* Go through all loops starting from innermost.  */
   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
     {
-      class tree_niter_desc niter;
       if (loop->aux)
 	{
 	  /* If any of our inner loops was split, don't split us,
@@ -636,29 +1377,14 @@ tree_ssa_split_loops (void)
 	  continue;
 	}
 
-      if (single_exit (loop)
-	  /* ??? We could handle non-empty latches when we split
-	     the latch edge (not the exit edge), and put the new
-	     exit condition in the new block.  OTOH this executes some
-	     code unconditionally that might have been skipped by the
-	     original exit before.  */
-	  && empty_block_p (loop->latch)
-	  && !optimize_loop_for_size_p (loop)
-	  && easy_exit_values (loop)
-	  && number_of_iterations_exit (loop, single_exit (loop), &niter,
-					false, true)
-	  && niter.cmp != ERROR_MARK
-	  /* We can't yet handle loops controlled by a != predicate.  */
-	  && niter.cmp != NE_EXPR
-	  && can_duplicate_loop_p (loop))
+      if (optimize_loop_for_size_p (loop))
+        continue;
+
+      if (split_loop (loop) || split_loop_on_cond (loop))
 	{
-	  if (split_loop (loop, &niter))
-	    {
-	      /* Mark our containing loop as having had some split inner
-	         loops.  */
-	      loop_outer (loop)->aux = loop;
-	      changed = true;
-	    }
+	  /* Mark our containing loop as having had some split inner loops.  */
+	  loop_outer (loop)->aux = loop;
+	  changed = true;
 	}
     }
Richard Biener Oct. 23, 2019, 9:04 a.m. UTC | #7
On Wed, Oct 23, 2019 at 5:36 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Michael,
>
> > I've only noticed a couple typos, and one minor remark.
> Typos corrected.
>
> > I just wonder why you duplicated these three loops instead of integrating
> > the real body into the existing LI_FROM_INNERMOST loop.  I would have
> > expected your "if (!optimize_loop_for_size_p && split_loop_on_cond)" block
> > to simply be the else block of the existing
> > "if (... conditions for normal loop splitting ...)" block.
> Adjusted to do two kinds of loop-split in same LI_FROM_INNERMOST loop.
>
> > From my perspective it's okay, but you still need the okay of a proper reviewer,
> > for which you might want to state the testing/regression state of this
> > patch relative to trunk.
>
> Richard,
>
>   Is it ok to commit this patch? Bootstrap and regression test passed. And for
> performance, we can get about 7% improvement on spec2017 omnetpp with this
> patch.

Can you please provide the corresponding ChangeLog entries as well and
attach the patch?  It seems to be garbled by some encoding.

Thanks,
Richard.

> Thanks,
> Feng
>
> ---
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 1407d019d14..d41e5aa0215 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -11481,6 +11481,19 @@ The maximum number of branches unswitched in a single loop.
>  @item lim-expensive
>  The minimum cost of an expensive expression in the loop invariant motion.
>
> +@item max-loop-cond-split-insns
> +In a loop, if a branch of a conditional statement is selected since certain
> +loop iteration, any operand that contributes to computation of the conditional
> +expression remains unchanged in all following iterations, the statement is
> +semi-invariant, upon which we can do a kind of loop split transformation.
> +@option{max-loop-cond-split-insns} controls maximum number of insns to be
> +added due to loop split on semi-invariant conditional statement.
> +
> +@item min-loop-cond-split-prob
> +When FDO profile information is available, @option{min-loop-cond-split-prob}
> +specifies minimum threshold for probability of semi-invariant condition
> +statement to trigger loop split.
> +
>  @item iv-consider-all-candidates-bound
>  Bound on number of candidates for induction variables, below which
>  all candidates are considered for each use in induction variable
> diff --git a/gcc/params.def b/gcc/params.def
> index 322c37f8b96..73b59f7465e 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -415,6 +415,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
>         "The maximum number of unswitchings in a single loop.",
>         3, 0, 0)
>
> +/* The maximum number of increased insns due to loop split on semi-invariant
> +   condition statement.  */
> +DEFPARAM(PARAM_MAX_LOOP_COND_SPLIT_INSNS,
> +       "max-loop-cond-split-insns",
> +       "The maximum number of insns to be added due to loop split on "
> +       "semi-invariant condition statement.",
> +       100, 0, 0)
> +
> +DEFPARAM(PARAM_MIN_LOOP_COND_SPLIT_PROB,
> +       "min-loop-cond-split-prob",
> +       "The minimum threshold for probability of semi-invariant condition "
> +       "statement to trigger loop split.",
> +       30, 0, 100)
> +
>  /* The maximum number of insns in loop header duplicated by the copy loop
>     headers pass.  */
>  DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> new file mode 100644
> index 00000000000..51f9da22fc7
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> @@ -0,0 +1,33 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> +
> +#include <string>
> +#include <map>
> +
> +using namespace std;
> +
> +class  A
> +{
> +public:
> +  bool empty;
> +  void set (string s);
> +};
> +
> +class  B
> +{
> +  map<int, string> m;
> +  void f ();
> +};
> +
> +extern A *ga;
> +
> +void B::f ()
> +{
> +  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
> +    {
> +      if (ga->empty)
> +        ga->set (iter->second);
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> new file mode 100644
> index 00000000000..bbd522d6bcd
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> +
> +__attribute__((pure)) __attribute__((noinline)) int inc (int i)
> +{
> +  return i + 1;
> +}
> +
> +extern int do_something (void);
> +extern int b;
> +
> +void test(int n)
> +{
> +  int i;
> +
> +  for (i = 0; i < n; i = inc (i))
> +    {
> +      if (b)
> +        b = do_something();
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index f5f083384bc..5cffd4bb508 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -32,7 +32,10 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-ssa-loop.h"
>  #include "tree-ssa-loop-manip.h"
>  #include "tree-into-ssa.h"
> +#include "tree-inline.h"
> +#include "tree-cfgcleanup.h"
>  #include "cfgloop.h"
> +#include "params.h"
>  #include "tree-scalar-evolution.h"
>  #include "gimple-iterator.h"
>  #include "gimple-pretty-print.h"
> @@ -40,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple-fold.h"
>  #include "gimplify-me.h"
>
> -/* This file implements loop splitting, i.e. transformation of loops like
> +/* This file implements two kinds of loop splitting.
> +
> +   One transformation of loops like:
>
>     for (i = 0; i < 100; i++)
>       {
> @@ -487,8 +492,9 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
>     single exit of LOOP.  */
>
>  static bool
> -split_loop (class loop *loop1, class tree_niter_desc *niter)
> +split_loop (class loop *loop1)
>  {
> +  class tree_niter_desc niter;
>    basic_block *bbs;
>    unsigned i;
>    bool changed = false;
> @@ -496,8 +502,28 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
>    tree border = NULL_TREE;
>    affine_iv iv;
>
> +  if (!single_exit (loop1)
> +      /* ??? We could handle non-empty latches when we split the latch edge
> +         (not the exit edge), and put the new exit condition in the new block.
> +        OTOH this executes some code unconditionally that might have been
> +        skipped by the original exit before.  */
> +      || !empty_block_p (loop1->latch)
> +      || !easy_exit_values (loop1)
> +      || !number_of_iterations_exit (loop1, single_exit (loop1), &niter,
> +                                    false, true)
> +      || niter.cmp == ERROR_MARK
> +      /* We can't yet handle loops controlled by a != predicate.  */
> +      || niter.cmp == NE_EXPR)
> +    return false;
> +
>    bbs = get_loop_body (loop1);
>
> +  if (!can_copy_bbs_p (bbs, loop1->num_nodes))
> +    {
> +      free (bbs);
> +      return false;
> +    }
> +
>    /* Find a splitting opportunity.  */
>    for (i = 0; i < loop1->num_nodes; i++)
>      if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
> @@ -505,8 +531,8 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
>         /* Handling opposite steps is not implemented yet.  Neither
>            is handling different step sizes.  */
>         if ((tree_int_cst_sign_bit (iv.step)
> -            != tree_int_cst_sign_bit (niter->control.step))
> -           || !tree_int_cst_equal (iv.step, niter->control.step))
> +            != tree_int_cst_sign_bit (niter.control.step))
> +           || !tree_int_cst_equal (iv.step, niter.control.step))
>           continue;
>
>         /* Find a loop PHI node that defines guard_iv directly,
> @@ -575,7 +601,7 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
>            Compute the new bound for the guarding IV and patch the
>            loop exit to use it instead of original IV and bound.  */
>         gimple_seq stmts = NULL;
> -       tree newend = compute_new_first_bound (&stmts, niter, border,
> +       tree newend = compute_new_first_bound (&stmts, &niter, border,
>                                                guard_code, guard_init);
>         if (stmts)
>           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
> @@ -612,6 +638,722 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
>    return changed;
>  }
>
> +/* Another transformation of loops like:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> +     {
> +       if (expr (a_1, a_2, ..., a_n))  // expr is pure
> +         a_j = ...;  // change at least one a_j
> +       else
> +         S;          // not change any a_j
> +     }
> +
> +   into:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> +     {
> +       if (expr (a_1, a_2, ..., a_n))
> +         a_j = ...;
> +       else
> +         {
> +           S;
> +           i = NEXT ();
> +           break;
> +         }
> +     }
> +
> +   for (; CHECK (i); i = NEXT ())
> +     {
> +       S;
> +     }
> +
> +   */
> +
> +/* Data structure to hold temporary information during loop split upon
> +   semi-invariant conditional statement.  */
> +class split_info {
> +public:
> +  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
> +  basic_block *bbs;
> +
> +  /* All memory store/clobber statements in a loop.  */
> +  auto_vec<gimple *> memory_stores;
> +
> +  /* Whether above memory stores vector has been filled.  */
> +  int need_init;
> +
> +  split_info () : bbs (NULL),  need_init (true) { }
> +
> +  ~split_info ()
> +    {
> +      if (bbs)
> +       free (bbs);
> +    }
> +};
> +
> +/* Find all statements with memory-write effect in LOOP, including memory
> +   store and non-pure function call, and keep those in a vector.  This work
> +   is only done one time, for the vector should be constant during analysis
> +   stage of semi-invariant condition.  */
> +
> +static void
> +find_vdef_in_loop (struct loop *loop)
> +{
> +  split_info *info = (split_info *) loop->aux;
> +  gphi *vphi = get_virtual_phi (loop->header);
> +
> +  /* Indicate memory store vector has been filled.  */
> +  info->need_init = false;
> +
> +  /* If loop contains memory operation, there must be a virtual PHI node in
> +     loop header basic block.  */
> +  if (vphi == NULL)
> +    return;
> +
> +  /* All virtual SSA names inside the loop are connected to be a cyclic
> +     graph via virtual PHI nodes.  The virtual PHI node in loop header just
> +     links the first and the last virtual SSA names, by using the last as
> +     PHI operand to define the first.  */
> +  const edge latch = loop_latch_edge (loop);
> +  const tree first = gimple_phi_result (vphi);
> +  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
> +
> +  /* The virtual SSA cyclic graph might consist of only one SSA name, who
> +     is defined by itself.
> +
> +       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
> +
> +     This means the loop contains only memory loads, so we can skip it.  */
> +  if (first == last)
> +    return;
> +
> +  auto_vec<gimple *> other_stores;
> +  auto_vec<tree> worklist;
> +  auto_bitmap visited;
> +
> +  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
> +  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
> +  worklist.safe_push (last);
> +
> +  do
> +    {
> +      tree vuse = worklist.pop ();
> +      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> +
> +      /* We mark the first and last SSA names as visited at the beginning,
> +        and reversely start the process from the last SSA name towards the
> +        first, which ensures that this do-while will not touch SSA names
> +        defined outside of the loop.  */
> +      gcc_assert (gimple_bb (stmt)
> +                 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
> +
> +      if (gimple_code (stmt) == GIMPLE_PHI)
> +       {
> +         gphi *phi = as_a <gphi *> (stmt);
> +
> +         for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> +           {
> +             tree arg = gimple_phi_arg_def (stmt, i);
> +
> +             if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
> +               worklist.safe_push (arg);
> +           }
> +       }
> +      else
> +       {
> +         tree prev = gimple_vuse (stmt);
> +
> +         /* Non-pure call statement is conservatively assumed to impact all
> +            memory locations.  So place call statements ahead of other memory
> +            stores in the vector with an idea of of using them as shortcut
> +            terminators to memory alias analysis.  */
> +         if (gimple_code (stmt) == GIMPLE_CALL)
> +           info->memory_stores.safe_push (stmt);
> +         else
> +           other_stores.safe_push (stmt);
> +
> +         if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
> +           worklist.safe_push (prev);
> +       }
> +    } while (!worklist.is_empty ());
> +
> +    info->memory_stores.safe_splice (other_stores);
> +}
> +
> +
> +/* Given STMT, memory load or pure call statement, check whether it is impacted
> +   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
> +   trace is composed of SKIP_HEAD and those basic block dominated by it, always
> +   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
> +   NULL, all basic blocks of LOOP are checked.  */
> +
> +static bool
> +vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
> +                      const_basic_block skip_head)
> +{
> +  split_info *info = (split_info *) loop->aux;
> +
> +  /* Collect memory store/clobber statements if have not do that.  */
> +  if (info->need_init)
> +    find_vdef_in_loop (loop);
> +
> +  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
> +  ao_ref ref;
> +  gimple *store;
> +  unsigned i;
> +
> +  ao_ref_init (&ref, rhs);
> +
> +  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
> +    {
> +      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
> +      if (skip_head
> +         && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
> +       continue;
> +
> +      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
> +       return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Forward declaration.  */
> +
> +static bool
> +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> +                      const_basic_block skip_head);
> +
> +/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
> +   certain iteration of LOOP, check whether an SSA name (NAME) remains
> +   unchanged in next iteration.  We call this characteristic semi-
> +   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
> +   basic blocks and control flows in the loop will be considered.  If non-
> +   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
> +
> +static bool
> +ssa_semi_invariant_p (struct loop *loop, const tree name,
> +                     const_basic_block skip_head)
> +{
> +  gimple *def = SSA_NAME_DEF_STMT (name);
> +  const_basic_block def_bb = gimple_bb (def);
> +
> +  /* An SSA name defined outside a loop is definitely semi-invariant.  */
> +  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
> +    return true;
> +
> +  if (gimple_code (def) == GIMPLE_PHI)
> +    {
> +      /* For PHI node that is not in loop header, its source operands should
> +        be defined inside the loop, which are seen as loop variant.  */
> +      if (def_bb != loop->header || !skip_head)
> +       return false;
> +
> +      const_edge latch = loop_latch_edge (loop);
> +      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
> +
> +      /* A PHI node in loop header contains two source operands, one is
> +        initial value, the other is the copy of last iteration through loop
> +        latch, we call it latch value.  From the PHI node to definition
> +        of latch value, if excluding branch trace from SKIP_HEAD, there
> +        is no definition of other version of same variable, SSA name defined
> +        by the PHI node is semi-invariant.
> +
> +                         loop entry
> +                              |     .--- latch ---.
> +                              |     |             |
> +                              v     v             |
> +                  x_1 = PHI <x_0,  x_3>           |
> +                           |                      |
> +                           v                      |
> +              .------- if (cond) -------.         |
> +              |                         |         |
> +              |                     [ SKIP ]      |
> +              |                         |         |
> +              |                     x_2 = ...     |
> +              |                         |         |
> +              '---- T ---->.<---- F ----'         |
> +                           |                      |
> +                           v                      |
> +                  x_3 = PHI <x_1, x_2>            |
> +                           |                      |
> +                           '----------------------'
> +
> +       Suppose in certain iteration, execution flow in above graph goes
> +       through true branch, which means that one source value to define
> +       x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
> +       x_1 in next iterations is defined by x_3, we know that x_1 will
> +       never changed if COND always chooses true branch from then on.  */
> +
> +      while (from != name)
> +       {
> +         /* A new value comes from a CONSTANT.  */
> +         if (TREE_CODE (from) != SSA_NAME)
> +           return false;
> +
> +         gimple *stmt = SSA_NAME_DEF_STMT (from);
> +         const_basic_block bb = gimple_bb (stmt);
> +
> +         /* A new value comes from outside of loop.  */
> +         if (!bb || !flow_bb_inside_loop_p (loop, bb))
> +           return false;
> +
> +         from = NULL_TREE;
> +
> +         if (gimple_code (stmt) == GIMPLE_PHI)
> +           {
> +             gphi *phi = as_a <gphi *> (stmt);
> +
> +             for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> +               {
> +                 const_edge e = gimple_phi_arg_edge (phi, i);
> +
> +                 /* Don't consider redefinitions in excluded basic blocks.  */
> +                 if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> +                   {
> +                     /* There are more than one source operands that can
> +                        provide value to the SSA name, it is variant.  */
> +                     if (from)
> +                       return false;
> +
> +                     from = gimple_phi_arg_def (phi, i);
> +                   }
> +               }
> +           }
> +         else if (gimple_code (stmt) == GIMPLE_ASSIGN)
> +           {
> +             /* For simple value copy, check its rhs instead.  */
> +             if (gimple_assign_ssa_name_copy_p (stmt))
> +               from = gimple_assign_rhs1 (stmt);
> +           }
> +
> +         /* Any other kind of definition is deemed to introduce a new value
> +            to the SSA name.  */
> +         if (!from)
> +           return false;
> +       }
> +       return true;
> +    }
> +
> +  /* Value originated from volatile memory load or return of normal (non-
> +     const/pure) call should not be treated as constant in each iteration.  */
> +  if (gimple_has_side_effects (def))
> +    return false;
> +
> +  /* Check if any memory store may kill memory load at this place.  */
> +  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
> +    return false;
> +
> +  /* Check operands of definition statement of the SSA name.  */
> +  return stmt_semi_invariant_p (loop, def, skip_head);
> +}
> +
> +/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
> +   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
> +   dominated by it are excluded from the loop.  */
> +
> +static bool
> +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> +                      const_basic_block skip_head)
> +{
> +  ssa_op_iter iter;
> +  tree use;
> +
> +  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
> +     here we only need to check SSA name operands.  This is because check on
> +     VARDECL operands, which involve memory loads, must have been done
> +     prior to invocation of this function in vuse_semi_invariant_p.  */
> +  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> +    {
> +      if (!ssa_semi_invariant_p (loop, use, skip_head))
> +       return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Determine when conditional statement never transfers execution to one of its
> +   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
> +   and those basic blocks dominated by BRANCH_BB.  */
> +
> +static bool
> +branch_removable_p (basic_block branch_bb)
> +{
> +  if (single_pred_p (branch_bb))
> +    return true;
> +
> +  edge e;
> +  edge_iterator ei;
> +
> +  FOR_EACH_EDGE (e, ei, branch_bb->preds)
> +    {
> +      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
> +       continue;
> +
> +      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
> +       continue;
> +
> +       /* The branch can be reached from opposite branch, or from some
> +         statement not dominated by the conditional statement.  */
> +      return false;
> +    }
> +
> +  return true;
> +}
> +
> +/* Find out which branch of a conditional statement (COND) is invariant in the
> +   execution context of LOOP.  That is: once the branch is selected in certain
> +   iteration of the loop, any operand that contributes to computation of the
> +   conditional statement remains unchanged in all following iterations.  */
> +
> +static edge
> +get_cond_invariant_branch (struct loop *loop, gcond *cond)
> +{
> +  basic_block cond_bb = gimple_bb (cond);
> +  basic_block targ_bb[2];
> +  bool invar[2];
> +  unsigned invar_checks;
> +
> +  for (unsigned i = 0; i < 2; i++)
> +    {
> +      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
> +
> +      /* One branch directs to loop exit, no need to perform loop split upon
> +        this conditional statement.  Firstly, it is trivial if the exit branch
> +        is semi-invariant, for the statement is just to break loop.  Secondly,
> +        if the opposite branch is semi-invariant, it means that the statement
> +        is real loop-invariant, which is covered by loop unswitch.  */
> +      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
> +       return NULL;
> +    }
> +
> +  invar_checks = 0;
> +
> +  for (unsigned i = 0; i < 2; i++)
> +    {
> +      invar[!i] = false;
> +
> +      if (!branch_removable_p (targ_bb[i]))
> +       continue;
> +
> +      /* Given a semi-invariant branch, if its opposite branch dominates
> +        loop latch, it and its following trace will only be executed in
> +        final iteration of loop, namely it is not part of repeated body
> +        of the loop.  Similar to the above case that the branch is loop
> +        exit, no need to split loop.  */
> +      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
> +       continue;
> +
> +      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
> +      invar_checks++;
> +    }
> +
> +  /* With both branches being invariant (handled by loop unswitch) or
> +     variant is not what we want.  */
> +  if (invar[0] ^ !invar[1])
> +    return NULL;
> +
> +  /* Found a real loop-invariant condition, do nothing.  */
> +  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
> +    return NULL;
> +
> +  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
> +}
> +
> +/* Calculate increased code size measured by estimated insn number if applying
> +   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
> +
> +static int
> +compute_added_num_insns (struct loop *loop, const_edge branch_edge)
> +{
> +  basic_block cond_bb = branch_edge->src;
> +  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
> +  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
> +  basic_block *bbs = ((split_info *) loop->aux)->bbs;
> +  int num = 0;
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    {
> +      /* Do no count basic blocks only in opposite branch.  */
> +      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
> +       continue;
> +
> +      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> +    }
> +
> +  /* It is unnecessary to evaluate expression of the conditional statement
> +     in new loop that contains only invariant branch.  This expression should
> +     be constant value (either true or false).  Exclude code size of insns
> +     that contribute to computation of the expression.  */
> +
> +  auto_vec<gimple *> worklist;
> +  hash_set<gimple *> removed;
> +  gimple *stmt = last_stmt (cond_bb);
> +
> +  worklist.safe_push (stmt);
> +  removed.add (stmt);
> +  num -= estimate_num_insns (stmt, &eni_size_weights);
> +
> +  do
> +    {
> +      ssa_op_iter opnd_iter;
> +      use_operand_p opnd_p;
> +
> +      stmt = worklist.pop ();
> +      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
> +       {
> +         tree opnd = USE_FROM_PTR (opnd_p);
> +
> +         if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
> +           continue;
> +
> +         gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
> +         use_operand_p use_p;
> +         imm_use_iterator use_iter;
> +
> +         if (removed.contains (opnd_stmt)
> +             || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
> +           continue;
> +
> +         FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
> +           {
> +             gimple *use_stmt = USE_STMT (use_p);
> +
> +             if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
> +               {
> +                 opnd_stmt = NULL;
> +                 break;
> +               }
> +           }
> +
> +         if (opnd_stmt)
> +           {
> +             worklist.safe_push (opnd_stmt);
> +             removed.add (opnd_stmt);
> +             num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
> +           }
> +       }
> +    } while (!worklist.is_empty ());
> +
> +  gcc_assert (num >= 0);
> +  return num;
> +}
> +
> +/* Find out loop-invariant branch of a conditional statement (COND) if it has,
> +   and check whether it is eligible and profitable to perform loop split upon
> +   this branch in LOOP.  */
> +
> +static edge
> +get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
> +{
> +  edge invar_branch = get_cond_invariant_branch (loop, cond);
> +
> +  if (!invar_branch)
> +    return NULL;
> +
> +  profile_probability prob = invar_branch->probability;
> +
> +  /* When accurate profile information is available, and execution
> +     frequency of the branch is too low, just let it go.  */
> +  if (prob.reliable_p ())
> +    {
> +      int thres = PARAM_VALUE (PARAM_MIN_LOOP_COND_SPLIT_PROB);
> +
> +      if (prob < profile_probability::always ().apply_scale (thres, 100))
> +       return NULL;
> +    }
> +
> +  /* Add a threshold for increased code size to disable loop split.  */
> +  if (compute_added_num_insns (loop, invar_branch)
> +      > PARAM_VALUE (PARAM_MAX_LOOP_COND_SPLIT_INSNS))
> +    return NULL;
> +
> +  return invar_branch;
> +}
> +
> +/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
> +   conditional statement, perform loop split transformation illustrated
> +   as the following graph.
> +
> +               .-------T------ if (true) ------F------.
> +               |                    .---------------. |
> +               |                    |               | |
> +               v                    |               v v
> +          pre-header                |            pre-header
> +               | .------------.     |                 | .------------.
> +               | |            |     |                 | |            |
> +               | v            |     |                 | v            |
> +             header           |     |               header           |
> +               |              |     |                 |              |
> +       [ bool r = cond; ]     |     |                 |              |
> +               |              |     |                 |              |
> +      .---- if (r) -----.     |     |        .--- if (true) ---.     |
> +      |                 |     |     |        |                 |     |
> +  invariant             |     |     |    invariant             |     |
> +      |                 |     |     |        |                 |     |
> +      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
> +               |              |    /                  |              |
> +             stmts            |   /                 stmts            |
> +               |              |  /                    |              |
> +              / \             | /                    / \             |
> +     .-------*   *       [ if (!r) ]        .-------*   *            |
> +     |           |            |             |           |            |
> +     |         latch          |             |         latch          |
> +     |           |            |             |           |            |
> +     |           '------------'             |           '------------'
> +     '------------------------. .-----------'
> +             loop1            | |                   loop2
> +                              v v
> +                             exits
> +
> +   In the graph, loop1 represents the part derived from original one, and
> +   loop2 is duplicated using loop_version (), which corresponds to the part
> +   of original one being splitted out.  In loop1, a new bool temporary (r)
> +   is introduced to keep value of the condition result.  In original latch
> +   edge of loop1, we insert a new conditional statement whose value comes
> +   from previous temporary (r), one of its branch goes back to loop1 header
> +   as a latch edge, and the other branch goes to loop2 pre-header as an entry
> +   edge.  And also in loop2, we abandon the variant branch of the conditional
> +   statement candidate by setting a constant bool condition, based on which
> +   branch is semi-invariant.  */
> +
> +static bool
> +do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> +{
> +  basic_block cond_bb = invar_branch->src;
> +  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
> +  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
> +
> +  gcc_assert (cond_bb->loop_father == loop1);
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +   {
> +     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
> +             current_function_name (), loop1->num,
> +             true_invar ? "T" : "F", cond_bb->index);
> +     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
> +   }
> +
> +  initialize_original_copy_tables ();
> +
> +  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> +                                    profile_probability::always (),
> +                                    profile_probability::never (),
> +                                    profile_probability::always (),
> +                                    profile_probability::always (),
> +                                    true);
> +  if (!loop2)
> +    {
> +      free_original_copy_tables ();
> +      return false;
> +    }
> +
> +  /* Generate a bool type temporary to hold result of the condition.  */
> +  tree tmp = make_ssa_name (boolean_type_node);
> +  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> +  gimple *stmt = gimple_build_assign (tmp,
> +                                     gimple_cond_code (cond),
> +                                     gimple_cond_lhs (cond),
> +                                     gimple_cond_rhs (cond));
> +
> +  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> +  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
> +  update_stmt (cond);
> +
> +  basic_block cond_bb_copy = get_bb_copy (cond_bb);
> +  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
> +
> +  /* Replace the condition in loop2 with a bool constant to let PassManager
> +     remove the variant branch after current pass completes.  */
> +  if (true_invar)
> +    gimple_cond_make_true (cond_copy);
> +  else
> +    gimple_cond_make_false (cond_copy);
> +
> +  update_stmt (cond_copy);
> +
> +  /* Insert a new conditional statement on latch edge of loop1.  This
> +     statement acts as a switch to transfer execution from loop1 to loop2,
> +     when loop1 enters into invariant state.  */
> +  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
> +  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
> +  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
> +                                         NULL_TREE, NULL_TREE);
> +
> +  gsi = gsi_last_bb (break_bb);
> +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> +
> +  edge to_loop1 = single_succ_edge (break_bb);
> +  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
> +
> +  to_loop1->flags &= ~EDGE_FALLTHRU;
> +  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> +  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> +
> +  update_ssa (TODO_update_ssa);
> +
> +  /* Due to introduction of a control flow edge from loop1 latch to loop2
> +     pre-header, we should update PHIs in loop2 to reflect this connection
> +     between loop1 and loop2.  */
> +  connect_loop_phis (loop1, loop2, to_loop2);
> +
> +  free_original_copy_tables ();
> +
> +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
> +
> +  return true;
> +}
> +
> +/* Traverse all conditional statements in LOOP, to find out a good candidate
> +   upon which we can do loop split.  */
> +
> +static bool
> +split_loop_on_cond (struct loop *loop)
> +{
> +  split_info *info = new split_info ();
> +  basic_block *bbs = info->bbs = get_loop_body (loop);
> +  bool do_split = false;
> +
> +  /* Allocate an area to keep temporary info, and associate its address
> +     with loop aux field.  */
> +  loop->aux = info;
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    {
> +      basic_block bb = bbs[i];
> +
> +      /* We only consider conditional statement, which be executed at most once
> +        in each iteration of the loop.  So skip statements in inner loops.  */
> +      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
> +       continue;
> +
> +      /* Actually this check is not a must constraint.  With it, we can ensure
> +        conditional statement will always be executed in each iteration.  */
> +      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> +       continue;
> +
> +      gimple *last = last_stmt (bb);
> +
> +      if (!last || gimple_code (last) != GIMPLE_COND)
> +       continue;
> +
> +      gcond *cond = as_a <gcond *> (last);
> +      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
> +
> +      if (branch_edge)
> +       {
> +         do_split_loop_on_cond (loop, branch_edge);
> +         do_split = true;
> +         break;
> +       }
> +    }
> +
> +  delete info;
> +  loop->aux = NULL;
> +
> +  return do_split;
> +}
> +
>  /* Main entry point.  Perform loop splitting on all suitable loops.  */
>
>  static unsigned int
> @@ -627,7 +1369,6 @@ tree_ssa_split_loops (void)
>    /* Go through all loops starting from innermost.  */
>    FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
>      {
> -      class tree_niter_desc niter;
>        if (loop->aux)
>         {
>           /* If any of our inner loops was split, don't split us,
> @@ -636,29 +1377,14 @@ tree_ssa_split_loops (void)
>           continue;
>         }
>
> -      if (single_exit (loop)
> -         /* ??? We could handle non-empty latches when we split
> -            the latch edge (not the exit edge), and put the new
> -            exit condition in the new block.  OTOH this executes some
> -            code unconditionally that might have been skipped by the
> -            original exit before.  */
> -         && empty_block_p (loop->latch)
> -         && !optimize_loop_for_size_p (loop)
> -         && easy_exit_values (loop)
> -         && number_of_iterations_exit (loop, single_exit (loop), &niter,
> -                                       false, true)
> -         && niter.cmp != ERROR_MARK
> -         /* We can't yet handle loops controlled by a != predicate.  */
> -         && niter.cmp != NE_EXPR
> -         && can_duplicate_loop_p (loop))
> +      if (optimize_loop_for_size_p (loop))
> +        continue;
> +
> +      if (split_loop (loop) || split_loop_on_cond (loop))
>         {
> -         if (split_loop (loop, &niter))
> -           {
> -             /* Mark our containing loop as having had some split inner
> -                loops.  */
> -             loop_outer (loop)->aux = loop;
> -             changed = true;
> -           }
> +         /* Mark our containing loop as having had some split inner loops.  */
> +         loop_outer (loop)->aux = loop;
> +         changed = true;
>         }
>      }
>
> --
> 2.17.1
Feng Xue OS Oct. 23, 2019, 9:11 a.m. UTC | #8
Patch attached.

Feng
Richard Biener Oct. 23, 2019, 10:28 a.m. UTC | #9
On Wed, Oct 23, 2019 at 11:11 AM Feng Xue OS
<fxue@os.amperecomputing.com> wrote:
>
> Patch attached.

+      /* For PHI node that is not in loop header, its source operands should
+        be defined inside the loop, which are seen as loop variant.  */
+      if (def_bb != loop->header || !skip_head)
+       return false;

so if we have

 for (;;)
  {
     if (x)
       a = ..;
     else
       a = ...;
     if (cond-to-split-on dependent on a)
...
  }

the above is too restrictive in case 'x' is semi-invariant as well, correct?

+         /* A new value comes from outside of loop.  */
+         if (!bb || !flow_bb_inside_loop_p (loop, bb))
+           return false;

but that means starting from the second iteration the value is invariant.

+                 /* Don't consider redefinitions in excluded basic blocks.  */
+                 if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
+                   {
+                     /* There are more than one source operands that can
+                        provide value to the SSA name, it is variant.  */
+                     if (from)
+                       return false;

they might be the same though, for PHIs with > 2 arguments.

In the cycle handling you are not recursing via stmt_semi_invariant_p
but only handle SSA name copies - any particular reason for that?

+static bool
+branch_removable_p (basic_block branch_bb)
+{
+  if (single_pred_p (branch_bb))
+    return true;

I'm not sure what this function tests - at least the single_pred_p check
looks odd to me given the dominator checks later.  The single predecessor
could simply be a forwarder.  I wonder if you are looking for branches forming
an irreducible loop?  I think you can then check EDGE_IRREDUCIBLE_LOOP
or BB_IRREDUCIBLE_LOOP on the condition block (btw, I don't see
testcases covering the appearant special-cases in the patch - refering to
existing ones via a comment often helps understanding the code).

+
+  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
+}

magic ensures that invar[1] is always the invariant edge?  Oh, it's a bool.
Ick.  I wonder if logic with int invariant_edge = -1; and the loop setting
it to either 0 or 1 would be easier to follow...

Note your stmt_semi_invariant_p check is exponential for a condition
like

   _1 = 1;
   _2 = _1 + _1;
   _3 = _2 + _2;
   if (_3 != param_4(D))

because you don't track ops you already proved semi-invariant.  We've
run into such situation repeatedly in SCEV analysis so I doubt it can be
disregarded as irrelevant in practice.  A worklist approach could then
also get rid of the recursion.  You are already computing the stmts
forming the condition in compute_added_num_insns so another option
is to re-use that.

Btw, I wonder if we can simply re-use PARAM_MAX_PEELED_INSNS
instead of adding yet another param (it also happens to have the same
size).  Because we are "peeling" the loop.

+  edge invar_branch = get_cond_invariant_branch (loop, cond);
+
+  if (!invar_branch)
+    return NULL;

extra vertical space is unwanted in such cases.

+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
+             current_function_name (), loop1->num,
+             true_invar ? "T" : "F", cond_bb->index);
+     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
+   }

can you please use sth like

  if (dump_enabled_p ())
    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
                             cond, "loop split on semi-invariant condition");

so -fopt-info-loop will show it?

+  /* Generate a bool type temporary to hold result of the condition.  */
+  tree tmp = make_ssa_name (boolean_type_node);
+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+  gimple *stmt = gimple_build_assign (tmp,
+                                     gimple_cond_code (cond),
+                                     gimple_cond_lhs (cond),
+                                     gimple_cond_rhs (cond));

shorter is

   gimple_seq stmts = NULL;
   tree tmp = gimple_build (&stmts, gimple_cond_code (cond),
                                      boolean_type_node,
gimple_cond_lhs (cond), gimple_cond_rhs (cond));
   gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);

+  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);

but I wonder what's the point here to move the condition computation to
a temporary?  Why not just build the original condition again for break_cond?

in split_loop_on_cond you'll find the first semi-invariant condition
to split on,
but we'll not visit the split loop again (also for original splitting I guess).
Don't we eventually want to recurse on that?

Otherwise the patch looks reasonable.  Sorry for the many bits above and the
late real review from me...

Thanks,
Richard.


> Feng
>
> ________________________________________
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Wednesday, October 23, 2019 5:04 PM
> To: Feng Xue OS
> Cc: Michael Matz; Philipp Tomsich; gcc-patches@gcc.gnu.org; Christoph Müllner; erick.ochoa@theobroma-systems.com
> Subject: Re: [PATCH V3] Loop split upon semi-invariant condition (PR tree-optimization/89134)
>
> On Wed, Oct 23, 2019 at 5:36 AM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
> >
> > Michael,
> >
> > > I've only noticed a couple typos, and one minor remark.
> > Typos corrected.
> >
> > > I just wonder why you duplicated these three loops instead of integrating
> > > the real body into the existing LI_FROM_INNERMOST loop.  I would have
> > > expected your "if (!optimize_loop_for_size_p && split_loop_on_cond)" block
> > > to simply be the else block of the existing
> > > "if (... conditions for normal loop splitting ...)" block.
> > Adjusted to do two kinds of loop-split in same LI_FROM_INNERMOST loop.
> >
> > > From my perspective it's okay, but you still need the okay of a proper reviewer,
> > > for which you might want to state the testing/regression state of this
> > > patch relative to trunk.
> >
> > Richard,
> >
> >   Is it ok to commit this patch? Bootstrap and regression test passed. And for
> > performance, we can get about 7% improvement on spec2017 omnetpp with this
> > patch.
>
> Can you please provide the corresponding ChangeLog entries as well and
> attach the patch?  It seems to be garbled by some encoding.
>
> Thanks,
> Richard.
>
> > Thanks,
> > Feng
> >
> > ---
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index 1407d019d14..d41e5aa0215 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -11481,6 +11481,19 @@ The maximum number of branches unswitched in a single loop.
> >  @item lim-expensive
> >  The minimum cost of an expensive expression in the loop invariant motion.
> >
> > +@item max-loop-cond-split-insns
> > +In a loop, if a branch of a conditional statement is selected since certain
> > +loop iteration, any operand that contributes to computation of the conditional
> > +expression remains unchanged in all following iterations, the statement is
> > +semi-invariant, upon which we can do a kind of loop split transformation.
> > +@option{max-loop-cond-split-insns} controls maximum number of insns to be
> > +added due to loop split on semi-invariant conditional statement.
> > +
> > +@item min-loop-cond-split-prob
> > +When FDO profile information is available, @option{min-loop-cond-split-prob}
> > +specifies minimum threshold for probability of semi-invariant condition
> > +statement to trigger loop split.
> > +
> >  @item iv-consider-all-candidates-bound
> >  Bound on number of candidates for induction variables, below which
> >  all candidates are considered for each use in induction variable
> > diff --git a/gcc/params.def b/gcc/params.def
> > index 322c37f8b96..73b59f7465e 100644
> > --- a/gcc/params.def
> > +++ b/gcc/params.def
> > @@ -415,6 +415,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
> >         "The maximum number of unswitchings in a single loop.",
> >         3, 0, 0)
> >
> > +/* The maximum number of increased insns due to loop split on semi-invariant
> > +   condition statement.  */
> > +DEFPARAM(PARAM_MAX_LOOP_COND_SPLIT_INSNS,
> > +       "max-loop-cond-split-insns",
> > +       "The maximum number of insns to be added due to loop split on "
> > +       "semi-invariant condition statement.",
> > +       100, 0, 0)
> > +
> > +DEFPARAM(PARAM_MIN_LOOP_COND_SPLIT_PROB,
> > +       "min-loop-cond-split-prob",
> > +       "The minimum threshold for probability of semi-invariant condition "
> > +       "statement to trigger loop split.",
> > +       30, 0, 100)
> > +
> >  /* The maximum number of insns in loop header duplicated by the copy loop
> >     headers pass.  */
> >  DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,
> >
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> > new file mode 100644
> > index 00000000000..51f9da22fc7
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
> > @@ -0,0 +1,33 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> > +
> > +#include <string>
> > +#include <map>
> > +
> > +using namespace std;
> > +
> > +class  A
> > +{
> > +public:
> > +  bool empty;
> > +  void set (string s);
> > +};
> > +
> > +class  B
> > +{
> > +  map<int, string> m;
> > +  void f ();
> > +};
> > +
> > +extern A *ga;
> > +
> > +void B::f ()
> > +{
> > +  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
> > +    {
> > +      if (ga->empty)
> > +        ga->set (iter->second);
> > +    }
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> > new file mode 100644
> > index 00000000000..bbd522d6bcd
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
> > @@ -0,0 +1,23 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
> > +
> > +__attribute__((pure)) __attribute__((noinline)) int inc (int i)
> > +{
> > +  return i + 1;
> > +}
> > +
> > +extern int do_something (void);
> > +extern int b;
> > +
> > +void test(int n)
> > +{
> > +  int i;
> > +
> > +  for (i = 0; i < n; i = inc (i))
> > +    {
> > +      if (b)
> > +        b = do_something();
> > +    }
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
> > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> > index f5f083384bc..5cffd4bb508 100644
> > --- a/gcc/tree-ssa-loop-split.c
> > +++ b/gcc/tree-ssa-loop-split.c
> > @@ -32,7 +32,10 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "tree-ssa-loop.h"
> >  #include "tree-ssa-loop-manip.h"
> >  #include "tree-into-ssa.h"
> > +#include "tree-inline.h"
> > +#include "tree-cfgcleanup.h"
> >  #include "cfgloop.h"
> > +#include "params.h"
> >  #include "tree-scalar-evolution.h"
> >  #include "gimple-iterator.h"
> >  #include "gimple-pretty-print.h"
> > @@ -40,7 +43,9 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "gimple-fold.h"
> >  #include "gimplify-me.h"
> >
> > -/* This file implements loop splitting, i.e. transformation of loops like
> > +/* This file implements two kinds of loop splitting.
> > +
> > +   One transformation of loops like:
> >
> >     for (i = 0; i < 100; i++)
> >       {
> > @@ -487,8 +492,9 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
> >     single exit of LOOP.  */
> >
> >  static bool
> > -split_loop (class loop *loop1, class tree_niter_desc *niter)
> > +split_loop (class loop *loop1)
> >  {
> > +  class tree_niter_desc niter;
> >    basic_block *bbs;
> >    unsigned i;
> >    bool changed = false;
> > @@ -496,8 +502,28 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
> >    tree border = NULL_TREE;
> >    affine_iv iv;
> >
> > +  if (!single_exit (loop1)
> > +      /* ??? We could handle non-empty latches when we split the latch edge
> > +         (not the exit edge), and put the new exit condition in the new block.
> > +        OTOH this executes some code unconditionally that might have been
> > +        skipped by the original exit before.  */
> > +      || !empty_block_p (loop1->latch)
> > +      || !easy_exit_values (loop1)
> > +      || !number_of_iterations_exit (loop1, single_exit (loop1), &niter,
> > +                                    false, true)
> > +      || niter.cmp == ERROR_MARK
> > +      /* We can't yet handle loops controlled by a != predicate.  */
> > +      || niter.cmp == NE_EXPR)
> > +    return false;
> > +
> >    bbs = get_loop_body (loop1);
> >
> > +  if (!can_copy_bbs_p (bbs, loop1->num_nodes))
> > +    {
> > +      free (bbs);
> > +      return false;
> > +    }
> > +
> >    /* Find a splitting opportunity.  */
> >    for (i = 0; i < loop1->num_nodes; i++)
> >      if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
> > @@ -505,8 +531,8 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
> >         /* Handling opposite steps is not implemented yet.  Neither
> >            is handling different step sizes.  */
> >         if ((tree_int_cst_sign_bit (iv.step)
> > -            != tree_int_cst_sign_bit (niter->control.step))
> > -           || !tree_int_cst_equal (iv.step, niter->control.step))
> > +            != tree_int_cst_sign_bit (niter.control.step))
> > +           || !tree_int_cst_equal (iv.step, niter.control.step))
> >           continue;
> >
> >         /* Find a loop PHI node that defines guard_iv directly,
> > @@ -575,7 +601,7 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
> >            Compute the new bound for the guarding IV and patch the
> >            loop exit to use it instead of original IV and bound.  */
> >         gimple_seq stmts = NULL;
> > -       tree newend = compute_new_first_bound (&stmts, niter, border,
> > +       tree newend = compute_new_first_bound (&stmts, &niter, border,
> >                                                guard_code, guard_init);
> >         if (stmts)
> >           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
> > @@ -612,6 +638,722 @@ split_loop (class loop *loop1, class tree_niter_desc *niter)
> >    return changed;
> >  }
> >
> > +/* Another transformation of loops like:
> > +
> > +   for (i = INIT (); CHECK (i); i = NEXT ())
> > +     {
> > +       if (expr (a_1, a_2, ..., a_n))  // expr is pure
> > +         a_j = ...;  // change at least one a_j
> > +       else
> > +         S;          // not change any a_j
> > +     }
> > +
> > +   into:
> > +
> > +   for (i = INIT (); CHECK (i); i = NEXT ())
> > +     {
> > +       if (expr (a_1, a_2, ..., a_n))
> > +         a_j = ...;
> > +       else
> > +         {
> > +           S;
> > +           i = NEXT ();
> > +           break;
> > +         }
> > +     }
> > +
> > +   for (; CHECK (i); i = NEXT ())
> > +     {
> > +       S;
> > +     }
> > +
> > +   */
> > +
> > +/* Data structure to hold temporary information during loop split upon
> > +   semi-invariant conditional statement.  */
> > +class split_info {
> > +public:
> > +  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
> > +  basic_block *bbs;
> > +
> > +  /* All memory store/clobber statements in a loop.  */
> > +  auto_vec<gimple *> memory_stores;
> > +
> > +  /* Whether above memory stores vector has been filled.  */
> > +  int need_init;
> > +
> > +  split_info () : bbs (NULL),  need_init (true) { }
> > +
> > +  ~split_info ()
> > +    {
> > +      if (bbs)
> > +       free (bbs);
> > +    }
> > +};
> > +
> > +/* Find all statements with memory-write effect in LOOP, including memory
> > +   store and non-pure function call, and keep those in a vector.  This work
> > +   is only done one time, for the vector should be constant during analysis
> > +   stage of semi-invariant condition.  */
> > +
> > +static void
> > +find_vdef_in_loop (struct loop *loop)
> > +{
> > +  split_info *info = (split_info *) loop->aux;
> > +  gphi *vphi = get_virtual_phi (loop->header);
> > +
> > +  /* Indicate memory store vector has been filled.  */
> > +  info->need_init = false;
> > +
> > +  /* If loop contains memory operation, there must be a virtual PHI node in
> > +     loop header basic block.  */
> > +  if (vphi == NULL)
> > +    return;
> > +
> > +  /* All virtual SSA names inside the loop are connected to be a cyclic
> > +     graph via virtual PHI nodes.  The virtual PHI node in loop header just
> > +     links the first and the last virtual SSA names, by using the last as
> > +     PHI operand to define the first.  */
> > +  const edge latch = loop_latch_edge (loop);
> > +  const tree first = gimple_phi_result (vphi);
> > +  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
> > +
> > +  /* The virtual SSA cyclic graph might consist of only one SSA name, who
> > +     is defined by itself.
> > +
> > +       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
> > +
> > +     This means the loop contains only memory loads, so we can skip it.  */
> > +  if (first == last)
> > +    return;
> > +
> > +  auto_vec<gimple *> other_stores;
> > +  auto_vec<tree> worklist;
> > +  auto_bitmap visited;
> > +
> > +  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
> > +  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
> > +  worklist.safe_push (last);
> > +
> > +  do
> > +    {
> > +      tree vuse = worklist.pop ();
> > +      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
> > +
> > +      /* We mark the first and last SSA names as visited at the beginning,
> > +        and reversely start the process from the last SSA name towards the
> > +        first, which ensures that this do-while will not touch SSA names
> > +        defined outside of the loop.  */
> > +      gcc_assert (gimple_bb (stmt)
> > +                 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
> > +
> > +      if (gimple_code (stmt) == GIMPLE_PHI)
> > +       {
> > +         gphi *phi = as_a <gphi *> (stmt);
> > +
> > +         for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> > +           {
> > +             tree arg = gimple_phi_arg_def (stmt, i);
> > +
> > +             if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
> > +               worklist.safe_push (arg);
> > +           }
> > +       }
> > +      else
> > +       {
> > +         tree prev = gimple_vuse (stmt);
> > +
> > +         /* Non-pure call statement is conservatively assumed to impact all
> > +            memory locations.  So place call statements ahead of other memory
> > +            stores in the vector with an idea of of using them as shortcut
> > +            terminators to memory alias analysis.  */
> > +         if (gimple_code (stmt) == GIMPLE_CALL)
> > +           info->memory_stores.safe_push (stmt);
> > +         else
> > +           other_stores.safe_push (stmt);
> > +
> > +         if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
> > +           worklist.safe_push (prev);
> > +       }
> > +    } while (!worklist.is_empty ());
> > +
> > +    info->memory_stores.safe_splice (other_stores);
> > +}
> > +
> > +
> > +/* Given STMT, memory load or pure call statement, check whether it is impacted
> > +   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
> > +   trace is composed of SKIP_HEAD and those basic block dominated by it, always
> > +   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
> > +   NULL, all basic blocks of LOOP are checked.  */
> > +
> > +static bool
> > +vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +                      const_basic_block skip_head)
> > +{
> > +  split_info *info = (split_info *) loop->aux;
> > +
> > +  /* Collect memory store/clobber statements if have not do that.  */
> > +  if (info->need_init)
> > +    find_vdef_in_loop (loop);
> > +
> > +  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
> > +  ao_ref ref;
> > +  gimple *store;
> > +  unsigned i;
> > +
> > +  ao_ref_init (&ref, rhs);
> > +
> > +  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
> > +    {
> > +      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
> > +      if (skip_head
> > +         && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
> > +       continue;
> > +
> > +      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
> > +       return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Forward declaration.  */
> > +
> > +static bool
> > +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +                      const_basic_block skip_head);
> > +
> > +/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
> > +   certain iteration of LOOP, check whether an SSA name (NAME) remains
> > +   unchanged in next iteration.  We call this characteristic semi-
> > +   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
> > +   basic blocks and control flows in the loop will be considered.  If non-
> > +   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
> > +
> > +static bool
> > +ssa_semi_invariant_p (struct loop *loop, const tree name,
> > +                     const_basic_block skip_head)
> > +{
> > +  gimple *def = SSA_NAME_DEF_STMT (name);
> > +  const_basic_block def_bb = gimple_bb (def);
> > +
> > +  /* An SSA name defined outside a loop is definitely semi-invariant.  */
> > +  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
> > +    return true;
> > +
> > +  if (gimple_code (def) == GIMPLE_PHI)
> > +    {
> > +      /* For PHI node that is not in loop header, its source operands should
> > +        be defined inside the loop, which are seen as loop variant.  */
> > +      if (def_bb != loop->header || !skip_head)
> > +       return false;
> > +
> > +      const_edge latch = loop_latch_edge (loop);
> > +      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
> > +
> > +      /* A PHI node in loop header contains two source operands, one is
> > +        initial value, the other is the copy of last iteration through loop
> > +        latch, we call it latch value.  From the PHI node to definition
> > +        of latch value, if excluding branch trace from SKIP_HEAD, there
> > +        is no definition of other version of same variable, SSA name defined
> > +        by the PHI node is semi-invariant.
> > +
> > +                         loop entry
> > +                              |     .--- latch ---.
> > +                              |     |             |
> > +                              v     v             |
> > +                  x_1 = PHI <x_0,  x_3>           |
> > +                           |                      |
> > +                           v                      |
> > +              .------- if (cond) -------.         |
> > +              |                         |         |
> > +              |                     [ SKIP ]      |
> > +              |                         |         |
> > +              |                     x_2 = ...     |
> > +              |                         |         |
> > +              '---- T ---->.<---- F ----'         |
> > +                           |                      |
> > +                           v                      |
> > +                  x_3 = PHI <x_1, x_2>            |
> > +                           |                      |
> > +                           '----------------------'
> > +
> > +       Suppose in certain iteration, execution flow in above graph goes
> > +       through true branch, which means that one source value to define
> > +       x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
> > +       x_1 in next iterations is defined by x_3, we know that x_1 will
> > +       never changed if COND always chooses true branch from then on.  */
> > +
> > +      while (from != name)
> > +       {
> > +         /* A new value comes from a CONSTANT.  */
> > +         if (TREE_CODE (from) != SSA_NAME)
> > +           return false;
> > +
> > +         gimple *stmt = SSA_NAME_DEF_STMT (from);
> > +         const_basic_block bb = gimple_bb (stmt);
> > +
> > +         /* A new value comes from outside of loop.  */
> > +         if (!bb || !flow_bb_inside_loop_p (loop, bb))
> > +           return false;
> > +
> > +         from = NULL_TREE;
> > +
> > +         if (gimple_code (stmt) == GIMPLE_PHI)
> > +           {
> > +             gphi *phi = as_a <gphi *> (stmt);
> > +
> > +             for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
> > +               {
> > +                 const_edge e = gimple_phi_arg_edge (phi, i);
> > +
> > +                 /* Don't consider redefinitions in excluded basic blocks.  */
> > +                 if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> > +                   {
> > +                     /* There are more than one source operands that can
> > +                        provide value to the SSA name, it is variant.  */
> > +                     if (from)
> > +                       return false;
> > +
> > +                     from = gimple_phi_arg_def (phi, i);
> > +                   }
> > +               }
> > +           }
> > +         else if (gimple_code (stmt) == GIMPLE_ASSIGN)
> > +           {
> > +             /* For simple value copy, check its rhs instead.  */
> > +             if (gimple_assign_ssa_name_copy_p (stmt))
> > +               from = gimple_assign_rhs1 (stmt);
> > +           }
> > +
> > +         /* Any other kind of definition is deemed to introduce a new value
> > +            to the SSA name.  */
> > +         if (!from)
> > +           return false;
> > +       }
> > +       return true;
> > +    }
> > +
> > +  /* Value originated from volatile memory load or return of normal (non-
> > +     const/pure) call should not be treated as constant in each iteration.  */
> > +  if (gimple_has_side_effects (def))
> > +    return false;
> > +
> > +  /* Check if any memory store may kill memory load at this place.  */
> > +  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
> > +    return false;
> > +
> > +  /* Check operands of definition statement of the SSA name.  */
> > +  return stmt_semi_invariant_p (loop, def, skip_head);
> > +}
> > +
> > +/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
> > +   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
> > +   dominated by it are excluded from the loop.  */
> > +
> > +static bool
> > +stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
> > +                      const_basic_block skip_head)
> > +{
> > +  ssa_op_iter iter;
> > +  tree use;
> > +
> > +  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
> > +     here we only need to check SSA name operands.  This is because check on
> > +     VARDECL operands, which involve memory loads, must have been done
> > +     prior to invocation of this function in vuse_semi_invariant_p.  */
> > +  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> > +    {
> > +      if (!ssa_semi_invariant_p (loop, use, skip_head))
> > +       return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Determine when conditional statement never transfers execution to one of its
> > +   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
> > +   and those basic blocks dominated by BRANCH_BB.  */
> > +
> > +static bool
> > +branch_removable_p (basic_block branch_bb)
> > +{
> > +  if (single_pred_p (branch_bb))
> > +    return true;
> > +
> > +  edge e;
> > +  edge_iterator ei;
> > +
> > +  FOR_EACH_EDGE (e, ei, branch_bb->preds)
> > +    {
> > +      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
> > +       continue;
> > +
> > +      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
> > +       continue;
> > +
> > +       /* The branch can be reached from opposite branch, or from some
> > +         statement not dominated by the conditional statement.  */
> > +      return false;
> > +    }
> > +
> > +  return true;
> > +}
> > +
> > +/* Find out which branch of a conditional statement (COND) is invariant in the
> > +   execution context of LOOP.  That is: once the branch is selected in certain
> > +   iteration of the loop, any operand that contributes to computation of the
> > +   conditional statement remains unchanged in all following iterations.  */
> > +
> > +static edge
> > +get_cond_invariant_branch (struct loop *loop, gcond *cond)
> > +{
> > +  basic_block cond_bb = gimple_bb (cond);
> > +  basic_block targ_bb[2];
> > +  bool invar[2];
> > +  unsigned invar_checks;
> > +
> > +  for (unsigned i = 0; i < 2; i++)
> > +    {
> > +      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
> > +
> > +      /* One branch directs to loop exit, no need to perform loop split upon
> > +        this conditional statement.  Firstly, it is trivial if the exit branch
> > +        is semi-invariant, for the statement is just to break loop.  Secondly,
> > +        if the opposite branch is semi-invariant, it means that the statement
> > +        is real loop-invariant, which is covered by loop unswitch.  */
> > +      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
> > +       return NULL;
> > +    }
> > +
> > +  invar_checks = 0;
> > +
> > +  for (unsigned i = 0; i < 2; i++)
> > +    {
> > +      invar[!i] = false;
> > +
> > +      if (!branch_removable_p (targ_bb[i]))
> > +       continue;
> > +
> > +      /* Given a semi-invariant branch, if its opposite branch dominates
> > +        loop latch, it and its following trace will only be executed in
> > +        final iteration of loop, namely it is not part of repeated body
> > +        of the loop.  Similar to the above case that the branch is loop
> > +        exit, no need to split loop.  */
> > +      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
> > +       continue;
> > +
> > +      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
> > +      invar_checks++;
> > +    }
> > +
> > +  /* With both branches being invariant (handled by loop unswitch) or
> > +     variant is not what we want.  */
> > +  if (invar[0] ^ !invar[1])
> > +    return NULL;
> > +
> > +  /* Found a real loop-invariant condition, do nothing.  */
> > +  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
> > +    return NULL;
> > +
> > +  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
> > +}
> > +
> > +/* Calculate increased code size measured by estimated insn number if applying
> > +   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
> > +
> > +static int
> > +compute_added_num_insns (struct loop *loop, const_edge branch_edge)
> > +{
> > +  basic_block cond_bb = branch_edge->src;
> > +  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
> > +  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
> > +  basic_block *bbs = ((split_info *) loop->aux)->bbs;
> > +  int num = 0;
> > +
> > +  for (unsigned i = 0; i < loop->num_nodes; i++)
> > +    {
> > +      /* Do no count basic blocks only in opposite branch.  */
> > +      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
> > +       continue;
> > +
> > +      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
> > +    }
> > +
> > +  /* It is unnecessary to evaluate expression of the conditional statement
> > +     in new loop that contains only invariant branch.  This expression should
> > +     be constant value (either true or false).  Exclude code size of insns
> > +     that contribute to computation of the expression.  */
> > +
> > +  auto_vec<gimple *> worklist;
> > +  hash_set<gimple *> removed;
> > +  gimple *stmt = last_stmt (cond_bb);
> > +
> > +  worklist.safe_push (stmt);
> > +  removed.add (stmt);
> > +  num -= estimate_num_insns (stmt, &eni_size_weights);
> > +
> > +  do
> > +    {
> > +      ssa_op_iter opnd_iter;
> > +      use_operand_p opnd_p;
> > +
> > +      stmt = worklist.pop ();
> > +      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
> > +       {
> > +         tree opnd = USE_FROM_PTR (opnd_p);
> > +
> > +         if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
> > +           continue;
> > +
> > +         gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
> > +         use_operand_p use_p;
> > +         imm_use_iterator use_iter;
> > +
> > +         if (removed.contains (opnd_stmt)
> > +             || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
> > +           continue;
> > +
> > +         FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
> > +           {
> > +             gimple *use_stmt = USE_STMT (use_p);
> > +
> > +             if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
> > +               {
> > +                 opnd_stmt = NULL;
> > +                 break;
> > +               }
> > +           }
> > +
> > +         if (opnd_stmt)
> > +           {
> > +             worklist.safe_push (opnd_stmt);
> > +             removed.add (opnd_stmt);
> > +             num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
> > +           }
> > +       }
> > +    } while (!worklist.is_empty ());
> > +
> > +  gcc_assert (num >= 0);
> > +  return num;
> > +}
> > +
> > +/* Find out loop-invariant branch of a conditional statement (COND) if it has,
> > +   and check whether it is eligible and profitable to perform loop split upon
> > +   this branch in LOOP.  */
> > +
> > +static edge
> > +get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
> > +{
> > +  edge invar_branch = get_cond_invariant_branch (loop, cond);
> > +
> > +  if (!invar_branch)
> > +    return NULL;
> > +
> > +  profile_probability prob = invar_branch->probability;
> > +
> > +  /* When accurate profile information is available, and execution
> > +     frequency of the branch is too low, just let it go.  */
> > +  if (prob.reliable_p ())
> > +    {
> > +      int thres = PARAM_VALUE (PARAM_MIN_LOOP_COND_SPLIT_PROB);
> > +
> > +      if (prob < profile_probability::always ().apply_scale (thres, 100))
> > +       return NULL;
> > +    }
> > +
> > +  /* Add a threshold for increased code size to disable loop split.  */
> > +  if (compute_added_num_insns (loop, invar_branch)
> > +      > PARAM_VALUE (PARAM_MAX_LOOP_COND_SPLIT_INSNS))
> > +    return NULL;
> > +
> > +  return invar_branch;
> > +}
> > +
> > +/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
> > +   conditional statement, perform loop split transformation illustrated
> > +   as the following graph.
> > +
> > +               .-------T------ if (true) ------F------.
> > +               |                    .---------------. |
> > +               |                    |               | |
> > +               v                    |               v v
> > +          pre-header                |            pre-header
> > +               | .------------.     |                 | .------------.
> > +               | |            |     |                 | |            |
> > +               | v            |     |                 | v            |
> > +             header           |     |               header           |
> > +               |              |     |                 |              |
> > +       [ bool r = cond; ]     |     |                 |              |
> > +               |              |     |                 |              |
> > +      .---- if (r) -----.     |     |        .--- if (true) ---.     |
> > +      |                 |     |     |        |                 |     |
> > +  invariant             |     |     |    invariant             |     |
> > +      |                 |     |     |        |                 |     |
> > +      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
> > +               |              |    /                  |              |
> > +             stmts            |   /                 stmts            |
> > +               |              |  /                    |              |
> > +              / \             | /                    / \             |
> > +     .-------*   *       [ if (!r) ]        .-------*   *            |
> > +     |           |            |             |           |            |
> > +     |         latch          |             |         latch          |
> > +     |           |            |             |           |            |
> > +     |           '------------'             |           '------------'
> > +     '------------------------. .-----------'
> > +             loop1            | |                   loop2
> > +                              v v
> > +                             exits
> > +
> > +   In the graph, loop1 represents the part derived from original one, and
> > +   loop2 is duplicated using loop_version (), which corresponds to the part
> > +   of original one being splitted out.  In loop1, a new bool temporary (r)
> > +   is introduced to keep value of the condition result.  In original latch
> > +   edge of loop1, we insert a new conditional statement whose value comes
> > +   from previous temporary (r), one of its branch goes back to loop1 header
> > +   as a latch edge, and the other branch goes to loop2 pre-header as an entry
> > +   edge.  And also in loop2, we abandon the variant branch of the conditional
> > +   statement candidate by setting a constant bool condition, based on which
> > +   branch is semi-invariant.  */
> > +
> > +static bool
> > +do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> > +{
> > +  basic_block cond_bb = invar_branch->src;
> > +  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
> > +  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
> > +
> > +  gcc_assert (cond_bb->loop_father == loop1);
> > +
> > +  if (dump_file && (dump_flags & TDF_DETAILS))
> > +   {
> > +     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
> > +             current_function_name (), loop1->num,
> > +             true_invar ? "T" : "F", cond_bb->index);
> > +     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
> > +   }
> > +
> > +  initialize_original_copy_tables ();
> > +
> > +  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> > +                                    profile_probability::always (),
> > +                                    profile_probability::never (),
> > +                                    profile_probability::always (),
> > +                                    profile_probability::always (),
> > +                                    true);
> > +  if (!loop2)
> > +    {
> > +      free_original_copy_tables ();
> > +      return false;
> > +    }
> > +
> > +  /* Generate a bool type temporary to hold result of the condition.  */
> > +  tree tmp = make_ssa_name (boolean_type_node);
> > +  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> > +  gimple *stmt = gimple_build_assign (tmp,
> > +                                     gimple_cond_code (cond),
> > +                                     gimple_cond_lhs (cond),
> > +                                     gimple_cond_rhs (cond));
> > +
> > +  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> > +  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
> > +  update_stmt (cond);
> > +
> > +  basic_block cond_bb_copy = get_bb_copy (cond_bb);
> > +  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
> > +
> > +  /* Replace the condition in loop2 with a bool constant to let PassManager
> > +     remove the variant branch after current pass completes.  */
> > +  if (true_invar)
> > +    gimple_cond_make_true (cond_copy);
> > +  else
> > +    gimple_cond_make_false (cond_copy);
> > +
> > +  update_stmt (cond_copy);
> > +
> > +  /* Insert a new conditional statement on latch edge of loop1.  This
> > +     statement acts as a switch to transfer execution from loop1 to loop2,
> > +     when loop1 enters into invariant state.  */
> > +  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
> > +  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
> > +  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
> > +                                         NULL_TREE, NULL_TREE);
> > +
> > +  gsi = gsi_last_bb (break_bb);
> > +  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
> > +
> > +  edge to_loop1 = single_succ_edge (break_bb);
> > +  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
> > +
> > +  to_loop1->flags &= ~EDGE_FALLTHRU;
> > +  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> > +  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> > +
> > +  update_ssa (TODO_update_ssa);
> > +
> > +  /* Due to introduction of a control flow edge from loop1 latch to loop2
> > +     pre-header, we should update PHIs in loop2 to reflect this connection
> > +     between loop1 and loop2.  */
> > +  connect_loop_phis (loop1, loop2, to_loop2);
> > +
> > +  free_original_copy_tables ();
> > +
> > +  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
> > +
> > +  return true;
> > +}
> > +
> > +/* Traverse all conditional statements in LOOP, to find out a good candidate
> > +   upon which we can do loop split.  */
> > +
> > +static bool
> > +split_loop_on_cond (struct loop *loop)
> > +{
> > +  split_info *info = new split_info ();
> > +  basic_block *bbs = info->bbs = get_loop_body (loop);
> > +  bool do_split = false;
> > +
> > +  /* Allocate an area to keep temporary info, and associate its address
> > +     with loop aux field.  */
> > +  loop->aux = info;
> > +
> > +  for (unsigned i = 0; i < loop->num_nodes; i++)
> > +    {
> > +      basic_block bb = bbs[i];
> > +
> > +      /* We only consider conditional statement, which be executed at most once
> > +        in each iteration of the loop.  So skip statements in inner loops.  */
> > +      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
> > +       continue;
> > +
> > +      /* Actually this check is not a must constraint.  With it, we can ensure
> > +        conditional statement will always be executed in each iteration.  */
> > +      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
> > +       continue;
> > +
> > +      gimple *last = last_stmt (bb);
> > +
> > +      if (!last || gimple_code (last) != GIMPLE_COND)
> > +       continue;
> > +
> > +      gcond *cond = as_a <gcond *> (last);
> > +      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
> > +
> > +      if (branch_edge)
> > +       {
> > +         do_split_loop_on_cond (loop, branch_edge);
> > +         do_split = true;
> > +         break;
> > +       }
> > +    }
> > +
> > +  delete info;
> > +  loop->aux = NULL;
> > +
> > +  return do_split;
> > +}
> > +
> >  /* Main entry point.  Perform loop splitting on all suitable loops.  */
> >
> >  static unsigned int
> > @@ -627,7 +1369,6 @@ tree_ssa_split_loops (void)
> >    /* Go through all loops starting from innermost.  */
> >    FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
> >      {
> > -      class tree_niter_desc niter;
> >        if (loop->aux)
> >         {
> >           /* If any of our inner loops was split, don't split us,
> > @@ -636,29 +1377,14 @@ tree_ssa_split_loops (void)
> >           continue;
> >         }
> >
> > -      if (single_exit (loop)
> > -         /* ??? We could handle non-empty latches when we split
> > -            the latch edge (not the exit edge), and put the new
> > -            exit condition in the new block.  OTOH this executes some
> > -            code unconditionally that might have been skipped by the
> > -            original exit before.  */
> > -         && empty_block_p (loop->latch)
> > -         && !optimize_loop_for_size_p (loop)
> > -         && easy_exit_values (loop)
> > -         && number_of_iterations_exit (loop, single_exit (loop), &niter,
> > -                                       false, true)
> > -         && niter.cmp != ERROR_MARK
> > -         /* We can't yet handle loops controlled by a != predicate.  */
> > -         && niter.cmp != NE_EXPR
> > -         && can_duplicate_loop_p (loop))
> > +      if (optimize_loop_for_size_p (loop))
> > +        continue;
> > +
> > +      if (split_loop (loop) || split_loop_on_cond (loop))
> >         {
> > -         if (split_loop (loop, &niter))
> > -           {
> > -             /* Mark our containing loop as having had some split inner
> > -                loops.  */
> > -             loop_outer (loop)->aux = loop;
> > -             changed = true;
> > -           }
> > +         /* Mark our containing loop as having had some split inner loops.  */
> > +         loop_outer (loop)->aux = loop;
> > +         changed = true;
> >         }
> >      }
> >
> > --
> > 2.17.1
Feng Xue OS Oct. 25, 2019, 3:43 a.m. UTC | #10
Richard,

    Thanks for your comments. 

>+      /* For PHI node that is not in loop header, its source operands should
>+        be defined inside the loop, which are seen as loop variant.  */
>+      if (def_bb != loop->header || !skip_head)
>+       return false;

> so if we have
>
> for (;;)
>  {
>     if (x)
>       a = ..;
>     else
>       a = ...;
>     if (cond-to-split-on dependent on a)
> ...
>  }
>
> the above is too restrictive in case 'x' is semi-invariant as well, correct?
In above case, cond-on-a will not be identified as semi-invariant, in that
a is defined by PHI with real multi-sources. To handle it,  besides each
source value, we should add extra check on each source's control
dependence node (x in the case), which might have not a little code expansion.
Anyway, I'll have a try.


>+         /* A new value comes from outside of loop.  */
>+         if (!bb || !flow_bb_inside_loop_p (loop, bb))
>+           return false;

> but that means starting from the second iteration the value is invariant.
No. Traversal direction is reverse to loop execution. In the following,
start from "x_1 = ", extract latch value x_3, and get x_3 definition, and
finally reach "x_1 =".

Loop:
      x_1 = PHI (x_0, x_3)
      ... 
      x_3 = 
      ...
      goto Loop;


>+                 /* Don't consider redefinitions in excluded basic blocks.  */
>+                 if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
>+                   {
>+                     /* There are more than one source operands that can
>+                        provide value to the SSA name, it is variant.  */
>+                     if (from)
>+                       return false;
>
> they might be the same though, for PHIs with > 2 arguments.
OK. Will add value equivalence check.


> In the cycle handling you are not recursing via stmt_semi_invariant_p
> but only handle SSA name copies - any particular reason for that?
The cycle handling is specified for ssa that crosses iteration. It is
semi-invariant if it remains unchanged after certain iteration, which
means its value in previous iteration (coming from latch edge) is just
a copy of its self,  nothing else. So, recursion via stmt_semi_invariant_p
is unnecessary.

Loop:
      x_1 = PHI (x_0, x_3);
      x_2 = PHI(x_1, value defined in excluded branch);
      x_3 = x_2;
      goto Loop;


>+static bool
>+branch_removable_p (basic_block branch_bb)
>+{
>+  if (single_pred_p (branch_bb))
>+    return true;
>
> I'm not sure what this function tests - at least the single_pred_p check
> looks odd to me given the dominator checks later.  The single predecessor
> could simply be a forwarder.  I wonder if you are looking for branches forming
> an irreducible loop?  I think you can then check EDGE_IRREDUCIBLE_LOOP
> or BB_IRREDUCIBLE_LOOP on the condition block (btw, I don't see
> testcases covering the appearant special-cases in the patch - refering to
> existing ones via a comment often helps understanding the code).

Upon condition evaluation, if a branch is not selected,  
This function test a branch is reachable from other place other than its
conditional statement. This ensure that when the branch is not selected
upon condition evaluation, trace path led by the branch will never
be executed so that it can be excluded  during semi-invariantness analysis.

If single_pred_p, only condition statement can reach the branch.

If not, consider a half diamond condition control graph, with a back-edge to
true branch.

            condition
               |  \
               |   \
               |  false branch
   .--->----.  |   /
   |        |  |  /
 other    true branch
   |        |
   '---<----'

If there is an edge from false branch, true branch can not be excluded even it
is not selected.  And back edge from "other" (dominated by true branch) does
not have any impact.


>+
>+  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
>+}
>
> magic ensures that invar[1] is always the invariant edge?  Oh, it's a bool.
> Ick.  I wonder if logic with int invariant_edge = -1; and the loop setting
> it to either 0 or 1 would be easier to follow...
OK.


> Note your stmt_semi_invariant_p check is exponential for a condition
> like
>
>   _1 = 1;
>   _2 = _1 + _1;
>   _3 = _2 + _2;
>   if (_3 != param_4(D))
>
> because you don't track ops you already proved semi-invariant.  We've
> run into such situation repeatedly in SCEV analysis so I doubt it can be
> disregarded as irrelevant in practice.  A worklist approach could then
> also get rid of the recursion.  You are already computing the stmts
> forming the condition in compute_added_num_insns so another option
> is to re-use that.
OK.


> Btw, I wonder if we can simply re-use PARAM_MAX_PEELED_INSNS
> instead of adding yet another param (it also happens to have the same
> size).  Because we are "peeling" the loop.
I'll check that.

>+  edge invar_branch = get_cond_invariant_branch (loop, cond);
>+
>+  if (!invar_branch)
>+    return NULL;
>
> extra vertical space is unwanted in such cases.
OK.

>+  if (dump_file && (dump_flags & TDF_DETAILS))
>+   {
>+     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
>+             current_function_name (), loop1->num,
>+             true_invar ? "T" : "F", cond_bb->index);
>+     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
>+   }
>
> can you please use sth like
>
>  if (dump_enabled_p ())
>    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
>                             cond, "loop split on semi-invariant condition");
>
> so -fopt-info-loop will show it?
OK.


>+  /* Generate a bool type temporary to hold result of the condition.  */
>+  tree tmp = make_ssa_name (boolean_type_node);
>+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
>+  gimple *stmt = gimple_build_assign (tmp,
>+                                     gimple_cond_code (cond),
>+                                     gimple_cond_lhs (cond),
>+                                     gimple_cond_rhs (cond));
>
> shorter is
>
>   gimple_seq stmts = NULL;
>   tree tmp = gimple_build (&stmts, gimple_cond_code (cond),
>                                      boolean_type_node,
>  gimple_cond_lhs (cond), gimple_cond_rhs (cond));
>   gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
OK.


>+  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
>+  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
>
> but I wonder what's the point here to move the condition computation to
> a temporary?  Why not just build the original condition again for break_cond?
OK.


> in split_loop_on_cond you'll find the first semi-invariant condition
> to split on,
> but we'll not visit the split loop again (also for original splitting I guess).
> Don't we eventually want to recurse on that?
Currently, we only do a round of loop-split. It is a TODO to enable more than
one loop-splits on a loop.
Feng Xue OS Oct. 31, 2019, 2:38 p.m. UTC | #11
Hi, Richard

   This is a new patch to support more generalized semi-invariant condition, which uses
control dependence analysis.

Thanks,
Feng
Richard Biener Nov. 5, 2019, 2:04 p.m. UTC | #12
On Thu, Oct 31, 2019 at 3:38 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Hi, Richard
>
>    This is a new patch to support more generalized semi-invariant condition, which uses
> control dependence analysis.

Uh.  Note it's not exactly helpful to change algorithms between
reviews, that makes it
just harder :/

Btw, I notice you use post-dominance info.  Note that we generally do
not keep that
up-to-date with CFG manipulations (and for dominators fast queries are
disabled).
Probably the way we walk & transform loops makes this safe but it's something to
remember when extending that.  Possibly doing analysis of all candidates first
and then applying the transform for all wanted cases would avoid this (and maybe
also can reduce the number of update_ssa calls).  I guess this can be done as
followup.

The patch is OK.

Thanks,
Richard.



> Thanks,
> Feng
>
> ________________________________________
> From: Feng Xue OS <fxue@os.amperecomputing.com>
> Sent: Friday, October 25, 2019 11:43 AM
> To: Richard Biener
> Cc: Michael Matz; Philipp Tomsich; gcc-patches@gcc.gnu.org; Christoph Müllner; erick.ochoa@theobroma-systems.com
> Subject: Re: [PATCH V3] Loop split upon semi-invariant condition (PR tree-optimization/89134)
>
> Richard,
>
>     Thanks for your comments.
>
> >+      /* For PHI node that is not in loop header, its source operands should
> >+        be defined inside the loop, which are seen as loop variant.  */
> >+      if (def_bb != loop->header || !skip_head)
> >+       return false;
>
> > so if we have
> >
> > for (;;)
> >  {
> >     if (x)
> >       a = ..;
> >     else
> >       a = ...;
> >     if (cond-to-split-on dependent on a)
> > ...
> >  }
> >
> > the above is too restrictive in case 'x' is semi-invariant as well, correct?
> In above case, cond-on-a will not be identified as semi-invariant, in that
> a is defined by PHI with real multi-sources. To handle it,  besides each
> source value, we should add extra check on each source's control
> dependence node (x in the case), which might have not a little code expansion.
> Anyway, I'll have a try.
>
>
> >+         /* A new value comes from outside of loop.  */
> >+         if (!bb || !flow_bb_inside_loop_p (loop, bb))
> >+           return false;
>
> > but that means starting from the second iteration the value is invariant.
> No. Traversal direction is reverse to loop execution. In the following,
> start from "x_1 = ", extract latch value x_3, and get x_3 definition, and
> finally reach "x_1 =".
>
> Loop:
>       x_1 = PHI (x_0, x_3)
>       ...
>       x_3 =
>       ...
>       goto Loop;
>
>
> >+                 /* Don't consider redefinitions in excluded basic blocks.  */
> >+                 if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
> >+                   {
> >+                     /* There are more than one source operands that can
> >+                        provide value to the SSA name, it is variant.  */
> >+                     if (from)
> >+                       return false;
> >
> > they might be the same though, for PHIs with > 2 arguments.
> OK. Will add value equivalence check.
>
>
> > In the cycle handling you are not recursing via stmt_semi_invariant_p
> > but only handle SSA name copies - any particular reason for that?
> The cycle handling is specified for ssa that crosses iteration. It is
> semi-invariant if it remains unchanged after certain iteration, which
> means its value in previous iteration (coming from latch edge) is just
> a copy of its self,  nothing else. So, recursion via stmt_semi_invariant_p
> is unnecessary.
>
> Loop:
>       x_1 = PHI (x_0, x_3);
>       x_2 = PHI(x_1, value defined in excluded branch);
>       x_3 = x_2;
>       goto Loop;
>
>
> >+static bool
> >+branch_removable_p (basic_block branch_bb)
> >+{
> >+  if (single_pred_p (branch_bb))
> >+    return true;
> >
> > I'm not sure what this function tests - at least the single_pred_p check
> > looks odd to me given the dominator checks later.  The single predecessor
> > could simply be a forwarder.  I wonder if you are looking for branches forming
> > an irreducible loop?  I think you can then check EDGE_IRREDUCIBLE_LOOP
> > or BB_IRREDUCIBLE_LOOP on the condition block (btw, I don't see
> > testcases covering the appearant special-cases in the patch - refering to
> > existing ones via a comment often helps understanding the code).
>
> Upon condition evaluation, if a branch is not selected,
> This function test a branch is reachable from other place other than its
> conditional statement. This ensure that when the branch is not selected
> upon condition evaluation, trace path led by the branch will never
> be executed so that it can be excluded  during semi-invariantness analysis.
>
> If single_pred_p, only condition statement can reach the branch.
>
> If not, consider a half diamond condition control graph, with a back-edge to
> true branch.
>
>             condition
>                |  \
>                |   \
>                |  false branch
>    .--->----.  |   /
>    |        |  |  /
>  other    true branch
>    |        |
>    '---<----'
>
> If there is an edge from false branch, true branch can not be excluded even it
> is not selected.  And back edge from "other" (dominated by true branch) does
> not have any impact.
>
>
> >+
> >+  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
> >+}
> >
> > magic ensures that invar[1] is always the invariant edge?  Oh, it's a bool.
> > Ick.  I wonder if logic with int invariant_edge = -1; and the loop setting
> > it to either 0 or 1 would be easier to follow...
> OK.
>
>
> > Note your stmt_semi_invariant_p check is exponential for a condition
> > like
> >
> >   _1 = 1;
> >   _2 = _1 + _1;
> >   _3 = _2 + _2;
> >   if (_3 != param_4(D))
> >
> > because you don't track ops you already proved semi-invariant.  We've
> > run into such situation repeatedly in SCEV analysis so I doubt it can be
> > disregarded as irrelevant in practice.  A worklist approach could then
> > also get rid of the recursion.  You are already computing the stmts
> > forming the condition in compute_added_num_insns so another option
> > is to re-use that.
> OK.
>
>
> > Btw, I wonder if we can simply re-use PARAM_MAX_PEELED_INSNS
> > instead of adding yet another param (it also happens to have the same
> > size).  Because we are "peeling" the loop.
> I'll check that.
>
> >+  edge invar_branch = get_cond_invariant_branch (loop, cond);
> >+
> >+  if (!invar_branch)
> >+    return NULL;
> >
> > extra vertical space is unwanted in such cases.
> OK.
>
> >+  if (dump_file && (dump_flags & TDF_DETAILS))
> >+   {
> >+     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
> >+             current_function_name (), loop1->num,
> >+             true_invar ? "T" : "F", cond_bb->index);
> >+     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
> >+   }
> >
> > can you please use sth like
> >
> >  if (dump_enabled_p ())
> >    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
> >                             cond, "loop split on semi-invariant condition");
> >
> > so -fopt-info-loop will show it?
> OK.
>
>
> >+  /* Generate a bool type temporary to hold result of the condition.  */
> >+  tree tmp = make_ssa_name (boolean_type_node);
> >+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
> >+  gimple *stmt = gimple_build_assign (tmp,
> >+                                     gimple_cond_code (cond),
> >+                                     gimple_cond_lhs (cond),
> >+                                     gimple_cond_rhs (cond));
> >
> > shorter is
> >
> >   gimple_seq stmts = NULL;
> >   tree tmp = gimple_build (&stmts, gimple_cond_code (cond),
> >                                      boolean_type_node,
> >  gimple_cond_lhs (cond), gimple_cond_rhs (cond));
> >   gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
> OK.
>
>
> >+  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> >+  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
> >
> > but I wonder what's the point here to move the condition computation to
> > a temporary?  Why not just build the original condition again for break_cond?
> OK.
>
>
> > in split_loop_on_cond you'll find the first semi-invariant condition
> > to split on,
> > but we'll not visit the split loop again (also for original splitting I guess).
> > Don't we eventually want to recurse on that?
> Currently, we only do a round of loop-split. It is a TODO to enable more than
> one loop-splits on a loop.
Feng Xue OS Nov. 6, 2019, 7:13 a.m. UTC | #13
> Uh.  Note it's not exactly helpful to change algorithms between
> reviews, that makes it
> just harder :/
>
> Btw, I notice you use post-dominance info.  Note that we generally do
> not keep that
> up-to-date with CFG manipulations (and for dominators fast queries are
> disabled).
> Probably the way we walk & transform loops makes this safe but it's something to
> remember when extending that.  Possibly doing analysis of all candidates first
> and then applying the transform for all wanted cases would avoid this (and maybe
> also can reduce the number of update_ssa calls).  I guess this can be done as
> followup.

Ok. Thanks for the suggestion.

Feng
diff mbox series

Patch

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1391a562c35..28981fa1048 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11418,6 +11418,19 @@  The maximum number of branches unswitched in a single loop.
 @item lim-expensive
 The minimum cost of an expensive expression in the loop invariant motion.
 
+@item max-cond-loop-split-insns
+In a loop, if a branch of a conditional statement is selected since certain
+loop iteration, any operand that contributes to computation of the conditional
+expression remains unchanged in all following iterations, the statement is
+semi-invariant, upon which we can do a kind of loop split transformation.
+@option{max-cond-loop-split-insns} controls maximum number of insns to be
+added due to loop split on semi-invariant conditional statement.
+
+@item min-cond-loop-split-prob
+When FDO profile information is available, @option{min-cond-loop-split-prob}
+specifies minimum threshold for probability of semi-invariant condition
+statement to trigger loop split.
+
 @item iv-consider-all-candidates-bound
 Bound on number of candidates for induction variables, below which
 all candidates are considered for each use in induction variable
diff --git a/gcc/params.def b/gcc/params.def
index 13001a7bb2d..12bc8c26c9e 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -386,6 +386,20 @@  DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
 	"The maximum number of unswitchings in a single loop.",
 	3, 0, 0)
 
+/* The maximum number of increased insns due to loop split on semi-invariant
+   condition statement.  */
+DEFPARAM(PARAM_MAX_COND_LOOP_SPLIT_INSNS,
+	"max-cond-loop-split-insns",
+	"The maximum number of insns to be added due to loop split on "
+	"semi-invariant condition statement.",
+	100, 0, 0)
+
+DEFPARAM(PARAM_MIN_COND_LOOP_SPLIT_PROB,
+	"min-cond-loop-split-prob",
+	"The minimum threshold for probability of semi-invariant condition "
+	"statement to trigger loop split.",
+	30, 0, 100)
+
 /* The maximum number of insns in loop header duplicated by the copy loop
    headers pass.  */
 DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS,

diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
new file mode 100644
index 00000000000..51f9da22fc7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C
@@ -0,0 +1,33 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
+
+#include <string>
+#include <map>
+
+using namespace std;
+
+class  A
+{
+public:
+  bool empty;
+  void set (string s);
+};
+
+class  B
+{
+  map<int, string> m;
+  void f ();
+};
+
+extern A *ga;
+
+void B::f ()
+{
+  for (map<int, string>::iterator iter = m.begin (); iter != m.end (); ++iter)
+    {
+      if (ga->empty)
+        ga->set (iter->second);
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
new file mode 100644
index 00000000000..bbd522d6bcd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */
+
+__attribute__((pure)) __attribute__((noinline)) int inc (int i)
+{
+  return i + 1;
+}
+
+extern int do_something (void);
+extern int b;
+
+void test(int n)
+{
+  int i;
+
+  for (i = 0; i < n; i = inc (i))
+    {
+      if (b)
+        b = do_something();
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "split loop 1 at branch" 1 "lsplit" } } */
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index f5f083384bc..e4a1b6d2019 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -32,7 +32,10 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-loop.h"
 #include "tree-ssa-loop-manip.h"
 #include "tree-into-ssa.h"
+#include "tree-inline.h"
+#include "tree-cfgcleanup.h"
 #include "cfgloop.h"
+#include "params.h"
 #include "tree-scalar-evolution.h"
 #include "gimple-iterator.h"
 #include "gimple-pretty-print.h"
@@ -40,7 +43,9 @@  along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "gimplify-me.h"
 
-/* This file implements loop splitting, i.e. transformation of loops like
+/* This file implements two kinds of loop splitting.
+
+   One transformation of loops like:
 
    for (i = 0; i < 100; i++)
      {
@@ -612,6 +617,722 @@  split_loop (class loop *loop1, class tree_niter_desc *niter)
   return changed;
 }
 
+/* Another transformation of loops like:
+
+   for (i = INIT (); CHECK (i); i = NEXT ())
+     {
+       if (expr (a_1, a_2, ..., a_n))  // expr is pure
+         a_j = ...;  // change at least one a_j
+       else
+         S;          // not change any a_j
+     }
+
+   into:
+
+   for (i = INIT (); CHECK (i); i = NEXT ())
+     {
+       if (expr (a_1, a_2, ..., a_n))
+         a_j = ...;
+       else
+         {
+           S;
+           i = NEXT ();
+           break;
+         }
+     }
+
+   for (; CHECK (i); i = NEXT ())
+     {
+       S;
+     }
+
+   */
+
+/* Data structure to hold temporary information during loop split upon
+   semi-invariant conditional statement.  */
+class split_info {
+public:
+  /* Array of all basic blocks in a loop, returned by get_loop_body().  */
+  basic_block *bbs;
+
+  /* All memory store/clobber statements in a loop.  */
+  auto_vec<gimple *> memory_stores;
+
+  /* Whether above memory stores vector has been filled.  */
+  int need_init;
+
+  split_info () : bbs (NULL),  need_init (true) { }
+
+  ~split_info ()
+    {
+      if (bbs)
+	free (bbs);
+    }
+};
+
+/* Find all statements with memory-write effect in LOOP, including memory
+   store and non-pure function call, and keep those in a vector.  This work
+   is only done one time, for the vector should be constant during analysis
+   stage of semi-invariant condition.  */
+
+static void
+find_vdef_in_loop (struct loop *loop)
+{
+  split_info *info = (split_info *) loop->aux;
+  gphi *vphi = get_virtual_phi (loop->header);
+
+  /* Indicate memory store vector has been filled.  */
+  info->need_init = false;
+
+  /* If loop contains memory operation, there must be a virtual PHI node in
+     loop header basic block.  */
+  if (vphi == NULL)
+    return;
+
+  /* All virtual SSA names inside the loop are connected to be a cyclic
+     graph via virtual PHI nodes.  The virtual PHI node in loop header just
+     links the first and the last virtual SSA names, by using the last as
+     PHI operand to define the first.  */
+  const edge latch = loop_latch_edge (loop);
+  const tree first = gimple_phi_result (vphi);
+  const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
+
+  /* The virtual SSA cyclic graph might consist of only one SSA name, who
+     is defined by itself.
+
+       .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
+
+     This means the loop contains only memory loads, so we can skip it.  */
+  if (first == last)
+    return;
+
+  auto_vec<gimple *> other_stores;
+  auto_vec<tree> worklist;
+  auto_bitmap visited;
+
+  bitmap_set_bit (visited, SSA_NAME_VERSION (first));
+  bitmap_set_bit (visited, SSA_NAME_VERSION (last));
+  worklist.safe_push (last);
+
+  do
+    {
+      tree vuse = worklist.pop ();
+      gimple *stmt = SSA_NAME_DEF_STMT (vuse);
+
+      /* We mark the first and last SSA names as visited at the beginning,
+	 and reversely start the process from the last SSA name towards the
+	 first, which ensures that this do-while will not touch SSA names
+	 defined outside of the loop.  */
+      gcc_assert (gimple_bb (stmt)
+		  && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
+
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	{
+	  gphi *phi = as_a <gphi *> (stmt);
+
+	  for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+	    {
+	      tree arg = gimple_phi_arg_def (stmt, i);
+
+	      if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
+		worklist.safe_push (arg);
+	    }
+	}
+      else
+	{
+	  tree prev = gimple_vuse (stmt);
+
+	  /* Non-pure call statement is conservatively assumed to impact all
+	     memory locations.  So place call statements ahead of other memory
+	     stores in the vector with an idea of of using them as shortcut
+	     terminators to memory alias analysis.  */
+	  if (gimple_code (stmt) == GIMPLE_CALL)
+	    info->memory_stores.safe_push (stmt);
+	  else
+	    other_stores.safe_push (stmt);
+
+	  if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
+	    worklist.safe_push (prev);
+	}
+    } while (!worklist.is_empty ());
+
+    info->memory_stores.safe_splice (other_stores);
+}
+
+
+/* Given STMT, memory load or pure call statement, check whether it is impacted
+   by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
+   trace is composed of SKIP_HEAD and those basic block dominated by it, always
+   corresponds to one branch of a conditional statement).  If SKIP_HEAD is
+   NULL, all basic blocks of LOOP are checked.  */
+
+static bool
+vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head)
+{
+  split_info *info = (split_info *) loop->aux;
+
+  /* Collect memory store/clobber statements if have not do that.  */
+  if (info->need_init)
+    find_vdef_in_loop (loop);
+
+  tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE;
+  ao_ref ref;
+  gimple *store;
+  unsigned i;
+
+  ao_ref_init (&ref, rhs);
+
+  FOR_EACH_VEC_ELT (info->memory_stores, i, store)
+    {
+      /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
+      if (skip_head
+	  && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
+	continue;
+
+      if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
+	return false;
+    }
+
+  return true;
+}
+
+/* Forward declaration.  */
+
+static bool
+stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head);
+
+/* Suppose one condition branch, led by SKIP_HEAD, is not executed since
+   certain iteration of LOOP, check whether an SSA name (NAME) remains
+   unchanged in next interation.  We call this characterisic as semi-
+   invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all
+   basic blocks and control flows in the loop will be considered.  If non-
+   NULL, SSA name to check is supposed to be defined before SKIP_HEAD.  */
+
+static bool
+ssa_semi_invariant_p (struct loop *loop, const tree name,
+		      const_basic_block skip_head)
+{
+  gimple *def = SSA_NAME_DEF_STMT (name);
+  const_basic_block def_bb = gimple_bb (def);
+
+  /* An SSA name defined outside a loop is definitely semi-invariant.  */
+  if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
+    return true;
+
+  if (gimple_code (def) == GIMPLE_PHI)
+    {
+      /* For PHI node that is not in loop header, its source operands should
+	 be defined inside the loop, which are seen as loop variant.  */
+      if (def_bb != loop->header || !skip_head)
+	return false;
+
+      const_edge latch = loop_latch_edge (loop);
+      tree from = PHI_ARG_DEF_FROM_EDGE (as_a <gphi *> (def), latch);
+
+      /* A PHI node in loop header contains two source operands, one is
+	 initial value, the other is the copy of last iteration through loop
+	 latch, we call it latch value.  From the PHI node to definition
+	 of latch value, if excluding branch trace from SKIP_HEAD, there
+	 is no definition of other version of same variable, SSA name defined
+	 by the PHI node is semi-invariant.
+
+                         loop entry
+                              |     .--- latch ---.
+                              |     |             |
+                              v     v             |
+                  x_1 = PHI <x_0,  x_3>           |
+                           |                      |
+                           v                      |
+              .------- if (cond) -------.         |
+              |                         |         |
+              |                     [ SKIP ]      |
+              |                         |         |
+              |                     x_2 = ...     |
+              |                         |         |
+              '---- T ---->.<---- F ----'         |
+                           |                      |
+                           v                      |
+                  x_3 = PHI <x_1, x_2>            |
+                           |                      |
+                           '----------------------'
+
+	Suppose in certain iteration, execution flow in above graph goes
+	through true branch, which means that one source value to define
+	x_3 in false branch (x2) is skipped, x_3 only comes from x_1, and
+	x_1 in next iterations is defined by x_3, we know that x_1 will
+	never changed if COND always chooses true branch from then on.  */
+
+      while (from != name)
+	{
+	  /* A new value comes from a CONSTANT.  */
+	  if (TREE_CODE (from) != SSA_NAME)
+	    return false;
+
+	  gimple *stmt = SSA_NAME_DEF_STMT (from);
+	  const_basic_block bb = gimple_bb (stmt);
+
+	  /* A new value comes from outside of loop.  */
+	  if (!bb || !flow_bb_inside_loop_p (loop, bb))
+	    return false;
+
+	  from = NULL_TREE;
+
+	  if (gimple_code (stmt) == GIMPLE_PHI)
+	    {
+	      gphi *phi = as_a <gphi *> (stmt);
+
+	      for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
+		{
+		  const_edge e = gimple_phi_arg_edge (phi, i);
+
+		  /* Not consider redefinitions in excluded basic blocks.  */
+		  if (!dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
+		    {
+		      /* There are more than one source operands that can
+			 provide value to the SSA name, it is variant.  */
+		      if (from)
+			return false;
+
+		      from = gimple_phi_arg_def (phi, i);
+		    }
+		}
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_ASSIGN)
+	    {
+	      /* For simple value copy, check its rhs instead.  */
+	      if (gimple_assign_ssa_name_copy_p (stmt))
+		from = gimple_assign_rhs1 (stmt);
+	    }
+
+	  /* Any other kind of definition is deemed to introduce a new value
+	     to the SSA name.  */
+	  if (!from)
+	    return false;
+	}
+	return true;
+    }
+
+  /* Value originated from volatile memory load or return of normal (non-
+     const/pure) call should not be treated as constant in each iteration.  */
+  if (gimple_has_side_effects (def))
+    return false;
+
+  /* Check if any memory store may kill memory load at this place.  */
+  if (gimple_vuse (def) && !vuse_semi_invariant_p (loop, def, skip_head))
+    return false;
+
+  /* Check operands of definition statement of the SSA name.  */
+  return stmt_semi_invariant_p (loop, def, skip_head);
+}
+
+/* Check whether STMT is semi-invariant in LOOP, iff all its operands are
+   semi-invariant.  Trace composed of basic block SKIP_HEAD and basic blocks
+   dominated by it are excluded from the loop.  */
+
+static bool
+stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
+		       const_basic_block skip_head)
+{
+  ssa_op_iter iter;
+  tree use;
+
+  /* Although operand of a statement might be SSA name, CONSTANT or VARDECL,
+     here we only need to check SSA name operands.  This is because check on
+     VARDECL operands, which involve memory loads, must have been done
+     prior to invocation of this function in vuse_semi_invariant_p.  */
+  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+    {
+      if (!ssa_semi_invariant_p (loop, use, skip_head))
+	return false;
+    }
+
+  return true;
+}
+
+/* Determine when conditional statement never transfers execution to one of its
+   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
+   and those basic blocks dominated by BRANCH_BB.  */
+
+static bool
+branch_removable_p (basic_block branch_bb)
+{
+  if (single_pred_p (branch_bb))
+    return true;
+
+  edge e;
+  edge_iterator ei;
+
+  FOR_EACH_EDGE (e, ei, branch_bb->preds)
+    {
+      if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
+	continue;
+
+      if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
+	continue;
+
+       /* The branch can be reached from opposite branch, or from some
+	  statement not dominated by the conditional statement.  */
+      return false;
+    }
+
+  return true;
+}
+
+/* Find out which branch of a conditional statement (COND) is invariant in the
+   execution context of LOOP.  That is: once the branch is selected in certain
+   iteration of the loop, any operand that contributes to computation of the
+   conditional statement remains unchanged in all following iterations.  */
+
+static edge
+get_cond_invariant_branch (struct loop *loop, gcond *cond)
+{
+  basic_block cond_bb = gimple_bb (cond);
+  basic_block targ_bb[2];
+  bool invar[2];
+  unsigned invar_checks;
+
+  for (unsigned i = 0; i < 2; i++)
+    {
+      targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
+
+      /* One branch directs to loop exit, no need to perform loop split upon
+	 this conditional statement.  Firstly, it is trivial if the exit branch
+	 is semi-invariant, for the statement is just to break loop.  Secondly,
+	 if the opposite branch is semi-invariant, it means that the statement
+	 is real loop-invariant, which is covered by loop unswitch.  */
+      if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
+	return NULL;
+    }
+
+  invar_checks = 0;
+
+  for (unsigned i = 0; i < 2; i++)
+    {
+      invar[!i] = false;
+
+      if (!branch_removable_p (targ_bb[i]))
+	continue;
+
+      /* Given a semi-invariant branch, if its opposite branch dominates
+	 loop latch, it and its following trace will only be executed in
+	 final iteration of loop, namely it is not part of repeated body
+	 of the loop.  Similar to the above case that the branch is loop
+	 exit, no need to split loop.  */
+      if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
+	continue;
+
+      invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
+      invar_checks++;
+    }
+
+  /* With both branches being invariant (handled by loop unswitch) or
+     variant is not what we want.  */
+  if (invar[0] ^ !invar[1])
+    return NULL;
+
+  /* Found a real loop-invariant condition, do nothing.  */
+  if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
+    return NULL;
+
+  return EDGE_SUCC (cond_bb, (unsigned) invar[1]);
+}
+
+/* Calculate increased code size measured by estimated insn number if applying
+   loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
+
+static int
+compute_added_num_insns (struct loop *loop, const_edge branch_edge)
+{
+  basic_block cond_bb = branch_edge->src;
+  unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
+  basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
+  basic_block *bbs = ((split_info *) loop->aux)->bbs;
+  int num = 0;
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    {
+      /* Do no count basic blocks only in opposite branch.  */
+      if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
+	continue;
+
+      num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
+    }
+
+  /* It is unnecessary to evaluate expression of the conditional statement
+     in new loop that contains only invariant branch.  This expresion should
+     be constant value (either true or false).  Exclude code size of insns
+     that contribute to computation of the expression.  */
+
+  auto_vec<gimple *> worklist;
+  hash_set<gimple *> removed;
+  gimple *stmt = last_stmt (cond_bb);
+
+  worklist.safe_push (stmt);
+  removed.add (stmt);
+  num -= estimate_num_insns (stmt, &eni_size_weights);
+
+  do
+    {
+      ssa_op_iter opnd_iter;
+      use_operand_p opnd_p;
+
+      stmt = worklist.pop ();
+      FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
+	{
+	  tree opnd = USE_FROM_PTR (opnd_p);
+
+	  if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
+	    continue;
+
+	  gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
+	  use_operand_p use_p;
+	  imm_use_iterator use_iter;
+
+	  if (removed.contains (opnd_stmt)
+	      || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
+	    continue;
+
+	  FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
+	    {
+              gimple *use_stmt = USE_STMT (use_p);
+
+	      if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
+		{
+		  opnd_stmt = NULL;
+		  break;
+		}
+	    }
+
+	  if (opnd_stmt)
+	    {
+	      worklist.safe_push (opnd_stmt);
+	      removed.add (opnd_stmt);
+	      num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
+	    }
+	}
+    } while (!worklist.is_empty ());
+
+  gcc_assert (num >= 0);
+  return num;
+}
+
+/* Find out loop-invariant branch of a conditional statement (COND) if it has,
+   and check whether it is eligible and profitable to perform loop split upon
+   this branch in LOOP.  */
+
+static edge
+get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
+{
+  edge invar_branch = get_cond_invariant_branch (loop, cond);
+
+  if (!invar_branch)
+    return NULL;
+
+  profile_probability prob = invar_branch->probability;
+
+  /* When accurate profile information is available, and execution
+     frequency of the branch is too low, just let it go.  */
+  if (prob.reliable_p ())
+    {
+      int thres = PARAM_VALUE (PARAM_MIN_COND_LOOP_SPLIT_PROB);
+
+      if (prob < profile_probability::always ().apply_scale (thres, 100))
+	return NULL;
+    }
+
+  /* Add a threshold for increased code size to disable loop split.  */
+  if (compute_added_num_insns (loop, invar_branch)
+      > PARAM_VALUE (PARAM_MAX_COND_LOOP_SPLIT_INSNS))
+    return NULL;
+
+  return invar_branch;
+}
+
+/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
+   conditional statement, perform loop split transformation illustrated
+   as the following graph.
+
+               .-------T------ if (true) ------F------.
+               |                    .---------------. |
+               |                    |               | |
+               v                    |               v v
+          pre-header                |            pre-header
+               | .------------.     |                 | .------------.
+               | |            |     |                 | |            |
+               | v            |     |                 | v            |
+             header           |     |               header           |
+               |              |     |                 |              |
+       [ bool r = cond; ]     |     |                 |              |
+               |              |     |                 |              |
+      .---- if (r) -----.     |     |        .--- if (true) ---.     |
+      |                 |     |     |        |                 |     |
+  invariant             |     |     |    invariant             |     |
+      |                 |     |     |        |                 |     |
+      '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
+               |              |    /                  |              |
+             stmts            |   /                 stmts            |
+               |              |  /                    |              |
+              / \             | /                    / \             |
+     .-------*   *       [ if (!r) ]        .-------*   *            |
+     |           |            |             |           |            |
+     |         latch          |             |         latch          |
+     |           |            |             |           |            |
+     |           '------------'             |           '------------'
+     '------------------------. .-----------'
+             loop1            | |                   loop2
+                              v v
+                             exits
+
+   In the graph, loop1 represents the part derived from original one, and
+   loop2 is duplicated using loop_version (), which corresponds to the part
+   of original one being splitted out.  In loop1, a new bool temporary (r)
+   is introduced to keep value of the condition result.  In original latch
+   edge of loop1, we insert a new conditional statement whose value comes
+   from previous temporary (r), one of its branch goes back to loop1 header
+   as a latch edge, and the other branch goes to loop2 pre-header as an entry
+   edge.  And also in loop2, we abandon the variant branch of the conditional
+   statement candidate by setting a constant bool condition, based on which
+   branch is semi-invariant.  */
+
+static bool
+do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
+{
+  basic_block cond_bb = invar_branch->src;
+  bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
+  gcond *cond = as_a <gcond *> (last_stmt (cond_bb));
+
+  gcc_assert (cond_bb->loop_father == loop1);
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+     fprintf (dump_file, "In %s(), split loop %d at branch<%s>, BB %d\n",
+	      current_function_name (), loop1->num,
+	      true_invar ? "T" : "F", cond_bb->index);
+     print_gimple_stmt (dump_file, cond, 0, TDF_SLIM | TDF_VOPS);
+   }
+
+  initialize_original_copy_tables ();
+
+  struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
+				     profile_probability::always (),
+				     profile_probability::never (),
+				     profile_probability::always (),
+				     profile_probability::always (),
+				     true);
+  if (!loop2)
+    {
+      free_original_copy_tables ();
+      return false;
+    }
+
+  /* Generate a bool type temporary to hold result of the condition.  */
+  tree tmp = make_ssa_name (boolean_type_node);
+  gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+  gimple *stmt = gimple_build_assign (tmp,
+				      gimple_cond_code (cond),
+				      gimple_cond_lhs (cond),
+				      gimple_cond_rhs (cond));
+
+  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+  gimple_cond_set_condition (cond, EQ_EXPR, tmp, boolean_true_node);
+  update_stmt (cond);
+
+  basic_block cond_bb_copy = get_bb_copy (cond_bb);
+  gcond *cond_copy = as_a<gcond *> (last_stmt (cond_bb_copy));
+
+  /* Replace the condition in loop2 with a bool constant to let PassManager
+     remove the variant branch after current pass completes.  */
+  if (true_invar)
+    gimple_cond_make_true (cond_copy);
+  else
+    gimple_cond_make_false (cond_copy);
+
+  update_stmt (cond_copy);
+
+  /* Insert a new conditional statement on latch edge of loop1.  This
+     statement acts as a switch to transfer execution from loop1 to loop2,
+     when loop1 enters into invariant state.  */
+  basic_block latch_bb = split_edge (loop_latch_edge (loop1));
+  basic_block break_bb = split_edge (single_pred_edge (latch_bb));
+  gimple *break_cond = gimple_build_cond (EQ_EXPR, tmp, boolean_true_node,
+					  NULL_TREE, NULL_TREE);
+
+  gsi = gsi_last_bb (break_bb);
+  gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
+
+  edge to_loop1 = single_succ_edge (break_bb);
+  edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
+
+  to_loop1->flags &= ~EDGE_FALLTHRU;
+  to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
+  to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
+
+  update_ssa (TODO_update_ssa);
+
+  /* Due to introduction of a control flow edge from loop1 latch to loop2
+     pre-header, we should update PHIs in loop2 to reflect this connection
+     between loop1 and loop2.  */
+  connect_loop_phis (loop1, loop2, to_loop2);
+
+  free_original_copy_tables ();
+
+  rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
+
+  return true;
+}
+
+/* Traverse all conditional statements in LOOP, to find out a good candidate
+   upon which we can do loop split.  */
+
+static bool
+split_loop_on_cond (struct loop *loop)
+{
+  split_info *info = new split_info ();
+  basic_block *bbs = info->bbs = get_loop_body (loop);
+  bool do_split = false;
+
+  /* Allocate an area to keep temporary info, and associate its address
+     with loop aux field.  */
+  loop->aux = info;
+
+  for (unsigned i = 0; i < loop->num_nodes; i++)
+    {
+      basic_block bb = bbs[i];
+
+      /* We only consider conditional statement, which be executed at most once
+	 in each iteration of the loop.  So skip statements in inner loops.  */
+      if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
+	continue;
+
+      /* Actually this check is not a must constraint. With it, we can ensure
+	 conditional statement will always be executed in each iteration. */
+      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
+	continue;
+
+      gimple *last = last_stmt (bb);
+
+      if (!last || gimple_code (last) != GIMPLE_COND)
+	continue;
+
+      gcond *cond = as_a <gcond *> (last);
+      edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
+
+      if (branch_edge)
+	{
+	  do_split_loop_on_cond (loop, branch_edge);
+	  do_split = true;
+	  break;
+	}
+    }
+
+  delete info;
+  loop->aux = NULL;
+
+  return do_split;
+}
+
 /* Main entry point.  Perform loop splitting on all suitable loops.  */
 
 static unsigned int
@@ -662,6 +1383,32 @@  tree_ssa_split_loops (void)
 	}
     }
 
+  if (changed)
+    {
+      cleanup_tree_cfg ();
+      changed = false;
+    }
+
+  /* Perform loop splitting for suitable if-conditions in all loops.  */
+  FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
+    loop->aux = NULL;
+
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+    {
+      if (loop->aux)
+        {
+	  loop_outer (loop)->aux = loop;
+	  continue;
+	}
+
+      if (!optimize_loop_for_size_p (loop)
+	  && split_loop_on_cond (loop))
+	{
+	  loop_outer (loop)->aux = loop;
+	  changed = true;
+	}
+    }
+
   FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT)
     loop->aux = NULL;