Patchwork Non-dominating loop bounds in tree-ssa-loop-niter 3/4

login
register
mail settings
Submitter Jan Hubicka
Date Oct. 31, 2012, 10:39 a.m.
Message ID <20121031103949.GD19020@kam.mff.cuni.cz>
Download mbox | patch
Permalink /patch/195829/
State New
Headers show

Comments

Jan Hubicka - Oct. 31, 2012, 10:39 a.m.
Hi,
this patch implements the logic to remove statements that are known to be
undefined and thus expected to not be executed after unrolling.  It also
removes redundant exits that I originally tried to do at once, but it
does not fly, since the peeling confuse number_of_iterations_exit
and it no longer understands the ivs.

So now we
1) always remove exits that are known to be redundant by the bounds found
2) try to peel/unroll
3) if success remove statemnts from the last iteration

This silence the array-bounds warnings in my testcase and many cases of
-O3 bootstrap (I am running it now).
Still if one construct testcase touching out of bound in more than one
iteration we will produce false warning, I will do that incrementally
by similar logic in loop copying.

Bootstrapped/regtested x86_64-linux, OK?

Honza

	* tree-ssa-loop-niter.c (free_loop_bounds): Break out from ...
	(free_numbers_of_iterations_estimates_loop): ... here.
	* tree-ssa-loop-ivcanon.c (remove_exits_and_undefined_stmts): New
	function.
	(remove_redundant_iv_test): New function.
	(try_unroll_loop_completely): Pass in MAXITER; use
	remove_exits_and_undefined_stmts
	(canonicalize_loop_induction_variables): Compute MAXITER;
	use remove_redundant_iv_test.
	* cfgloop.h (free_loop_bounds): New function.

	* gcc.dg/tree-ssa/cunroll-10.c: New testcase.
	* gcc.dg/tree-ssa/cunroll-9.c: New testcase.
Index: tree-ssa-loop-niter.c
===================================================================
--- tree-ssa-loop-niter.c	(revision 192989)
@@ -3505,15 +3737,11 @@ scev_probably_wraps_p (tree base, tree s
   return true;
 }
 
-/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
-
 void
-free_numbers_of_iterations_estimates_loop (struct loop *loop)
+free_loop_bounds (struct loop *loop)
 {
   struct nb_iter_bound *bound, *next;
 
-  loop->nb_iterations = NULL;
-  loop->estimate_state = EST_NOT_COMPUTED;
   for (bound = loop->bounds; bound; bound = next)
     {
       next = bound->next;
@@ -3523,6 +3751,16 @@ free_numbers_of_iterations_estimates_loo
   loop->bounds = NULL;
 }
 
+/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
+
+void
+free_numbers_of_iterations_estimates_loop (struct loop *loop)
+{
+  loop->nb_iterations = NULL;
+  loop->estimate_state = EST_NOT_COMPUTED;
+  free_loop_bounds (loop);
+}
+
 /* Frees the information on upper bounds on numbers of iterations of loops.  */
 
 void
Richard Guenther - Oct. 31, 2012, 11:01 a.m.
On Wed, 31 Oct 2012, Jan Hubicka wrote:

> Hi,
> this patch implements the logic to remove statements that are known to be
> undefined and thus expected to not be executed after unrolling.  It also
> removes redundant exits that I originally tried to do at once, but it
> does not fly, since the peeling confuse number_of_iterations_exit
> and it no longer understands the ivs.
> 
> So now we
> 1) always remove exits that are known to be redundant by the bounds found
> 2) try to peel/unroll
> 3) if success remove statemnts from the last iteration
> 
> This silence the array-bounds warnings in my testcase and many cases of
> -O3 bootstrap (I am running it now).
> Still if one construct testcase touching out of bound in more than one
> iteration we will produce false warning, I will do that incrementally
> by similar logic in loop copying.
> 
> Bootstrapped/regtested x86_64-linux, OK?
> 
> Honza
> 
> 	* tree-ssa-loop-niter.c (free_loop_bounds): Break out from ...
> 	(free_numbers_of_iterations_estimates_loop): ... here.
> 	* tree-ssa-loop-ivcanon.c (remove_exits_and_undefined_stmts): New
> 	function.
> 	(remove_redundant_iv_test): New function.
> 	(try_unroll_loop_completely): Pass in MAXITER; use
> 	remove_exits_and_undefined_stmts
> 	(canonicalize_loop_induction_variables): Compute MAXITER;
> 	use remove_redundant_iv_test.
> 	* cfgloop.h (free_loop_bounds): New function.
> 
> 	* gcc.dg/tree-ssa/cunroll-10.c: New testcase.
> 	* gcc.dg/tree-ssa/cunroll-9.c: New testcase.
> Index: tree-ssa-loop-niter.c
> ===================================================================
> --- tree-ssa-loop-niter.c	(revision 192989)
> @@ -3505,15 +3737,11 @@ scev_probably_wraps_p (tree base, tree s
>    return true;
>  }
>  
> -/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
> -

Needs a comment.

>  void
> -free_numbers_of_iterations_estimates_loop (struct loop *loop)
> +free_loop_bounds (struct loop *loop)
>  {
>    struct nb_iter_bound *bound, *next;
>  
> -  loop->nb_iterations = NULL;
> -  loop->estimate_state = EST_NOT_COMPUTED;
>    for (bound = loop->bounds; bound; bound = next)
>      {
>        next = bound->next;
> @@ -3523,6 +3751,16 @@ free_numbers_of_iterations_estimates_loo
>    loop->bounds = NULL;
>  }
>  
> +/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
> +
> +void
> +free_numbers_of_iterations_estimates_loop (struct loop *loop)
> +{
> +  loop->nb_iterations = NULL;
> +  loop->estimate_state = EST_NOT_COMPUTED;
> +  free_loop_bounds (loop);
> +}
> +
>  /* Frees the information on upper bounds on numbers of iterations of loops.  */
>  
>  void
> Index: tree-ssa-loop-ivcanon.c
> ===================================================================
> --- tree-ssa-loop-ivcanon.c	(revision 192989)
> +++ tree-ssa-loop-ivcanon.c	(working copy)
> @@ -389,6 +389,116 @@ loop_edge_to_cancel (struct loop *loop)
>    return NULL;
>  }
>  
> +/* Remove all tests for exits that are known to be taken after LOOP was
> +   peeled NPEELED times. Put gcc_unreachable before every statement
> +   known to not be executed.  */
> +
> +static bool
> +remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
> +{
> +  struct nb_iter_bound *elt;
> +  bool changed = false;
> +
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      /* If statement is known to be undefined after peeling, turn it
> +	 into unreachable (or trap when debugging experience is supposed
> +	 to be good).  */
> +      if (!elt->is_exit
> +	  && elt->bound.ule (double_int::from_uhwi (npeeled)))
> +	{
> +	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
> +	  gimple stmt = gimple_build_call
> +	      (builtin_decl_implicit (optimize_debug
> +				      ? BUILT_IN_TRAP : BUILT_IN_UNREACHABLE),

I'm not sure we should do different things for -Og here.  In fact there
is no unrolling done for -Og at all, so just use unreachable.

> +	       0);
> +
> +	  gimple_set_location (stmt, gimple_location (elt->stmt));
> +	  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
> +	  changed = true;
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    {
> +	      fprintf (dump_file, "Forced statement unreachable: ");
> +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> +	    }
> +	}
> +      /* If we know the exit will be taken after peeling, update.  */
> +      else if (elt->is_exit
> +	       && elt->bound.ule (double_int::from_uhwi (npeeled)))
> +	{
> +	  basic_block bb = gimple_bb (elt->stmt);
> +	  edge exit_edge = EDGE_SUCC (bb, 0);
> +
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    {
> +	      fprintf (dump_file, "Forced exit to be taken: ");
> +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> +	    }
> +	  if (!loop_exit_edge_p (loop, exit_edge))
> +	    exit_edge = EDGE_SUCC (bb, 1);
> +	  if (exit_edge->flags & EDGE_TRUE_VALUE)

I think we can have abnormal/eh exit edges.  So I'm not sure you
can, without checking, assume the block ends in a GIMPLE_COND.

> +	    gimple_cond_make_true (elt->stmt);
> +	  else
> +	    gimple_cond_make_false (elt->stmt);
> +	  update_stmt (elt->stmt);
> +	  changed = true;
> +	}
> +    }
> +  return changed;
> +}
> +
> +/* Remove all exits that are known to be never taken because of the loop bound
> +   discovered.  */
> +
> +static bool
> +remove_redundant_iv_test (struct loop *loop)
> +{
> +  struct nb_iter_bound *elt;
> +  bool changed = false;
> +
> +  if (!loop->any_upper_bound)
> +    return false;
> +  for (elt = loop->bounds; elt; elt = elt->next)
> +    {
> +      /* Exit is pointless if it won't be taken before loop reaches
> +	 upper bound.  */
> +      if (elt->is_exit && loop->any_upper_bound
> +          && loop->nb_iterations_upper_bound.ult (elt->bound))
> +	{
> +	  basic_block bb = gimple_bb (elt->stmt);
> +	  edge exit_edge = EDGE_SUCC (bb, 0);
> +	  struct tree_niter_desc niter;
> +	  
> +	  if (!loop_exit_edge_p (loop, exit_edge))
> +	    exit_edge = EDGE_SUCC (bb, 1);
> +
> +	  /* Only when we know the actual number of iterations, not
> +	     just a bound, we can remove the exit.  */
> +	  if (!number_of_iterations_exit (loop, exit_edge,
> +					  &niter, false, false))
> +	    gcc_unreachable ();
> +	  if (!integer_onep (niter.assumptions)
> +	      || !integer_zerop (niter.may_be_zero)
> +	      || !niter.niter
> +	      || TREE_CODE (niter.niter) != INTEGER_CST)
> +	    continue;
> +
> +	  if (dump_file && (dump_flags & TDF_DETAILS))
> +	    {
> +	      fprintf (dump_file, "Removed pointless exit: ");
> +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> +	    }
> +	  if (exit_edge->flags & EDGE_TRUE_VALUE)
> +	    gimple_cond_make_false (elt->stmt);
> +	  else
> +	    gimple_cond_make_true (elt->stmt);

See above.  Otherwise the overall idea sounds fine.

Richard.

> +	  update_stmt (elt->stmt);
> +	  changed = true;
> +	}
> +    }
> +  return changed;
> +}
> +
>  /* Tries to unroll LOOP completely, i.e. NITER times.
>     UL determines which loops we are allowed to unroll.
>     EXIT is the exit of the loop that should be eliminated.  
> @@ -396,20 +511,22 @@ loop_edge_to_cancel (struct loop *loop)
>     irreducible regions may become invalid as a result
>     of the transformation.  
>     LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
> -   when we need to go into loop closed SSA form.  */
> +   when we need to go into loop closed SSA form. 
> +   MAXITER specfy bound on number of iterations, -1 if it is
> +   not known or too large for HOST_WIDE_INT.  */
>  
>  static bool
>  try_unroll_loop_completely (struct loop *loop,
>  			    edge exit, tree niter,
>  			    enum unroll_level ul,
>  			    bool *irred_invalidated,
> -			    bitmap loop_closed_ssa_invalidated)
> +			    bitmap loop_closed_ssa_invalidated,
> +			    HOST_WIDE_INT maxiter)
>  {
>    unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
>    gimple cond;
>    struct loop_size size;
>    bool n_unroll_found = false;
> -  HOST_WIDE_INT maxiter;
>    basic_block latch;
>    edge latch_edge;
>    location_t locus;
> @@ -445,7 +562,6 @@ try_unroll_loop_completely (struct loop 
>      exit = NULL;
>  
>    /* See if we can improve our estimate by using recorded loop bounds.  */
> -  maxiter = max_loop_iterations_int (loop);
>    if (maxiter >= 0
>        && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
>      {
> @@ -545,6 +661,8 @@ try_unroll_loop_completely (struct loop 
>        free_original_copy_tables ();
>      }
>  
> +  remove_exits_and_undefined_stmts (loop, n_unroll);
> +
>    /* Remove the conditional from the last copy of the loop.  */
>    if (edge_to_cancel)
>      {
> @@ -627,6 +745,8 @@ canonicalize_loop_induction_variables (s
>  {
>    edge exit = NULL;
>    tree niter;
> +  HOST_WIDE_INT maxiter;
> +  bool modified = false;
>  
>    niter = number_of_latch_executions (loop);
>    if (TREE_CODE (niter) == INTEGER_CST)
> @@ -657,6 +777,8 @@ canonicalize_loop_induction_variables (s
>    if (niter && TREE_CODE (niter) == INTEGER_CST)
>      record_niter_bound (loop, tree_to_double_int (niter), false, true);
>  
> +  maxiter = max_loop_iterations_int (loop);
> +
>    if (dump_file && (dump_flags & TDF_DETAILS)
>        && TREE_CODE (niter) == INTEGER_CST)
>      {
> @@ -665,21 +787,28 @@ canonicalize_loop_induction_variables (s
>        fprintf (dump_file, " times.\n");
>      }
>    if (dump_file && (dump_flags & TDF_DETAILS)
> -      && max_loop_iterations_int (loop) >= 0)
> +      && maxiter >= 0)
>      {
>        fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
> -	       (int)max_loop_iterations_int (loop));
> +	       (int)maxiter);
>      }
>  
> +  /* Remove exits that are known to be never taken based on loop bound.
> +     Needs to be called after compilation of max_loop_iterations_int that
> +     populates the loop bounds.  */
> +  modified |= remove_redundant_iv_test (loop);
> +
>    if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated,
> -				  loop_closed_ssa_invalidated))
> +				  loop_closed_ssa_invalidated, maxiter))
>      return true;
>  
>    if (create_iv
>        && niter && !chrec_contains_undetermined (niter))
>      create_canonical_iv (loop, exit, niter);
>  
> -  return false;
> +  if (modified)
> +    free_loop_bounds (loop);
> +  return modified;
>  }
>  
>  /* The main entry point of the pass.  Adds canonical induction variables
> Index: cfgloop.h
> ===================================================================
> --- cfgloop.h	(revision 192989)
> +++ cfgloop.h	(working copy)
> @@ -286,6 +286,7 @@ extern unsigned expected_loop_iterations
>  extern rtx doloop_condition_get (rtx);
>  
>  void estimate_numbers_of_iterations_loop (struct loop *);
> +void free_loop_bounds (struct loop *);
>  void record_niter_bound (struct loop *, double_int, bool, bool);
>  bool estimated_loop_iterations (struct loop *, double_int *);
>  bool max_loop_iterations (struct loop *, double_int *);
> Index: testsuite/gcc.dg/tree-ssa/cunroll-10.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/cunroll-10.c	(revision 0)
> +++ testsuite/gcc.dg/tree-ssa/cunroll-10.c	(revision 0)
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll" } */
> +int a[3];
> +int b[4];
> +main()
> +{
> +  int i;
> +  for (i=0;i<4;i++)
> +    if (b[i]==2)
> +     a[i]++;
> +}
> +/* { dg-final { scan-tree-dump-times 2 "Forced statement unreachable:" "cunroll" } } */
> +/* { dg-final { cleanup-tree-dump "cunroll" } } */
> Index: testsuite/gcc.dg/tree-ssa/cunroll-9.c
> ===================================================================
> --- testsuite/gcc.dg/tree-ssa/cunroll-9.c	(revision 0)
> +++ testsuite/gcc.dg/tree-ssa/cunroll-9.c	(revision 0)
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cunrolli" } */
> +int a[10];
> +int b[11];
> +t (int n)
> +{
> +  int i;
> +  int sum = 0;
> +  for (i = 0; i < n; i++)
> +    {
> +      if (i > 1000)
> +	abort ();
> +      if (q ())
> +	sum += a[i];
> +      else
> +	sum += b[i];
> +    }
> +  return sum;
> +}
> +/* { dg-final { scan-tree-dump-times 1 "Removed pointless exit:" "cunroli" } } */
> +/* { dg-final { cleanup-tree-dump "cunroli" } } */
> 
>
Jan Hubicka - Oct. 31, 2012, 12:03 p.m.
> > Index: tree-ssa-loop-niter.c
> > ===================================================================
> > --- tree-ssa-loop-niter.c	(revision 192989)
> > @@ -3505,15 +3737,11 @@ scev_probably_wraps_p (tree base, tree s
> >    return true;
> >  }
> >  
> > -/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
> > -
> 
> Needs a comment.

Yep,
also the reason I export it is because I use in other patch.
(I think we should not hook the bounds into the loop structure and keep them dead,
so i want the max_iter*friends to free them unless they are asked to preserve and
explicitely free in the two users of them).
> > +static bool
> > +remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
> > +{
> > +  struct nb_iter_bound *elt;
> > +  bool changed = false;
> > +
> > +  for (elt = loop->bounds; elt; elt = elt->next)
> > +    {
> > +      /* If statement is known to be undefined after peeling, turn it
> > +	 into unreachable (or trap when debugging experience is supposed
> > +	 to be good).  */
> > +      if (!elt->is_exit
> > +	  && elt->bound.ule (double_int::from_uhwi (npeeled)))
> > +	{
> > +	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
> > +	  gimple stmt = gimple_build_call
> > +	      (builtin_decl_implicit (optimize_debug
> > +				      ? BUILT_IN_TRAP : BUILT_IN_UNREACHABLE),
> 
> I'm not sure we should do different things for -Og here.  In fact there
> is no unrolling done for -Og at all, so just use unreachable.

OK, I was thinking about BUILT_IN_TRAP for -O1 too, but I am not sure either.
i guess we will gather some experience on how much are users confused.

Why we do ont inline at -Og?
> > +      /* If we know the exit will be taken after peeling, update.  */
> > +      else if (elt->is_exit
> > +	       && elt->bound.ule (double_int::from_uhwi (npeeled)))
> > +	{
> > +	  basic_block bb = gimple_bb (elt->stmt);
> > +	  edge exit_edge = EDGE_SUCC (bb, 0);
> > +
> > +	  if (dump_file && (dump_flags & TDF_DETAILS))
> > +	    {
> > +	      fprintf (dump_file, "Forced exit to be taken: ");
> > +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> > +	    }
> > +	  if (!loop_exit_edge_p (loop, exit_edge))
> > +	    exit_edge = EDGE_SUCC (bb, 1);
> > +	  if (exit_edge->flags & EDGE_TRUE_VALUE)
> 
> I think we can have abnormal/eh exit edges.  So I'm not sure you
> can, without checking, assume the block ends in a GIMPLE_COND.

We can't - those are only edges that are found by the IV analysis and they
always test IV with some bound. (it is all done by number_of_iterations_exit)
> 
> See above.  Otherwise the overall idea sounds fine.

Similarly here, simple exits are always conditionals. 


Thanks a lot!
Honza
Richard Guenther - Oct. 31, 2012, 12:17 p.m.
On Wed, 31 Oct 2012, Jan Hubicka wrote:

> > > Index: tree-ssa-loop-niter.c
> > > ===================================================================
> > > --- tree-ssa-loop-niter.c	(revision 192989)
> > > @@ -3505,15 +3737,11 @@ scev_probably_wraps_p (tree base, tree s
> > >    return true;
> > >  }
> > >  
> > > -/* Frees the information on upper bounds on numbers of iterations of LOOP.  */
> > > -
> > 
> > Needs a comment.
> 
> Yep,
> also the reason I export it is because I use in other patch.
> (I think we should not hook the bounds into the loop structure and keep them dead,
> so i want the max_iter*friends to free them unless they are asked to preserve and
> explicitely free in the two users of them).
> > > +static bool
> > > +remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
> > > +{
> > > +  struct nb_iter_bound *elt;
> > > +  bool changed = false;
> > > +
> > > +  for (elt = loop->bounds; elt; elt = elt->next)
> > > +    {
> > > +      /* If statement is known to be undefined after peeling, turn it
> > > +	 into unreachable (or trap when debugging experience is supposed
> > > +	 to be good).  */
> > > +      if (!elt->is_exit
> > > +	  && elt->bound.ule (double_int::from_uhwi (npeeled)))
> > > +	{
> > > +	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
> > > +	  gimple stmt = gimple_build_call
> > > +	      (builtin_decl_implicit (optimize_debug
> > > +				      ? BUILT_IN_TRAP : BUILT_IN_UNREACHABLE),
> > 
> > I'm not sure we should do different things for -Og here.  In fact there
> > is no unrolling done for -Og at all, so just use unreachable.
> 
> OK, I was thinking about BUILT_IN_TRAP for -O1 too, but I am not sure either.
> i guess we will gather some experience on how much are users confused.
> 
> Why we do ont inline at -Og?

unroll you mean.  Because unrolling mutates the CFG too much.
Well - it was just a starting point, populating -Og with as little
as possible and 100% profitable transforms (in both debug and speed
metric).  In late opts we only do (early opt queue is shared):

  NEXT_PASS (pass_all_optimizations_g);
    {
      struct opt_pass **p = &pass_all_optimizations_g.pass.sub;
      NEXT_PASS (pass_remove_cgraph_callee_edges);
      NEXT_PASS (pass_strip_predict_hints);
      /* Lower remaining pieces of GIMPLE.  */
      NEXT_PASS (pass_lower_complex);
      NEXT_PASS (pass_lower_vector_ssa);
      /* Perform simple scalar cleanup which is constant/copy propagation.  
*/
      NEXT_PASS (pass_ccp);
      NEXT_PASS (pass_object_sizes);
      /* Copy propagation also copy-propagates constants, this is 
necessary
         to forward object-size results properly.  */
      NEXT_PASS (pass_copy_prop);
      NEXT_PASS (pass_rename_ssa_copies);
      NEXT_PASS (pass_dce);
      /* Fold remaining builtins.  */
      NEXT_PASS (pass_fold_builtins);
      /* ???  We do want some kind of loop invariant motion, but we 
possibly
         need to adjust LIM to be more friendly towards preserving 
accurate
         debug information here.  */
      NEXT_PASS (pass_late_warn_uninitialized);
      NEXT_PASS (pass_uncprop);
      NEXT_PASS (pass_local_pure_const);
    }

> > > +      /* If we know the exit will be taken after peeling, update.  */
> > > +      else if (elt->is_exit
> > > +	       && elt->bound.ule (double_int::from_uhwi (npeeled)))
> > > +	{
> > > +	  basic_block bb = gimple_bb (elt->stmt);
> > > +	  edge exit_edge = EDGE_SUCC (bb, 0);
> > > +
> > > +	  if (dump_file && (dump_flags & TDF_DETAILS))
> > > +	    {
> > > +	      fprintf (dump_file, "Forced exit to be taken: ");
> > > +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> > > +	    }
> > > +	  if (!loop_exit_edge_p (loop, exit_edge))
> > > +	    exit_edge = EDGE_SUCC (bb, 1);
> > > +	  if (exit_edge->flags & EDGE_TRUE_VALUE)
> > 
> > I think we can have abnormal/eh exit edges.  So I'm not sure you
> > can, without checking, assume the block ends in a GIMPLE_COND.
> 
> We can't - those are only edges that are found by the IV analysis and they
> always test IV with some bound. (it is all done by number_of_iterations_exit)
> > 
> > See above.  Otherwise the overall idea sounds fine.
> 
> Similarly here, simple exits are always conditionals. 

I see.  Then the patch is ok (with the comment added).

Thanks,
Richard.
Jan Hubicka - Oct. 31, 2012, 12:22 p.m.
> 
> unroll you mean.  Because unrolling mutates the CFG too much.
> Well - it was just a starting point, populating -Og with as little
> as possible and 100% profitable transforms (in both debug and speed
> metric).  In late opts we only do (early opt queue is shared):

Well, and what about early cunrolli?
> 
>   NEXT_PASS (pass_all_optimizations_g);
>     {
>       struct opt_pass **p = &pass_all_optimizations_g.pass.sub;
>       NEXT_PASS (pass_remove_cgraph_callee_edges);
>       NEXT_PASS (pass_strip_predict_hints);
>       /* Lower remaining pieces of GIMPLE.  */
>       NEXT_PASS (pass_lower_complex);
>       NEXT_PASS (pass_lower_vector_ssa);
>       /* Perform simple scalar cleanup which is constant/copy propagation.  
> */
>       NEXT_PASS (pass_ccp);
>       NEXT_PASS (pass_object_sizes);
>       /* Copy propagation also copy-propagates constants, this is 
> necessary
>          to forward object-size results properly.  */
>       NEXT_PASS (pass_copy_prop);
>       NEXT_PASS (pass_rename_ssa_copies);
>       NEXT_PASS (pass_dce);
>       /* Fold remaining builtins.  */
>       NEXT_PASS (pass_fold_builtins);
>       /* ???  We do want some kind of loop invariant motion, but we 
> possibly
>          need to adjust LIM to be more friendly towards preserving 
> accurate
>          debug information here.  */
>       NEXT_PASS (pass_late_warn_uninitialized);
>       NEXT_PASS (pass_uncprop);
>       NEXT_PASS (pass_local_pure_const);
>     }
> 
> > > > +      /* If we know the exit will be taken after peeling, update.  */
> > > > +      else if (elt->is_exit
> > > > +	       && elt->bound.ule (double_int::from_uhwi (npeeled)))
> > > > +	{
> > > > +	  basic_block bb = gimple_bb (elt->stmt);
> > > > +	  edge exit_edge = EDGE_SUCC (bb, 0);
> > > > +
> > > > +	  if (dump_file && (dump_flags & TDF_DETAILS))
> > > > +	    {
> > > > +	      fprintf (dump_file, "Forced exit to be taken: ");
> > > > +	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
> > > > +	    }
> > > > +	  if (!loop_exit_edge_p (loop, exit_edge))
> > > > +	    exit_edge = EDGE_SUCC (bb, 1);
> > > > +	  if (exit_edge->flags & EDGE_TRUE_VALUE)
> > > 
> > > I think we can have abnormal/eh exit edges.  So I'm not sure you
> > > can, without checking, assume the block ends in a GIMPLE_COND.
> > 
> > We can't - those are only edges that are found by the IV analysis and they
> > always test IV with some bound. (it is all done by number_of_iterations_exit)
> > > 
> > > See above.  Otherwise the overall idea sounds fine.
> > 
> > Similarly here, simple exits are always conditionals. 
> 
> I see.  Then the patch is ok (with the comment added).

Thanks, i will ad comment and privatize free_loop_bounds for now.

Honza
> 
> Thanks,
> Richard.
Richard Guenther - Oct. 31, 2012, 12:30 p.m.
On Wed, 31 Oct 2012, Jan Hubicka wrote:

> > 
> > unroll you mean.  Because unrolling mutates the CFG too much.
> > Well - it was just a starting point, populating -Og with as little
> > as possible and 100% profitable transforms (in both debug and speed
> > metric).  In late opts we only do (early opt queue is shared):
> 
> Well, and what about early cunrolli?

It's not there (and I would disable it for -Og).

Richard.
Jakub Jelinek - Oct. 31, 2012, 12:34 p.m.
On Wed, Oct 31, 2012 at 01:30:02PM +0100, Richard Biener wrote:
> On Wed, 31 Oct 2012, Jan Hubicka wrote:
> > > unroll you mean.  Because unrolling mutates the CFG too much.
> > > Well - it was just a starting point, populating -Og with as little
> > > as possible and 100% profitable transforms (in both debug and speed
> > > metric).  In late opts we only do (early opt queue is shared):
> > 
> > Well, and what about early cunrolli?
> 
> It's not there (and I would disable it for -Og).

Generally, most of the loop transforms are undesirable for -Og.

	Jakub
Richard Guenther - Oct. 31, 2012, 12:35 p.m.
On Wed, 31 Oct 2012, Jakub Jelinek wrote:

> On Wed, Oct 31, 2012 at 01:30:02PM +0100, Richard Biener wrote:
> > On Wed, 31 Oct 2012, Jan Hubicka wrote:
> > > > unroll you mean.  Because unrolling mutates the CFG too much.
> > > > Well - it was just a starting point, populating -Og with as little
> > > > as possible and 100% profitable transforms (in both debug and speed
> > > > metric).  In late opts we only do (early opt queue is shared):
> > > 
> > > Well, and what about early cunrolli?
> > 
> > It's not there (and I would disable it for -Og).
> 
> Generally, most of the loop transforms are undesirable for -Og.

Maybe with the exception of un-looping once rolling loops.  But
yes, even loop header copying is undesirable in general.

Richard.
Jan Hubicka - Oct. 31, 2012, 2:05 p.m.
> On Wed, Oct 31, 2012 at 01:30:02PM +0100, Richard Biener wrote:
> > On Wed, 31 Oct 2012, Jan Hubicka wrote:
> > > > unroll you mean.  Because unrolling mutates the CFG too much.
> > > > Well - it was just a starting point, populating -Og with as little
> > > > as possible and 100% profitable transforms (in both debug and speed
> > > > metric).  In late opts we only do (early opt queue is shared):
> > > 
> > > Well, and what about early cunrolli?
> > 
> > It's not there (and I would disable it for -Og).
> 
> Generally, most of the loop transforms are undesirable for -Og.

Hmm, do we have some guidelines what is and is not desirable for -Og?
Unrolling/header copying being code duplicating optimization do not really
throw away any info by themself only require debugger to understand that some
code may get duplicated, but it is not different from i.e. inlining. Of course
the subsequent const prop will make it impossible to write into the iv
variable, but do we want to support reliable writting into user vars (i.e.
disable cprop on user vars) at -Og?

Honza
> 
> 	Jakub
Richard Guenther - Oct. 31, 2012, 2:06 p.m.
On Wed, 31 Oct 2012, Jan Hubicka wrote:

> > On Wed, Oct 31, 2012 at 01:30:02PM +0100, Richard Biener wrote:
> > > On Wed, 31 Oct 2012, Jan Hubicka wrote:
> > > > > unroll you mean.  Because unrolling mutates the CFG too much.
> > > > > Well - it was just a starting point, populating -Og with as little
> > > > > as possible and 100% profitable transforms (in both debug and speed
> > > > > metric).  In late opts we only do (early opt queue is shared):
> > > > 
> > > > Well, and what about early cunrolli?
> > > 
> > > It's not there (and I would disable it for -Og).
> > 
> > Generally, most of the loop transforms are undesirable for -Og.
> 
> Hmm, do we have some guidelines what is and is not desirable for -Og?
> Unrolling/header copying being code duplicating optimization do not really
> throw away any info by themself only require debugger to understand that some
> code may get duplicated, but it is not different from i.e. inlining. Of course
> the subsequent const prop will make it impossible to write into the iv
> variable, but do we want to support reliable writting into user vars (i.e.
> disable cprop on user vars) at -Og?

No, we don't support reliable writing.

We should first sort out issues with the current passes and debug info,
look at compile-time and runtime performance and then add new passes.

Richard.
H.J. Lu - Dec. 1, 2012, 7:22 p.m.
On Wed, Oct 31, 2012 at 3:39 AM, Jan Hubicka <hubicka@ucw.cz> wrote:
> Hi,
> this patch implements the logic to remove statements that are known to be
> undefined and thus expected to not be executed after unrolling.  It also
> removes redundant exits that I originally tried to do at once, but it
> does not fly, since the peeling confuse number_of_iterations_exit
> and it no longer understands the ivs.
>
> So now we
> 1) always remove exits that are known to be redundant by the bounds found
> 2) try to peel/unroll
> 3) if success remove statemnts from the last iteration
>
> This silence the array-bounds warnings in my testcase and many cases of
> -O3 bootstrap (I am running it now).
> Still if one construct testcase touching out of bound in more than one
> iteration we will produce false warning, I will do that incrementally
> by similar logic in loop copying.
>
> Bootstrapped/regtested x86_64-linux, OK?
>
> Honza
>
>         * tree-ssa-loop-niter.c (free_loop_bounds): Break out from ...
>         (free_numbers_of_iterations_estimates_loop): ... here.
>         * tree-ssa-loop-ivcanon.c (remove_exits_and_undefined_stmts): New
>         function.
>         (remove_redundant_iv_test): New function.
>         (try_unroll_loop_completely): Pass in MAXITER; use
>         remove_exits_and_undefined_stmts
>         (canonicalize_loop_induction_variables): Compute MAXITER;
>         use remove_redundant_iv_test.
>         * cfgloop.h (free_loop_bounds): New function.
>
>         * gcc.dg/tree-ssa/cunroll-10.c: New testcase.
>         * gcc.dg/tree-ssa/cunroll-9.c: New testcase.

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55555

Patch

Index: tree-ssa-loop-ivcanon.c
===================================================================
--- tree-ssa-loop-ivcanon.c	(revision 192989)
+++ tree-ssa-loop-ivcanon.c	(working copy)
@@ -389,6 +389,116 @@  loop_edge_to_cancel (struct loop *loop)
   return NULL;
 }
 
+/* Remove all tests for exits that are known to be taken after LOOP was
+   peeled NPEELED times. Put gcc_unreachable before every statement
+   known to not be executed.  */
+
+static bool
+remove_exits_and_undefined_stmts (struct loop *loop, unsigned int npeeled)
+{
+  struct nb_iter_bound *elt;
+  bool changed = false;
+
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      /* If statement is known to be undefined after peeling, turn it
+	 into unreachable (or trap when debugging experience is supposed
+	 to be good).  */
+      if (!elt->is_exit
+	  && elt->bound.ule (double_int::from_uhwi (npeeled)))
+	{
+	  gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt);
+	  gimple stmt = gimple_build_call
+	      (builtin_decl_implicit (optimize_debug
+				      ? BUILT_IN_TRAP : BUILT_IN_UNREACHABLE),
+	       0);
+
+	  gimple_set_location (stmt, gimple_location (elt->stmt));
+	  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
+	  changed = true;
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Forced statement unreachable: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+	    }
+	}
+      /* If we know the exit will be taken after peeling, update.  */
+      else if (elt->is_exit
+	       && elt->bound.ule (double_int::from_uhwi (npeeled)))
+	{
+	  basic_block bb = gimple_bb (elt->stmt);
+	  edge exit_edge = EDGE_SUCC (bb, 0);
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Forced exit to be taken: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+	    }
+	  if (!loop_exit_edge_p (loop, exit_edge))
+	    exit_edge = EDGE_SUCC (bb, 1);
+	  if (exit_edge->flags & EDGE_TRUE_VALUE)
+	    gimple_cond_make_true (elt->stmt);
+	  else
+	    gimple_cond_make_false (elt->stmt);
+	  update_stmt (elt->stmt);
+	  changed = true;
+	}
+    }
+  return changed;
+}
+
+/* Remove all exits that are known to be never taken because of the loop bound
+   discovered.  */
+
+static bool
+remove_redundant_iv_test (struct loop *loop)
+{
+  struct nb_iter_bound *elt;
+  bool changed = false;
+
+  if (!loop->any_upper_bound)
+    return false;
+  for (elt = loop->bounds; elt; elt = elt->next)
+    {
+      /* Exit is pointless if it won't be taken before loop reaches
+	 upper bound.  */
+      if (elt->is_exit && loop->any_upper_bound
+          && loop->nb_iterations_upper_bound.ult (elt->bound))
+	{
+	  basic_block bb = gimple_bb (elt->stmt);
+	  edge exit_edge = EDGE_SUCC (bb, 0);
+	  struct tree_niter_desc niter;
+	  
+	  if (!loop_exit_edge_p (loop, exit_edge))
+	    exit_edge = EDGE_SUCC (bb, 1);
+
+	  /* Only when we know the actual number of iterations, not
+	     just a bound, we can remove the exit.  */
+	  if (!number_of_iterations_exit (loop, exit_edge,
+					  &niter, false, false))
+	    gcc_unreachable ();
+	  if (!integer_onep (niter.assumptions)
+	      || !integer_zerop (niter.may_be_zero)
+	      || !niter.niter
+	      || TREE_CODE (niter.niter) != INTEGER_CST)
+	    continue;
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    {
+	      fprintf (dump_file, "Removed pointless exit: ");
+	      print_gimple_stmt (dump_file, elt->stmt, 0, 0);
+	    }
+	  if (exit_edge->flags & EDGE_TRUE_VALUE)
+	    gimple_cond_make_false (elt->stmt);
+	  else
+	    gimple_cond_make_true (elt->stmt);
+	  update_stmt (elt->stmt);
+	  changed = true;
+	}
+    }
+  return changed;
+}
+
 /* Tries to unroll LOOP completely, i.e. NITER times.
    UL determines which loops we are allowed to unroll.
    EXIT is the exit of the loop that should be eliminated.  
@@ -396,20 +511,22 @@  loop_edge_to_cancel (struct loop *loop)
    irreducible regions may become invalid as a result
    of the transformation.  
    LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case
-   when we need to go into loop closed SSA form.  */
+   when we need to go into loop closed SSA form. 
+   MAXITER specfy bound on number of iterations, -1 if it is
+   not known or too large for HOST_WIDE_INT.  */
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
 			    edge exit, tree niter,
 			    enum unroll_level ul,
 			    bool *irred_invalidated,
-			    bitmap loop_closed_ssa_invalidated)
+			    bitmap loop_closed_ssa_invalidated,
+			    HOST_WIDE_INT maxiter)
 {
   unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
   gimple cond;
   struct loop_size size;
   bool n_unroll_found = false;
-  HOST_WIDE_INT maxiter;
   basic_block latch;
   edge latch_edge;
   location_t locus;
@@ -445,7 +562,6 @@  try_unroll_loop_completely (struct loop 
     exit = NULL;
 
   /* See if we can improve our estimate by using recorded loop bounds.  */
-  maxiter = max_loop_iterations_int (loop);
   if (maxiter >= 0
       && (!n_unroll_found || (unsigned HOST_WIDE_INT)maxiter < n_unroll))
     {
@@ -545,6 +661,8 @@  try_unroll_loop_completely (struct loop 
       free_original_copy_tables ();
     }
 
+  remove_exits_and_undefined_stmts (loop, n_unroll);
+
   /* Remove the conditional from the last copy of the loop.  */
   if (edge_to_cancel)
     {
@@ -627,6 +745,8 @@  canonicalize_loop_induction_variables (s
 {
   edge exit = NULL;
   tree niter;
+  HOST_WIDE_INT maxiter;
+  bool modified = false;
 
   niter = number_of_latch_executions (loop);
   if (TREE_CODE (niter) == INTEGER_CST)
@@ -657,6 +777,8 @@  canonicalize_loop_induction_variables (s
   if (niter && TREE_CODE (niter) == INTEGER_CST)
     record_niter_bound (loop, tree_to_double_int (niter), false, true);
 
+  maxiter = max_loop_iterations_int (loop);
+
   if (dump_file && (dump_flags & TDF_DETAILS)
       && TREE_CODE (niter) == INTEGER_CST)
     {
@@ -665,21 +787,28 @@  canonicalize_loop_induction_variables (s
       fprintf (dump_file, " times.\n");
     }
   if (dump_file && (dump_flags & TDF_DETAILS)
-      && max_loop_iterations_int (loop) >= 0)
+      && maxiter >= 0)
     {
       fprintf (dump_file, "Loop %d iterates at most %i times.\n", loop->num,
-	       (int)max_loop_iterations_int (loop));
+	       (int)maxiter);
     }
 
+  /* Remove exits that are known to be never taken based on loop bound.
+     Needs to be called after compilation of max_loop_iterations_int that
+     populates the loop bounds.  */
+  modified |= remove_redundant_iv_test (loop);
+
   if (try_unroll_loop_completely (loop, exit, niter, ul, irred_invalidated,
-				  loop_closed_ssa_invalidated))
+				  loop_closed_ssa_invalidated, maxiter))
     return true;
 
   if (create_iv
       && niter && !chrec_contains_undetermined (niter))
     create_canonical_iv (loop, exit, niter);
 
-  return false;
+  if (modified)
+    free_loop_bounds (loop);
+  return modified;
 }
 
 /* The main entry point of the pass.  Adds canonical induction variables
Index: cfgloop.h
===================================================================
--- cfgloop.h	(revision 192989)
+++ cfgloop.h	(working copy)
@@ -286,6 +286,7 @@  extern unsigned expected_loop_iterations
 extern rtx doloop_condition_get (rtx);
 
 void estimate_numbers_of_iterations_loop (struct loop *);
+void free_loop_bounds (struct loop *);
 void record_niter_bound (struct loop *, double_int, bool, bool);
 bool estimated_loop_iterations (struct loop *, double_int *);
 bool max_loop_iterations (struct loop *, double_int *);
Index: testsuite/gcc.dg/tree-ssa/cunroll-10.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/cunroll-10.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/cunroll-10.c	(revision 0)
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll" } */
+int a[3];
+int b[4];
+main()
+{
+  int i;
+  for (i=0;i<4;i++)
+    if (b[i]==2)
+     a[i]++;
+}
+/* { dg-final { scan-tree-dump-times 2 "Forced statement unreachable:" "cunroll" } } */
+/* { dg-final { cleanup-tree-dump "cunroll" } } */
Index: testsuite/gcc.dg/tree-ssa/cunroll-9.c
===================================================================
--- testsuite/gcc.dg/tree-ssa/cunroll-9.c	(revision 0)
+++ testsuite/gcc.dg/tree-ssa/cunroll-9.c	(revision 0)
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cunrolli" } */
+int a[10];
+int b[11];
+t (int n)
+{
+  int i;
+  int sum = 0;
+  for (i = 0; i < n; i++)
+    {
+      if (i > 1000)
+	abort ();
+      if (q ())
+	sum += a[i];
+      else
+	sum += b[i];
+    }
+  return sum;
+}
+/* { dg-final { scan-tree-dump-times 1 "Removed pointless exit:" "cunroli" } } */
+/* { dg-final { cleanup-tree-dump "cunroli" } } */